diff options
-rw-r--r-- | Makefile | 129 | ||||
-rw-r--r-- | NNET.DAT | 210 | ||||
-rw-r--r-- | assignment.c | 562 | ||||
-rw-r--r-- | bitfield.c | 335 | ||||
-rw-r--r-- | emfloat.h | 4 | ||||
-rw-r--r-- | fourier.c | 302 | ||||
-rw-r--r-- | fpemulation.c | 147 | ||||
-rw-r--r-- | huffman.c | 557 | ||||
-rw-r--r-- | idea.c | 392 | ||||
-rw-r--r-- | linear.c | 541 | ||||
-rw-r--r-- | nbench0.h | 2 | ||||
-rw-r--r-- | nbench1.c | 4445 | ||||
-rw-r--r-- | nbench1.h | 428 | ||||
-rw-r--r-- | neural.c | 815 | ||||
-rw-r--r-- | nmglobal.h | 3 | ||||
-rw-r--r-- | numsort.c | 292 | ||||
-rw-r--r-- | stringsort.c | 575 | ||||
-rw-r--r-- | wordcat.h | 81 |
18 files changed, 4813 insertions, 5007 deletions
@@ -19,10 +19,16 @@ default: nbench # You should leave -static in the CFLAGS so that your sysinfo can be # compiled into the executable. -CC = gcc +CCC=gcc +GCC=gcc # generic options for gcc -CFLAGS = -s -static -Wall -O3 +#CFLAGS=-O3 -mcpu=ev67 -mieee -funroll-loops -ftree-vectorize -fprefetch-loop-arrays -pipe -static -msmall-data -msmall-text +#CFLAGS=-O3 -mcpu=ev67 -mieee -pipe -msmall-data -msmall-text -funroll-loops -ftree-vectorize -static +GCCFLAGS=-O3 -march=k8 -msse3 -ftree-vectorize -funroll-loops -pipe +#CCCFLAGS=-fast -unroll 0 -inline speed +CCCFLAGS=${GCCFLAGS} +LINKFLAGS= # if your gcc lets you do it, then try this one #CFLAGS = -s -static -Wall -O3 -fomit-frame-pointer -funroll-loops @@ -91,15 +97,15 @@ MACHINE= # For any Unix flavor you need -DLINUX # You also need -DLINUX to get the new indices -DEFINES= -DLINUX $(NO_UNAME) +DEFINES=$(NO_UNAME) -DLINUX ########################################################################## # For LINUX-like systems with gcc sysinfoc.c: Makefile - ./sysinfo.sh $(CC) $(MACHINE) $(DEFINES) $(CFLAGS) + ./sysinfo.sh $(GCC) $(DEFINES) $(GCCFLAGS) sysinfo.c: Makefile - ./sysinfo.sh $(CC) $(MACHINE) $(DEFINES) $(CFLAGS) + ./sysinfo.sh $(GCC) $(DEFINES) $(GCCFLAGS) ########################################################################## # For non-LINUX systems @@ -107,47 +113,82 @@ sysinfo.c: Makefile # and take sysinfo.c and sysinfoc.c out of the dependencies for nbench0.o hardware.o: hardware.c hardware.h Makefile - $(CC) $(MACHINE) $(DEFINES) $(CFLAGS)\ - -c hardware.c - -nbench0.o: nbench0.h nbench0.c nmglobal.h pointer.h hardware.h\ - Makefile sysinfo.c sysinfoc.c - $(CC) $(MACHINE) $(DEFINES) $(CFLAGS)\ - -c nbench0.c - -emfloat.o: emfloat.h emfloat.c nmglobal.h pointer.h Makefile - $(CC) $(MACHINE) $(DEFINES) $(CFLAGS)\ - -c emfloat.c - -pointer.h: pointer Makefile - $(CC) $(MACHINE) $(DEFINES) $(CFLAGS)\ - -o pointer pointer.c - rm -f pointer.h - if [ "4" = `./pointer` ] ; then touch pointer.h ;\ - else echo "#define LONG64" >pointer.h ; fi - -misc.o: misc.h misc.c Makefile - $(CC) $(MACHINE) $(DEFINES) $(CFLAGS)\ - -c misc.c - -nbench1.o: nbench1.h nbench1.c wordcat.h nmglobal.h pointer.h Makefile - $(CC) $(MACHINE) $(DEFINES) $(CFLAGS)\ - -c nbench1.c - -sysspec.o: sysspec.h sysspec.c nmglobal.h pointer.h Makefile - $(CC) $(MACHINE) $(DEFINES) $(CFLAGS)\ - -c sysspec.c - -nbench: emfloat.o misc.o nbench0.o nbench1.o sysspec.o hardware.o - $(CC) $(MACHINE) $(DEFINES) $(CFLAGS) $(LINKFLAGS)\ - emfloat.o misc.o nbench0.o nbench1.o sysspec.o hardware.o\ - -o nbench -lm + $(GCC) $(DEFINES) $(GCCFLAGS) -c hardware.c + +nbench0.o: nbench0.h nbench0.c nmglobal.h hardware.h Makefile sysinfo.c sysinfoc.c + $(GCC) $(DEFINES) $(GCCFLAGS) -c nbench0.c +# Segfault before first test + +emfloat.o: emfloat.h emfloat.c nmglobal.h Makefile + $(CCC) $(DEFINES) $(CCCFLAGS) -c emfloat.c + +misc.o: misc.c Makefile + $(CCC) $(DEFINES) $(CCCFLAGS) -c misc.c + +numsort.o: numsort.c + $(CCC) $(DEFINES) $(CCCFLAGS) -c numsort.c +# gcc - 2.95 +# ccc - 5.08 + +stringsort.o: stringsort.c + $(CCC) $(DEFINES) $(CCCFLAGS) -c stringsort.c +# gcc - 6.77 +# ccc - 7.24 + +bitfield.o: bitfield.c + $(CCC) $(DEFINES) $(CCCFLAGS) -c bitfield.c +# gcc - 4.93, 4.90, 4.93, 4.93 +# ccc - 4.79, 4.85, 4.81, + +fpemulation.o: fpemulation.c + $(CCC) $(DEFINES) $(CCCFLAGS) -c fpemulation.c +# gcc - 13.90 +# ccc - 13.90 + +fourier.o: fourier.c + $(CCC) $(DEFINES) $(CCCFLAGS) -c fourier.c +# gcc - 15.96 +# ccc - 19.30 + +assignment.o: assignment.c + $(GCC) $(DEFINES) $(GCCFLAGS) -c assignment.c +# gcc - 16.51 +# ccc - 7.98 + +idea.o: idea.c + $(CCC) $(DEFINES) $(CCCFLAGS) -c idea.c +# gcc - 8.79 +# ccc - 9.10 + +huffman.o: huffman.c + $(GCC) $(DEFINES) $(GCCFLAGS) -c huffman.c +# $(CCC) $(DEFINES) -O1 -host -unroll 0 -inline speed -c huffman.c -g3 +# $(CCC) $(DEFINES) $(CCCFLAGS) -c huffman.c -g3 +# gcc - 6.94 +# ccc - segfault +# ccc -O1 -host - 5.14 +# ccc -O2 -host - segfault +# ccc -O1 -host -unroll 0 -inline speed - 5.95 + +neural.o: neural.c + $(CCC) $(DEFINES) $(CCCFLAGS) -c neural.c +# gcc - 12.00 +# ccc - 14.20 + +linear.o: linear.c + $(CCC) $(DEFINES) $(CCCFLAGS) -c linear.c +# gcc - 16.38 +# ccc - 23.29 + +sysspec.o: sysspec.h sysspec.c nmglobal.h Makefile + $(GCC) $(DEFINES) $(GCCFLAGS) -c sysspec.c + +nbench: emfloat.o misc.o nbench0.o numsort.o sysspec.o hardware.o stringsort.o bitfield.o fpemulation.o fourier.o assignment.o idea.o huffman.o neural.o linear.o + $(GCC) emfloat.o misc.o nbench0.o numsort.o sysspec.o hardware.o stringsort.o bitfield.o fpemulation.o fourier.o assignment.o idea.o huffman.o neural.o linear.o -o nbench -lm ########################################################################## clean: - - /bin/rm -f *.o *~ \#* core a.out hello sysinfo.c sysinfoc.c \ - bug pointer pointer.h debugbit.dat + - /bin/rm -f *.o nbench sysinfo.c sysinfoc.c -mrproper: clean - - /bin/rm -f nbench +remake: clean nbench diff --git a/NNET.DAT b/NNET.DAT new file mode 100644 index 0000000..5711730 --- /dev/null +++ b/NNET.DAT @@ -0,0 +1,210 @@ +5 7 8 +26 +0 0 1 0 0 +0 1 0 1 0 +1 0 0 0 1 +1 0 0 0 1 +1 1 1 1 1 +1 0 0 0 1 +1 0 0 0 1 +0 1 0 0 0 0 0 1 +1 1 1 1 0 +1 0 0 0 1 +1 0 0 0 1 +1 1 1 1 0 +1 0 0 0 1 +1 0 0 0 1 +1 1 1 1 0 +0 1 0 0 0 0 1 0 +0 1 1 1 0 +1 0 0 0 1 +1 0 0 0 0 +1 0 0 0 0 +1 0 0 0 0 +1 0 0 0 1 +0 1 1 1 0 +0 1 0 0 0 0 1 1 +1 1 1 1 0 +1 0 0 0 1 +1 0 0 0 1 +1 0 0 0 1 +1 0 0 0 1 +1 0 0 0 1 +1 1 1 1 0 +0 1 0 0 0 1 0 0 +1 1 1 1 1 +1 0 0 0 0 +1 0 0 0 0 +1 1 1 0 0 +1 0 0 0 0 +1 0 0 0 0 +1 1 1 1 1 +0 1 0 0 0 1 0 1 +1 1 1 1 1 +1 0 0 0 0 +1 0 0 0 0 +1 1 1 0 0 +1 0 0 0 0 +1 0 0 0 0 +1 0 0 0 0 +0 1 0 0 0 1 1 0 +0 1 1 1 0 +1 0 0 0 1 +1 0 0 0 0 +1 0 0 0 0 +1 0 0 1 1 +1 0 0 0 1 +0 1 1 1 0 +0 1 0 0 0 1 1 1 +1 0 0 0 1 +1 0 0 0 1 +1 0 0 0 1 +1 1 1 1 1 +1 0 0 0 1 +1 0 0 0 1 +1 0 0 0 1 +0 1 0 0 1 0 0 0 +0 1 1 1 0 +0 0 1 0 0 +0 0 1 0 0 +0 0 1 0 0 +0 0 1 0 0 +0 0 1 0 0 +0 1 1 1 0 +0 1 0 0 1 0 0 1 +0 0 0 0 1 +0 0 0 0 1 +0 0 0 0 1 +0 0 0 0 1 +1 0 0 0 1 +1 0 0 0 1 +0 1 1 1 0 +0 1 0 0 1 0 1 0 +1 0 0 0 1 +1 0 0 1 0 +1 0 1 0 0 +1 1 0 0 0 +1 0 1 0 0 +1 0 0 1 0 +1 0 0 0 1 +0 1 0 0 1 0 1 1 +1 0 0 0 0 +1 0 0 0 0 +1 0 0 0 0 +1 0 0 0 0 +1 0 0 0 0 +1 0 0 0 0 +1 1 1 1 1 +0 1 0 0 1 1 0 0 +1 0 0 0 1 +1 1 0 1 1 +1 0 1 0 1 +1 0 1 0 1 +1 0 0 0 1 +1 0 0 0 1 +1 0 0 0 1 +0 1 0 0 1 1 0 1 +1 0 0 0 1 +1 1 0 0 1 +1 0 1 0 1 +1 0 1 0 1 +1 0 1 0 1 +1 0 0 1 1 +1 0 0 0 1 +0 1 0 0 1 1 1 0 +0 1 1 1 0 +1 0 0 0 1 +1 0 0 0 1 +1 0 0 0 1 +1 0 0 0 1 +1 0 0 0 1 +0 1 1 1 0 +0 1 0 0 1 1 1 1 +1 1 1 1 0 +1 0 0 0 1 +1 0 0 0 1 +1 1 1 1 0 +1 0 0 0 0 +1 0 0 0 0 +1 0 0 0 0 +0 1 0 1 0 0 0 0 +0 1 1 1 0 +1 0 0 0 1 +1 0 0 0 1 +1 0 0 0 1 +1 0 1 0 1 +1 0 0 1 1 +0 1 1 1 1 +0 1 0 1 0 0 0 1 +1 1 1 1 0 +1 0 0 0 1 +1 0 0 0 1 +1 1 1 1 0 +1 0 1 0 0 +1 0 0 1 0 +1 0 0 0 1 +0 1 0 1 0 0 1 0 +0 1 1 1 1 +1 0 0 0 0 +1 0 0 0 0 +0 1 1 1 0 +0 0 0 0 1 +0 0 0 0 1 +1 1 1 1 0 +0 1 0 1 0 0 1 1 +1 1 1 1 1 +0 0 1 0 0 +0 0 1 0 0 +0 0 1 0 0 +0 0 1 0 0 +0 0 1 0 0 +0 0 1 0 0 +0 1 0 1 0 1 0 0 +1 0 0 0 1 +1 0 0 0 1 +1 0 0 0 1 +1 0 0 0 1 +1 0 0 0 1 +1 0 0 0 1 +0 1 1 1 0 +0 1 0 1 0 1 0 1 +1 0 0 0 1 +1 0 0 0 1 +0 1 0 1 0 +0 1 0 1 0 +0 1 0 1 0 +0 1 0 1 0 +0 0 1 0 0 +0 1 0 1 0 1 1 0 +1 0 0 0 1 +1 0 0 0 1 +1 0 0 0 1 +1 0 1 0 1 +1 0 1 0 1 +1 0 1 0 1 +0 1 0 1 0 +0 1 0 1 0 1 1 1 +1 0 0 0 1 +0 1 0 1 0 +0 1 0 1 0 +0 0 1 0 0 +0 1 0 1 0 +0 1 0 1 0 +1 0 0 0 1 +0 1 0 1 1 0 0 0 +1 0 0 0 1 +0 1 0 1 0 +0 1 0 1 0 +0 0 1 0 0 +0 0 1 0 0 +0 0 1 0 0 +0 0 1 0 0 +0 1 0 1 1 0 0 1 +1 1 1 1 1 +0 0 0 1 0 +0 0 0 1 0 +0 0 1 0 0 +0 1 0 0 0 +0 1 0 0 0 +1 1 1 1 1 +0 1 0 1 1 0 1 0 diff --git a/assignment.c b/assignment.c new file mode 100644 index 0000000..7313d78 --- /dev/null +++ b/assignment.c @@ -0,0 +1,562 @@ +#include <string.h> +#include "nmglobal.h" +#include "nbench1.h" + +/************************* +** ASSIGNMENT ALGORITHM ** +*************************/ + +/************* +** DoAssign ** +************** +** Perform an assignment algorithm. +** The algorithm was adapted from the step by step guide found +** in "Quantitative Decision Making for Business" (Gordon, +** Pressman, and Cohn; Prentice-Hall) +** +** +** NOTES: +** 1. Even though the algorithm distinguishes between +** ASSIGNROWS and ASSIGNCOLS, as though the two might +** be different, it does presume a square matrix. +** I.E., ASSIGNROWS and ASSIGNCOLS must be the same. +** This makes for some algorithmically-correct but +** probably non-optimal constructs. +** +*/ +void DoAssign(void) +{ +AssignStruct *locassignstruct; /* Local structure ptr */ +farlong *arraybase; +char *errorcontext; +int systemerror; +ulong accumtime; +double iterations; + +/* +** Link to global structure +*/ +locassignstruct=&global_assignstruct; + +/* +** Set the error context string. +*/ +errorcontext="CPU:Assignment"; + +/* +** See if we need to do self adjustment code. +*/ +if(locassignstruct->adjust==0) +{ + /* + ** Self-adjustment code. The system begins by working on 1 + ** array. If it does that in no time, then two arrays + ** are built. This process continues until + ** enough arrays are built to handle the tolerance. + */ + locassignstruct->numarrays=1; + while(1) + { + /* + ** Allocate space for arrays + */ + arraybase=(farlong *) AllocateMemory(sizeof(long)* + ASSIGNROWS*ASSIGNCOLS*locassignstruct->numarrays, + &systemerror); + if(systemerror) + { ReportError(errorcontext,systemerror); + FreeMemory((farvoid *)arraybase, + &systemerror); + ErrorExit(); + } + + /* + ** Do an iteration of the assignment alg. If the + ** elapsed time is less than or equal to the permitted + ** minimum, then allocate for more arrays and + ** try again. + */ + if(DoAssignIteration(arraybase, + locassignstruct->numarrays)>global_min_ticks) + break; /* We're ok...exit */ + + FreeMemory((farvoid *)arraybase, &systemerror); + locassignstruct->numarrays++; + } +} +else +{ /* + ** Allocate space for arrays + */ + arraybase=(farlong *)AllocateMemory(sizeof(long)* + ASSIGNROWS*ASSIGNCOLS*locassignstruct->numarrays, + &systemerror); + if(systemerror) + { ReportError(errorcontext,systemerror); + FreeMemory((farvoid *)arraybase, + &systemerror); + ErrorExit(); + } +} + +/* +** All's well if we get here. Do the tests. +*/ +accumtime=0L; +iterations=(double)0.0; + +do { + accumtime+=DoAssignIteration(arraybase, + locassignstruct->numarrays); + iterations+=(double)1.0; +} while(TicksToSecs(accumtime)<locassignstruct->request_secs); + +/* +** Clean up, calculate results, and go home. Be sure to +** show that we don't have to rerun adjustment code. +*/ +FreeMemory((farvoid *)arraybase,&systemerror); + +locassignstruct->iterspersec=iterations * + (double)locassignstruct->numarrays / TicksToFracSecs(accumtime); + +if(locassignstruct->adjust==0) + locassignstruct->adjust=1; + +return; + +} + +/********************** +** DoAssignIteration ** +*********************** +** This routine executes one iteration of the assignment test. +** It returns the number of ticks elapsed in the iteration. +*/ +static ulong DoAssignIteration(farlong *arraybase, + ulong numarrays) +{ +longptr abase; /* local pointer */ +ulong elapsed; /* Elapsed ticks */ +ulong i; + +/* +** Set up local pointer +*/ +abase.ptrs.p=arraybase; + +/* +** Load up the arrays with a random table. +*/ +LoadAssignArrayWithRand(arraybase,numarrays); + +/* +** Start the stopwatch +*/ +elapsed=StartStopwatch(); + +/* +** Execute assignment algorithms +*/ +for(i=0;i<numarrays;i++) +{ /* abase.ptrs.p+=i*ASSIGNROWS*ASSIGNCOLS; */ + /* Fixed by Eike Dierks */ + Assignment(*abase.ptrs.ap); + abase.ptrs.p+=ASSIGNROWS*ASSIGNCOLS; +} + +/* +** Get elapsed time +*/ +return(StopStopwatch(elapsed)); +} + +/**************************** +** LoadAssignArrayWithRand ** +***************************** +** Load the assignment arrays with random numbers. All positive. +** These numbers represent costs. +*/ +static void LoadAssignArrayWithRand(farlong *arraybase, + ulong numarrays) +{ +longptr abase,abase1; /* Local for array pointer */ +ulong i; + +/* +** Set local array pointer +*/ +abase.ptrs.p=arraybase; +abase1.ptrs.p=arraybase; + +/* +** Set up the first array. Then just copy it into the +** others. +*/ +LoadAssign(*(abase.ptrs.ap)); +if(numarrays>1) + for(i=1;i<numarrays;i++) + { /* abase1.ptrs.p+=i*ASSIGNROWS*ASSIGNCOLS; */ + /* Fixed by Eike Dierks */ + abase1.ptrs.p+=ASSIGNROWS*ASSIGNCOLS; + CopyToAssign(*(abase.ptrs.ap),*(abase1.ptrs.ap)); + } + +return; +} + +/*************** +** LoadAssign ** +**************** +** The array given by arraybase is loaded with positive random +** numbers. Elements in the array are capped at 5,000,000. +*/ +static void LoadAssign(farlong arraybase[][ASSIGNCOLS]) +{ +ushort i,j; + +/* +** Reset random number generator so things repeat. +*/ +/* randnum(13L); */ +randnum((int32)13); + +for(i=0;i<ASSIGNROWS;i++) + for(j=0;j<ASSIGNROWS;j++){ + /* arraybase[i][j]=abs_randwc(5000000L);*/ + arraybase[i][j]=abs_randwc((int32)5000000); + } + +return; +} + +/***************** +** CopyToAssign ** +****************** +** Copy the contents of one array to another. This is called by +** the routine that builds the initial array, and is used to copy +** the contents of the intial array into all following arrays. +*/ +static void CopyToAssign(farlong arrayfrom[ASSIGNROWS][ASSIGNCOLS], + farlong arrayto[ASSIGNROWS][ASSIGNCOLS]) +{ +ushort i,j; + +for(i=0;i<ASSIGNROWS;i++) + for(j=0;j<ASSIGNCOLS;j++) + arrayto[i][j]=arrayfrom[i][j]; + +return; +} + +/*************** +** Assignment ** +***************/ +static void Assignment(farlong arraybase[][ASSIGNCOLS]) +{ +short assignedtableau[ASSIGNROWS][ASSIGNCOLS]; + +/* +** First, calculate minimum costs +*/ +calc_minimum_costs(arraybase); + +/* +** Repeat following until the number of rows selected +** equals the number of rows in the tableau. +*/ +while(first_assignments(arraybase,assignedtableau)!=ASSIGNROWS) +{ second_assignments(arraybase,assignedtableau); +} + +#ifdef DEBUG +{ + int i,j; + printf("\nColumn choices for each row\n"); + for(i=0;i<ASSIGNROWS;i++) + { + printf("R%03d: ",i); + for(j=0;j<ASSIGNCOLS;j++) + if(assignedtableau[i][j]==1) + printf("%03d ",j); + } +} +#endif + +return; +} + +/*********************** +** calc_minimum_costs ** +************************ +** Revise the tableau by calculating the minimum costs on a +** row and column basis. These minima are subtracted from +** their rows and columns, creating a new tableau. +*/ +static void calc_minimum_costs(long tableau[][ASSIGNCOLS]) +{ +ushort i,j; /* Index variables */ +long currentmin; /* Current minimum */ +/* +** Determine minimum costs on row basis. This is done by +** subtracting -- on a row-per-row basis -- the minum value +** for that row. +*/ +for(i=0;i<ASSIGNROWS;i++) +{ + currentmin=MAXPOSLONG; /* Initialize minimum */ + for(j=0;j<ASSIGNCOLS;j++) + if(tableau[i][j]<currentmin) + currentmin=tableau[i][j]; + + for(j=0;j<ASSIGNCOLS;j++) + tableau[i][j]-=currentmin; +} + +/* +** Determine minimum cost on a column basis. This works +** just as above, only now we step through the array +** column-wise +*/ +for(j=0;j<ASSIGNCOLS;j++) +{ + currentmin=MAXPOSLONG; /* Initialize minimum */ + for(i=0;i<ASSIGNROWS;i++) + if(tableau[i][j]<currentmin) + currentmin=tableau[i][j]; + + /* + ** Here, we'll take the trouble to see if the current + ** minimum is zero. This is likely worth it, since the + ** preceding loop will have created at least one zero in + ** each row. We can save ourselves a few iterations. + */ + if(currentmin!=0) + for(i=0;i<ASSIGNROWS;i++) + tableau[i][j]-=currentmin; +} + +return; +} + +/********************** +** first_assignments ** +*********************** +** Do first assignments. +** The assignedtableau[] array holds a set of values that +** indicate the assignment of a value, or its elimination. +** The values are: +** 0 = Item is neither assigned nor eliminated. +** 1 = Item is assigned +** 2 = Item is eliminated +** Returns the number of selections made. If this equals +** the number of rows, then an optimum has been determined. +*/ +static int first_assignments(long tableau[][ASSIGNCOLS], + short assignedtableau[][ASSIGNCOLS]) +{ +ushort i,j,k; /* Index variables */ +ushort numassigns; /* # of assignments */ +ushort totnumassigns; /* Total # of assignments */ +ushort numzeros; /* # of zeros in row */ +int selected=0; /* Flag used to indicate selection */ + +/* +** Clear the assignedtableau, setting all members to show that +** no one is yet assigned, eliminated, or anything. +*/ +for(i=0;i<ASSIGNROWS;i++) + for(j=0;j<ASSIGNCOLS;j++) + assignedtableau[i][j]=0; + +totnumassigns=0; +do { + numassigns=0; + /* + ** Step through rows. For each one that is not currently + ** assigned, see if the row has only one zero in it. If so, + ** mark that as an assigned row/col. Eliminate other zeros + ** in the same column. + */ + for(i=0;i<ASSIGNROWS;i++) + { numzeros=0; + for(j=0;j<ASSIGNCOLS;j++) + if(tableau[i][j]==0L) + if(assignedtableau[i][j]==0) + { numzeros++; + selected=j; + } + if(numzeros==1) + { numassigns++; + totnumassigns++; + assignedtableau[i][selected]=1; + for(k=0;k<ASSIGNROWS;k++) + if((k!=i) && + (tableau[k][selected]==0)) + assignedtableau[k][selected]=2; + } + } + /* + ** Step through columns, doing same as above. Now, be careful + ** of items in the other rows of a selected column. + */ + for(j=0;j<ASSIGNCOLS;j++) + { numzeros=0; + for(i=0;i<ASSIGNROWS;i++) + if(tableau[i][j]==0L) + if(assignedtableau[i][j]==0) + { numzeros++; + selected=i; + } + if(numzeros==1) + { numassigns++; + totnumassigns++; + assignedtableau[selected][j]=1; + for(k=0;k<ASSIGNCOLS;k++) + if((k!=j) && + (tableau[selected][k]==0)) + assignedtableau[selected][k]=2; + } + } + /* + ** Repeat until no more assignments to be made. + */ +} while(numassigns!=0); + +/* +** See if we can leave at this point. +*/ +if(totnumassigns==ASSIGNROWS) return(totnumassigns); + +/* +** Now step through the array by row. If you find any unassigned +** zeros, pick the first in the row. Eliminate all zeros from +** that same row & column. This occurs if there are multiple optima... +** possibly. +*/ +for(i=0;i<ASSIGNROWS;i++) +{ selected=-1; + for(j=0;j<ASSIGNCOLS;j++) + if((tableau[i][j]==0L) && + (assignedtableau[i][j]==0)) + { selected=j; + break; + } + if(selected!=-1) + { assignedtableau[i][selected]=1; + totnumassigns++; + for(k=0;k<ASSIGNCOLS;k++) + if((k!=selected) && + (tableau[i][k]==0L)) + assignedtableau[i][k]=2; + for(k=0;k<ASSIGNROWS;k++) + if((k!=i) && + (tableau[k][selected]==0L)) + assignedtableau[k][selected]=2; + } +} + +return(totnumassigns); +} + +/*********************** +** second_assignments ** +************************ +** This section of the algorithm creates the revised +** tableau, and is difficult to explain. I suggest you +** refer to the algorithm's source, mentioned in comments +** toward the beginning of the program. +*/ +static void second_assignments(long tableau[][ASSIGNCOLS], + short assignedtableau[][ASSIGNCOLS]) +{ +int i,j; /* Indexes */ +short linesrow[ASSIGNROWS]; +short linescol[ASSIGNCOLS]; +long smallest; /* Holds smallest value */ +ushort numassigns; /* Number of assignments */ +ushort newrows; /* New rows to be considered */ +/* +** Clear the linesrow and linescol arrays. +*/ +for(i=0;i<ASSIGNROWS;i++) + linesrow[i]=0; +for(i=0;i<ASSIGNCOLS;i++) + linescol[i]=0; + +/* +** Scan rows, flag each row that has no assignment in it. +*/ +for(i=0;i<ASSIGNROWS;i++) +{ numassigns=0; + for(j=0;j<ASSIGNCOLS;j++) + if(assignedtableau[i][j]==1) + { numassigns++; + break; + } + if(numassigns==0) linesrow[i]=1; +} + +do { + + newrows=0; + /* + ** For each row checked above, scan for any zeros. If found, + ** check the associated column. + */ + for(i=0;i<ASSIGNROWS;i++) + { if(linesrow[i]==1) + for(j=0;j<ASSIGNCOLS;j++) + if(tableau[i][j]==0) + linescol[j]=1; + } + + /* + ** Now scan checked columns. If any contain assigned zeros, check + ** the associated row. + */ + for(j=0;j<ASSIGNCOLS;j++) + if(linescol[j]==1) + for(i=0;i<ASSIGNROWS;i++) + if((assignedtableau[i][j]==1) && + (linesrow[i]!=1)) + { + linesrow[i]=1; + newrows++; + } +} while(newrows!=0); + +/* +** linesrow[n]==0 indicate rows covered by imaginary line +** linescol[n]==1 indicate cols covered by imaginary line +** For all cells not covered by imaginary lines, determine smallest +** value. +*/ +smallest=MAXPOSLONG; +for(i=0;i<ASSIGNROWS;i++) + if(linesrow[i]!=0) + for(j=0;j<ASSIGNCOLS;j++) + if(linescol[j]!=1) + if(tableau[i][j]<smallest) + smallest=tableau[i][j]; + +/* +** Subtract smallest from all cells in the above set. +*/ +for(i=0;i<ASSIGNROWS;i++) + if(linesrow[i]!=0) + for(j=0;j<ASSIGNCOLS;j++) + if(linescol[j]!=1) + tableau[i][j]-=smallest; + +/* +** Add smallest to all cells covered by two lines. +*/ +for(i=0;i<ASSIGNROWS;i++) + if(linesrow[i]==0) + for(j=0;j<ASSIGNCOLS;j++) + if(linescol[j]==1) + tableau[i][j]+=smallest; + +return; +} diff --git a/bitfield.c b/bitfield.c new file mode 100644 index 0000000..78aba9e --- /dev/null +++ b/bitfield.c @@ -0,0 +1,335 @@ +#include <stdint.h> +/* +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <strings.h> +#include <math.h>*/ +#include "nmglobal.h" +#include "nbench1.h" + +#ifdef DEBUG +static int numsort_status=0; +static int stringsort_status=0; +#endif + +/************************ +** BITFIELD OPERATIONS ** +*************************/ + +/************* +** DoBitops ** +************** +** Perform the bit operations test portion of the CPU +** benchmark. Returns the iterations per second. +*/ +void DoBitops(void) +{ +BitOpStruct *locbitopstruct; /* Local bitop structure */ +unsigned long *bitarraybase; /* Base of bitmap array */ +unsigned long *bitoparraybase; /* Base of bitmap operations array */ +unsigned long nbitops; /* # of bitfield operations */ +unsigned long accumtime; /* Accumulated time in ticks */ +double iterations; /* # of iterations */ +char *errorcontext; /* Error context string */ +int systemerror; /* For holding error codes */ +int ticks; + +/* +** Link to global structure. +*/ +locbitopstruct=&global_bitopstruct; + +/* +** Set the error context. +*/ +errorcontext="CPU:Bitfields"; + +/* +** See if we need to run adjustment code. +*/ +if(locbitopstruct->adjust==0) +{ + bitarraybase=(unsigned long *)AllocateMemory(locbitopstruct->bitfieldarraysize * + sizeof(unsigned long),&systemerror); + if(systemerror) + { ReportError(errorcontext,systemerror); + } + + /* + ** Initialize bitfield operations array to [2,30] elements + */ + locbitopstruct->bitoparraysize=30L; + + while(1) + { + /* + ** Allocate space for operations array + */ + bitoparraybase=(unsigned long *)AllocateMemory(locbitopstruct->bitoparraysize*2L* + sizeof(unsigned long), + &systemerror); + if(systemerror) + { ReportError(errorcontext,systemerror); + FreeMemory((void *)bitarraybase,&systemerror); + } + /* + ** Do an iteration of the bitmap test. If the + ** elapsed time is less than or equal to the permitted + ** minimum, then de-allocate the array, reallocate a + ** larger version, and try again. + */ + ticks=DoBitfieldIteration(bitarraybase, + bitoparraybase, + locbitopstruct->bitoparraysize, + &nbitops); +#ifdef DEBUG +#ifdef LINUX + if (locbitopstruct->bitoparraysize==30L){ + /* this is the first loop, write a debug file */ + FILE *file; + unsigned long *running_base; /* same as unsigned long */ + long counter; + file=fopen("debugbit.dat","w"); + running_base=bitarraybase; + for (counter=0;counter<(long)(locbitopstruct->bitfieldarraysize);counter++){ +#ifdef _LP64 + fprintf(file,"%08X",(unsigned int)(*running_base&0xFFFFFFFFL)); + fprintf(file,"%08X",(unsigned int)((*running_base>>32)&0xFFFFFFFFL)); + if ((counter+1)%4==0) fprintf(file,"\n"); +#else + fprintf(file,"%08lX",*running_base); + if ((counter+1)%8==0) fprintf(file,"\n"); +#endif + running_base=running_base+1; + } + fclose(file); + printf("\nWrote the file debugbit.dat, you may want to compare it to debugbit.good\n"); + } +#endif +#endif + + if (ticks>global_min_ticks) break; /* We're ok...exit */ + + FreeMemory((void *)bitoparraybase,&systemerror); + locbitopstruct->bitoparraysize+=100L; + } +} +else +{ + /* + ** Don't need to do self adjustment, just allocate + ** the array space. + */ + bitarraybase=(unsigned long *)AllocateMemory(locbitopstruct->bitfieldarraysize * + sizeof(unsigned long),&systemerror); + if(systemerror) + { ReportError(errorcontext,systemerror); + } + bitoparraybase=(unsigned long *)AllocateMemory(locbitopstruct->bitoparraysize*2L* + sizeof(unsigned long), + &systemerror); + if(systemerror) + { ReportError(errorcontext,systemerror); + FreeMemory((void *)bitarraybase,&systemerror); + } +} + +/* +** All's well if we get here. Repeatedly perform bitops until the +** accumulated elapsed time is greater than # of seconds requested. +*/ +accumtime=0L; +iterations=(double)0.0; +do { + accumtime+=DoBitfieldIteration(bitarraybase, + bitoparraybase, + locbitopstruct->bitoparraysize,&nbitops); + iterations+=(double)nbitops; +} while(TicksToSecs(accumtime)<locbitopstruct->request_secs); + +/* +** Clean up, calculate results, and go home. +** Also, set adjustment flag to show that we don't have +** to do self adjusting in the future. +*/ +FreeMemory((void *)bitarraybase,&systemerror); +FreeMemory((void *)bitoparraybase,&systemerror); +locbitopstruct->bitopspersec=iterations /TicksToFracSecs(accumtime); +if(locbitopstruct->adjust==0) + locbitopstruct->adjust=1; + +return; +} + +/************************ +** DoBitfieldIteration ** +************************* +** Perform a single iteration of the bitfield benchmark. +** Return the # of ticks accumulated by the operation. +*/ +static unsigned long DoBitfieldIteration(unsigned long *bitarraybase, + unsigned long *bitoparraybase, + long bitoparraysize, + unsigned long *nbitops) +{ +long i; /* Index */ +unsigned long bitoffset; /* Offset into bitmap */ +unsigned long elapsed; /* Time to execute */ +/* +** Clear # bitops counter +*/ +*nbitops=0L; + +/* +** Construct a set of bitmap offsets and run lengths. +** The offset can be any random number from 0 to the +** size of the bitmap (in bits). The run length can +** be any random number from 1 to the number of bits +** between the offset and the end of the bitmap. +** Note that the bitmap has 8192 * 32 bits in it. +** (262,144 bits) +*/ +/* +** Reset random number generator so things repeat. +** Also reset the bit array we work on. +** added by Uwe F. Mayer +*/ +randnum((int32_t)13); + +for (i=0;i<global_bitopstruct.bitfieldarraysize;i++) +{ +#ifdef _LP64 + *(bitarraybase+i)=(unsigned long)0x5555555555555555; +#else + *(bitarraybase+i)=(unsigned long)0x55555555; +#endif +} + +randnum((int32_t)13); +/* end of addition of code */ + +for (i=0;i<bitoparraysize;i++) +{ + /* First item is offset */ + /* *(bitoparraybase+i+i)=bitoffset=abs_randwc(262140L); */ + *(bitoparraybase+i+i)=bitoffset=abs_randwc((int32_t)262140); + + /* Next item is run length */ + /* *nbitops+=*(bitoparraybase+i+i+1L)=abs_randwc(262140L-bitoffset);*/ + *nbitops+=*(bitoparraybase+i+i+1L)=abs_randwc((int32_t)262140-bitoffset); +} + +/* +** Array of offset and lengths built...do an iteration of +** the test. +** Start the stopwatch. +*/ +elapsed=StartStopwatch(); + +/* +** Loop through array off offset/run length pairs. +** Execute operation based on modulus of index. +*/ +for(i = 0; i < bitoparraysize; i++) { + int asdf = i % 3; + switch(asdf) { + case 2: /* Complement run of bits */ + FlipBitRun(bitarraybase, + *(bitoparraybase+i+i), + *(bitoparraybase+i+i+1)); + break; + default: + ToggleBitRun(bitarraybase, + *(bitoparraybase+i+i), + *(bitoparraybase+i+i+1), + !i); + break; + } +} + +/* +** Return elapsed time +*/ +return(StopStopwatch(elapsed)); +} + +/*************** +** FlipBitRun ** +**************** +** Complements a run of bits. +*/ +static void FlipBitRun(unsigned long *bitmap, /* Bit map */ + unsigned long bit_addr, /* Bit address */ + unsigned long nbits) /* # of bits to flip */ +{ +unsigned long bindex; /* Index into array */ +unsigned long bitnumb; /* Bit number */ + +while(nbits--) +{ +#ifdef _LP64 + bindex=bit_addr>>6; /* Index is number /64 */ + bitnumb=bit_addr % 64; /* Bit number in longword */ +#else + bindex=bit_addr>>5; /* Index is number /32 */ + bitnumb=bit_addr % 32; /* Bit number in longword */ +#endif + bitmap[bindex]^=(1L<<bitnumb); + bit_addr++; +} + +return; +} + +/***************************** +** ToggleBitRun * +****************************** +** Set or clear a run of nbits starting at +** bit_addr in bitmap. +*/ +void ToggleBitRun(unsigned long *bitmap, /* Bitmap */ + unsigned long bit_addr, /* Address of bits to set */ + unsigned long nbits, /* # of bits to set/clr */ + unsigned int val) /* 1 or 0 */ +{ + unsigned long bindex; /* Index into array */ + unsigned long bitnumb; /* Bit number */ + +#if 0 + while (nbits != 0) { + nbits--; + + bindex = bit_addr >> 6; /* Index is number /64 */ + bitnumb = bit_addr % 64; /* Bit number in word */ + + if (val) { + bitmap[bindex] |= (1L << bitnumb); + } else { + bitmap[bindex] &= ~(1L << bitnumb); + } + bit_addr++; + } +#else + if (val) { + for (; nbits != 0; nbits--) { + bindex = bit_addr >> 6; + bitnumb = bit_addr % 64; + + bitmap[bindex] |= (1L << bitnumb); + + bit_addr++; + } + } else { + for (; nbits != 0; nbits--) { + bindex = bit_addr >> 6; + bitnumb = bit_addr % 64; + + bitmap[bindex] &= ~(1L << bitnumb); + + bit_addr++; + } + } +#endif + return; +} @@ -27,10 +27,6 @@ #include <stdio.h> -/* Is this a 64 bit architecture? If so, this will define LONG64 */ -/* Uwe F. Mayer 15 November 1997 */ -#include "pointer.h" - /* ** DEFINES */ diff --git a/fourier.c b/fourier.c new file mode 100644 index 0000000..1c95cb7 --- /dev/null +++ b/fourier.c @@ -0,0 +1,302 @@ +/* +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <strings.h>*/ +#include <math.h> +#include "nmglobal.h" +#include "nbench1.h" + +/************************* +** FOURIER COEFFICIENTS ** +*************************/ + +/************** +** DoFourier ** +*************** +** Perform the transcendental/trigonometric portion of the +** benchmark. This benchmark calculates the first n +** fourier coefficients of the function (x+1)^x defined +** on the interval 0,2. +*/ +void DoFourier(void) +{ +FourierStruct *locfourierstruct; /* Local fourier struct */ +fardouble *abase; /* Base of A[] coefficients array */ +fardouble *bbase; /* Base of B[] coefficients array */ +unsigned long accumtime; /* Accumulated time in ticks */ +double iterations; /* # of iterations */ +char *errorcontext; /* Error context string pointer */ +int systemerror; /* For error code */ + +/* +** Link to global structure +*/ +locfourierstruct=&global_fourierstruct; + +/* +** Set error context string +*/ +errorcontext="FPU:Transcendental"; + +/* +** See if we need to do self-adjustment code. +*/ +if(locfourierstruct->adjust==0) +{ + locfourierstruct->arraysize=100L; /* Start at 100 elements */ + while(1) + { + + abase=(fardouble *)AllocateMemory(locfourierstruct->arraysize*sizeof(double), + &systemerror); + if(systemerror) + { ReportError(errorcontext,systemerror); + ErrorExit(); + } + + bbase=(fardouble *)AllocateMemory(locfourierstruct->arraysize*sizeof(double), + &systemerror); + if(systemerror) + { ReportError(errorcontext,systemerror); + FreeMemory((void *)abase,&systemerror); + ErrorExit(); + } + /* + ** Do an iteration of the tests. If the elapsed time is + ** less than or equal to the permitted minimum, re-allocate + ** larger arrays and try again. + */ + if(DoFPUTransIteration(abase,bbase, + locfourierstruct->arraysize)>global_min_ticks) + break; /* We're ok...exit */ + + /* + ** Make bigger arrays and try again. + */ + FreeMemory((farvoid *)abase,&systemerror); + FreeMemory((farvoid *)bbase,&systemerror); + locfourierstruct->arraysize+=50L; + } +} +else +{ /* + ** Don't need self-adjustment. Just allocate the + ** arrays, and go. + */ + abase=(fardouble *)AllocateMemory(locfourierstruct->arraysize*sizeof(double), + &systemerror); + if(systemerror) + { ReportError(errorcontext,systemerror); + ErrorExit(); + } + + bbase=(fardouble *)AllocateMemory(locfourierstruct->arraysize*sizeof(double), + &systemerror); + if(systemerror) + { ReportError(errorcontext,systemerror); + FreeMemory((void *)abase,&systemerror); + ErrorExit(); + } +} +/* +** All's well if we get here. Repeatedly perform integration +** tests until the accumulated time is greater than the +** # of seconds requested. +*/ +accumtime=0L; +iterations=(double)0.0; +do { + accumtime+=DoFPUTransIteration(abase,bbase,locfourierstruct->arraysize); + iterations+=(double)locfourierstruct->arraysize*(double)2.0-(double)1.0; +} while(TicksToSecs(accumtime)<locfourierstruct->request_secs); + + +/* +** Clean up, calculate results, and go home. +** Also set adjustment flag to indicate no adjust code needed. +*/ +FreeMemory((farvoid *)abase,&systemerror); +FreeMemory((farvoid *)bbase,&systemerror); + +locfourierstruct->fflops=iterations/(double)TicksToFracSecs(accumtime); + +if(locfourierstruct->adjust==0) + locfourierstruct->adjust=1; + +return; +} + +/************************ +** DoFPUTransIteration ** +************************* +** Perform an iteration of the FPU Transcendental/trigonometric +** benchmark. Here, an iteration consists of calculating the +** first n fourier coefficients of the function (x+1)^x on +** the interval 0,2. n is given by arraysize. +** NOTE: The # of integration steps is fixed at +** 200. +*/ +static ulong DoFPUTransIteration(fardouble *abase, /* A coeffs. */ + fardouble *bbase, /* B coeffs. */ + ulong arraysize) /* # of coeffs */ +{ +double omega; /* Fundamental frequency */ +unsigned long i; /* Index */ +unsigned long elapsed; /* Elapsed time */ + +/* +** Start the stopwatch +*/ +elapsed=StartStopwatch(); + +/* +** Calculate the fourier series. Begin by +** calculating A[0]. +*/ + +*abase=TrapezoidIntegrate((double)0.0, + (double)2.0, + 200, + (double)0.0, /* No omega * n needed */ + 0 )/(double)2.0; + +/* +** Calculate the fundamental frequency. +** ( 2 * pi ) / period...and since the period +** is 2, omega is simply pi. +*/ +omega=(double)3.1415926535897932; + +for(i=1;i<arraysize;i++) +{ + + /* + ** Calculate A[i] terms. Note, once again, that we + ** can ignore the 2/period term outside the integral + ** since the period is 2 and the term cancels itself + ** out. + */ + *(abase+i)=TrapezoidIntegrate((double)0.0, + (double)2.0, + 200, + omega * (double)i, + 1); + + /* + ** Calculate the B[i] terms. + */ + *(bbase+i)=TrapezoidIntegrate((double)0.0, + (double)2.0, + 200, + omega * (double)i, + 2); + +} +#ifdef DEBUG +{ + int i; + printf("\nA[i]=\n"); + for (i=0;i<arraysize;i++) printf("%7.3g ",abase[i]); + printf("\nB[i]=\n(undefined) "); + for (i=1;i<arraysize;i++) printf("%7.3g ",bbase[i]); +} +#endif +/* +** All done, stop the stopwatch +*/ +return(StopStopwatch(elapsed)); +} + +/*********************** +** TrapezoidIntegrate ** +************************ +** Perform a simple trapezoid integration on the +** function (x+1)**x. +** x0,x1 set the lower and upper bounds of the +** integration. +** nsteps indicates # of trapezoidal sections +** omegan is the fundamental frequency times +** the series member # +** select = 0 for the A[0] term, 1 for cosine terms, and +** 2 for sine terms. +** Returns the value. +*/ +static double TrapezoidIntegrate( double x0, /* Lower bound */ + double x1, /* Upper bound */ + int nsteps, /* # of steps */ + double omegan, /* omega * n */ + int select) +{ +double x; /* Independent variable */ +double dx; /* Stepsize */ +double rvalue; /* Return value */ + + +/* +** Initialize independent variable +*/ +x=x0; + +/* +** Calculate stepsize +*/ +dx=(x1 - x0) / (double)nsteps; + +/* +** Initialize the return value. +*/ +rvalue=thefunction(x0,omegan,select)/(double)2.0; + +/* +** Compute the other terms of the integral. +*/ +if(nsteps!=1) +{ --nsteps; /* Already done 1 step */ + while(--nsteps ) + { + x+=dx; + rvalue+=thefunction(x,omegan,select); + } +} +/* +** Finish computation +*/ +rvalue=(rvalue+thefunction(x1,omegan,select)/(double)2.0)*dx; + +return(rvalue); +} + +/**************** +** thefunction ** +***************** +** This routine selects the function to be used +** in the Trapezoid integration. +** x is the independent variable +** omegan is omega * n +** select chooses which of the sine/cosine functions +** are used. note the special case for select=0. +*/ +static double thefunction(double x, /* Independent variable */ + double omegan, /* Omega * term */ + int select) /* Choose term */ +{ + +/* +** Use select to pick which function we call. +*/ +switch(select) +{ + case 0: return(pow(x+(double)1.0,x)); + + case 1: return(pow(x+(double)1.0,x) * cos(omegan * x)); + + case 2: return(pow(x+(double)1.0,x) * sin(omegan * x)); +} + +/* +** We should never reach this point, but the following +** keeps compilers from issuing a warning message. +*/ +return(0.0); +} diff --git a/fpemulation.c b/fpemulation.c new file mode 100644 index 0000000..d4332a3 --- /dev/null +++ b/fpemulation.c @@ -0,0 +1,147 @@ +#include <stdio.h> +/* +#include <stdlib.h> +#include <string.h> +#include <strings.h> +#include <math.h>*/ +#include "nmglobal.h" +#include "nbench1.h" + + +/***************************** +** FLOATING-POINT EMULATION ** +*****************************/ + +/************** +** DoEmFloat ** +*************** +** Perform the floating-point emulation routines portion of the +** CPU benchmark. Returns the operations per second. +*/ +void DoEmFloat(void) +{ +EmFloatStruct *locemfloatstruct; /* Local structure */ +InternalFPF *abase; /* Base of A array */ +InternalFPF *bbase; /* Base of B array */ +InternalFPF *cbase; /* Base of C array */ +unsigned long accumtime; /* Accumulated time in ticks */ +double iterations; /* # of iterations */ +unsigned long tickcount; /* # of ticks */ +char *errorcontext; /* Error context string pointer */ +int systemerror; /* For holding error code */ +unsigned long loops; /* # of loops */ + +/* +** Link to global structure +*/ +locemfloatstruct=&global_emfloatstruct; + +/* +** Set the error context +*/ +errorcontext="CPU:Floating Emulation"; + + +/* +** Test the emulation routines. +*/ +#ifdef DEBUG +#endif + +abase=(InternalFPF *)AllocateMemory(locemfloatstruct->arraysize*sizeof(InternalFPF), + &systemerror); +if(systemerror) +{ ReportError(errorcontext,systemerror); + ErrorExit(); +} + +bbase=(InternalFPF *)AllocateMemory(locemfloatstruct->arraysize*sizeof(InternalFPF), + &systemerror); +if(systemerror) +{ ReportError(errorcontext,systemerror); + FreeMemory((void *)abase,&systemerror); + ErrorExit(); +} + +cbase=(InternalFPF *)AllocateMemory(locemfloatstruct->arraysize*sizeof(InternalFPF), + &systemerror); +if(systemerror) +{ ReportError(errorcontext,systemerror); + FreeMemory((void *)abase,&systemerror); + FreeMemory((void *)bbase,&systemerror); + ErrorExit(); +} + +/* +** Set up the arrays +*/ +SetupCPUEmFloatArrays(abase,bbase,cbase,locemfloatstruct->arraysize); + +/* +** See if we need to do self-adjusting code. +*/ +if(locemfloatstruct->adjust==0) +{ + locemfloatstruct->loops=0; + + /* + ** Do an iteration of the tests. If the elapsed time is + ** less than minimum, increase the loop count and try + ** again. + */ + for(loops=1;loops<CPUEMFLOATLOOPMAX;loops+=loops) + { tickcount=DoEmFloatIteration(abase,bbase,cbase, + locemfloatstruct->arraysize, + loops); + if(tickcount>global_min_ticks) + { locemfloatstruct->loops=loops; + break; + } + } +} + +/* +** Verify that selft adjustment code worked. +*/ +if(locemfloatstruct->loops==0) +{ printf("CPU:EMFPU -- CMPUEMFLOATLOOPMAX limit hit\n"); + FreeMemory((void *)abase,&systemerror); + FreeMemory((void *)bbase,&systemerror); + FreeMemory((void *)cbase,&systemerror); + ErrorExit(); +} + +/* +** All's well if we get here. Repeatedly perform floating +** tests until the accumulated time is greater than the +** # of seconds requested. +** Each iteration performs arraysize * 3 operations. +*/ +accumtime=0L; +iterations=(double)0.0; +do { + accumtime+=DoEmFloatIteration(abase,bbase,cbase, + locemfloatstruct->arraysize, + locemfloatstruct->loops); + iterations+=(double)1.0; +} while(TicksToSecs(accumtime)<locemfloatstruct->request_secs); + + +/* +** Clean up, calculate results, and go home. +** Also, indicate that adjustment is done. +*/ +FreeMemory((void *)abase,&systemerror); +FreeMemory((void *)bbase,&systemerror); +FreeMemory((void *)cbase,&systemerror); + +locemfloatstruct->emflops=(iterations*(double)locemfloatstruct->loops)/ + (double)TicksToFracSecs(accumtime); +if(locemfloatstruct->adjust==0) + locemfloatstruct->adjust=1; + +#ifdef DEBUG +printf("----------------------------------------------------------------------------\n"); +#endif +return; +} diff --git a/huffman.c b/huffman.c new file mode 100644 index 0000000..961fc08 --- /dev/null +++ b/huffman.c @@ -0,0 +1,557 @@ +#include <string.h> +#include <stdint.h> +#include "nmglobal.h" +#include "nbench1.h" + +/* +** Word catalog +*/ +#define WORDCATSIZE 50 + +char *wordcatarray[WORDCATSIZE] = +{ "Hello", + "He", + "Him", + "the", + "this", + "that", + "though", + "rough", + "cough", + "obviously", + "But", + "but", + "bye", + "begin", + "beginning", + "beginnings", + "of", + "our", + "ourselves", + "yourselves", + "to", + "together", + "togetherness", + "from", + "either", + "I", + "A", + "return", + "However", + "that", + "example", + "yet", + "quickly", + "all", + "if", + "were", + "includes", + "always", + "never", + "not", + "small", + "returns", + "set", + "basic", + "Entered", + "with", + "used", + "shown", + "you", + "know" }; + + +/************************ +** HUFFMAN COMPRESSION ** +************************/ + +/************** +** DoHuffman ** +*************** +** Execute a huffman compression on a block of plaintext. +** Note that (as with IDEA encryption) an iteration of the +** Huffman test includes a compression AND a decompression. +** Also, the compression cycle includes building the +** Huffman tree. +*/ +void DoHuffman(void) +{ +HuffStruct *lochuffstruct; /* Loc pointer to global data */ +char *errorcontext; +int systemerror; +unsigned long accumtime; +double iterations; +char *comparray; +char *decomparray; +char *plaintext; + +/* +** Link to global data +*/ +lochuffstruct=&global_huffstruct; + +/* +** Set error context. +*/ +errorcontext="CPU:Huffman"; + +/* +** Allocate memory for the plaintext and the compressed text. +** We'll be really pessimistic here, and allocate equal amounts +** for both (though we know...well, we PRESUME) the compressed +** stuff will take less than the plain stuff. +** Also note that we'll build a 3rd buffer to decompress +** into, and we preallocate space for the huffman tree. +** (We presume that the Huffman tree will grow no larger +** than 512 bytes. This is actually a super-conservative +** estimate...but, who cares?) +*/ +plaintext=(char *)AllocateMemory(lochuffstruct->arraysize,&systemerror); +if(systemerror) +{ ReportError(errorcontext,systemerror); +} +comparray=(char *)AllocateMemory(lochuffstruct->arraysize,&systemerror); +if(systemerror) +{ ReportError(errorcontext,systemerror); + FreeMemory(plaintext,&systemerror); +} +decomparray=(char *)AllocateMemory(lochuffstruct->arraysize,&systemerror); +if(systemerror) +{ ReportError(errorcontext,systemerror); + FreeMemory(plaintext,&systemerror); + FreeMemory(comparray,&systemerror); +} + +hufftree=(huff_node *)AllocateMemory(sizeof(huff_node) * 512, + &systemerror); +if(systemerror) +{ ReportError(errorcontext,systemerror); + FreeMemory(plaintext,&systemerror); + FreeMemory(comparray,&systemerror); + FreeMemory(decomparray,&systemerror); +} + +/* +** Build the plaintext buffer. Since we want this to +** actually be able to compress, we'll use the +** wordcatalog to build the plaintext stuff. +*/ +/* +** Reset random number generator so things repeat. +** added by Uwe F. Mayer +*/ +randnum((int32_t)13); +create_text_block(plaintext,lochuffstruct->arraysize-1,(unsigned short)500); +plaintext[lochuffstruct->arraysize-1L]='\0'; +plaintextlen=lochuffstruct->arraysize; + +/* +** See if we need to perform self adjustment loop. +*/ +if(lochuffstruct->adjust==0) +{ + /* + ** Do self-adjustment. This involves initializing the + ** # of loops and increasing the loop count until we + ** get a number of loops that we can use. + */ + for(lochuffstruct->loops=100L; + lochuffstruct->loops<MAXHUFFLOOPS; + lochuffstruct->loops+=10L) + if(DoHuffIteration(plaintext, + comparray, + decomparray, + lochuffstruct->arraysize, + lochuffstruct->loops, + hufftree)>global_min_ticks) break; +} + +/* +** All's well if we get here. Do the test. +*/ +accumtime=0L; +iterations=(double)0.0; + +do { + accumtime+=DoHuffIteration(plaintext, + comparray, + decomparray, + lochuffstruct->arraysize, + lochuffstruct->loops, + hufftree); + iterations+=(double)lochuffstruct->loops; +} while(TicksToSecs(accumtime)<lochuffstruct->request_secs); + +/* +** Clean up, calculate results, and go home. Be sure to +** show that we don't have to rerun adjustment code. +*/ +FreeMemory((void *)plaintext,&systemerror); +FreeMemory((void *)comparray,&systemerror); +FreeMemory((void *)decomparray,&systemerror); +FreeMemory((void *)hufftree,&systemerror); +lochuffstruct->iterspersec=iterations / TicksToFracSecs(accumtime); + +if(lochuffstruct->adjust==0) + lochuffstruct->adjust=1; + +} + +/********************* +** create_text_line ** +********************** +** Create a random line of text, stored at *dt. The line may be +** no more than nchars long. +*/ +static void create_text_line(char *dt, + long nchars) +{ +long charssofar; /* # of characters so far */ +long tomove; /* # of characters to move */ +char myword[40]; /* Local buffer for words */ +char *wordptr; /* Pointer to word from catalog */ + +charssofar=0; + +do { +/* +** Grab a random word from the wordcatalog +*/ +/* wordptr=wordcatarray[abs_randwc((long)WORDCATSIZE)];*/ +wordptr=wordcatarray[abs_randwc((int32_t)WORDCATSIZE)]; +MoveMemory((void *)myword, + (void *)wordptr, + (unsigned long)strlen(wordptr)+1); + +/* +** Append a blank. +*/ +tomove=strlen(myword)+1; +myword[tomove-1]=' '; + +/* +** See how long it is. If its length+charssofar > nchars, we have +** to trim it. +*/ +if((tomove+charssofar)>nchars) + tomove=nchars-charssofar; +/* +** Attach the word to the current line. Increment counter. +*/ +MoveMemory((void *)dt,(void *)myword,(unsigned long)tomove); +charssofar+=tomove; +dt+=tomove; + +/* +** If we're done, bail out. Otherwise, go get another word. +*/ +} while(charssofar<nchars); + +return; +} + +/********************** +** create_text_block ** +*********************** +** Build a block of text randomly loaded with words. The words +** come from the wordcatalog (which must be loaded before you +** call this). +** *tb points to the memory where the text is to be built. +** tblen is the # of bytes to put into the text block +** maxlinlen is the maximum length of any line (line end indicated +** by a carriage return). +*/ +static void create_text_block(char *tb, + unsigned long tblen, + unsigned short maxlinlen) +{ +unsigned long bytessofar; /* # of bytes so far */ +unsigned long linelen; /* Line length */ + +bytessofar=0L; +do { + +/* +** Pick a random length for a line and fill the line. +** Make sure the line can fit (haven't exceeded tablen) and also +** make sure you leave room to append a carriage return. +*/ +linelen=abs_randwc(maxlinlen-6)+6; +if((linelen+bytessofar)>tblen) + linelen=tblen-bytessofar; + +if(linelen>1) +{ + create_text_line(tb,linelen); +} +tb+=linelen-1; /* Add the carriage return */ +*tb++='\n'; + +bytessofar+=linelen; + +} while(bytessofar<tblen); + +} + +/******************** +** DoHuffIteration ** +********************* +** Perform the huffman benchmark. This routine +** (a) Builds the huffman tree +** (b) Compresses the text +** (c) Decompresses the text and verifies correct decompression +*/ +static unsigned long DoHuffIteration(char *plaintext, + char *comparray, + char *decomparray, + unsigned long arraysize, + unsigned long nloops, + huff_node *hufftree) +{ +int i; /* Index */ +long j; /* Bigger index */ +int root; /* Pointer to huffman tree root */ +float lowfreq1, lowfreq2; /* Low frequency counters */ +int lowidx1, lowidx2; /* Indexes of low freq. elements */ +long bitoffset; /* Bit offset into text */ +long textoffset; /* Char offset into text */ +long maxbitoffset; /* Holds limit of bit offset */ +long bitstringlen; /* Length of bitstring */ +int c; /* Character from plaintext */ +char bitstring[30]; /* Holds bitstring */ +unsigned long elapsed; /* For stopwatch */ +#ifdef DEBUG +int status=0; +#endif + +/* +** Start the stopwatch +*/ +elapsed=StartStopwatch(); + +/* +** Do everything for nloops +*/ +while(nloops--) +{ + +/* +** Calculate the frequency of each byte value. Store the +** results in what will become the "leaves" of the +** Huffman tree. Interior nodes will be built in those +** nodes greater than node #255. +*/ +for(i=0;i<256;i++) +{ + hufftree[i].freq=(float)0.0; + hufftree[i].c=(unsigned char)i; +} + +for(j=0;j<arraysize;j++) + hufftree[(int)plaintext[j]].freq+=(float)1.0; + +for(i=0;i<256;i++) + if(hufftree[i].freq != (float)0.0) + hufftree[i].freq/=(float)arraysize; + +/* Reset the second half of the tree. Otherwise the loop below that +** compares the frequencies up to index 512 makes no sense. Some +** systems automatically zero out memory upon allocation, others (like +** for example DEC Unix) do not. Depending on this the loop below gets +** different data and different run times. On our alpha the data that +** was arbitrarily assigned led to an underflow error at runtime. We +** use that zeroed-out bits are in fact 0 as a float. +** Uwe F. Mayer */ +bzero((char *)&(hufftree[256]),sizeof(huff_node)*256); +/* +** Build the huffman tree. First clear all the parent +** pointers and left/right pointers. Also, discard all +** nodes that have a frequency of true 0. */ +for(i=0;i<512;i++) +{ if(hufftree[i].freq==(float)0.0) + hufftree[i].parent=EXCLUDED; + else { + hufftree[i].right = -1; + hufftree[i].left = -1; + hufftree[i].parent = -1; + } +} + +/* +** Go through the tree. Finding nodes of really low +** frequency. +*/ +root=255; /* Starting root node-1 */ +while(1) +{ + lowfreq1=(float)2.0; lowfreq2=(float)2.0; + lowidx1=-1; lowidx2=-1; + /* + ** Find first lowest frequency. + */ + for(i=0;i<=root;i++) + if(hufftree[i].parent<0) + if(hufftree[i].freq<lowfreq1) + { lowfreq1=hufftree[i].freq; + lowidx1=i; + } + + /* + ** Did we find a lowest value? If not, the + ** tree is done. + */ + if(lowidx1==-1) break; + + /* + ** Find next lowest frequency + */ + for(i=0;i<=root;i++) + if((hufftree[i].parent<0) && (i!=lowidx1)) + if(hufftree[i].freq<lowfreq2) + { lowfreq2=hufftree[i].freq; + lowidx2=i; + } + + /* + ** If we could only find one item, then that + ** item is surely the root, and (as above) the + ** tree is done. + */ + if(lowidx2==-1) break; + + /* + ** Attach the two new nodes to the current root, and + ** advance the current root. + */ + root++; /* New root */ + hufftree[lowidx1].parent=root; + hufftree[lowidx2].parent=root; + hufftree[root].freq=lowfreq1+lowfreq2; + hufftree[root].left=lowidx1; + hufftree[root].right=lowidx2; + hufftree[root].parent=-2; /* Show root */ +} + +/* +** Huffman tree built...compress the plaintext +*/ +bitoffset=0L; /* Initialize bit offset */ +for(i=0;i<arraysize;i++) +{ + c=(int)plaintext[i]; /* Fetch character */ + /* + ** Build a bit string for byte c + */ + bitstringlen=0; + while(hufftree[c].parent!=-2) + { if(hufftree[hufftree[c].parent].left==c) + bitstring[bitstringlen]='0'; + else + bitstring[bitstringlen]='1'; + c=hufftree[c].parent; + bitstringlen++; + } + + /* + ** Step backwards through the bit string, setting + ** bits in the compressed array as you go. + */ + while(bitstringlen--) + { SetCompBit((uint8_t *)comparray,(uint32_t)bitoffset,bitstring[bitstringlen]); + bitoffset++; + } +} + +/* +** Compression done. Perform de-compression. +*/ +maxbitoffset=bitoffset; +bitoffset=0; +textoffset=0; +do { + i=root; + while(hufftree[i].left!=-1) + { if(GetCompBit((uint8_t *)comparray,(uint32_t)bitoffset)==0) + i=hufftree[i].left; + else + i=hufftree[i].right; + bitoffset++; + } + decomparray[textoffset]=hufftree[i].c; + +#ifdef DEBUG + if(hufftree[i].c != plaintext[textoffset]) + { + /* Show error */ + printf("Error at textoffset %ld\n",textoffset); + status=1; + } +#endif + textoffset++; +} while(bitoffset<maxbitoffset); + +} /* End the big while(nloops--) from above */ + +/* +** All done +*/ +#ifdef DEBUG + if (status==0) printf("Huffman: OK\n"); +#endif +return(StopStopwatch(elapsed)); +} + +/*************** +** SetCompBit ** +**************** +** Set a bit in the compression array. The value of the +** bit is set according to char bitchar. +*/ +static void SetCompBit(uint8_t *comparray, + uint32_t bitoffset, + char bitchar) +{ +uint32_t byteoffset; +int bitnumb; + +/* +** First calculate which element in the comparray to +** alter. and the bitnumber. +*/ +byteoffset=bitoffset>>3; +bitnumb=bitoffset % 8; + +/* +** Set or clear +*/ +if(bitchar=='1') + comparray[byteoffset]|=(1<<bitnumb); +else + comparray[byteoffset]&=~(1<<bitnumb); + +return; +} + +/*************** +** GetCompBit ** +**************** +** Return the bit value of a bit in the comparession array. +** Returns 0 if the bit is clear, nonzero otherwise. +*/ +static int GetCompBit(uint8_t *comparray, + uint32_t bitoffset) +{ +uint32_t byteoffset; +int bitnumb; + +/* +** Calculate byte offset and bit number. +*/ +byteoffset=bitoffset>>3; +bitnumb=bitoffset % 8; + +/* +** Fetch +*/ +return((1<<bitnumb) & comparray[byteoffset] ); +} @@ -0,0 +1,392 @@ +#include "nmglobal.h" +#include "nbench1.h" + +/******************** +** IDEA Encryption ** +********************* +** IDEA - International Data Encryption Algorithm. +** Based on code presented in Applied Cryptography by Bruce Schneier. +** Which was based on code developed by Xuejia Lai and James L. Massey. +** Other modifications made by Colin Plumb. +** +*/ + +/*********** +** DoIDEA ** +************ +** Perform IDEA encryption. Note that we time encryption & decryption +** time as being a single loop. +*/ +void DoIDEA(void) +{ +IDEAStruct *locideastruct; /* Loc pointer to global structure */ +int i; +IDEAkey Z,DK; +u16 userkey[8]; +ulong accumtime; +double iterations; +char *errorcontext; +int systemerror; +faruchar *plain1; /* First plaintext buffer */ +faruchar *crypt1; /* Encryption buffer */ +faruchar *plain2; /* Second plaintext buffer */ + +/* +** Link to global data +*/ +locideastruct=&global_ideastruct; + +/* +** Set error context +*/ +errorcontext="CPU:IDEA"; + +/* +** Re-init random-number generator. +*/ +/* randnum(3L); */ +randnum((int32)3); + +/* +** Build an encryption/decryption key +*/ +for (i=0;i<8;i++) + /* userkey[i]=(u16)(abs_randwc(60000L) & 0xFFFF); */ + userkey[i]=(u16)(abs_randwc((int32)60000) & 0xFFFF); +for(i=0;i<KEYLEN;i++) + Z[i]=0; + +/* +** Compute encryption/decryption subkeys +*/ +en_key_idea(userkey,Z); +de_key_idea(Z,DK); + +/* +** Allocate memory for buffers. We'll make 3, called plain1, +** crypt1, and plain2. It works like this: +** plain1 >>encrypt>> crypt1 >>decrypt>> plain2. +** So, plain1 and plain2 should match. +** Also, fill up plain1 with sample text. +*/ +plain1=(faruchar *)AllocateMemory(locideastruct->arraysize,&systemerror); +if(systemerror) +{ + ReportError(errorcontext,systemerror); + ErrorExit(); +} + +crypt1=(faruchar *)AllocateMemory(locideastruct->arraysize,&systemerror); +if(systemerror) +{ + ReportError(errorcontext,systemerror); + FreeMemory((farvoid *)plain1,&systemerror); + ErrorExit(); +} + +plain2=(faruchar *)AllocateMemory(locideastruct->arraysize,&systemerror); +if(systemerror) +{ + ReportError(errorcontext,systemerror); + FreeMemory((farvoid *)plain1,&systemerror); + FreeMemory((farvoid *)crypt1,&systemerror); + ErrorExit(); +} +/* +** Note that we build the "plaintext" by simply loading +** the array up with random numbers. +*/ +for(i=0;i<locideastruct->arraysize;i++) + plain1[i]=(uchar)(abs_randwc(255) & 0xFF); + +/* +** See if we need to perform self adjustment loop. +*/ +if(locideastruct->adjust==0) +{ + /* + ** Do self-adjustment. This involves initializing the + ** # of loops and increasing the loop count until we + ** get a number of loops that we can use. + */ + for(locideastruct->loops=100L; + locideastruct->loops<MAXIDEALOOPS; + locideastruct->loops+=10L) + if(DoIDEAIteration(plain1,crypt1,plain2, + locideastruct->arraysize, + locideastruct->loops, + Z,DK)>global_min_ticks) break; +} + +/* +** All's well if we get here. Do the test. +*/ +accumtime=0L; +iterations=(double)0.0; + +do { + accumtime+=DoIDEAIteration(plain1,crypt1,plain2, + locideastruct->arraysize, + locideastruct->loops,Z,DK); + iterations+=(double)locideastruct->loops; +} while(TicksToSecs(accumtime)<locideastruct->request_secs); + +/* +** Clean up, calculate results, and go home. Be sure to +** show that we don't have to rerun adjustment code. +*/ +FreeMemory((farvoid *)plain1,&systemerror); +FreeMemory((farvoid *)crypt1,&systemerror); +FreeMemory((farvoid *)plain2,&systemerror); +locideastruct->iterspersec=iterations / TicksToFracSecs(accumtime); + +if(locideastruct->adjust==0) + locideastruct->adjust=1; + +return; + +} + +/******************** +** DoIDEAIteration ** +********************* +** Execute a single iteration of the IDEA encryption algorithm. +** Actually, a single iteration is one encryption and one +** decryption. +*/ +static ulong DoIDEAIteration(faruchar *plain1, + faruchar *crypt1, + faruchar *plain2, + ulong arraysize, + ulong nloops, + IDEAkey Z, + IDEAkey DK) +{ +register ulong i; +register ulong j; +ulong elapsed; +#ifdef DEBUG +int status=0; +#endif + +/* +** Start the stopwatch. +*/ +elapsed=StartStopwatch(); + +/* +** Do everything for nloops. +*/ +for(i=0;i<nloops;i++) +{ + for(j=0;j<arraysize;j+=(sizeof(u16)*4)) + cipher_idea((u16 *)(plain1+j),(u16 *)(crypt1+j),Z); /* Encrypt */ + + for(j=0;j<arraysize;j+=(sizeof(u16)*4)) + cipher_idea((u16 *)(crypt1+j),(u16 *)(plain2+j),DK); /* Decrypt */ +} + +#ifdef DEBUG +for(j=0;j<arraysize;j++) + if(*(plain1+j)!=*(plain2+j)){ + printf("IDEA Error! \n"); + status=1; + } +if (status==0) printf("IDEA: OK\n"); +#endif + +/* +** Get elapsed time. +*/ +return(StopStopwatch(elapsed)); +} + +/******** +** mul ** +********* +** Performs multiplication, modulo (2**16)+1. This code is structured +** on the assumption that untaken branches are cheaper than taken +** branches, and that the compiler doesn't schedule branches. +*/ +static u16 mul(register u16 a, register u16 b) +{ +register u32 p; +if(a) +{ if(b) + { p=(u32)(a*b); + b=low16(p); + a=(u16)(p>>16); + return(b-a+(b<a)); + } + else + return(1-a); +} +else + return(1-b); +} + +/******** +** inv ** +********* +** Compute multiplicative inverse of x, modulo (2**16)+1 +** using Euclid's GCD algorithm. It is unrolled twice +** to avoid swapping the meaning of the registers. And +** some subtracts are changed to adds. +*/ +static u16 inv(u16 x) +{ +u16 t0, t1; +u16 q, y; + +if(x<=1) + return(x); /* 0 and 1 are self-inverse */ +t1=0x10001 / x; +y=0x10001 % x; +if(y==1) + return(low16(1-t1)); +t0=1; +do { + q=x/y; + x=x%y; + t0+=q*t1; + if(x==1) return(t0); + q=y/x; + y=y%x; + t1+=q*t0; +} while(y!=1); +return(low16(1-t1)); +} + +/**************** +** en_key_idea ** +***************** +** Compute IDEA encryption subkeys Z +*/ +static void en_key_idea(u16 *userkey, u16 *Z) +{ +int i,j; + +/* +** shifts +*/ +for(j=0;j<8;j++) + Z[j]=*userkey++; +for(i=0;j<KEYLEN;j++) +{ i++; + Z[i+7]=(Z[i&7]<<9)| (Z[(i+1) & 7] >> 7); + Z+=i&8; + i&=7; +} +return; +} + +/**************** +** de_key_idea ** +***************** +** Compute IDEA decryption subkeys DK from encryption +** subkeys Z. +*/ +static void de_key_idea(IDEAkey Z, IDEAkey DK) +{ +IDEAkey TT; +int j; +u16 t1, t2, t3; +u16 *p; +p=(u16 *)(TT+KEYLEN); + +t1=inv(*Z++); +t2=-*Z++; +t3=-*Z++; +*--p=inv(*Z++); +*--p=t3; +*--p=t2; +*--p=t1; + +for(j=1;j<ROUNDS;j++) +{ t1=*Z++; + *--p=*Z++; + *--p=t1; + t1=inv(*Z++); + t2=-*Z++; + t3=-*Z++; + *--p=inv(*Z++); + *--p=t2; + *--p=t3; + *--p=t1; +} +t1=*Z++; +*--p=*Z++; +*--p=t1; +t1=inv(*Z++); +t2=-*Z++; +t3=-*Z++; +*--p=inv(*Z++); +*--p=t3; +*--p=t2; +*--p=t1; +/* +** Copy and destroy temp copy +*/ +for(j=0,p=TT;j<KEYLEN;j++) +{ *DK++=*p; + *p++=0; +} + +return; +} + +/* +** MUL(x,y) +** This #define creates a macro that computes x=x*y modulo 0x10001. +** Requires temps t16 and t32. Also requires y to be strictly 16 +** bits. Here, I am using the simplest form. May not be the +** fastest. -- RG +*/ +/* #define MUL(x,y) (x=mul(low16(x),y)) */ + +/**************** +** cipher_idea ** +***************** +** IDEA encryption/decryption algorithm. +*/ +static void cipher_idea(u16 in[4], + u16 out[4], + register IDEAkey Z) +{ +register u16 x1, x2, x3, x4, t1, t2; +/* register u16 t16; +register u16 t32; */ +int r=ROUNDS; + +x1=*in++; +x2=*in++; +x3=*in++; +x4=*in; + +do { + MUL(x1,*Z++); + x2+=*Z++; + x3+=*Z++; + MUL(x4,*Z++); + + t2=x1^x3; + MUL(t2,*Z++); + t1=t2+(x2^x4); + MUL(t1,*Z++); + t2=t1+t2; + + x1^=t1; + x4^=t2; + + t2^=x2; + x2=x3^t1; + x3=t2; +} while(--r); +MUL(x1,*Z++); +*out++=x1; +*out++=x3+*Z++; +*out++=x2+*Z++; +MUL(x4,*Z); +*out=x4; +return; +} diff --git a/linear.c b/linear.c new file mode 100644 index 0000000..ff1b1d7 --- /dev/null +++ b/linear.c @@ -0,0 +1,541 @@ +#include <stdio.h> +/*#include <stdlib.h> +#include <string.h> +#include <strings.h>*/ +#include <math.h> +#include <stddef.h> +#include "nmglobal.h" +#include "nbench1.h" + +/*********************** +** LU DECOMPOSITION ** +** (Linear Equations) ** +************************ +** These routines come from "Numerical Recipes in Pascal". +** Note that, as in the assignment algorithm, though we +** separately define LUARRAYROWS and LUARRAYCOLS, the two +** must be the same value (this routine depends on a square +** matrix). +*/ + +/********* +** DoLU ** +********** +** Perform the LU decomposition benchmark. +*/ +void DoLU(void) +{ +LUStruct *loclustruct; /* Local pointer to global data */ +char *errorcontext; +int systemerror; +fardouble *a; +fardouble *b; +fardouble *abase; +fardouble *bbase; +LUdblptr ptra; +int n; +int i; +ulong accumtime; +double iterations; + +/* +** Link to global data +*/ +loclustruct=&global_lustruct; + +/* +** Set error context. +*/ +errorcontext="FPU:LU"; + +/* +** Our first step is to build a "solvable" problem. This +** will become the "seed" set that all others will be +** derived from. (I.E., we'll simply copy these arrays +** into the others. +*/ +a=(fardouble *)AllocateMemory(sizeof(double) * LUARRAYCOLS * LUARRAYROWS, + &systemerror); +b=(fardouble *)AllocateMemory(sizeof(double) * LUARRAYROWS, + &systemerror); +n=LUARRAYROWS; + +/* +** We need to allocate a temp vector that is used by the LU +** algorithm. This removes the allocation routine from the +** timing. +*/ +LUtempvv=(fardouble *)AllocateMemory(sizeof(double)*LUARRAYROWS, + &systemerror); + +/* +** Build a problem to be solved. +*/ +ptra.ptrs.p=a; /* Gotta coerce linear array to 2D array */ +build_problem(*ptra.ptrs.ap,n,b); + +/* +** Now that we have a problem built, see if we need to do +** auto-adjust. If so, repeatedly call the DoLUIteration routine, +** increasing the number of solutions per iteration as you go. +*/ +if(loclustruct->adjust==0) +{ + loclustruct->numarrays=0; + for(i=1;i<=MAXLUARRAYS;i++) + { + abase=(fardouble *)AllocateMemory(sizeof(double) * + LUARRAYCOLS*LUARRAYROWS*(i+1),&systemerror); + if(systemerror) + { ReportError(errorcontext,systemerror); + LUFreeMem(a,b,(fardouble *)NULL,(fardouble *)NULL); + ErrorExit(); + } + bbase=(fardouble *)AllocateMemory(sizeof(double) * + LUARRAYROWS*(i+1),&systemerror); + if(systemerror) + { ReportError(errorcontext,systemerror); + LUFreeMem(a,b,abase,(fardouble *)NULL); + ErrorExit(); + } + if(DoLUIteration(a,b,abase,bbase,i)>global_min_ticks) + { loclustruct->numarrays=i; + break; + } + /* + ** Not enough arrays...free them all and try again + */ + FreeMemory((farvoid *)abase,&systemerror); + FreeMemory((farvoid *)bbase,&systemerror); + } + /* + ** Were we able to do it? + */ + if(loclustruct->numarrays==0) + { printf("FPU:LU -- Array limit reached\n"); + LUFreeMem(a,b,abase,bbase); + ErrorExit(); + } +} +else +{ /* + ** Don't need to adjust -- just allocate the proper + ** number of arrays and proceed. + */ + abase=(fardouble *)AllocateMemory(sizeof(double) * + LUARRAYCOLS*LUARRAYROWS*loclustruct->numarrays, + &systemerror); + if(systemerror) + { ReportError(errorcontext,systemerror); + LUFreeMem(a,b,(fardouble *)NULL,(fardouble *)NULL); + ErrorExit(); + } + bbase=(fardouble *)AllocateMemory(sizeof(double) * + LUARRAYROWS*loclustruct->numarrays,&systemerror); + if(systemerror) + { + ReportError(errorcontext,systemerror); + LUFreeMem(a,b,abase,(fardouble *)NULL); + ErrorExit(); + } +} +/* +** All's well if we get here. Do the test. +*/ +accumtime=0L; +iterations=(double)0.0; + +do { + accumtime+=DoLUIteration(a,b,abase,bbase, + loclustruct->numarrays); + iterations+=(double)loclustruct->numarrays; +} while(TicksToSecs(accumtime)<loclustruct->request_secs); + +/* +** Clean up, calculate results, and go home. Be sure to +** show that we don't have to rerun adjustment code. +*/ +loclustruct->iterspersec=iterations / TicksToFracSecs(accumtime); + +if(loclustruct->adjust==0) + loclustruct->adjust=1; + +LUFreeMem(a,b,abase,bbase); +return; +} + +/************** +** LUFreeMem ** +*************** +** Release memory associated with LU benchmark. +*/ +static void LUFreeMem(fardouble *a, fardouble *b, + fardouble *abase,fardouble *bbase) +{ +int systemerror; + +FreeMemory((farvoid *)a,&systemerror); +FreeMemory((farvoid *)b,&systemerror); +FreeMemory((farvoid *)LUtempvv,&systemerror); + +if(abase!=(fardouble *)NULL) FreeMemory((farvoid *)abase,&systemerror); +if(bbase!=(fardouble *)NULL) FreeMemory((farvoid *)bbase,&systemerror); +return; +} + +/****************** +** DoLUIteration ** +******************* +** Perform an iteration of the LU decomposition benchmark. +** An iteration refers to the repeated solution of several +** identical matrices. +*/ +static ulong DoLUIteration(fardouble *a,fardouble *b, + fardouble *abase, fardouble *bbase, + ulong numarrays) +{ +fardouble *locabase; +fardouble *locbbase; +LUdblptr ptra; /* For converting ptr to 2D array */ +ulong elapsed; +ulong j,i; /* Indexes */ + + +/* +** Move the seed arrays (a & b) into the destination +** arrays; +*/ +for(j=0;j<numarrays;j++) +{ locabase=abase+j*LUARRAYROWS*LUARRAYCOLS; + locbbase=bbase+j*LUARRAYROWS; + for(i=0;i<LUARRAYROWS*LUARRAYCOLS;i++) + *(locabase+i)=*(a+i); + for(i=0;i<LUARRAYROWS;i++) + *(locbbase+i)=*(b+i); +} + +/* +** Do test...begin timing. +*/ +elapsed=StartStopwatch(); +for(i=0;i<numarrays;i++) +{ locabase=abase+i*LUARRAYROWS*LUARRAYCOLS; + locbbase=bbase+i*LUARRAYROWS; + ptra.ptrs.p=locabase; + lusolve(*ptra.ptrs.ap,LUARRAYROWS,locbbase); +} + +return(StopStopwatch(elapsed)); +} + +/****************** +** build_problem ** +******************* +** Constructs a solvable set of linear equations. It does this by +** creating an identity matrix, then loading the solution vector +** with random numbers. After that, the identity matrix and +** solution vector are randomly "scrambled". Scrambling is +** done by (a) randomly selecting a row and multiplying that +** row by a random number and (b) adding one randomly-selected +** row to another. +*/ +static void build_problem(double a[][LUARRAYCOLS], + int n, + double b[LUARRAYROWS]) +{ +long i,j,k,k1; /* Indexes */ +double rcon; /* Random constant */ + +/* +** Reset random number generator +*/ +/* randnum(13L); */ +randnum((int32)13); + +/* +** Build an identity matrix. +** We'll also use this as a chance to load the solution +** vector. +*/ +for(i=0;i<n;i++) +{ /* b[i]=(double)(abs_randwc(100L)+1L); */ + b[i]=(double)(abs_randwc((int32)100)+(int32)1); + for(j=0;j<n;j++) + if(i==j) + /* a[i][j]=(double)(abs_randwc(1000L)+1L); */ + a[i][j]=(double)(abs_randwc((int32)1000)+(int32)1); + else + a[i][j]=(double)0.0; +} + +#ifdef DEBUG +printf("Problem:\n"); +for(i=0;i<n;i++) +{ +/* + for(j=0;j<n;j++) + printf("%6.2f ",a[i][j]); +*/ + printf("%.0f/%.0f=%.2f\t",b[i],a[i][i],b[i]/a[i][i]); +/* + printf("\n"); +*/ +} +#endif + +/* +** Scramble. Do this 8n times. See comment above for +** a description of the scrambling process. +*/ + +for(i=0;i<8*n;i++) +{ + /* + ** Pick a row and a random constant. Multiply + ** all elements in the row by the constant. + */ + /* k=abs_randwc((long)n); + rcon=(double)(abs_randwc(20L)+1L); + for(j=0;j<n;j++) + a[k][j]=a[k][j]*rcon; + b[k]=b[k]*rcon; +*/ + /* + ** Pick two random rows and add second to + ** first. Note that we also occasionally multiply + ** by minus 1 so that we get a subtraction operation. + */ + /* k=abs_randwc((long)n); */ + /* k1=abs_randwc((long)n); */ + k=abs_randwc((int32)n); + k1=abs_randwc((int32)n); + if(k!=k1) + { + if(k<k1) rcon=(double)1.0; + else rcon=(double)-1.0; + for(j=0;j<n;j++) + a[k][j]+=a[k1][j]*rcon;; + b[k]+=b[k1]*rcon; + } +} + +return; +} + + +/*********** +** ludcmp ** +************ +** From the procedure of the same name in "Numerical Recipes in Pascal", +** by Press, Flannery, Tukolsky, and Vetterling. +** Given an nxn matrix a[], this routine replaces it by the LU +** decomposition of a rowwise permutation of itself. a[] and n +** are input. a[] is output, modified as follows: +** -- -- +** | b(1,1) b(1,2) b(1,3)... | +** | a(2,1) b(2,2) b(2,3)... | +** | a(3,1) a(3,2) b(3,3)... | +** | a(4,1) a(4,2) a(4,3)... | +** | ... | +** -- -- +** +** Where the b(i,j) elements form the upper triangular matrix of the +** LU decomposition, and the a(i,j) elements form the lower triangular +** elements. The LU decomposition is calculated so that we don't +** need to store the a(i,i) elements (which would have laid along the +** diagonal and would have all been 1). +** +** indx[] is an output vector that records the row permutation +** effected by the partial pivoting; d is output as +/-1 depending +** on whether the number of row interchanges was even or odd, +** respectively. +** Returns 0 if matrix singular, else returns 1. +*/ +static int ludcmp(double a[][LUARRAYCOLS], + int n, + int indx[], + int *d) +{ + +double big; /* Holds largest element value */ +double sum; +double dum; /* Holds dummy value */ +int i,j,k; /* Indexes */ +int imax=0; /* Holds max index value */ +double tiny; /* A really small number */ + +tiny=(double)1.0e-20; + +*d=1; /* No interchanges yet */ + +for(i=0;i<n;i++) +{ big=(double)0.0; + for(j=0;j<n;j++) + if((double)fabs(a[i][j]) > big) + big=fabs(a[i][j]); + /* Bail out on singular matrix */ + if(big==(double)0.0) return(0); + LUtempvv[i]=1.0/big; +} + +/* +** Crout's algorithm...loop over columns. +*/ +for(j=0;j<n;j++) +{ if(j!=0) + for(i=0;i<j;i++) + { sum=a[i][j]; + if(i!=0) + for(k=0;k<i;k++) + sum-=(a[i][k]*a[k][j]); + a[i][j]=sum; + } + big=(double)0.0; + for(i=j;i<n;i++) + { sum=a[i][j]; + if(j!=0) + for(k=0;k<j;k++) + sum-=a[i][k]*a[k][j]; + a[i][j]=sum; + dum=LUtempvv[i]*fabs(sum); + if(dum>=big) + { big=dum; + imax=i; + } + } + if(j!=imax) /* Interchange rows if necessary */ + { for(k=0;k<n;k++) + { dum=a[imax][k]; + a[imax][k]=a[j][k]; + a[j][k]=dum; + } + *d=-*d; /* Change parity of d */ + dum=LUtempvv[imax]; + LUtempvv[imax]=LUtempvv[j]; /* Don't forget scale factor */ + LUtempvv[j]=dum; + } + indx[j]=imax; + /* + ** If the pivot element is zero, the matrix is singular + ** (at least as far as the precision of the machine + ** is concerned.) We'll take the original author's + ** recommendation and replace 0.0 with "tiny". + */ + if(a[j][j]==(double)0.0) + a[j][j]=tiny; + + if(j!=(n-1)) + { dum=1.0/a[j][j]; + for(i=j+1;i<n;i++) + a[i][j]=a[i][j]*dum; + } +} + +return(1); +} + +/*********** +** lubksb ** +************ +** Also from "Numerical Recipes in Pascal". +** This routine solves the set of n linear equations A X = B. +** Here, a[][] is input, not as the matrix A, but as its +** LU decomposition, created by the routine ludcmp(). +** Indx[] is input as the permutation vector returned by ludcmp(). +** b[] is input as the right-hand side an returns the +** solution vector X. +** a[], n, and indx are not modified by this routine and +** can be left in place for different values of b[]. +** This routine takes into account the possibility that b will +** begin with many zero elements, so it is efficient for use in +** matrix inversion. +*/ +static void lubksb( double a[][LUARRAYCOLS], + int n, + int indx[LUARRAYROWS], + double b[LUARRAYROWS]) +{ + +int i,j; /* Indexes */ +int ip; /* "pointer" into indx */ +int ii; +double sum; + +/* +** When ii is set to a positive value, it will become +** the index of the first nonvanishing element of b[]. +** We now do the forward substitution. The only wrinkle +** is to unscramble the permutation as we go. +*/ +ii=-1; +for(i=0;i<n;i++) +{ ip=indx[i]; + sum=b[ip]; + b[ip]=b[i]; + if(ii!=-1) + for(j=ii;j<i;j++) + sum=sum-a[i][j]*b[j]; + else + /* + ** If a nonzero element is encountered, we have + ** to do the sums in the loop above. + */ + if(sum!=(double)0.0) + ii=i; + b[i]=sum; +} +/* +** Do backsubstitution +*/ +for(i=(n-1);i>=0;i--) +{ + sum=b[i]; + if(i!=(n-1)) + for(j=(i+1);j<n;j++) + sum=sum-a[i][j]*b[j]; + b[i]=sum/a[i][i]; +} +return; +} + +/************ +** lusolve ** +************* +** Solve a linear set of equations: A x = b +** Original matrix A will be destroyed by this operation. +** Returns 0 if matrix is singular, 1 otherwise. +*/ +static int lusolve(double a[][LUARRAYCOLS], + int n, + double b[LUARRAYROWS]) +{ +int indx[LUARRAYROWS]; +int d; +#ifdef DEBUG +int i,j; +#endif + +if(ludcmp(a,n,indx,&d)==0) return(0); + +/* Matrix not singular -- proceed */ +lubksb(a,n,indx,b); + +#ifdef DEBUG +printf("Solution:\n"); +for(i=0;i<n;i++) +{ + for(j=0;j<n;j++){ + /* + printf("%6.2f ",a[i][j]); + */ + } + printf("%6.2f\t",b[i]); + /* + printf("\n"); + */ +} +printf("\n"); +#endif + +return(1); +} @@ -336,8 +336,6 @@ extern void DoHuffman(void); extern void DoNNET(void); extern void DoLU(void); -extern void ErrorExit(void); /* From SYSSPEC */ - /* ** Array of pointers to the benchmark functions. */ diff --git a/nbench1.c b/nbench1.c deleted file mode 100644 index 05c35df..0000000 --- a/nbench1.c +++ /dev/null @@ -1,4445 +0,0 @@ - -/* -** nbench1.c -*/ - -/******************************** -** BYTEmark (tm) ** -** BYTE NATIVE MODE BENCHMARKS ** -** VERSION 2 ** -** ** -** Included in this source ** -** file: ** -** Numeric Heapsort ** -** String Heapsort ** -** Bitfield test ** -** Floating point emulation ** -** Fourier coefficients ** -** Assignment algorithm ** -** IDEA Encyption ** -** Huffman compression ** -** Back prop. neural net ** -** LU Decomposition ** -** (linear equations) ** -** ---------- ** -** Rick Grehan, BYTE Magazine ** -********************************* -** -** BYTEmark (tm) -** BYTE's Native Mode Benchmarks -** Rick Grehan, BYTE Magazine -** -** Creation: -** Revision: 3/95;10/95 -** 10/95 - Removed allocation that was taking place inside -** the LU Decomposition benchmark. Though it didn't seem to -** make a difference on systems we ran it on, it nonetheless -** removes an operating system dependency that probably should -** not have been there. -** -** DISCLAIMER -** The source, executable, and documentation files that comprise -** the BYTEmark benchmarks are made available on an "as is" basis. -** This means that we at BYTE Magazine have made every reasonable -** effort to verify that the there are no errors in the source and -** executable code. We cannot, however, guarantee that the programs -** are error-free. Consequently, McGraw-HIll and BYTE Magazine make -** no claims in regard to the fitness of the source code, executable -** code, and documentation of the BYTEmark. -** Furthermore, BYTE Magazine, McGraw-Hill, and all employees -** of McGraw-Hill cannot be held responsible for any damages resulting -** from the use of this code or the results obtained from using -** this code. -*/ - -/* -** INCLUDES -*/ -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <strings.h> -#include <math.h> -#include "nmglobal.h" -#include "nbench1.h" -#include "wordcat.h" - -#ifdef DEBUG -static int numsort_status=0; -static int stringsort_status=0; -#endif - -/********************* -** NUMERIC HEAPSORT ** -********************** -** This test implements a heapsort algorithm, performed on an -** array of longs. -*/ - -/************** -** DoNumSort ** -*************** -** This routine performs the CPU numeric sort test. -** NOTE: Last version incorrectly stated that the routine -** returned result in # of longword sorted per second. -** Not so; the routine returns # of iterations per sec. -*/ - -void DoNumSort(void) -{ -SortStruct *numsortstruct; /* Local pointer to global struct */ -farlong *arraybase; /* Base pointers of array */ -long accumtime; /* Accumulated time */ -double iterations; /* Iteration counter */ -char *errorcontext; /* Error context string pointer */ -int systemerror; /* For holding error codes */ - -/* -** Link to global structure -*/ -numsortstruct=&global_numsortstruct; - -/* -** Set the error context string. -*/ -errorcontext="CPU:Numeric Sort"; - -/* -** See if we need to do self adjustment code. -*/ -if(numsortstruct->adjust==0) -{ - /* - ** Self-adjustment code. The system begins by sorting 1 - ** array. If it does that in no time, then two arrays - ** are built and sorted. This process continues until - ** enough arrays are built to handle the tolerance. - */ - numsortstruct->numarrays=1; - while(1) - { - /* - ** Allocate space for arrays - */ - arraybase=(farlong *)AllocateMemory(sizeof(long) * - numsortstruct->numarrays * numsortstruct->arraysize, - &systemerror); - if(systemerror) - { ReportError(errorcontext,systemerror); - FreeMemory((farvoid *)arraybase, - &systemerror); - ErrorExit(); - } - - /* - ** Do an iteration of the numeric sort. If the - ** elapsed time is less than or equal to the permitted - ** minimum, then allocate for more arrays and - ** try again. - */ - if(DoNumSortIteration(arraybase, - numsortstruct->arraysize, - numsortstruct->numarrays)>global_min_ticks) - break; /* We're ok...exit */ - - FreeMemory((farvoid *)arraybase,&systemerror); - if(numsortstruct->numarrays++>NUMNUMARRAYS) - { printf("CPU:NSORT -- NUMNUMARRAYS hit.\n"); - ErrorExit(); - } - } -} -else -{ /* - ** Allocate space for arrays - */ - arraybase=(farlong *)AllocateMemory(sizeof(long) * - numsortstruct->numarrays * numsortstruct->arraysize, - &systemerror); - if(systemerror) - { ReportError(errorcontext,systemerror); - FreeMemory((farvoid *)arraybase, - &systemerror); - ErrorExit(); - } - -} -/* -** All's well if we get here. Repeatedly perform sorts until the -** accumulated elapsed time is greater than # of seconds requested. -*/ -accumtime=0L; -iterations=(double)0.0; - -do { - accumtime+=DoNumSortIteration(arraybase, - numsortstruct->arraysize, - numsortstruct->numarrays); - iterations+=(double)1.0; -} while(TicksToSecs(accumtime)<numsortstruct->request_secs); - -/* -** Clean up, calculate results, and go home. Be sure to -** show that we don't have to rerun adjustment code. -*/ -FreeMemory((farvoid *)arraybase,&systemerror); - -numsortstruct->sortspersec=iterations * - (double)numsortstruct->numarrays / TicksToFracSecs(accumtime); - -if(numsortstruct->adjust==0) - numsortstruct->adjust=1; - -#ifdef DEBUG -if (numsort_status==0) printf("Numeric sort: OK\n"); -numsort_status=0; -#endif -return; -} - -/*********************** -** DoNumSortIteration ** -************************ -** This routine executes one iteration of the numeric -** sort benchmark. It returns the number of ticks -** elapsed for the iteration. -*/ -static ulong DoNumSortIteration(farlong *arraybase, - ulong arraysize, - uint numarrays) -{ -ulong elapsed; /* Elapsed ticks */ -ulong i; -/* -** Load up the array with random numbers -*/ -LoadNumArrayWithRand(arraybase,arraysize,numarrays); - -/* -** Start the stopwatch -*/ -elapsed=StartStopwatch(); - -/* -** Execute a heap of heapsorts -*/ -for(i=0;i<numarrays;i++) - NumHeapSort(arraybase+i*arraysize,0L,arraysize-1L); - -/* -** Get elapsed time -*/ -elapsed=StopStopwatch(elapsed); -#ifdef DEBUG -{ - for(i=0;i<arraysize-1;i++) - { /* - ** Compare to check for proper - ** sort. - */ - if(arraybase[i+1]<arraybase[i]) - { printf("Sort Error\n"); - numsort_status=1; - break; - } - } -} -#endif - -return(elapsed); -} - -/************************* -** LoadNumArrayWithRand ** -************************** -** Load up an array with random longs. -*/ -static void LoadNumArrayWithRand(farlong *array, /* Pointer to arrays */ - ulong arraysize, - uint numarrays) /* # of elements in array */ -{ -long i; /* Used for index */ -farlong *darray; /* Destination array pointer */ -/* -** Initialize the random number generator -*/ -/* randnum(13L); */ -randnum((int32)13); - -/* -** Load up first array with randoms -*/ -for(i=0L;i<arraysize;i++) - /* array[i]=randnum(0L); */ - array[i]=randnum((int32)0); - -/* -** Now, if there's more than one array to load, copy the -** first into each of the others. -*/ -darray=array; -while(--numarrays) -{ darray+=arraysize; - for(i=0L;i<arraysize;i++) - darray[i]=array[i]; -} - -return; -} - -/**************** -** NumHeapSort ** -***************** -** Pass this routine a pointer to an array of long -** integers. Also pass in minimum and maximum offsets. -** This routine performs a heap sort on that array. -*/ -static void NumHeapSort(farlong *array, - ulong bottom, /* Lower bound */ - ulong top) /* Upper bound */ -{ -ulong temp; /* Used to exchange elements */ -ulong i; /* Loop index */ - -/* -** First, build a heap in the array -*/ -for(i=(top/2L); i>0; --i) - NumSift(array,i,top); - -/* -** Repeatedly extract maximum from heap and place it at the -** end of the array. When we get done, we'll have a sorted -** array. -*/ -for(i=top; i>0; --i) -{ NumSift(array,bottom,i); - temp=*array; /* Perform exchange */ - *array=*(array+i); - *(array+i)=temp; -} -return; -} - -/************ -** NumSift ** -************* -** Peforms the sift operation on a numeric array, -** constructing a heap in the array. -*/ -static void NumSift(farlong *array, /* Array of numbers */ - ulong i, /* Minimum of array */ - ulong j) /* Maximum of array */ -{ -unsigned long k; -long temp; /* Used for exchange */ - -while((i+i)<=j) -{ - k=i+i; - if(k<j) - if(array[k]<array[k+1L]) - ++k; - if(array[i]<array[k]) - { - temp=array[k]; - array[k]=array[i]; - array[i]=temp; - i=k; - } - else - i=j+1; -} -return; -} - -/******************** -** STRING HEAPSORT ** -********************/ - -/***************** -** DoStringSort ** -****************** -** This routine performs the CPU string sort test. -** Arguments: -** requested_secs = # of seconds to execute test -** stringspersec = # of strings per second sorted (RETURNED) -*/ -void DoStringSort(void) -{ - -SortStruct *strsortstruct; /* Local for sort structure */ -faruchar *arraybase; /* Base pointer of char array */ -long accumtime; /* Accumulated time */ -double iterations; /* # of iterations */ -char *errorcontext; /* Error context string pointer */ -int systemerror; /* For holding error code */ - -/* -** Link to global structure -*/ -strsortstruct=&global_strsortstruct; - -/* -** Set the error context -*/ -errorcontext="CPU:String Sort"; - -/* -** See if we have to perform self-adjustment code -*/ -if(strsortstruct->adjust==0) -{ - /* - ** Initialize the number of arrays. - */ - strsortstruct->numarrays=1; - while(1) - { - /* - ** Allocate space for array. We'll add an extra 100 - ** bytes to protect memory as strings move around - ** (this can happen during string adjustment) - */ - arraybase=(faruchar *)AllocateMemory((strsortstruct->arraysize+100L) * - (long)strsortstruct->numarrays,&systemerror); - if(systemerror) - { ReportError(errorcontext,systemerror); - ErrorExit(); - } - - /* - ** Do an iteration of the string sort. If the - ** elapsed time is less than or equal to the permitted - ** minimum, then de-allocate the array, reallocate a - ** an additional array, and try again. - */ - if(DoStringSortIteration(arraybase, - strsortstruct->numarrays, - strsortstruct->arraysize)>global_min_ticks) - break; /* We're ok...exit */ - - FreeMemory((farvoid *)arraybase,&systemerror); - strsortstruct->numarrays+=1; - } -} -else -{ - /* - ** We don't have to perform self adjustment code. - ** Simply allocate the space for the array. - */ - arraybase=(faruchar *)AllocateMemory((strsortstruct->arraysize+100L) * - (long)strsortstruct->numarrays,&systemerror); - if(systemerror) - { ReportError(errorcontext,systemerror); - ErrorExit(); - } -} -/* -** All's well if we get here. Repeatedly perform sorts until the -** accumulated elapsed time is greater than # of seconds requested. -*/ -accumtime=0L; -iterations=(double)0.0; - -do { - accumtime+=DoStringSortIteration(arraybase, - strsortstruct->numarrays, - strsortstruct->arraysize); - iterations+=(double)strsortstruct->numarrays; -} while(TicksToSecs(accumtime)<strsortstruct->request_secs); - -/* -** Clean up, calculate results, and go home. -** Set flag to show we don't need to rerun adjustment code. -*/ -FreeMemory((farvoid *)arraybase,&systemerror); -strsortstruct->sortspersec=iterations / (double)TicksToFracSecs(accumtime); -if(strsortstruct->adjust==0) - strsortstruct->adjust=1; -#ifdef DEBUG -if (stringsort_status==0) printf("String sort: OK\n"); -stringsort_status=0; -#endif -return; -} - -/************************** -** DoStringSortIteration ** -*************************** -** This routine executes one iteration of the string -** sort benchmark. It returns the number of ticks -** Note that this routine also builds the offset pointer -** array. -*/ -static ulong DoStringSortIteration(faruchar *arraybase, - uint numarrays,ulong arraysize) -{ -farulong *optrarray; /* Offset pointer array */ -unsigned long elapsed; /* Elapsed ticks */ -unsigned long nstrings; /* # of strings in array */ -int syserror; /* System error code */ -unsigned int i; /* Index */ -farulong *tempobase; /* Temporary offset pointer base */ -faruchar *tempsbase; /* Temporary string base pointer */ - -/* -** Load up the array(s) with random numbers -*/ -optrarray=LoadStringArray(arraybase,numarrays,&nstrings,arraysize); - -/* -** Set temp base pointers...they will be modified as the -** benchmark proceeds. -*/ -tempobase=optrarray; -tempsbase=arraybase; - -/* -** Start the stopwatch -*/ -elapsed=StartStopwatch(); - -/* -** Execute heapsorts -*/ -for(i=0;i<numarrays;i++) -{ StrHeapSort(tempobase,tempsbase,nstrings,0L,nstrings-1); - tempobase+=nstrings; /* Advance base pointers */ - tempsbase+=arraysize+100; -} - -/* -** Record elapsed time -*/ -elapsed=StopStopwatch(elapsed); - -#ifdef DEBUG -{ - unsigned long i; - for(i=0;i<nstrings-1;i++) - { /* - ** Compare strings to check for proper - ** sort. - */ - if(str_is_less(optrarray,arraybase,nstrings,i+1,i)) - { printf("Sort Error\n"); - stringsort_status=1; - break; - } - } -} -#endif - -/* -** Release the offset pointer array built by -** LoadStringArray() -*/ -FreeMemory((farvoid *)optrarray,&syserror); - -/* -** Return elapsed ticks. -*/ -return(elapsed); -} - -/******************** -** LoadStringArray ** -********************* -** Initialize the string array with random strings of -** varying sizes. -** Returns the pointer to the offset pointer array. -** Note that since we're creating a number of arrays, this -** routine builds one array, then copies it into the others. -*/ -static farulong *LoadStringArray(faruchar *strarray, /* String array */ - uint numarrays, /* # of arrays */ - ulong *nstrings, /* # of strings */ - ulong arraysize) /* Size of array */ -{ -faruchar *tempsbase; /* Temporary string base pointer */ -farulong *optrarray; /* Local for pointer */ -farulong *tempobase; /* Temporary offset pointer base pointer */ -unsigned long curroffset; /* Current offset */ -int fullflag; /* Indicates full array */ -unsigned char stringlength; /* Length of string */ -unsigned char i; /* Index */ -unsigned long j; /* Another index */ -unsigned int k; /* Yet another index */ -unsigned int l; /* Ans still one more index */ -int systemerror; /* For holding error code */ - -/* -** Initialize random number generator. -*/ -/* randnum(13L); */ -randnum((int32)13); - -/* -** Start with no strings. Initialize our current offset pointer -** to 0. -*/ -*nstrings=0L; -curroffset=0L; -fullflag=0; - -do -{ - /* - ** Allocate a string with a random length no - ** shorter than 4 bytes and no longer than - ** 80 bytes. Note we have to also make sure - ** there's room in the array. - */ - /* stringlength=(unsigned char)((1+abs_randwc(76L)) & 0xFFL);*/ - stringlength=(unsigned char)((1+abs_randwc((int32)76)) & 0xFFL); - if((unsigned long)stringlength+curroffset+1L>=arraysize) - { stringlength=(unsigned char)((arraysize-curroffset-1L) & - 0xFF); - fullflag=1; /* Indicates a full */ - } - - /* - ** Store length at curroffset and advance current offset. - */ - *(strarray+curroffset)=stringlength; - curroffset++; - - /* - ** Fill up the rest of the string with random bytes. - */ - for(i=0;i<stringlength;i++) - { *(strarray+curroffset)= - /* (unsigned char)(abs_randwc((long)0xFE)); */ - (unsigned char)(abs_randwc((int32)0xFE)); - curroffset++; - } - - /* - ** Increment the # of strings counter. - */ - *nstrings+=1L; - -} while(fullflag==0); - -/* -** We now have initialized a single full array. If there -** is more than one array, copy the original into the -** others. -*/ -k=1; -tempsbase=strarray; -while(k<numarrays) -{ tempsbase+=arraysize+100; /* Set base */ - for(l=0;l<arraysize;l++) - tempsbase[l]=strarray[l]; - k++; -} - -/* -** Now the array is full, allocate enough space for an -** offset pointer array. -*/ -optrarray=(farulong *)AllocateMemory(*nstrings * sizeof(unsigned long) * - numarrays, - &systemerror); -if(systemerror) -{ ReportError("CPU:Stringsort",systemerror); - FreeMemory((void *)strarray,&systemerror); - ErrorExit(); -} - -/* -** Go through the newly-built string array, building -** offsets and putting them into the offset pointer -** array. -*/ -curroffset=0; -for(j=0;j<*nstrings;j++) -{ *(optrarray+j)=curroffset; - curroffset+=(unsigned long)(*(strarray+curroffset))+1L; -} - -/* -** As above, we've made one copy of the offset pointers, -** so duplicate this array in the remaining ones. -*/ -k=1; -tempobase=optrarray; -while(k<numarrays) -{ tempobase+=*nstrings; - for(l=0;l<*nstrings;l++) - tempobase[l]=optrarray[l]; - k++; -} - -/* -** All done...go home. Pass local pointer back. -*/ -return(optrarray); -} - -/************** -** stradjust ** -*************** -** Used by the string heap sort. Call this routine to adjust the -** string at offset i to length l. The members of the string array -** are moved accordingly and the length of the string at offset i -** is set to l. -*/ -static void stradjust(farulong *optrarray, /* Offset pointer array */ - faruchar *strarray, /* String array */ - ulong nstrings, /* # of strings */ - ulong i, /* Offset to adjust */ - uchar l) /* New length */ -{ -unsigned long nbytes; /* # of bytes to move */ -unsigned long j; /* Index */ -int direction; /* Direction indicator */ -unsigned char adjamount; /* Adjustment amount */ - -/* -** If new length is less than old length, the direction is -** down. If new length is greater than old length, the -** direction is up. -*/ -direction=(int)l - (int)*(strarray+*(optrarray+i)); -adjamount=(unsigned char)abs(direction); - -/* -** See if the adjustment is being made to the last -** string in the string array. If so, we don't have to -** do anything more than adjust the length field. -*/ -if(i==(nstrings-1L)) -{ *(strarray+*(optrarray+i))=l; - return; -} - -/* -** Calculate the total # of bytes in string array from -** location i+1 to end of array. Whether we're moving "up" or -** down, this is how many bytes we'll have to move. -*/ -nbytes=*(optrarray+nstrings-1L) + - (unsigned long)*(strarray+*(optrarray+nstrings-1L)) + 1L - - *(optrarray+i+1L); - -/* -** Calculate the source and the destination. Source is -** string position i+1. Destination is string position i+l -** (i+"ell"...don't confuse 1 and l). -** Hand this straight to memmove and let it handle the -** "overlap" problem. -*/ -MoveMemory((farvoid *)(strarray+*(optrarray+i)+l+1), - (farvoid *)(strarray+*(optrarray+i+1)), - (unsigned long)nbytes); - -/* -** We have to adjust the offset pointer array. -** This covers string i+1 to numstrings-1. -*/ -for(j=i+1;j<nstrings;j++) - if(direction<0) - *(optrarray+j)=*(optrarray+j)-adjamount; - else - *(optrarray+j)=*(optrarray+j)+adjamount; - -/* -** Store the new length and go home. -*/ -*(strarray+*(optrarray+i))=l; -return; -} - -/**************** -** strheapsort ** -***************** -** Pass this routine a pointer to an array of unsigned char. -** The array is presumed to hold strings occupying at most -** 80 bytes (counts a byte count). -** This routine also needs a pointer to an array of offsets -** which represent string locations in the array, and -** an unsigned long indicating the number of strings -** in the array. -*/ -static void StrHeapSort(farulong *optrarray, /* Offset pointers */ - faruchar *strarray, /* Strings array */ - ulong numstrings, /* # of strings in array */ - ulong bottom, /* Region to sort...bottom */ - ulong top) /* Region to sort...top */ -{ -unsigned char temp[80]; /* Used to exchange elements */ -unsigned char tlen; /* Temp to hold length */ -unsigned long i; /* Loop index */ - - -/* -** Build a heap in the array -*/ -for(i=(top/2L); i>0; --i) - strsift(optrarray,strarray,numstrings,i,top); - -/* -** Repeatedly extract maximum from heap and place it at the -** end of the array. When we get done, we'll have a sorted -** array. -*/ -for(i=top; i>0; --i) -{ - strsift(optrarray,strarray,numstrings,0,i); - - /* temp = string[0] */ - tlen=*strarray; - MoveMemory((farvoid *)&temp[0], /* Perform exchange */ - (farvoid *)strarray, - (unsigned long)(tlen+1)); - - - /* string[0]=string[i] */ - tlen=*(strarray+*(optrarray+i)); - stradjust(optrarray,strarray,numstrings,0,tlen); - MoveMemory((farvoid *)strarray, - (farvoid *)(strarray+*(optrarray+i)), - (unsigned long)(tlen+1)); - - /* string[i]=temp */ - tlen=temp[0]; - stradjust(optrarray,strarray,numstrings,i,tlen); - MoveMemory((farvoid *)(strarray+*(optrarray+i)), - (farvoid *)&temp[0], - (unsigned long)(tlen+1)); - -} -return; -} - -/**************** -** str_is_less ** -***************** -** Pass this function: -** 1) A pointer to an array of offset pointers -** 2) A pointer to a string array -** 3) The number of elements in the string array -** 4) Offsets to two strings (a & b) -** This function returns TRUE if string a is < string b. -*/ -static int str_is_less(farulong *optrarray, /* Offset pointers */ - faruchar *strarray, /* String array */ - ulong numstrings, /* # of strings */ - ulong a, ulong b) /* Offsets */ -{ -int slen; /* String length */ - -/* -** Determine which string has the minimum length. Use that -** to call strncmp(). If they match up to that point, the -** string with the longer length wins. -*/ -slen=(int)*(strarray+*(optrarray+a)); -if(slen > (int)*(strarray+*(optrarray+b))) - slen=(int)*(strarray+*(optrarray+b)); - -slen=strncmp((char *)(strarray+*(optrarray+a)), - (char *)(strarray+*(optrarray+b)),slen); - -if(slen==0) -{ - /* - ** They match. Return true if the length of a - ** is greater than the length of b. - */ - if(*(strarray+*(optrarray+a)) > - *(strarray+*(optrarray+b))) - return(TRUE); - return(FALSE); -} - -if(slen<0) return(TRUE); /* a is strictly less than b */ - -return(FALSE); /* Only other possibility */ -} - -/************ -** strsift ** -************* -** Pass this function: -** 1) A pointer to an array of offset pointers -** 2) A pointer to a string array -** 3) The number of elements in the string array -** 4) Offset within which to sort. -** Sift the array within the bounds of those offsets (thus -** building a heap). -*/ -static void strsift(farulong *optrarray, /* Offset pointers */ - faruchar *strarray, /* String array */ - ulong numstrings, /* # of strings */ - ulong i, ulong j) /* Offsets */ -{ -unsigned long k; /* Temporaries */ -unsigned char temp[80]; -unsigned char tlen; /* For string lengths */ - - -while((i+i)<=j) -{ - k=i+i; - if(k<j) - if(str_is_less(optrarray,strarray,numstrings,k,k+1L)) - ++k; - if(str_is_less(optrarray,strarray,numstrings,i,k)) - { - /* temp=string[k] */ - tlen=*(strarray+*(optrarray+k)); - MoveMemory((farvoid *)&temp[0], - (farvoid *)(strarray+*(optrarray+k)), - (unsigned long)(tlen+1)); - - /* string[k]=string[i] */ - tlen=*(strarray+*(optrarray+i)); - stradjust(optrarray,strarray,numstrings,k,tlen); - MoveMemory((farvoid *)(strarray+*(optrarray+k)), - (farvoid *)(strarray+*(optrarray+i)), - (unsigned long)(tlen+1)); - - /* string[i]=temp */ - tlen=temp[0]; - stradjust(optrarray,strarray,numstrings,i,tlen); - MoveMemory((farvoid *)(strarray+*(optrarray+i)), - (farvoid *)&temp[0], - (unsigned long)(tlen+1)); - i=k; - } - else - i=j+1; -} -return; -} - -/************************ -** BITFIELD OPERATIONS ** -*************************/ - -/************* -** DoBitops ** -************** -** Perform the bit operations test portion of the CPU -** benchmark. Returns the iterations per second. -*/ -void DoBitops(void) -{ -BitOpStruct *locbitopstruct; /* Local bitop structure */ -farulong *bitarraybase; /* Base of bitmap array */ -farulong *bitoparraybase; /* Base of bitmap operations array */ -ulong nbitops; /* # of bitfield operations */ -ulong accumtime; /* Accumulated time in ticks */ -double iterations; /* # of iterations */ -char *errorcontext; /* Error context string */ -int systemerror; /* For holding error codes */ -int ticks; - -/* -** Link to global structure. -*/ -locbitopstruct=&global_bitopstruct; - -/* -** Set the error context. -*/ -errorcontext="CPU:Bitfields"; - -/* -** See if we need to run adjustment code. -*/ -if(locbitopstruct->adjust==0) -{ - bitarraybase=(farulong *)AllocateMemory(locbitopstruct->bitfieldarraysize * - sizeof(ulong),&systemerror); - if(systemerror) - { ReportError(errorcontext,systemerror); - ErrorExit(); - } - - /* - ** Initialize bitfield operations array to [2,30] elements - */ - locbitopstruct->bitoparraysize=30L; - - while(1) - { - /* - ** Allocate space for operations array - */ - bitoparraybase=(farulong *)AllocateMemory(locbitopstruct->bitoparraysize*2L* - sizeof(ulong), - &systemerror); - if(systemerror) - { ReportError(errorcontext,systemerror); - FreeMemory((farvoid *)bitarraybase,&systemerror); - ErrorExit(); - } - /* - ** Do an iteration of the bitmap test. If the - ** elapsed time is less than or equal to the permitted - ** minimum, then de-allocate the array, reallocate a - ** larger version, and try again. - */ - ticks=DoBitfieldIteration(bitarraybase, - bitoparraybase, - locbitopstruct->bitoparraysize, - &nbitops); -#ifdef DEBUG -#ifdef LINUX - if (locbitopstruct->bitoparraysize==30L){ - /* this is the first loop, write a debug file */ - FILE *file; - unsigned long *running_base; /* same as farulong */ - long counter; - file=fopen("debugbit.dat","w"); - running_base=bitarraybase; - for (counter=0;counter<(long)(locbitopstruct->bitfieldarraysize);counter++){ -#ifdef LONG64 - fprintf(file,"%08X",(unsigned int)(*running_base&0xFFFFFFFFL)); - fprintf(file,"%08X",(unsigned int)((*running_base>>32)&0xFFFFFFFFL)); - if ((counter+1)%4==0) fprintf(file,"\n"); -#else - fprintf(file,"%08lX",*running_base); - if ((counter+1)%8==0) fprintf(file,"\n"); -#endif - running_base=running_base+1; - } - fclose(file); - printf("\nWrote the file debugbit.dat, you may want to compare it to debugbit.good\n"); - } -#endif -#endif - - if (ticks>global_min_ticks) break; /* We're ok...exit */ - - FreeMemory((farvoid *)bitoparraybase,&systemerror); - locbitopstruct->bitoparraysize+=100L; - } -} -else -{ - /* - ** Don't need to do self adjustment, just allocate - ** the array space. - */ - bitarraybase=(farulong *)AllocateMemory(locbitopstruct->bitfieldarraysize * - sizeof(ulong),&systemerror); - if(systemerror) - { ReportError(errorcontext,systemerror); - ErrorExit(); - } - bitoparraybase=(farulong *)AllocateMemory(locbitopstruct->bitoparraysize*2L* - sizeof(ulong), - &systemerror); - if(systemerror) - { ReportError(errorcontext,systemerror); - FreeMemory((farvoid *)bitarraybase,&systemerror); - ErrorExit(); - } -} - -/* -** All's well if we get here. Repeatedly perform bitops until the -** accumulated elapsed time is greater than # of seconds requested. -*/ -accumtime=0L; -iterations=(double)0.0; -do { - accumtime+=DoBitfieldIteration(bitarraybase, - bitoparraybase, - locbitopstruct->bitoparraysize,&nbitops); - iterations+=(double)nbitops; -} while(TicksToSecs(accumtime)<locbitopstruct->request_secs); - -/* -** Clean up, calculate results, and go home. -** Also, set adjustment flag to show that we don't have -** to do self adjusting in the future. -*/ -FreeMemory((farvoid *)bitarraybase,&systemerror); -FreeMemory((farvoid *)bitoparraybase,&systemerror); -locbitopstruct->bitopspersec=iterations /TicksToFracSecs(accumtime); -if(locbitopstruct->adjust==0) - locbitopstruct->adjust=1; - -return; -} - -/************************ -** DoBitfieldIteration ** -************************* -** Perform a single iteration of the bitfield benchmark. -** Return the # of ticks accumulated by the operation. -*/ -static ulong DoBitfieldIteration(farulong *bitarraybase, - farulong *bitoparraybase, - long bitoparraysize, - ulong *nbitops) -{ -long i; /* Index */ -ulong bitoffset; /* Offset into bitmap */ -ulong elapsed; /* Time to execute */ -/* -** Clear # bitops counter -*/ -*nbitops=0L; - -/* -** Construct a set of bitmap offsets and run lengths. -** The offset can be any random number from 0 to the -** size of the bitmap (in bits). The run length can -** be any random number from 1 to the number of bits -** between the offset and the end of the bitmap. -** Note that the bitmap has 8192 * 32 bits in it. -** (262,144 bits) -*/ -/* -** Reset random number generator so things repeat. -** Also reset the bit array we work on. -** added by Uwe F. Mayer -*/ -randnum((int32)13); -for (i=0;i<global_bitopstruct.bitfieldarraysize;i++) -{ -#ifdef LONG64 - *(bitarraybase+i)=(ulong)0x5555555555555555; -#else - *(bitarraybase+i)=(ulong)0x55555555; -#endif -} -randnum((int32)13); -/* end of addition of code */ - -for (i=0;i<bitoparraysize;i++) -{ - /* First item is offset */ - /* *(bitoparraybase+i+i)=bitoffset=abs_randwc(262140L); */ - *(bitoparraybase+i+i)=bitoffset=abs_randwc((int32)262140); - - /* Next item is run length */ - /* *nbitops+=*(bitoparraybase+i+i+1L)=abs_randwc(262140L-bitoffset);*/ - *nbitops+=*(bitoparraybase+i+i+1L)=abs_randwc((int32)262140-bitoffset); -} - -/* -** Array of offset and lengths built...do an iteration of -** the test. -** Start the stopwatch. -*/ -elapsed=StartStopwatch(); - -/* -** Loop through array off offset/run length pairs. -** Execute operation based on modulus of index. -*/ -for(i=0;i<bitoparraysize;i++) -{ - switch(i % 3) - { - - case 0: /* Set run of bits */ - ToggleBitRun(bitarraybase, - *(bitoparraybase+i+i), - *(bitoparraybase+i+i+1), - 1); - break; - - case 1: /* Clear run of bits */ - ToggleBitRun(bitarraybase, - *(bitoparraybase+i+i), - *(bitoparraybase+i+i+1), - 0); - break; - - case 2: /* Complement run of bits */ - FlipBitRun(bitarraybase, - *(bitoparraybase+i+i), - *(bitoparraybase+i+i+1)); - break; - } -} - -/* -** Return elapsed time -*/ -return(StopStopwatch(elapsed)); -} - - -/***************************** -** ToggleBitRun * -****************************** -** Set or clear a run of nbits starting at -** bit_addr in bitmap. -*/ -static void ToggleBitRun(farulong *bitmap, /* Bitmap */ - ulong bit_addr, /* Address of bits to set */ - ulong nbits, /* # of bits to set/clr */ - uint val) /* 1 or 0 */ -{ -unsigned long bindex; /* Index into array */ -unsigned long bitnumb; /* Bit number */ - -while(nbits--) -{ -#ifdef LONG64 - bindex=bit_addr>>6; /* Index is number /64 */ - bitnumb=bit_addr % 64; /* Bit number in word */ -#else - bindex=bit_addr>>5; /* Index is number /32 */ - bitnumb=bit_addr % 32; /* bit number in word */ -#endif - if(val) - bitmap[bindex]|=(1L<<bitnumb); - else - bitmap[bindex]&=~(1L<<bitnumb); - bit_addr++; -} -return; -} - -/*************** -** FlipBitRun ** -**************** -** Complements a run of bits. -*/ -static void FlipBitRun(farulong *bitmap, /* Bit map */ - ulong bit_addr, /* Bit address */ - ulong nbits) /* # of bits to flip */ -{ -unsigned long bindex; /* Index into array */ -unsigned long bitnumb; /* Bit number */ - -while(nbits--) -{ -#ifdef LONG64 - bindex=bit_addr>>6; /* Index is number /64 */ - bitnumb=bit_addr % 64; /* Bit number in longword */ -#else - bindex=bit_addr>>5; /* Index is number /32 */ - bitnumb=bit_addr % 32; /* Bit number in longword */ -#endif - bitmap[bindex]^=(1L<<bitnumb); - bit_addr++; -} - -return; -} - -/***************************** -** FLOATING-POINT EMULATION ** -*****************************/ - -/************** -** DoEmFloat ** -*************** -** Perform the floating-point emulation routines portion of the -** CPU benchmark. Returns the operations per second. -*/ -void DoEmFloat(void) -{ -EmFloatStruct *locemfloatstruct; /* Local structure */ -InternalFPF *abase; /* Base of A array */ -InternalFPF *bbase; /* Base of B array */ -InternalFPF *cbase; /* Base of C array */ -ulong accumtime; /* Accumulated time in ticks */ -double iterations; /* # of iterations */ -ulong tickcount; /* # of ticks */ -char *errorcontext; /* Error context string pointer */ -int systemerror; /* For holding error code */ -ulong loops; /* # of loops */ - -/* -** Link to global structure -*/ -locemfloatstruct=&global_emfloatstruct; - -/* -** Set the error context -*/ -errorcontext="CPU:Floating Emulation"; - - -/* -** Test the emulation routines. -*/ -#ifdef DEBUG -#endif - -abase=(InternalFPF *)AllocateMemory(locemfloatstruct->arraysize*sizeof(InternalFPF), - &systemerror); -if(systemerror) -{ ReportError(errorcontext,systemerror); - ErrorExit(); -} - -bbase=(InternalFPF *)AllocateMemory(locemfloatstruct->arraysize*sizeof(InternalFPF), - &systemerror); -if(systemerror) -{ ReportError(errorcontext,systemerror); - FreeMemory((farvoid *)abase,&systemerror); - ErrorExit(); -} - -cbase=(InternalFPF *)AllocateMemory(locemfloatstruct->arraysize*sizeof(InternalFPF), - &systemerror); -if(systemerror) -{ ReportError(errorcontext,systemerror); - FreeMemory((farvoid *)abase,&systemerror); - FreeMemory((farvoid *)bbase,&systemerror); - ErrorExit(); -} - -/* -** Set up the arrays -*/ -SetupCPUEmFloatArrays(abase,bbase,cbase,locemfloatstruct->arraysize); - -/* -** See if we need to do self-adjusting code. -*/ -if(locemfloatstruct->adjust==0) -{ - locemfloatstruct->loops=0; - - /* - ** Do an iteration of the tests. If the elapsed time is - ** less than minimum, increase the loop count and try - ** again. - */ - for(loops=1;loops<CPUEMFLOATLOOPMAX;loops+=loops) - { tickcount=DoEmFloatIteration(abase,bbase,cbase, - locemfloatstruct->arraysize, - loops); - if(tickcount>global_min_ticks) - { locemfloatstruct->loops=loops; - break; - } - } -} - -/* -** Verify that selft adjustment code worked. -*/ -if(locemfloatstruct->loops==0) -{ printf("CPU:EMFPU -- CMPUEMFLOATLOOPMAX limit hit\n"); - FreeMemory((farvoid *)abase,&systemerror); - FreeMemory((farvoid *)bbase,&systemerror); - FreeMemory((farvoid *)cbase,&systemerror); - ErrorExit(); -} - -/* -** All's well if we get here. Repeatedly perform floating -** tests until the accumulated time is greater than the -** # of seconds requested. -** Each iteration performs arraysize * 3 operations. -*/ -accumtime=0L; -iterations=(double)0.0; -do { - accumtime+=DoEmFloatIteration(abase,bbase,cbase, - locemfloatstruct->arraysize, - locemfloatstruct->loops); - iterations+=(double)1.0; -} while(TicksToSecs(accumtime)<locemfloatstruct->request_secs); - - -/* -** Clean up, calculate results, and go home. -** Also, indicate that adjustment is done. -*/ -FreeMemory((farvoid *)abase,&systemerror); -FreeMemory((farvoid *)bbase,&systemerror); -FreeMemory((farvoid *)cbase,&systemerror); - -locemfloatstruct->emflops=(iterations*(double)locemfloatstruct->loops)/ - (double)TicksToFracSecs(accumtime); -if(locemfloatstruct->adjust==0) - locemfloatstruct->adjust=1; - -#ifdef DEBUG -printf("----------------------------------------------------------------------------\n"); -#endif -return; -} - -/************************* -** FOURIER COEFFICIENTS ** -*************************/ - -/************** -** DoFourier ** -*************** -** Perform the transcendental/trigonometric portion of the -** benchmark. This benchmark calculates the first n -** fourier coefficients of the function (x+1)^x defined -** on the interval 0,2. -*/ -void DoFourier(void) -{ -FourierStruct *locfourierstruct; /* Local fourier struct */ -fardouble *abase; /* Base of A[] coefficients array */ -fardouble *bbase; /* Base of B[] coefficients array */ -unsigned long accumtime; /* Accumulated time in ticks */ -double iterations; /* # of iterations */ -char *errorcontext; /* Error context string pointer */ -int systemerror; /* For error code */ - -/* -** Link to global structure -*/ -locfourierstruct=&global_fourierstruct; - -/* -** Set error context string -*/ -errorcontext="FPU:Transcendental"; - -/* -** See if we need to do self-adjustment code. -*/ -if(locfourierstruct->adjust==0) -{ - locfourierstruct->arraysize=100L; /* Start at 100 elements */ - while(1) - { - - abase=(fardouble *)AllocateMemory(locfourierstruct->arraysize*sizeof(double), - &systemerror); - if(systemerror) - { ReportError(errorcontext,systemerror); - ErrorExit(); - } - - bbase=(fardouble *)AllocateMemory(locfourierstruct->arraysize*sizeof(double), - &systemerror); - if(systemerror) - { ReportError(errorcontext,systemerror); - FreeMemory((void *)abase,&systemerror); - ErrorExit(); - } - /* - ** Do an iteration of the tests. If the elapsed time is - ** less than or equal to the permitted minimum, re-allocate - ** larger arrays and try again. - */ - if(DoFPUTransIteration(abase,bbase, - locfourierstruct->arraysize)>global_min_ticks) - break; /* We're ok...exit */ - - /* - ** Make bigger arrays and try again. - */ - FreeMemory((farvoid *)abase,&systemerror); - FreeMemory((farvoid *)bbase,&systemerror); - locfourierstruct->arraysize+=50L; - } -} -else -{ /* - ** Don't need self-adjustment. Just allocate the - ** arrays, and go. - */ - abase=(fardouble *)AllocateMemory(locfourierstruct->arraysize*sizeof(double), - &systemerror); - if(systemerror) - { ReportError(errorcontext,systemerror); - ErrorExit(); - } - - bbase=(fardouble *)AllocateMemory(locfourierstruct->arraysize*sizeof(double), - &systemerror); - if(systemerror) - { ReportError(errorcontext,systemerror); - FreeMemory((void *)abase,&systemerror); - ErrorExit(); - } -} -/* -** All's well if we get here. Repeatedly perform integration -** tests until the accumulated time is greater than the -** # of seconds requested. -*/ -accumtime=0L; -iterations=(double)0.0; -do { - accumtime+=DoFPUTransIteration(abase,bbase,locfourierstruct->arraysize); - iterations+=(double)locfourierstruct->arraysize*(double)2.0-(double)1.0; -} while(TicksToSecs(accumtime)<locfourierstruct->request_secs); - - -/* -** Clean up, calculate results, and go home. -** Also set adjustment flag to indicate no adjust code needed. -*/ -FreeMemory((farvoid *)abase,&systemerror); -FreeMemory((farvoid *)bbase,&systemerror); - -locfourierstruct->fflops=iterations/(double)TicksToFracSecs(accumtime); - -if(locfourierstruct->adjust==0) - locfourierstruct->adjust=1; - -return; -} - -/************************ -** DoFPUTransIteration ** -************************* -** Perform an iteration of the FPU Transcendental/trigonometric -** benchmark. Here, an iteration consists of calculating the -** first n fourier coefficients of the function (x+1)^x on -** the interval 0,2. n is given by arraysize. -** NOTE: The # of integration steps is fixed at -** 200. -*/ -static ulong DoFPUTransIteration(fardouble *abase, /* A coeffs. */ - fardouble *bbase, /* B coeffs. */ - ulong arraysize) /* # of coeffs */ -{ -double omega; /* Fundamental frequency */ -unsigned long i; /* Index */ -unsigned long elapsed; /* Elapsed time */ - -/* -** Start the stopwatch -*/ -elapsed=StartStopwatch(); - -/* -** Calculate the fourier series. Begin by -** calculating A[0]. -*/ - -*abase=TrapezoidIntegrate((double)0.0, - (double)2.0, - 200, - (double)0.0, /* No omega * n needed */ - 0 )/(double)2.0; - -/* -** Calculate the fundamental frequency. -** ( 2 * pi ) / period...and since the period -** is 2, omega is simply pi. -*/ -omega=(double)3.1415926535897932; - -for(i=1;i<arraysize;i++) -{ - - /* - ** Calculate A[i] terms. Note, once again, that we - ** can ignore the 2/period term outside the integral - ** since the period is 2 and the term cancels itself - ** out. - */ - *(abase+i)=TrapezoidIntegrate((double)0.0, - (double)2.0, - 200, - omega * (double)i, - 1); - - /* - ** Calculate the B[i] terms. - */ - *(bbase+i)=TrapezoidIntegrate((double)0.0, - (double)2.0, - 200, - omega * (double)i, - 2); - -} -#ifdef DEBUG -{ - int i; - printf("\nA[i]=\n"); - for (i=0;i<arraysize;i++) printf("%7.3g ",abase[i]); - printf("\nB[i]=\n(undefined) "); - for (i=1;i<arraysize;i++) printf("%7.3g ",bbase[i]); -} -#endif -/* -** All done, stop the stopwatch -*/ -return(StopStopwatch(elapsed)); -} - -/*********************** -** TrapezoidIntegrate ** -************************ -** Perform a simple trapezoid integration on the -** function (x+1)**x. -** x0,x1 set the lower and upper bounds of the -** integration. -** nsteps indicates # of trapezoidal sections -** omegan is the fundamental frequency times -** the series member # -** select = 0 for the A[0] term, 1 for cosine terms, and -** 2 for sine terms. -** Returns the value. -*/ -static double TrapezoidIntegrate( double x0, /* Lower bound */ - double x1, /* Upper bound */ - int nsteps, /* # of steps */ - double omegan, /* omega * n */ - int select) -{ -double x; /* Independent variable */ -double dx; /* Stepsize */ -double rvalue; /* Return value */ - - -/* -** Initialize independent variable -*/ -x=x0; - -/* -** Calculate stepsize -*/ -dx=(x1 - x0) / (double)nsteps; - -/* -** Initialize the return value. -*/ -rvalue=thefunction(x0,omegan,select)/(double)2.0; - -/* -** Compute the other terms of the integral. -*/ -if(nsteps!=1) -{ --nsteps; /* Already done 1 step */ - while(--nsteps ) - { - x+=dx; - rvalue+=thefunction(x,omegan,select); - } -} -/* -** Finish computation -*/ -rvalue=(rvalue+thefunction(x1,omegan,select)/(double)2.0)*dx; - -return(rvalue); -} - -/**************** -** thefunction ** -***************** -** This routine selects the function to be used -** in the Trapezoid integration. -** x is the independent variable -** omegan is omega * n -** select chooses which of the sine/cosine functions -** are used. note the special case for select=0. -*/ -static double thefunction(double x, /* Independent variable */ - double omegan, /* Omega * term */ - int select) /* Choose term */ -{ - -/* -** Use select to pick which function we call. -*/ -switch(select) -{ - case 0: return(pow(x+(double)1.0,x)); - - case 1: return(pow(x+(double)1.0,x) * cos(omegan * x)); - - case 2: return(pow(x+(double)1.0,x) * sin(omegan * x)); -} - -/* -** We should never reach this point, but the following -** keeps compilers from issuing a warning message. -*/ -return(0.0); -} - -/************************* -** ASSIGNMENT ALGORITHM ** -*************************/ - -/************* -** DoAssign ** -************** -** Perform an assignment algorithm. -** The algorithm was adapted from the step by step guide found -** in "Quantitative Decision Making for Business" (Gordon, -** Pressman, and Cohn; Prentice-Hall) -** -** -** NOTES: -** 1. Even though the algorithm distinguishes between -** ASSIGNROWS and ASSIGNCOLS, as though the two might -** be different, it does presume a square matrix. -** I.E., ASSIGNROWS and ASSIGNCOLS must be the same. -** This makes for some algorithmically-correct but -** probably non-optimal constructs. -** -*/ -void DoAssign(void) -{ -AssignStruct *locassignstruct; /* Local structure ptr */ -farlong *arraybase; -char *errorcontext; -int systemerror; -ulong accumtime; -double iterations; - -/* -** Link to global structure -*/ -locassignstruct=&global_assignstruct; - -/* -** Set the error context string. -*/ -errorcontext="CPU:Assignment"; - -/* -** See if we need to do self adjustment code. -*/ -if(locassignstruct->adjust==0) -{ - /* - ** Self-adjustment code. The system begins by working on 1 - ** array. If it does that in no time, then two arrays - ** are built. This process continues until - ** enough arrays are built to handle the tolerance. - */ - locassignstruct->numarrays=1; - while(1) - { - /* - ** Allocate space for arrays - */ - arraybase=(farlong *) AllocateMemory(sizeof(long)* - ASSIGNROWS*ASSIGNCOLS*locassignstruct->numarrays, - &systemerror); - if(systemerror) - { ReportError(errorcontext,systemerror); - FreeMemory((farvoid *)arraybase, - &systemerror); - ErrorExit(); - } - - /* - ** Do an iteration of the assignment alg. If the - ** elapsed time is less than or equal to the permitted - ** minimum, then allocate for more arrays and - ** try again. - */ - if(DoAssignIteration(arraybase, - locassignstruct->numarrays)>global_min_ticks) - break; /* We're ok...exit */ - - FreeMemory((farvoid *)arraybase, &systemerror); - locassignstruct->numarrays++; - } -} -else -{ /* - ** Allocate space for arrays - */ - arraybase=(farlong *)AllocateMemory(sizeof(long)* - ASSIGNROWS*ASSIGNCOLS*locassignstruct->numarrays, - &systemerror); - if(systemerror) - { ReportError(errorcontext,systemerror); - FreeMemory((farvoid *)arraybase, - &systemerror); - ErrorExit(); - } -} - -/* -** All's well if we get here. Do the tests. -*/ -accumtime=0L; -iterations=(double)0.0; - -do { - accumtime+=DoAssignIteration(arraybase, - locassignstruct->numarrays); - iterations+=(double)1.0; -} while(TicksToSecs(accumtime)<locassignstruct->request_secs); - -/* -** Clean up, calculate results, and go home. Be sure to -** show that we don't have to rerun adjustment code. -*/ -FreeMemory((farvoid *)arraybase,&systemerror); - -locassignstruct->iterspersec=iterations * - (double)locassignstruct->numarrays / TicksToFracSecs(accumtime); - -if(locassignstruct->adjust==0) - locassignstruct->adjust=1; - -return; - -} - -/********************** -** DoAssignIteration ** -*********************** -** This routine executes one iteration of the assignment test. -** It returns the number of ticks elapsed in the iteration. -*/ -static ulong DoAssignIteration(farlong *arraybase, - ulong numarrays) -{ -longptr abase; /* local pointer */ -ulong elapsed; /* Elapsed ticks */ -ulong i; - -/* -** Set up local pointer -*/ -abase.ptrs.p=arraybase; - -/* -** Load up the arrays with a random table. -*/ -LoadAssignArrayWithRand(arraybase,numarrays); - -/* -** Start the stopwatch -*/ -elapsed=StartStopwatch(); - -/* -** Execute assignment algorithms -*/ -for(i=0;i<numarrays;i++) -{ /* abase.ptrs.p+=i*ASSIGNROWS*ASSIGNCOLS; */ - /* Fixed by Eike Dierks */ - Assignment(*abase.ptrs.ap); - abase.ptrs.p+=ASSIGNROWS*ASSIGNCOLS; -} - -/* -** Get elapsed time -*/ -return(StopStopwatch(elapsed)); -} - -/**************************** -** LoadAssignArrayWithRand ** -***************************** -** Load the assignment arrays with random numbers. All positive. -** These numbers represent costs. -*/ -static void LoadAssignArrayWithRand(farlong *arraybase, - ulong numarrays) -{ -longptr abase,abase1; /* Local for array pointer */ -ulong i; - -/* -** Set local array pointer -*/ -abase.ptrs.p=arraybase; -abase1.ptrs.p=arraybase; - -/* -** Set up the first array. Then just copy it into the -** others. -*/ -LoadAssign(*(abase.ptrs.ap)); -if(numarrays>1) - for(i=1;i<numarrays;i++) - { /* abase1.ptrs.p+=i*ASSIGNROWS*ASSIGNCOLS; */ - /* Fixed by Eike Dierks */ - abase1.ptrs.p+=ASSIGNROWS*ASSIGNCOLS; - CopyToAssign(*(abase.ptrs.ap),*(abase1.ptrs.ap)); - } - -return; -} - -/*************** -** LoadAssign ** -**************** -** The array given by arraybase is loaded with positive random -** numbers. Elements in the array are capped at 5,000,000. -*/ -static void LoadAssign(farlong arraybase[][ASSIGNCOLS]) -{ -ushort i,j; - -/* -** Reset random number generator so things repeat. -*/ -/* randnum(13L); */ -randnum((int32)13); - -for(i=0;i<ASSIGNROWS;i++) - for(j=0;j<ASSIGNROWS;j++){ - /* arraybase[i][j]=abs_randwc(5000000L);*/ - arraybase[i][j]=abs_randwc((int32)5000000); - } - -return; -} - -/***************** -** CopyToAssign ** -****************** -** Copy the contents of one array to another. This is called by -** the routine that builds the initial array, and is used to copy -** the contents of the intial array into all following arrays. -*/ -static void CopyToAssign(farlong arrayfrom[ASSIGNROWS][ASSIGNCOLS], - farlong arrayto[ASSIGNROWS][ASSIGNCOLS]) -{ -ushort i,j; - -for(i=0;i<ASSIGNROWS;i++) - for(j=0;j<ASSIGNCOLS;j++) - arrayto[i][j]=arrayfrom[i][j]; - -return; -} - -/*************** -** Assignment ** -***************/ -static void Assignment(farlong arraybase[][ASSIGNCOLS]) -{ -short assignedtableau[ASSIGNROWS][ASSIGNCOLS]; - -/* -** First, calculate minimum costs -*/ -calc_minimum_costs(arraybase); - -/* -** Repeat following until the number of rows selected -** equals the number of rows in the tableau. -*/ -while(first_assignments(arraybase,assignedtableau)!=ASSIGNROWS) -{ second_assignments(arraybase,assignedtableau); -} - -#ifdef DEBUG -{ - int i,j; - printf("\nColumn choices for each row\n"); - for(i=0;i<ASSIGNROWS;i++) - { - printf("R%03d: ",i); - for(j=0;j<ASSIGNCOLS;j++) - if(assignedtableau[i][j]==1) - printf("%03d ",j); - } -} -#endif - -return; -} - -/*********************** -** calc_minimum_costs ** -************************ -** Revise the tableau by calculating the minimum costs on a -** row and column basis. These minima are subtracted from -** their rows and columns, creating a new tableau. -*/ -static void calc_minimum_costs(long tableau[][ASSIGNCOLS]) -{ -ushort i,j; /* Index variables */ -long currentmin; /* Current minimum */ -/* -** Determine minimum costs on row basis. This is done by -** subtracting -- on a row-per-row basis -- the minum value -** for that row. -*/ -for(i=0;i<ASSIGNROWS;i++) -{ - currentmin=MAXPOSLONG; /* Initialize minimum */ - for(j=0;j<ASSIGNCOLS;j++) - if(tableau[i][j]<currentmin) - currentmin=tableau[i][j]; - - for(j=0;j<ASSIGNCOLS;j++) - tableau[i][j]-=currentmin; -} - -/* -** Determine minimum cost on a column basis. This works -** just as above, only now we step through the array -** column-wise -*/ -for(j=0;j<ASSIGNCOLS;j++) -{ - currentmin=MAXPOSLONG; /* Initialize minimum */ - for(i=0;i<ASSIGNROWS;i++) - if(tableau[i][j]<currentmin) - currentmin=tableau[i][j]; - - /* - ** Here, we'll take the trouble to see if the current - ** minimum is zero. This is likely worth it, since the - ** preceding loop will have created at least one zero in - ** each row. We can save ourselves a few iterations. - */ - if(currentmin!=0) - for(i=0;i<ASSIGNROWS;i++) - tableau[i][j]-=currentmin; -} - -return; -} - -/********************** -** first_assignments ** -*********************** -** Do first assignments. -** The assignedtableau[] array holds a set of values that -** indicate the assignment of a value, or its elimination. -** The values are: -** 0 = Item is neither assigned nor eliminated. -** 1 = Item is assigned -** 2 = Item is eliminated -** Returns the number of selections made. If this equals -** the number of rows, then an optimum has been determined. -*/ -static int first_assignments(long tableau[][ASSIGNCOLS], - short assignedtableau[][ASSIGNCOLS]) -{ -ushort i,j,k; /* Index variables */ -ushort numassigns; /* # of assignments */ -ushort totnumassigns; /* Total # of assignments */ -ushort numzeros; /* # of zeros in row */ -int selected=0; /* Flag used to indicate selection */ - -/* -** Clear the assignedtableau, setting all members to show that -** no one is yet assigned, eliminated, or anything. -*/ -for(i=0;i<ASSIGNROWS;i++) - for(j=0;j<ASSIGNCOLS;j++) - assignedtableau[i][j]=0; - -totnumassigns=0; -do { - numassigns=0; - /* - ** Step through rows. For each one that is not currently - ** assigned, see if the row has only one zero in it. If so, - ** mark that as an assigned row/col. Eliminate other zeros - ** in the same column. - */ - for(i=0;i<ASSIGNROWS;i++) - { numzeros=0; - for(j=0;j<ASSIGNCOLS;j++) - if(tableau[i][j]==0L) - if(assignedtableau[i][j]==0) - { numzeros++; - selected=j; - } - if(numzeros==1) - { numassigns++; - totnumassigns++; - assignedtableau[i][selected]=1; - for(k=0;k<ASSIGNROWS;k++) - if((k!=i) && - (tableau[k][selected]==0)) - assignedtableau[k][selected]=2; - } - } - /* - ** Step through columns, doing same as above. Now, be careful - ** of items in the other rows of a selected column. - */ - for(j=0;j<ASSIGNCOLS;j++) - { numzeros=0; - for(i=0;i<ASSIGNROWS;i++) - if(tableau[i][j]==0L) - if(assignedtableau[i][j]==0) - { numzeros++; - selected=i; - } - if(numzeros==1) - { numassigns++; - totnumassigns++; - assignedtableau[selected][j]=1; - for(k=0;k<ASSIGNCOLS;k++) - if((k!=j) && - (tableau[selected][k]==0)) - assignedtableau[selected][k]=2; - } - } - /* - ** Repeat until no more assignments to be made. - */ -} while(numassigns!=0); - -/* -** See if we can leave at this point. -*/ -if(totnumassigns==ASSIGNROWS) return(totnumassigns); - -/* -** Now step through the array by row. If you find any unassigned -** zeros, pick the first in the row. Eliminate all zeros from -** that same row & column. This occurs if there are multiple optima... -** possibly. -*/ -for(i=0;i<ASSIGNROWS;i++) -{ selected=-1; - for(j=0;j<ASSIGNCOLS;j++) - if((tableau[i][j]==0L) && - (assignedtableau[i][j]==0)) - { selected=j; - break; - } - if(selected!=-1) - { assignedtableau[i][selected]=1; - totnumassigns++; - for(k=0;k<ASSIGNCOLS;k++) - if((k!=selected) && - (tableau[i][k]==0L)) - assignedtableau[i][k]=2; - for(k=0;k<ASSIGNROWS;k++) - if((k!=i) && - (tableau[k][selected]==0L)) - assignedtableau[k][selected]=2; - } -} - -return(totnumassigns); -} - -/*********************** -** second_assignments ** -************************ -** This section of the algorithm creates the revised -** tableau, and is difficult to explain. I suggest you -** refer to the algorithm's source, mentioned in comments -** toward the beginning of the program. -*/ -static void second_assignments(long tableau[][ASSIGNCOLS], - short assignedtableau[][ASSIGNCOLS]) -{ -int i,j; /* Indexes */ -short linesrow[ASSIGNROWS]; -short linescol[ASSIGNCOLS]; -long smallest; /* Holds smallest value */ -ushort numassigns; /* Number of assignments */ -ushort newrows; /* New rows to be considered */ -/* -** Clear the linesrow and linescol arrays. -*/ -for(i=0;i<ASSIGNROWS;i++) - linesrow[i]=0; -for(i=0;i<ASSIGNCOLS;i++) - linescol[i]=0; - -/* -** Scan rows, flag each row that has no assignment in it. -*/ -for(i=0;i<ASSIGNROWS;i++) -{ numassigns=0; - for(j=0;j<ASSIGNCOLS;j++) - if(assignedtableau[i][j]==1) - { numassigns++; - break; - } - if(numassigns==0) linesrow[i]=1; -} - -do { - - newrows=0; - /* - ** For each row checked above, scan for any zeros. If found, - ** check the associated column. - */ - for(i=0;i<ASSIGNROWS;i++) - { if(linesrow[i]==1) - for(j=0;j<ASSIGNCOLS;j++) - if(tableau[i][j]==0) - linescol[j]=1; - } - - /* - ** Now scan checked columns. If any contain assigned zeros, check - ** the associated row. - */ - for(j=0;j<ASSIGNCOLS;j++) - if(linescol[j]==1) - for(i=0;i<ASSIGNROWS;i++) - if((assignedtableau[i][j]==1) && - (linesrow[i]!=1)) - { - linesrow[i]=1; - newrows++; - } -} while(newrows!=0); - -/* -** linesrow[n]==0 indicate rows covered by imaginary line -** linescol[n]==1 indicate cols covered by imaginary line -** For all cells not covered by imaginary lines, determine smallest -** value. -*/ -smallest=MAXPOSLONG; -for(i=0;i<ASSIGNROWS;i++) - if(linesrow[i]!=0) - for(j=0;j<ASSIGNCOLS;j++) - if(linescol[j]!=1) - if(tableau[i][j]<smallest) - smallest=tableau[i][j]; - -/* -** Subtract smallest from all cells in the above set. -*/ -for(i=0;i<ASSIGNROWS;i++) - if(linesrow[i]!=0) - for(j=0;j<ASSIGNCOLS;j++) - if(linescol[j]!=1) - tableau[i][j]-=smallest; - -/* -** Add smallest to all cells covered by two lines. -*/ -for(i=0;i<ASSIGNROWS;i++) - if(linesrow[i]==0) - for(j=0;j<ASSIGNCOLS;j++) - if(linescol[j]==1) - tableau[i][j]+=smallest; - -return; -} - -/******************** -** IDEA Encryption ** -********************* -** IDEA - International Data Encryption Algorithm. -** Based on code presented in Applied Cryptography by Bruce Schneier. -** Which was based on code developed by Xuejia Lai and James L. Massey. -** Other modifications made by Colin Plumb. -** -*/ - -/*********** -** DoIDEA ** -************ -** Perform IDEA encryption. Note that we time encryption & decryption -** time as being a single loop. -*/ -void DoIDEA(void) -{ -IDEAStruct *locideastruct; /* Loc pointer to global structure */ -int i; -IDEAkey Z,DK; -u16 userkey[8]; -ulong accumtime; -double iterations; -char *errorcontext; -int systemerror; -faruchar *plain1; /* First plaintext buffer */ -faruchar *crypt1; /* Encryption buffer */ -faruchar *plain2; /* Second plaintext buffer */ - -/* -** Link to global data -*/ -locideastruct=&global_ideastruct; - -/* -** Set error context -*/ -errorcontext="CPU:IDEA"; - -/* -** Re-init random-number generator. -*/ -/* randnum(3L); */ -randnum((int32)3); - -/* -** Build an encryption/decryption key -*/ -for (i=0;i<8;i++) - /* userkey[i]=(u16)(abs_randwc(60000L) & 0xFFFF); */ - userkey[i]=(u16)(abs_randwc((int32)60000) & 0xFFFF); -for(i=0;i<KEYLEN;i++) - Z[i]=0; - -/* -** Compute encryption/decryption subkeys -*/ -en_key_idea(userkey,Z); -de_key_idea(Z,DK); - -/* -** Allocate memory for buffers. We'll make 3, called plain1, -** crypt1, and plain2. It works like this: -** plain1 >>encrypt>> crypt1 >>decrypt>> plain2. -** So, plain1 and plain2 should match. -** Also, fill up plain1 with sample text. -*/ -plain1=(faruchar *)AllocateMemory(locideastruct->arraysize,&systemerror); -if(systemerror) -{ - ReportError(errorcontext,systemerror); - ErrorExit(); -} - -crypt1=(faruchar *)AllocateMemory(locideastruct->arraysize,&systemerror); -if(systemerror) -{ - ReportError(errorcontext,systemerror); - FreeMemory((farvoid *)plain1,&systemerror); - ErrorExit(); -} - -plain2=(faruchar *)AllocateMemory(locideastruct->arraysize,&systemerror); -if(systemerror) -{ - ReportError(errorcontext,systemerror); - FreeMemory((farvoid *)plain1,&systemerror); - FreeMemory((farvoid *)crypt1,&systemerror); - ErrorExit(); -} -/* -** Note that we build the "plaintext" by simply loading -** the array up with random numbers. -*/ -for(i=0;i<locideastruct->arraysize;i++) - plain1[i]=(uchar)(abs_randwc(255) & 0xFF); - -/* -** See if we need to perform self adjustment loop. -*/ -if(locideastruct->adjust==0) -{ - /* - ** Do self-adjustment. This involves initializing the - ** # of loops and increasing the loop count until we - ** get a number of loops that we can use. - */ - for(locideastruct->loops=100L; - locideastruct->loops<MAXIDEALOOPS; - locideastruct->loops+=10L) - if(DoIDEAIteration(plain1,crypt1,plain2, - locideastruct->arraysize, - locideastruct->loops, - Z,DK)>global_min_ticks) break; -} - -/* -** All's well if we get here. Do the test. -*/ -accumtime=0L; -iterations=(double)0.0; - -do { - accumtime+=DoIDEAIteration(plain1,crypt1,plain2, - locideastruct->arraysize, - locideastruct->loops,Z,DK); - iterations+=(double)locideastruct->loops; -} while(TicksToSecs(accumtime)<locideastruct->request_secs); - -/* -** Clean up, calculate results, and go home. Be sure to -** show that we don't have to rerun adjustment code. -*/ -FreeMemory((farvoid *)plain1,&systemerror); -FreeMemory((farvoid *)crypt1,&systemerror); -FreeMemory((farvoid *)plain2,&systemerror); -locideastruct->iterspersec=iterations / TicksToFracSecs(accumtime); - -if(locideastruct->adjust==0) - locideastruct->adjust=1; - -return; - -} - -/******************** -** DoIDEAIteration ** -********************* -** Execute a single iteration of the IDEA encryption algorithm. -** Actually, a single iteration is one encryption and one -** decryption. -*/ -static ulong DoIDEAIteration(faruchar *plain1, - faruchar *crypt1, - faruchar *plain2, - ulong arraysize, - ulong nloops, - IDEAkey Z, - IDEAkey DK) -{ -register ulong i; -register ulong j; -ulong elapsed; -#ifdef DEBUG -int status=0; -#endif - -/* -** Start the stopwatch. -*/ -elapsed=StartStopwatch(); - -/* -** Do everything for nloops. -*/ -for(i=0;i<nloops;i++) -{ - for(j=0;j<arraysize;j+=(sizeof(u16)*4)) - cipher_idea((u16 *)(plain1+j),(u16 *)(crypt1+j),Z); /* Encrypt */ - - for(j=0;j<arraysize;j+=(sizeof(u16)*4)) - cipher_idea((u16 *)(crypt1+j),(u16 *)(plain2+j),DK); /* Decrypt */ -} - -#ifdef DEBUG -for(j=0;j<arraysize;j++) - if(*(plain1+j)!=*(plain2+j)){ - printf("IDEA Error! \n"); - status=1; - } -if (status==0) printf("IDEA: OK\n"); -#endif - -/* -** Get elapsed time. -*/ -return(StopStopwatch(elapsed)); -} - -/******** -** mul ** -********* -** Performs multiplication, modulo (2**16)+1. This code is structured -** on the assumption that untaken branches are cheaper than taken -** branches, and that the compiler doesn't schedule branches. -*/ -static u16 mul(register u16 a, register u16 b) -{ -register u32 p; -if(a) -{ if(b) - { p=(u32)(a*b); - b=low16(p); - a=(u16)(p>>16); - return(b-a+(b<a)); - } - else - return(1-a); -} -else - return(1-b); -} - -/******** -** inv ** -********* -** Compute multiplicative inverse of x, modulo (2**16)+1 -** using Euclid's GCD algorithm. It is unrolled twice -** to avoid swapping the meaning of the registers. And -** some subtracts are changed to adds. -*/ -static u16 inv(u16 x) -{ -u16 t0, t1; -u16 q, y; - -if(x<=1) - return(x); /* 0 and 1 are self-inverse */ -t1=0x10001 / x; -y=0x10001 % x; -if(y==1) - return(low16(1-t1)); -t0=1; -do { - q=x/y; - x=x%y; - t0+=q*t1; - if(x==1) return(t0); - q=y/x; - y=y%x; - t1+=q*t0; -} while(y!=1); -return(low16(1-t1)); -} - -/**************** -** en_key_idea ** -***************** -** Compute IDEA encryption subkeys Z -*/ -static void en_key_idea(u16 *userkey, u16 *Z) -{ -int i,j; - -/* -** shifts -*/ -for(j=0;j<8;j++) - Z[j]=*userkey++; -for(i=0;j<KEYLEN;j++) -{ i++; - Z[i+7]=(Z[i&7]<<9)| (Z[(i+1) & 7] >> 7); - Z+=i&8; - i&=7; -} -return; -} - -/**************** -** de_key_idea ** -***************** -** Compute IDEA decryption subkeys DK from encryption -** subkeys Z. -*/ -static void de_key_idea(IDEAkey Z, IDEAkey DK) -{ -IDEAkey TT; -int j; -u16 t1, t2, t3; -u16 *p; -p=(u16 *)(TT+KEYLEN); - -t1=inv(*Z++); -t2=-*Z++; -t3=-*Z++; -*--p=inv(*Z++); -*--p=t3; -*--p=t2; -*--p=t1; - -for(j=1;j<ROUNDS;j++) -{ t1=*Z++; - *--p=*Z++; - *--p=t1; - t1=inv(*Z++); - t2=-*Z++; - t3=-*Z++; - *--p=inv(*Z++); - *--p=t2; - *--p=t3; - *--p=t1; -} -t1=*Z++; -*--p=*Z++; -*--p=t1; -t1=inv(*Z++); -t2=-*Z++; -t3=-*Z++; -*--p=inv(*Z++); -*--p=t3; -*--p=t2; -*--p=t1; -/* -** Copy and destroy temp copy -*/ -for(j=0,p=TT;j<KEYLEN;j++) -{ *DK++=*p; - *p++=0; -} - -return; -} - -/* -** MUL(x,y) -** This #define creates a macro that computes x=x*y modulo 0x10001. -** Requires temps t16 and t32. Also requires y to be strictly 16 -** bits. Here, I am using the simplest form. May not be the -** fastest. -- RG -*/ -/* #define MUL(x,y) (x=mul(low16(x),y)) */ - -/**************** -** cipher_idea ** -***************** -** IDEA encryption/decryption algorithm. -*/ -static void cipher_idea(u16 in[4], - u16 out[4], - register IDEAkey Z) -{ -register u16 x1, x2, x3, x4, t1, t2; -/* register u16 t16; -register u16 t32; */ -int r=ROUNDS; - -x1=*in++; -x2=*in++; -x3=*in++; -x4=*in; - -do { - MUL(x1,*Z++); - x2+=*Z++; - x3+=*Z++; - MUL(x4,*Z++); - - t2=x1^x3; - MUL(t2,*Z++); - t1=t2+(x2^x4); - MUL(t1,*Z++); - t2=t1+t2; - - x1^=t1; - x4^=t2; - - t2^=x2; - x2=x3^t1; - x3=t2; -} while(--r); -MUL(x1,*Z++); -*out++=x1; -*out++=x3+*Z++; -*out++=x2+*Z++; -MUL(x4,*Z); -*out=x4; -return; -} - -/************************ -** HUFFMAN COMPRESSION ** -************************/ - -/************** -** DoHuffman ** -*************** -** Execute a huffman compression on a block of plaintext. -** Note that (as with IDEA encryption) an iteration of the -** Huffman test includes a compression AND a decompression. -** Also, the compression cycle includes building the -** Huffman tree. -*/ -void DoHuffman(void) -{ -HuffStruct *lochuffstruct; /* Loc pointer to global data */ -char *errorcontext; -int systemerror; -ulong accumtime; -double iterations; -farchar *comparray; -farchar *decomparray; -farchar *plaintext; - -/* -** Link to global data -*/ -lochuffstruct=&global_huffstruct; - -/* -** Set error context. -*/ -errorcontext="CPU:Huffman"; - -/* -** Allocate memory for the plaintext and the compressed text. -** We'll be really pessimistic here, and allocate equal amounts -** for both (though we know...well, we PRESUME) the compressed -** stuff will take less than the plain stuff. -** Also note that we'll build a 3rd buffer to decompress -** into, and we preallocate space for the huffman tree. -** (We presume that the Huffman tree will grow no larger -** than 512 bytes. This is actually a super-conservative -** estimate...but, who cares?) -*/ -plaintext=(farchar *)AllocateMemory(lochuffstruct->arraysize,&systemerror); -if(systemerror) -{ ReportError(errorcontext,systemerror); - ErrorExit(); -} -comparray=(farchar *)AllocateMemory(lochuffstruct->arraysize,&systemerror); -if(systemerror) -{ ReportError(errorcontext,systemerror); - FreeMemory(plaintext,&systemerror); - ErrorExit(); -} -decomparray=(farchar *)AllocateMemory(lochuffstruct->arraysize,&systemerror); -if(systemerror) -{ ReportError(errorcontext,systemerror); - FreeMemory(plaintext,&systemerror); - FreeMemory(comparray,&systemerror); - ErrorExit(); -} - -hufftree=(huff_node *)AllocateMemory(sizeof(huff_node) * 512, - &systemerror); -if(systemerror) -{ ReportError(errorcontext,systemerror); - FreeMemory(plaintext,&systemerror); - FreeMemory(comparray,&systemerror); - FreeMemory(decomparray,&systemerror); - ErrorExit(); -} - -/* -** Build the plaintext buffer. Since we want this to -** actually be able to compress, we'll use the -** wordcatalog to build the plaintext stuff. -*/ -/* -** Reset random number generator so things repeat. -** added by Uwe F. Mayer -*/ -randnum((int32)13); -create_text_block(plaintext,lochuffstruct->arraysize-1,(ushort)500); -plaintext[lochuffstruct->arraysize-1L]='\0'; -plaintextlen=lochuffstruct->arraysize; - -/* -** See if we need to perform self adjustment loop. -*/ -if(lochuffstruct->adjust==0) -{ - /* - ** Do self-adjustment. This involves initializing the - ** # of loops and increasing the loop count until we - ** get a number of loops that we can use. - */ - for(lochuffstruct->loops=100L; - lochuffstruct->loops<MAXHUFFLOOPS; - lochuffstruct->loops+=10L) - if(DoHuffIteration(plaintext, - comparray, - decomparray, - lochuffstruct->arraysize, - lochuffstruct->loops, - hufftree)>global_min_ticks) break; -} - -/* -** All's well if we get here. Do the test. -*/ -accumtime=0L; -iterations=(double)0.0; - -do { - accumtime+=DoHuffIteration(plaintext, - comparray, - decomparray, - lochuffstruct->arraysize, - lochuffstruct->loops, - hufftree); - iterations+=(double)lochuffstruct->loops; -} while(TicksToSecs(accumtime)<lochuffstruct->request_secs); - -/* -** Clean up, calculate results, and go home. Be sure to -** show that we don't have to rerun adjustment code. -*/ -FreeMemory((farvoid *)plaintext,&systemerror); -FreeMemory((farvoid *)comparray,&systemerror); -FreeMemory((farvoid *)decomparray,&systemerror); -FreeMemory((farvoid *)hufftree,&systemerror); -lochuffstruct->iterspersec=iterations / TicksToFracSecs(accumtime); - -if(lochuffstruct->adjust==0) - lochuffstruct->adjust=1; - -} - -/********************* -** create_text_line ** -********************** -** Create a random line of text, stored at *dt. The line may be -** no more than nchars long. -*/ -static void create_text_line(farchar *dt, - long nchars) -{ -long charssofar; /* # of characters so far */ -long tomove; /* # of characters to move */ -char myword[40]; /* Local buffer for words */ -farchar *wordptr; /* Pointer to word from catalog */ - -charssofar=0; - -do { -/* -** Grab a random word from the wordcatalog -*/ -/* wordptr=wordcatarray[abs_randwc((long)WORDCATSIZE)];*/ -wordptr=wordcatarray[abs_randwc((int32)WORDCATSIZE)]; -MoveMemory((farvoid *)myword, - (farvoid *)wordptr, - (unsigned long)strlen(wordptr)+1); - -/* -** Append a blank. -*/ -tomove=strlen(myword)+1; -myword[tomove-1]=' '; - -/* -** See how long it is. If its length+charssofar > nchars, we have -** to trim it. -*/ -if((tomove+charssofar)>nchars) - tomove=nchars-charssofar; -/* -** Attach the word to the current line. Increment counter. -*/ -MoveMemory((farvoid *)dt,(farvoid *)myword,(unsigned long)tomove); -charssofar+=tomove; -dt+=tomove; - -/* -** If we're done, bail out. Otherwise, go get another word. -*/ -} while(charssofar<nchars); - -return; -} - -/********************** -** create_text_block ** -*********************** -** Build a block of text randomly loaded with words. The words -** come from the wordcatalog (which must be loaded before you -** call this). -** *tb points to the memory where the text is to be built. -** tblen is the # of bytes to put into the text block -** maxlinlen is the maximum length of any line (line end indicated -** by a carriage return). -*/ -static void create_text_block(farchar *tb, - ulong tblen, - ushort maxlinlen) -{ -ulong bytessofar; /* # of bytes so far */ -ulong linelen; /* Line length */ - -bytessofar=0L; -do { - -/* -** Pick a random length for a line and fill the line. -** Make sure the line can fit (haven't exceeded tablen) and also -** make sure you leave room to append a carriage return. -*/ -linelen=abs_randwc(maxlinlen-6)+6; -if((linelen+bytessofar)>tblen) - linelen=tblen-bytessofar; - -if(linelen>1) -{ - create_text_line(tb,linelen); -} -tb+=linelen-1; /* Add the carriage return */ -*tb++='\n'; - -bytessofar+=linelen; - -} while(bytessofar<tblen); - -} - -/******************** -** DoHuffIteration ** -********************* -** Perform the huffman benchmark. This routine -** (a) Builds the huffman tree -** (b) Compresses the text -** (c) Decompresses the text and verifies correct decompression -*/ -static ulong DoHuffIteration(farchar *plaintext, - farchar *comparray, - farchar *decomparray, - ulong arraysize, - ulong nloops, - huff_node *hufftree) -{ -int i; /* Index */ -long j; /* Bigger index */ -int root; /* Pointer to huffman tree root */ -float lowfreq1, lowfreq2; /* Low frequency counters */ -int lowidx1, lowidx2; /* Indexes of low freq. elements */ -long bitoffset; /* Bit offset into text */ -long textoffset; /* Char offset into text */ -long maxbitoffset; /* Holds limit of bit offset */ -long bitstringlen; /* Length of bitstring */ -int c; /* Character from plaintext */ -char bitstring[30]; /* Holds bitstring */ -ulong elapsed; /* For stopwatch */ -#ifdef DEBUG -int status=0; -#endif - -/* -** Start the stopwatch -*/ -elapsed=StartStopwatch(); - -/* -** Do everything for nloops -*/ -while(nloops--) -{ - -/* -** Calculate the frequency of each byte value. Store the -** results in what will become the "leaves" of the -** Huffman tree. Interior nodes will be built in those -** nodes greater than node #255. -*/ -for(i=0;i<256;i++) -{ - hufftree[i].freq=(float)0.0; - hufftree[i].c=(unsigned char)i; -} - -for(j=0;j<arraysize;j++) - hufftree[(int)plaintext[j]].freq+=(float)1.0; - -for(i=0;i<256;i++) - if(hufftree[i].freq != (float)0.0) - hufftree[i].freq/=(float)arraysize; - -/* Reset the second half of the tree. Otherwise the loop below that -** compares the frequencies up to index 512 makes no sense. Some -** systems automatically zero out memory upon allocation, others (like -** for example DEC Unix) do not. Depending on this the loop below gets -** different data and different run times. On our alpha the data that -** was arbitrarily assigned led to an underflow error at runtime. We -** use that zeroed-out bits are in fact 0 as a float. -** Uwe F. Mayer */ -bzero((char *)&(hufftree[256]),sizeof(huff_node)*256); -/* -** Build the huffman tree. First clear all the parent -** pointers and left/right pointers. Also, discard all -** nodes that have a frequency of true 0. */ -for(i=0;i<512;i++) -{ if(hufftree[i].freq==(float)0.0) - hufftree[i].parent=EXCLUDED; - else - hufftree[i].parent=hufftree[i].left=hufftree[i].right=-1; -} - -/* -** Go through the tree. Finding nodes of really low -** frequency. -*/ -root=255; /* Starting root node-1 */ -while(1) -{ - lowfreq1=(float)2.0; lowfreq2=(float)2.0; - lowidx1=-1; lowidx2=-1; - /* - ** Find first lowest frequency. - */ - for(i=0;i<=root;i++) - if(hufftree[i].parent<0) - if(hufftree[i].freq<lowfreq1) - { lowfreq1=hufftree[i].freq; - lowidx1=i; - } - - /* - ** Did we find a lowest value? If not, the - ** tree is done. - */ - if(lowidx1==-1) break; - - /* - ** Find next lowest frequency - */ - for(i=0;i<=root;i++) - if((hufftree[i].parent<0) && (i!=lowidx1)) - if(hufftree[i].freq<lowfreq2) - { lowfreq2=hufftree[i].freq; - lowidx2=i; - } - - /* - ** If we could only find one item, then that - ** item is surely the root, and (as above) the - ** tree is done. - */ - if(lowidx2==-1) break; - - /* - ** Attach the two new nodes to the current root, and - ** advance the current root. - */ - root++; /* New root */ - hufftree[lowidx1].parent=root; - hufftree[lowidx2].parent=root; - hufftree[root].freq=lowfreq1+lowfreq2; - hufftree[root].left=lowidx1; - hufftree[root].right=lowidx2; - hufftree[root].parent=-2; /* Show root */ -} - -/* -** Huffman tree built...compress the plaintext -*/ -bitoffset=0L; /* Initialize bit offset */ -for(i=0;i<arraysize;i++) -{ - c=(int)plaintext[i]; /* Fetch character */ - /* - ** Build a bit string for byte c - */ - bitstringlen=0; - while(hufftree[c].parent!=-2) - { if(hufftree[hufftree[c].parent].left==c) - bitstring[bitstringlen]='0'; - else - bitstring[bitstringlen]='1'; - c=hufftree[c].parent; - bitstringlen++; - } - - /* - ** Step backwards through the bit string, setting - ** bits in the compressed array as you go. - */ - while(bitstringlen--) - { SetCompBit((u8 *)comparray,(u32)bitoffset,bitstring[bitstringlen]); - bitoffset++; - } -} - -/* -** Compression done. Perform de-compression. -*/ -maxbitoffset=bitoffset; -bitoffset=0; -textoffset=0; -do { - i=root; - while(hufftree[i].left!=-1) - { if(GetCompBit((u8 *)comparray,(u32)bitoffset)==0) - i=hufftree[i].left; - else - i=hufftree[i].right; - bitoffset++; - } - decomparray[textoffset]=hufftree[i].c; - -#ifdef DEBUG - if(hufftree[i].c != plaintext[textoffset]) - { - /* Show error */ - printf("Error at textoffset %ld\n",textoffset); - status=1; - } -#endif - textoffset++; -} while(bitoffset<maxbitoffset); - -} /* End the big while(nloops--) from above */ - -/* -** All done -*/ -#ifdef DEBUG - if (status==0) printf("Huffman: OK\n"); -#endif -return(StopStopwatch(elapsed)); -} - -/*************** -** SetCompBit ** -**************** -** Set a bit in the compression array. The value of the -** bit is set according to char bitchar. -*/ -static void SetCompBit(u8 *comparray, - u32 bitoffset, - char bitchar) -{ -u32 byteoffset; -int bitnumb; - -/* -** First calculate which element in the comparray to -** alter. and the bitnumber. -*/ -byteoffset=bitoffset>>3; -bitnumb=bitoffset % 8; - -/* -** Set or clear -*/ -if(bitchar=='1') - comparray[byteoffset]|=(1<<bitnumb); -else - comparray[byteoffset]&=~(1<<bitnumb); - -return; -} - -/*************** -** GetCompBit ** -**************** -** Return the bit value of a bit in the comparession array. -** Returns 0 if the bit is clear, nonzero otherwise. -*/ -static int GetCompBit(u8 *comparray, - u32 bitoffset) -{ -u32 byteoffset; -int bitnumb; - -/* -** Calculate byte offset and bit number. -*/ -byteoffset=bitoffset>>3; -bitnumb=bitoffset % 8; - -/* -** Fetch -*/ -return((1<<bitnumb) & comparray[byteoffset] ); -} - -/******************************** -** BACK PROPAGATION NEURAL NET ** -********************************* -** This code is a modified version of the code -** that was submitted to BYTE Magazine by -** Maureen Caudill. It accomanied an article -** that I CANNOT NOW RECALL. -** The author's original heading/comment was -** as follows: -** -** Backpropagation Network -** Written by Maureen Caudill -** in Think C 4.0 on a Macintosh -** -** (c) Maureen Caudill 1988-1991 -** This network will accept 5x7 input patterns -** and produce 8 bit output patterns. -** The source code may be copied or modified without restriction, -** but no fee may be charged for its use. -** -** ++++++++++++++ -** I have modified the code so that it will work -** on systems other than a Macintosh -- RG -*/ - -/*********** -** DoNNet ** -************ -** Perform the neural net benchmark. -** Note that this benchmark is one of the few that -** requires an input file. That file is "NNET.DAT" and -** should be on the local directory (from which the -** benchmark program in launched). -*/ -void DoNNET(void) -{ -NNetStruct *locnnetstruct; /* Local ptr to global data */ -char *errorcontext; -ulong accumtime; -double iterations; - -/* -** Link to global data -*/ -locnnetstruct=&global_nnetstruct; - -/* -** Set error context -*/ -errorcontext="CPU:NNET"; - -/* -** Init random number generator. -** NOTE: It is important that the random number generator -** be re-initialized for every pass through this test. -** The NNET algorithm uses the random number generator -** to initialize the net. Results are sensitive to -** the initial neural net state. -*/ -/* randnum(3L); */ -randnum((int32)3); - -/* -** Read in the input and output patterns. We'll do this -** only once here at the beginning. These values don't -** change once loaded. -*/ -if(read_data_file()!=0) - ErrorExit(); - - -/* -** See if we need to perform self adjustment loop. -*/ -if(locnnetstruct->adjust==0) -{ - /* - ** Do self-adjustment. This involves initializing the - ** # of loops and increasing the loop count until we - ** get a number of loops that we can use. - */ - for(locnnetstruct->loops=1L; - locnnetstruct->loops<MAXNNETLOOPS; - locnnetstruct->loops++) - { /*randnum(3L); */ - randnum((int32)3); - if(DoNNetIteration(locnnetstruct->loops) - >global_min_ticks) break; - } -} - -/* -** All's well if we get here. Do the test. -*/ -accumtime=0L; -iterations=(double)0.0; - -do { - /* randnum(3L); */ /* Gotta do this for Neural Net */ - randnum((int32)3); /* Gotta do this for Neural Net */ - accumtime+=DoNNetIteration(locnnetstruct->loops); - iterations+=(double)locnnetstruct->loops; -} while(TicksToSecs(accumtime)<locnnetstruct->request_secs); - -/* -** Clean up, calculate results, and go home. Be sure to -** show that we don't have to rerun adjustment code. -*/ -locnnetstruct->iterspersec=iterations / TicksToFracSecs(accumtime); - -if(locnnetstruct->adjust==0) - locnnetstruct->adjust=1; - - -return; -} - -/******************** -** DoNNetIteration ** -********************* -** Do a single iteration of the neural net benchmark. -** By iteration, we mean a "learning" pass. -*/ -static ulong DoNNetIteration(ulong nloops) -{ -ulong elapsed; /* Elapsed time */ -int patt; - -/* -** Run nloops learning cycles. Notice that, counted with -** the learning cycle is the weight randomization and -** zeroing of changes. This should reduce clock jitter, -** since we don't have to stop and start the clock for -** each iteration. -*/ -elapsed=StartStopwatch(); -while(nloops--) -{ - randomize_wts(); - zero_changes(); - iteration_count=1; - learned = F; - numpasses = 0; - while (learned == F) - { - for (patt=0; patt<numpats; patt++) - { - worst_error = 0.0; /* reset this every pass through data */ - move_wt_changes(); /* move last pass's wt changes to momentum array */ - do_forward_pass(patt); - do_back_pass(patt); - iteration_count++; - } - numpasses ++; - learned = check_out_error(); - } -#ifdef DEBUG -printf("Learned in %d passes\n",numpasses); -#endif -} -return(StopStopwatch(elapsed)); -} - -/************************* -** do_mid_forward(patt) ** -************************** -** Process the middle layer's forward pass -** The activation of middle layer's neurode is the weighted -** sum of the inputs from the input pattern, with sigmoid -** function applied to the inputs. -**/ -static void do_mid_forward(int patt) -{ -double sum; -int neurode, i; - -for (neurode=0;neurode<MID_SIZE; neurode++) -{ - sum = 0.0; - for (i=0; i<IN_SIZE; i++) - { /* compute weighted sum of input signals */ - sum += mid_wts[neurode][i]*in_pats[patt][i]; - } - /* - ** apply sigmoid function f(x) = 1/(1+exp(-x)) to weighted sum - */ - sum = 1.0/(1.0+exp(-sum)); - mid_out[neurode] = sum; -} -return; -} - -/********************* -** do_out_forward() ** -********************** -** process the forward pass through the output layer -** The activation of the output layer is the weighted sum of -** the inputs (outputs from middle layer), modified by the -** sigmoid function. -**/ -static void do_out_forward() -{ -double sum; -int neurode, i; - -for (neurode=0; neurode<OUT_SIZE; neurode++) -{ - sum = 0.0; - for (i=0; i<MID_SIZE; i++) - { /* - ** compute weighted sum of input signals - ** from middle layer - */ - sum += out_wts[neurode][i]*mid_out[i]; - } - /* - ** Apply f(x) = 1/(1+exp(-x)) to weighted input - */ - sum = 1.0/(1.0+exp(-sum)); - out_out[neurode] = sum; -} -return; -} - -/************************* -** display_output(patt) ** -************************** -** Display the actual output vs. the desired output of the -** network. -** Once the training is complete, and the "learned" flag set -** to TRUE, then display_output sends its output to both -** the screen and to a text output file. -** -** NOTE: This routine has been disabled in the benchmark -** version. -- RG -**/ -/* -void display_output(int patt) -{ -int i; - - fprintf(outfile,"\n Iteration # %d",iteration_count); - fprintf(outfile,"\n Desired Output: "); - - for (i=0; i<OUT_SIZE; i++) - { - fprintf(outfile,"%6.3f ",out_pats[patt][i]); - } - fprintf(outfile,"\n Actual Output: "); - - for (i=0; i<OUT_SIZE; i++) - { - fprintf(outfile,"%6.3f ",out_out[i]); - } - fprintf(outfile,"\n"); - return; -} -*/ - -/********************** -** do_forward_pass() ** -*********************** -** control function for the forward pass through the network -** NOTE: I have disabled the call to display_output() in -** the benchmark version -- RG. -**/ -static void do_forward_pass(int patt) -{ -do_mid_forward(patt); /* process forward pass, middle layer */ -do_out_forward(); /* process forward pass, output layer */ -/* display_output(patt); ** display results of forward pass */ -return; -} - -/*********************** -** do_out_error(patt) ** -************************ -** Compute the error for the output layer neurodes. -** This is simply Desired - Actual. -**/ -static void do_out_error(int patt) -{ -int neurode; -double error,tot_error, sum; - -tot_error = 0.0; -sum = 0.0; -for (neurode=0; neurode<OUT_SIZE; neurode++) -{ - out_error[neurode] = out_pats[patt][neurode] - out_out[neurode]; - /* - ** while we're here, also compute magnitude - ** of total error and worst error in this pass. - ** We use these to decide if we are done yet. - */ - error = out_error[neurode]; - if (error <0.0) - { - sum += -error; - if (-error > tot_error) - tot_error = -error; /* worst error this pattern */ - } - else - { - sum += error; - if (error > tot_error) - tot_error = error; /* worst error this pattern */ - } -} -avg_out_error[patt] = sum/OUT_SIZE; -tot_out_error[patt] = tot_error; -return; -} - -/*********************** -** worst_pass_error() ** -************************ -** Find the worst and average error in the pass and save it -**/ -static void worst_pass_error() -{ -double error,sum; - -int i; - -error = 0.0; -sum = 0.0; -for (i=0; i<numpats; i++) -{ - if (tot_out_error[i] > error) error = tot_out_error[i]; - sum += avg_out_error[i]; -} -worst_error = error; -average_error = sum/numpats; -return; -} - -/******************* -** do_mid_error() ** -******************** -** Compute the error for the middle layer neurodes -** This is based on the output errors computed above. -** Note that the derivative of the sigmoid f(x) is -** f'(x) = f(x)(1 - f(x)) -** Recall that f(x) is merely the output of the middle -** layer neurode on the forward pass. -**/ -static void do_mid_error() -{ -double sum; -int neurode, i; - -for (neurode=0; neurode<MID_SIZE; neurode++) -{ - sum = 0.0; - for (i=0; i<OUT_SIZE; i++) - sum += out_wts[i][neurode]*out_error[i]; - - /* - ** apply the derivative of the sigmoid here - ** Because of the choice of sigmoid f(I), the derivative - ** of the sigmoid is f'(I) = f(I)(1 - f(I)) - */ - mid_error[neurode] = mid_out[neurode]*(1-mid_out[neurode])*sum; -} -return; -} - -/********************* -** adjust_out_wts() ** -********************** -** Adjust the weights of the output layer. The error for -** the output layer has been previously propagated back to -** the middle layer. -** Use the Delta Rule with momentum term to adjust the weights. -**/ -static void adjust_out_wts() -{ -int weight, neurode; -double learn,delta,alph; - -learn = BETA; -alph = ALPHA; -for (neurode=0; neurode<OUT_SIZE; neurode++) -{ - for (weight=0; weight<MID_SIZE; weight++) - { - /* standard delta rule */ - delta = learn * out_error[neurode] * mid_out[weight]; - - /* now the momentum term */ - delta += alph * out_wt_change[neurode][weight]; - out_wts[neurode][weight] += delta; - - /* keep track of this pass's cum wt changes for next pass's momentum */ - out_wt_cum_change[neurode][weight] += delta; - } -} -return; -} - -/************************* -** adjust_mid_wts(patt) ** -************************** -** Adjust the middle layer weights using the previously computed -** errors. -** We use the Generalized Delta Rule with momentum term -**/ -static void adjust_mid_wts(int patt) -{ -int weight, neurode; -double learn,alph,delta; - -learn = BETA; -alph = ALPHA; -for (neurode=0; neurode<MID_SIZE; neurode++) -{ - for (weight=0; weight<IN_SIZE; weight++) - { - /* first the basic delta rule */ - delta = learn * mid_error[neurode] * in_pats[patt][weight]; - - /* with the momentum term */ - delta += alph * mid_wt_change[neurode][weight]; - mid_wts[neurode][weight] += delta; - - /* keep track of this pass's cum wt changes for next pass's momentum */ - mid_wt_cum_change[neurode][weight] += delta; - } -} -return; -} - -/******************* -** do_back_pass() ** -******************** -** Process the backward propagation of error through network. -**/ -void do_back_pass(int patt) -{ - -do_out_error(patt); -do_mid_error(); -adjust_out_wts(); -adjust_mid_wts(patt); - -return; -} - - -/********************** -** move_wt_changes() ** -*********************** -** Move the weight changes accumulated last pass into the wt-change -** array for use by the momentum term in this pass. Also zero out -** the accumulating arrays after the move. -**/ -static void move_wt_changes() -{ -int i,j; - -for (i = 0; i<MID_SIZE; i++) - for (j = 0; j<IN_SIZE; j++) - { - mid_wt_change[i][j] = mid_wt_cum_change[i][j]; - /* - ** Zero it out for next pass accumulation. - */ - mid_wt_cum_change[i][j] = 0.0; - } - -for (i = 0; i<OUT_SIZE; i++) - for (j=0; j<MID_SIZE; j++) - { - out_wt_change[i][j] = out_wt_cum_change[i][j]; - out_wt_cum_change[i][j] = 0.0; - } - -return; -} - -/********************** -** check_out_error() ** -*********************** -** Check to see if the error in the output layer is below -** MARGIN*OUT_SIZE for all output patterns. If so, then -** assume the network has learned acceptably well. This -** is simply an arbitrary measure of how well the network -** has learned -- many other standards are possible. -**/ -static int check_out_error() -{ -int result,i,error; - -result = T; -error = F; -worst_pass_error(); /* identify the worst error in this pass */ - -/* -#ifdef DEBUG -printf("\n Iteration # %d",iteration_count); -#endif -*/ -for (i=0; i<numpats; i++) -{ -/* printf("\n Error pattern %d: Worst: %8.3f; Average: %8.3f", - i+1,tot_out_error[i], avg_out_error[i]); - fprintf(outfile, - "\n Error pattern %d: Worst: %8.3f; Average: %8.3f", - i+1,tot_out_error[i]); -*/ - - if (worst_error >= STOP) result = F; - if (tot_out_error[i] >= 16.0) error = T; -} - -if (error == T) result = ERR; - - -#ifdef DEBUG -/* printf("\n Error this pass thru data: Worst: %8.3f; Average: %8.3f", - worst_error,average_error); -*/ -/* fprintf(outfile, - "\n Error this pass thru data: Worst: %8.3f; Average: %8.3f", - worst_error, average_error); */ -#endif - -return(result); -} - - -/******************* -** zero_changes() ** -******************** -** Zero out all the wt change arrays -**/ -static void zero_changes() -{ -int i,j; - -for (i = 0; i<MID_SIZE; i++) -{ - for (j=0; j<IN_SIZE; j++) - { - mid_wt_change[i][j] = 0.0; - mid_wt_cum_change[i][j] = 0.0; - } -} - -for (i = 0; i< OUT_SIZE; i++) -{ - for (j=0; j<MID_SIZE; j++) - { - out_wt_change[i][j] = 0.0; - out_wt_cum_change[i][j] = 0.0; - } -} -return; -} - - -/******************** -** randomize_wts() ** -********************* -** Intialize the weights in the middle and output layers to -** random values between -0.25..+0.25 -** Function rand() returns a value between 0 and 32767. -** -** NOTE: Had to make alterations to how the random numbers were -** created. -- RG. -**/ -static void randomize_wts() -{ -int neurode,i; -double value; - -/* -** Following not used int benchmark version -- RG -** -** printf("\n Please enter a random number seed (1..32767): "); -** scanf("%d", &i); -** srand(i); -*/ - -for (neurode = 0; neurode<MID_SIZE; neurode++) -{ - for(i=0; i<IN_SIZE; i++) - { - /* value=(double)abs_randwc(100000L); */ - value=(double)abs_randwc((int32)100000); - value=value/(double)100000.0 - (double) 0.5; - mid_wts[neurode][i] = value/2; - } -} -for (neurode=0; neurode<OUT_SIZE; neurode++) -{ - for(i=0; i<MID_SIZE; i++) - { - /* value=(double)abs_randwc(100000L); */ - value=(double)abs_randwc((int32)100000); - value=value/(double)10000.0 - (double) 0.5; - out_wts[neurode][i] = value/2; - } -} - -return; -} - - -/********************* -** read_data_file() ** -********************** -** Read in the input data file and store the patterns in -** in_pats and out_pats. -** The format for the data file is as follows: -** -** line# data expected -** ----- ------------------------------ -** 1 In-X-size,in-y-size,out-size -** 2 number of patterns in file -** 3 1st X row of 1st input pattern -** 4.. following rows of 1st input pattern pattern -** in-x+2 y-out pattern -** 1st X row of 2nd pattern -** etc. -** -** Each row of data is separated by commas or spaces. -** The data is expected to be ascii text corresponding to -** either a +1 or a 0. -** -** Sample input for a 1-pattern file (The comments to the -** right may NOT be in the file unless more sophisticated -** parsing of the input is done.): -** -** 5,7,8 input is 5x7 grid, output is 8 bits -** 1 one pattern in file -** 0,1,1,1,0 beginning of pattern for "O" -** 1,0,0,0,1 -** 1,0,0,0,1 -** 1,0,0,0,1 -** 1,0,0,0,1 -** 1,0,0,0,0 -** 0,1,1,1,0 -** 0,1,0,0,1,1,1,1 ASCII code for "O" -- 0100 1111 -** -** Clearly, this simple scheme can be expanded or enhanced -** any way you like. -** -** Returns -1 if any file error occurred, otherwise 0. -**/ -static int read_data_file() -{ -FILE *infile; - -int xinsize,yinsize,youtsize; -int patt, element, i, row; -int vals_read; -int val1,val2,val3,val4,val5,val6,val7,val8; - -/* printf("\n Opening and retrieving data from file."); */ - -infile = fopen(inpath, "r"); -if (infile == NULL) -{ - printf("\n CPU:NNET--error in opening file!"); - return -1 ; -} -vals_read =fscanf(infile,"%d %d %d",&xinsize,&yinsize,&youtsize); -if (vals_read != 3) -{ - printf("\n CPU:NNET -- Should read 3 items in line one; did read %d",vals_read); - return -1; -} -vals_read=fscanf(infile,"%d",&numpats); -if (vals_read !=1) -{ - printf("\n CPU:NNET -- Should read 1 item in line 2; did read %d",vals_read); - return -1; -} -if (numpats > MAXPATS) - numpats = MAXPATS; - -for (patt=0; patt<numpats; patt++) -{ - element = 0; - for (row = 0; row<yinsize; row++) - { - vals_read = fscanf(infile,"%d %d %d %d %d", - &val1, &val2, &val3, &val4, &val5); - if (vals_read != 5) - { - printf ("\n CPU:NNET -- failure in reading input!"); - return -1; - } - element=row*xinsize; - - in_pats[patt][element] = (double) val1; element++; - in_pats[patt][element] = (double) val2; element++; - in_pats[patt][element] = (double) val3; element++; - in_pats[patt][element] = (double) val4; element++; - in_pats[patt][element] = (double) val5; element++; - } - for (i=0;i<IN_SIZE; i++) - { - if (in_pats[patt][i] >= 0.9) - in_pats[patt][i] = 0.9; - if (in_pats[patt][i] <= 0.1) - in_pats[patt][i] = 0.1; - } - element = 0; - vals_read = fscanf(infile,"%d %d %d %d %d %d %d %d", - &val1, &val2, &val3, &val4, &val5, &val6, &val7, &val8); - - out_pats[patt][element] = (double) val1; element++; - out_pats[patt][element] = (double) val2; element++; - out_pats[patt][element] = (double) val3; element++; - out_pats[patt][element] = (double) val4; element++; - out_pats[patt][element] = (double) val5; element++; - out_pats[patt][element] = (double) val6; element++; - out_pats[patt][element] = (double) val7; element++; - out_pats[patt][element] = (double) val8; element++; -} - -/* printf("\n Closing the input file now. "); */ - -fclose(infile); -return(0); -} - -/********************* -** initialize_net() ** -********************** -** Do all the initialization stuff before beginning -*/ -/* -static int initialize_net() -{ -int err_code; - -randomize_wts(); -zero_changes(); -err_code = read_data_file(); -iteration_count = 1; -return(err_code); -} -*/ - -/********************** -** display_mid_wts() ** -*********************** -** Display the weights on the middle layer neurodes -** NOTE: This routine is not used in the benchmark -** test -- RG -**/ -/* static void display_mid_wts() -{ -int neurode, weight, row, col; - -fprintf(outfile,"\n Weights of Middle Layer neurodes:"); - -for (neurode=0; neurode<MID_SIZE; neurode++) -{ - fprintf(outfile,"\n Mid Neurode # %d",neurode); - for (row=0; row<IN_Y_SIZE; row++) - { - fprintf(outfile,"\n "); - for (col=0; col<IN_X_SIZE; col++) - { - weight = IN_X_SIZE * row + col; - fprintf(outfile," %8.3f ", mid_wts[neurode][weight]); - } - } -} -return; -} -*/ -/********************** -** display_out_wts() ** -*********************** -** Display the weights on the output layer neurodes -** NOTE: This code is not used in the benchmark -** test -- RG -*/ -/* void display_out_wts() -{ -int neurode, weight; - - fprintf(outfile,"\n Weights of Output Layer neurodes:"); - - for (neurode=0; neurode<OUT_SIZE; neurode++) - { - fprintf(outfile,"\n Out Neurode # %d \n",neurode); - for (weight=0; weight<MID_SIZE; weight++) - { - fprintf(outfile," %8.3f ", out_wts[neurode][weight]); - } - } - return; -} -*/ - -/*********************** -** LU DECOMPOSITION ** -** (Linear Equations) ** -************************ -** These routines come from "Numerical Recipes in Pascal". -** Note that, as in the assignment algorithm, though we -** separately define LUARRAYROWS and LUARRAYCOLS, the two -** must be the same value (this routine depends on a square -** matrix). -*/ - -/********* -** DoLU ** -********** -** Perform the LU decomposition benchmark. -*/ -void DoLU(void) -{ -LUStruct *loclustruct; /* Local pointer to global data */ -char *errorcontext; -int systemerror; -fardouble *a; -fardouble *b; -fardouble *abase; -fardouble *bbase; -LUdblptr ptra; -int n; -int i; -ulong accumtime; -double iterations; - -/* -** Link to global data -*/ -loclustruct=&global_lustruct; - -/* -** Set error context. -*/ -errorcontext="FPU:LU"; - -/* -** Our first step is to build a "solvable" problem. This -** will become the "seed" set that all others will be -** derived from. (I.E., we'll simply copy these arrays -** into the others. -*/ -a=(fardouble *)AllocateMemory(sizeof(double) * LUARRAYCOLS * LUARRAYROWS, - &systemerror); -b=(fardouble *)AllocateMemory(sizeof(double) * LUARRAYROWS, - &systemerror); -n=LUARRAYROWS; - -/* -** We need to allocate a temp vector that is used by the LU -** algorithm. This removes the allocation routine from the -** timing. -*/ -LUtempvv=(fardouble *)AllocateMemory(sizeof(double)*LUARRAYROWS, - &systemerror); - -/* -** Build a problem to be solved. -*/ -ptra.ptrs.p=a; /* Gotta coerce linear array to 2D array */ -build_problem(*ptra.ptrs.ap,n,b); - -/* -** Now that we have a problem built, see if we need to do -** auto-adjust. If so, repeatedly call the DoLUIteration routine, -** increasing the number of solutions per iteration as you go. -*/ -if(loclustruct->adjust==0) -{ - loclustruct->numarrays=0; - for(i=1;i<=MAXLUARRAYS;i++) - { - abase=(fardouble *)AllocateMemory(sizeof(double) * - LUARRAYCOLS*LUARRAYROWS*(i+1),&systemerror); - if(systemerror) - { ReportError(errorcontext,systemerror); - LUFreeMem(a,b,(fardouble *)NULL,(fardouble *)NULL); - ErrorExit(); - } - bbase=(fardouble *)AllocateMemory(sizeof(double) * - LUARRAYROWS*(i+1),&systemerror); - if(systemerror) - { ReportError(errorcontext,systemerror); - LUFreeMem(a,b,abase,(fardouble *)NULL); - ErrorExit(); - } - if(DoLUIteration(a,b,abase,bbase,i)>global_min_ticks) - { loclustruct->numarrays=i; - break; - } - /* - ** Not enough arrays...free them all and try again - */ - FreeMemory((farvoid *)abase,&systemerror); - FreeMemory((farvoid *)bbase,&systemerror); - } - /* - ** Were we able to do it? - */ - if(loclustruct->numarrays==0) - { printf("FPU:LU -- Array limit reached\n"); - LUFreeMem(a,b,abase,bbase); - ErrorExit(); - } -} -else -{ /* - ** Don't need to adjust -- just allocate the proper - ** number of arrays and proceed. - */ - abase=(fardouble *)AllocateMemory(sizeof(double) * - LUARRAYCOLS*LUARRAYROWS*loclustruct->numarrays, - &systemerror); - if(systemerror) - { ReportError(errorcontext,systemerror); - LUFreeMem(a,b,(fardouble *)NULL,(fardouble *)NULL); - ErrorExit(); - } - bbase=(fardouble *)AllocateMemory(sizeof(double) * - LUARRAYROWS*loclustruct->numarrays,&systemerror); - if(systemerror) - { - ReportError(errorcontext,systemerror); - LUFreeMem(a,b,abase,(fardouble *)NULL); - ErrorExit(); - } -} -/* -** All's well if we get here. Do the test. -*/ -accumtime=0L; -iterations=(double)0.0; - -do { - accumtime+=DoLUIteration(a,b,abase,bbase, - loclustruct->numarrays); - iterations+=(double)loclustruct->numarrays; -} while(TicksToSecs(accumtime)<loclustruct->request_secs); - -/* -** Clean up, calculate results, and go home. Be sure to -** show that we don't have to rerun adjustment code. -*/ -loclustruct->iterspersec=iterations / TicksToFracSecs(accumtime); - -if(loclustruct->adjust==0) - loclustruct->adjust=1; - -LUFreeMem(a,b,abase,bbase); -return; -} - -/************** -** LUFreeMem ** -*************** -** Release memory associated with LU benchmark. -*/ -static void LUFreeMem(fardouble *a, fardouble *b, - fardouble *abase,fardouble *bbase) -{ -int systemerror; - -FreeMemory((farvoid *)a,&systemerror); -FreeMemory((farvoid *)b,&systemerror); -FreeMemory((farvoid *)LUtempvv,&systemerror); - -if(abase!=(fardouble *)NULL) FreeMemory((farvoid *)abase,&systemerror); -if(bbase!=(fardouble *)NULL) FreeMemory((farvoid *)bbase,&systemerror); -return; -} - -/****************** -** DoLUIteration ** -******************* -** Perform an iteration of the LU decomposition benchmark. -** An iteration refers to the repeated solution of several -** identical matrices. -*/ -static ulong DoLUIteration(fardouble *a,fardouble *b, - fardouble *abase, fardouble *bbase, - ulong numarrays) -{ -fardouble *locabase; -fardouble *locbbase; -LUdblptr ptra; /* For converting ptr to 2D array */ -ulong elapsed; -ulong j,i; /* Indexes */ - - -/* -** Move the seed arrays (a & b) into the destination -** arrays; -*/ -for(j=0;j<numarrays;j++) -{ locabase=abase+j*LUARRAYROWS*LUARRAYCOLS; - locbbase=bbase+j*LUARRAYROWS; - for(i=0;i<LUARRAYROWS*LUARRAYCOLS;i++) - *(locabase+i)=*(a+i); - for(i=0;i<LUARRAYROWS;i++) - *(locbbase+i)=*(b+i); -} - -/* -** Do test...begin timing. -*/ -elapsed=StartStopwatch(); -for(i=0;i<numarrays;i++) -{ locabase=abase+i*LUARRAYROWS*LUARRAYCOLS; - locbbase=bbase+i*LUARRAYROWS; - ptra.ptrs.p=locabase; - lusolve(*ptra.ptrs.ap,LUARRAYROWS,locbbase); -} - -return(StopStopwatch(elapsed)); -} - -/****************** -** build_problem ** -******************* -** Constructs a solvable set of linear equations. It does this by -** creating an identity matrix, then loading the solution vector -** with random numbers. After that, the identity matrix and -** solution vector are randomly "scrambled". Scrambling is -** done by (a) randomly selecting a row and multiplying that -** row by a random number and (b) adding one randomly-selected -** row to another. -*/ -static void build_problem(double a[][LUARRAYCOLS], - int n, - double b[LUARRAYROWS]) -{ -long i,j,k,k1; /* Indexes */ -double rcon; /* Random constant */ - -/* -** Reset random number generator -*/ -/* randnum(13L); */ -randnum((int32)13); - -/* -** Build an identity matrix. -** We'll also use this as a chance to load the solution -** vector. -*/ -for(i=0;i<n;i++) -{ /* b[i]=(double)(abs_randwc(100L)+1L); */ - b[i]=(double)(abs_randwc((int32)100)+(int32)1); - for(j=0;j<n;j++) - if(i==j) - /* a[i][j]=(double)(abs_randwc(1000L)+1L); */ - a[i][j]=(double)(abs_randwc((int32)1000)+(int32)1); - else - a[i][j]=(double)0.0; -} - -#ifdef DEBUG -printf("Problem:\n"); -for(i=0;i<n;i++) -{ -/* - for(j=0;j<n;j++) - printf("%6.2f ",a[i][j]); -*/ - printf("%.0f/%.0f=%.2f\t",b[i],a[i][i],b[i]/a[i][i]); -/* - printf("\n"); -*/ -} -#endif - -/* -** Scramble. Do this 8n times. See comment above for -** a description of the scrambling process. -*/ - -for(i=0;i<8*n;i++) -{ - /* - ** Pick a row and a random constant. Multiply - ** all elements in the row by the constant. - */ - /* k=abs_randwc((long)n); - rcon=(double)(abs_randwc(20L)+1L); - for(j=0;j<n;j++) - a[k][j]=a[k][j]*rcon; - b[k]=b[k]*rcon; -*/ - /* - ** Pick two random rows and add second to - ** first. Note that we also occasionally multiply - ** by minus 1 so that we get a subtraction operation. - */ - /* k=abs_randwc((long)n); */ - /* k1=abs_randwc((long)n); */ - k=abs_randwc((int32)n); - k1=abs_randwc((int32)n); - if(k!=k1) - { - if(k<k1) rcon=(double)1.0; - else rcon=(double)-1.0; - for(j=0;j<n;j++) - a[k][j]+=a[k1][j]*rcon;; - b[k]+=b[k1]*rcon; - } -} - -return; -} - - -/*********** -** ludcmp ** -************ -** From the procedure of the same name in "Numerical Recipes in Pascal", -** by Press, Flannery, Tukolsky, and Vetterling. -** Given an nxn matrix a[], this routine replaces it by the LU -** decomposition of a rowwise permutation of itself. a[] and n -** are input. a[] is output, modified as follows: -** -- -- -** | b(1,1) b(1,2) b(1,3)... | -** | a(2,1) b(2,2) b(2,3)... | -** | a(3,1) a(3,2) b(3,3)... | -** | a(4,1) a(4,2) a(4,3)... | -** | ... | -** -- -- -** -** Where the b(i,j) elements form the upper triangular matrix of the -** LU decomposition, and the a(i,j) elements form the lower triangular -** elements. The LU decomposition is calculated so that we don't -** need to store the a(i,i) elements (which would have laid along the -** diagonal and would have all been 1). -** -** indx[] is an output vector that records the row permutation -** effected by the partial pivoting; d is output as +/-1 depending -** on whether the number of row interchanges was even or odd, -** respectively. -** Returns 0 if matrix singular, else returns 1. -*/ -static int ludcmp(double a[][LUARRAYCOLS], - int n, - int indx[], - int *d) -{ - -double big; /* Holds largest element value */ -double sum; -double dum; /* Holds dummy value */ -int i,j,k; /* Indexes */ -int imax=0; /* Holds max index value */ -double tiny; /* A really small number */ - -tiny=(double)1.0e-20; - -*d=1; /* No interchanges yet */ - -for(i=0;i<n;i++) -{ big=(double)0.0; - for(j=0;j<n;j++) - if((double)fabs(a[i][j]) > big) - big=fabs(a[i][j]); - /* Bail out on singular matrix */ - if(big==(double)0.0) return(0); - LUtempvv[i]=1.0/big; -} - -/* -** Crout's algorithm...loop over columns. -*/ -for(j=0;j<n;j++) -{ if(j!=0) - for(i=0;i<j;i++) - { sum=a[i][j]; - if(i!=0) - for(k=0;k<i;k++) - sum-=(a[i][k]*a[k][j]); - a[i][j]=sum; - } - big=(double)0.0; - for(i=j;i<n;i++) - { sum=a[i][j]; - if(j!=0) - for(k=0;k<j;k++) - sum-=a[i][k]*a[k][j]; - a[i][j]=sum; - dum=LUtempvv[i]*fabs(sum); - if(dum>=big) - { big=dum; - imax=i; - } - } - if(j!=imax) /* Interchange rows if necessary */ - { for(k=0;k<n;k++) - { dum=a[imax][k]; - a[imax][k]=a[j][k]; - a[j][k]=dum; - } - *d=-*d; /* Change parity of d */ - dum=LUtempvv[imax]; - LUtempvv[imax]=LUtempvv[j]; /* Don't forget scale factor */ - LUtempvv[j]=dum; - } - indx[j]=imax; - /* - ** If the pivot element is zero, the matrix is singular - ** (at least as far as the precision of the machine - ** is concerned.) We'll take the original author's - ** recommendation and replace 0.0 with "tiny". - */ - if(a[j][j]==(double)0.0) - a[j][j]=tiny; - - if(j!=(n-1)) - { dum=1.0/a[j][j]; - for(i=j+1;i<n;i++) - a[i][j]=a[i][j]*dum; - } -} - -return(1); -} - -/*********** -** lubksb ** -************ -** Also from "Numerical Recipes in Pascal". -** This routine solves the set of n linear equations A X = B. -** Here, a[][] is input, not as the matrix A, but as its -** LU decomposition, created by the routine ludcmp(). -** Indx[] is input as the permutation vector returned by ludcmp(). -** b[] is input as the right-hand side an returns the -** solution vector X. -** a[], n, and indx are not modified by this routine and -** can be left in place for different values of b[]. -** This routine takes into account the possibility that b will -** begin with many zero elements, so it is efficient for use in -** matrix inversion. -*/ -static void lubksb( double a[][LUARRAYCOLS], - int n, - int indx[LUARRAYROWS], - double b[LUARRAYROWS]) -{ - -int i,j; /* Indexes */ -int ip; /* "pointer" into indx */ -int ii; -double sum; - -/* -** When ii is set to a positive value, it will become -** the index of the first nonvanishing element of b[]. -** We now do the forward substitution. The only wrinkle -** is to unscramble the permutation as we go. -*/ -ii=-1; -for(i=0;i<n;i++) -{ ip=indx[i]; - sum=b[ip]; - b[ip]=b[i]; - if(ii!=-1) - for(j=ii;j<i;j++) - sum=sum-a[i][j]*b[j]; - else - /* - ** If a nonzero element is encountered, we have - ** to do the sums in the loop above. - */ - if(sum!=(double)0.0) - ii=i; - b[i]=sum; -} -/* -** Do backsubstitution -*/ -for(i=(n-1);i>=0;i--) -{ - sum=b[i]; - if(i!=(n-1)) - for(j=(i+1);j<n;j++) - sum=sum-a[i][j]*b[j]; - b[i]=sum/a[i][i]; -} -return; -} - -/************ -** lusolve ** -************* -** Solve a linear set of equations: A x = b -** Original matrix A will be destroyed by this operation. -** Returns 0 if matrix is singular, 1 otherwise. -*/ -static int lusolve(double a[][LUARRAYCOLS], - int n, - double b[LUARRAYROWS]) -{ -int indx[LUARRAYROWS]; -int d; -#ifdef DEBUG -int i,j; -#endif - -if(ludcmp(a,n,indx,&d)==0) return(0); - -/* Matrix not singular -- proceed */ -lubksb(a,n,indx,b); - -#ifdef DEBUG -printf("Solution:\n"); -for(i=0;i<n;i++) -{ - for(j=0;j<n;j++){ - /* - printf("%6.2f ",a[i][j]); - */ - } - printf("%6.2f\t",b[i]); - /* - printf("\n"); - */ -} -printf("\n"); -#endif - -return(1); -} diff --git a/nbench1.h b/nbench1.h deleted file mode 100644 index 13a5907..0000000 --- a/nbench1.h +++ /dev/null @@ -1,428 +0,0 @@ -/* -** nbench1.h -** Header for nbench1.c -** BYTEmark (tm) -** BYTE's Native Mode Benchmarks -** Rick Grehan, BYTE Magazine -** -** Creation: -** Revision: 3/95;10/95 -** -** DISCLAIMER -** The source, executable, and documentation files that comprise -** the BYTEmark benchmarks are made available on an "as is" basis. -** This means that we at BYTE Magazine have made every reasonable -** effort to verify that the there are no errors in the source and -** executable code. We cannot, however, guarantee that the programs -** are error-free. Consequently, McGraw-HIll and BYTE Magazine make -** no claims in regard to the fitness of the source code, executable -** code, and documentation of the BYTEmark. -** Furthermore, BYTE Magazine, McGraw-Hill, and all employees -** of McGraw-Hill cannot be held responsible for any damages resulting -** from the use of this code or the results obtained from using -** this code. -*/ - -/* -** DEFINES -*/ -/* #define DEBUG */ - -/* -** EXTERNALS -*/ -extern ulong global_min_ticks; - -extern SortStruct global_numsortstruct; -extern SortStruct global_strsortstruct; -extern BitOpStruct global_bitopstruct; -extern EmFloatStruct global_emfloatstruct; -extern FourierStruct global_fourierstruct; -extern AssignStruct global_assignstruct; -extern IDEAStruct global_ideastruct; -extern HuffStruct global_huffstruct; -extern NNetStruct global_nnetstruct; -extern LUStruct global_lustruct; - -/* External PROTOTYPES */ -/*extern unsigned long abs_randwc(unsigned long num);*/ /* From MISC */ -/*extern long randnum(long lngval);*/ -extern int32 randwc(int32 num); -extern u32 abs_randwc(u32 num); -extern int32 randnum(int32 lngval); - -extern farvoid *AllocateMemory(unsigned long nbytes, /* From SYSSPEC */ - int *errorcode); -extern void FreeMemory(farvoid *mempointer, - int *errorcode); -extern void MoveMemory(farvoid *destination, - farvoid *source, unsigned long nbytes); -extern void ReportError(char *context, int errorcode); -extern void ErrorExit(); -extern unsigned long StartStopwatch(); -extern unsigned long StopStopwatch(unsigned long startticks); -extern unsigned long TicksToSecs(unsigned long tickamount); -extern double TicksToFracSecs(unsigned long tickamount); - -/***************** -** NUMERIC SORT ** -*****************/ - -/* -** PROTOTYPES -*/ -void DoNumSort(void); -static ulong DoNumSortIteration(farlong *arraybase, - ulong arraysize, - uint numarrays); -static void LoadNumArrayWithRand(farlong *array, - ulong arraysize, - uint numarrays); -static void NumHeapSort(farlong *array, - ulong bottom, - ulong top); -static void NumSift(farlong *array, - ulong i, - ulong j); - - -/**************** -** STRING SORT ** -***************** -*/ - - -/* -** PROTOTYPES -*/ -void DoStringSort(void); -static ulong DoStringSortIteration(faruchar *arraybase, - uint numarrays, - ulong arraysize); -static farulong *LoadStringArray(faruchar *strarray, - uint numarrays, - ulong *strings, - ulong arraysize); -static void stradjust(farulong *optrarray, - faruchar *strarray, - ulong nstrings, - ulong i, - uchar l); -static void StrHeapSort(farulong *optrarray, - faruchar *strarray, - ulong numstrings, - ulong bottom, - ulong top); -static int str_is_less(farulong *optrarray, - faruchar *strarray, - ulong numstrings, - ulong a, - ulong b); -static void strsift(farulong *optrarray, - faruchar *strarray, - ulong numstrings, - ulong i, - ulong j); - -/************************ -** BITFIELD OPERATIONS ** -************************* -*/ - -/* -** PROTOTYPES -*/ -void DoBitops(void); -static ulong DoBitfieldIteration(farulong *bitarraybase, - farulong *bitoparraybase, - long bitoparraysize, - ulong *nbitops); -static void ToggleBitRun(farulong *bitmap, - ulong bit_addr, - ulong nbits, - uint val); -static void FlipBitRun(farulong *bitmap, - ulong bit_addr, - ulong nbits); - -/**************************** -** EMULATED FLOATING POINT ** -****************************/ -typedef struct -{ - u8 type; /* Indicates, NORMAL, SUBNORMAL, etc. */ - u8 sign; /* Mantissa sign */ - short exp; /* Signed exponent...no bias */ - u16 mantissa[INTERNAL_FPF_PRECISION]; -} InternalFPF; - -/* -** PROTOTYPES -*/ -void DoEmFloat(void); - -/* -** EXTERNALS -*/ -extern void SetupCPUEmFloatArrays(InternalFPF *abase, - InternalFPF *bbase, InternalFPF *cbase, - ulong arraysize); -extern ulong DoEmFloatIteration(InternalFPF *abase, - InternalFPF *bbase, InternalFPF *cbase, - ulong arraysize, ulong loops); - -/************************* -** FOURIER COEFFICIENTS ** -*************************/ - -/* -** PROTOTYPES -*/ -void DoFourier(void); -static ulong DoFPUTransIteration(fardouble *abase, - fardouble *bbase, - ulong arraysize); -static double TrapezoidIntegrate(double x0, - double x1, - int nsteps, - double omegan, - int select); -static double thefunction(double x, - double omegan, - int select); - -/************************* -** ASSIGNMENT ALGORITHM ** -*************************/ - -/* -** DEFINES -*/ - -#define ASSIGNROWS 101L -#define ASSIGNCOLS 101L - -/* -** TYPEDEFS -*/ -typedef struct { - union { - long *p; - long (*ap)[ASSIGNROWS][ASSIGNCOLS]; - } ptrs; -} longptr; - -/* -** PROTOTYPES -*/ -void DoAssign(void); -static ulong DoAssignIteration(farlong *arraybase, - ulong numarrays); -static void LoadAssignArrayWithRand(farlong *arraybase, - ulong numarrays); -static void LoadAssign(farlong arraybase[][ASSIGNCOLS]); -static void CopyToAssign(farlong arrayfrom[][ASSIGNCOLS], - long arrayto[][ASSIGNCOLS]); -static void Assignment(farlong arraybase[][ASSIGNCOLS]); -static void calc_minimum_costs(long tableau[][ASSIGNCOLS]); -static int first_assignments(long tableau[][ASSIGNCOLS], - short assignedtableau[][ASSIGNCOLS]); -static void second_assignments(long tableau[][ASSIGNCOLS], - short assignedtableau[][ASSIGNCOLS]); - -/******************** -** IDEA ENCRYPTION ** -********************/ - -/* -** DEFINES -*/ -#define IDEAKEYSIZE 16 -#define IDEABLOCKSIZE 8 -#define ROUNDS 8 -#define KEYLEN (6*ROUNDS+4) - -/* -** MACROS -*/ -#define low16(x) ((x) & 0x0FFFF) -#define MUL(x,y) (x=mul(low16(x),y)) - - -typedef u16 IDEAkey[KEYLEN]; - -/* -** PROTOTYPES -*/ -void DoIDEA(void); -static ulong DoIDEAIteration(faruchar *plain1, - faruchar *crypt1, faruchar *plain2, - ulong arraysize, ulong nloops, - IDEAkey Z, IDEAkey DK); -static u16 mul(register u16 a, register u16 b); -static u16 inv(u16 x); -static void en_key_idea(u16 userkey[8], IDEAkey Z); -static void de_key_idea(IDEAkey Z, IDEAkey DK); -static void cipher_idea(u16 in[4], u16 out[4], IDEAkey Z); - -/************************ -** HUFFMAN COMPRESSION ** -************************/ - -/* -** DEFINES -*/ -#define EXCLUDED 32000L /* Big positive value */ - -/* -** TYPEDEFS -*/ -typedef struct { - uchar c; /* Byte value */ - float freq; /* Frequency */ - int parent; /* Parent node */ - int left; /* Left pointer = 0 */ - int right; /* Right pointer = 1 */ -} huff_node; - -/* -** GLOBALS -*/ -static huff_node *hufftree; /* The huffman tree */ -static long plaintextlen; /* Length of plaintext */ - -/* -** PROTOTYPES -*/ -void DoHuffman(); -static void create_text_line(farchar *dt,long nchars); -static void create_text_block(farchar *tb, ulong tblen, - ushort maxlinlen); -static ulong DoHuffIteration(farchar *plaintext, - farchar *comparray, farchar *decomparray, - ulong arraysize, ulong nloops, huff_node *hufftree); -static void SetCompBit(u8 *comparray, u32 bitoffset, char bitchar); -static int GetCompBit(u8 *comparray, u32 bitoffset); - -/******************************** -** BACK PROPAGATION NEURAL NET ** -********************************/ - -/* -** DEFINES -*/ -#define T 1 /* TRUE */ -#define F 0 /* FALSE */ -#define ERR -1 -#define MAXPATS 10 /* max number of patterns in data file */ -#define IN_X_SIZE 5 /* number of neurodes/row of input layer */ -#define IN_Y_SIZE 7 /* number of neurodes/col of input layer */ -#define IN_SIZE 35 /* equals IN_X_SIZE*IN_Y_SIZE */ -#define MID_SIZE 8 /* number of neurodes in middle layer */ -#define OUT_SIZE 8 /* number of neurodes in output layer */ -#define MARGIN 0.1 /* how near to 1,0 do we have to come to stop? */ -#define BETA 0.09 /* beta learning constant */ -#define ALPHA 0.09 /* momentum term constant */ -#define STOP 0.1 /* when worst_error less than STOP, training is done */ - -/* -** GLOBALS -*/ -double mid_wts[MID_SIZE][IN_SIZE]; /* middle layer weights */ -double out_wts[OUT_SIZE][MID_SIZE]; /* output layer weights */ -double mid_out[MID_SIZE]; /* middle layer output */ -double out_out[OUT_SIZE]; /* output layer output */ -double mid_error[MID_SIZE]; /* middle layer errors */ -double out_error[OUT_SIZE]; /* output layer errors */ -double mid_wt_change[MID_SIZE][IN_SIZE]; /* storage for last wt change */ -double out_wt_change[OUT_SIZE][MID_SIZE]; /* storage for last wt change */ -double in_pats[MAXPATS][IN_SIZE]; /* input patterns */ -double out_pats[MAXPATS][OUT_SIZE]; /* desired output patterns */ -double tot_out_error[MAXPATS]; /* measure of whether net is done */ -double out_wt_cum_change[OUT_SIZE][MID_SIZE]; /* accumulated wt changes */ -double mid_wt_cum_change[MID_SIZE][IN_SIZE]; /* accumulated wt changes */ - -double worst_error; /* worst error each pass through the data */ -double average_error; /* average error each pass through the data */ -double avg_out_error[MAXPATS]; /* average error each pattern */ - -int iteration_count; /* number of passes thru network so far */ -int numpats; /* number of patterns in data file */ -int numpasses; /* number of training passes through data file */ -int learned; /* flag--if TRUE, network has learned all patterns */ - -/* -** The Neural Net test requires an input data file. -** The name is specified here. -*/ -char *inpath="NNET.DAT"; - -/* -** PROTOTYPES -*/ -void DoNNET(void); -static ulong DoNNetIteration(ulong nloops); -static void do_mid_forward(int patt); -static void do_out_forward(); -void display_output(int patt); -static void do_forward_pass(int patt); -static void do_out_error(int patt); -static void worst_pass_error(); -static void do_mid_error(); -static void adjust_out_wts(); -static void adjust_mid_wts(); -static void do_back_pass(int patt); -static void move_wt_changes(); -static int check_out_error(); -static void zero_changes(); -static void randomize_wts(); -static int read_data_file(); -/* static int initialize_net(); */ - -/*********************** -** LU DECOMPOSITION ** -** (Linear Equations) ** -***********************/ - -/* -** DEFINES -*/ - -#define LUARRAYROWS 101L -#define LUARRAYCOLS 101L - -/* -** TYPEDEFS -*/ -typedef struct -{ union - { fardouble *p; - fardouble (*ap)[][LUARRAYCOLS]; - } ptrs; -} LUdblptr; - -/* -** GLOBALS -*/ -fardouble *LUtempvv; - -/* -** PROTOTYPES -*/ -void DoLU(void); -static void LUFreeMem(fardouble *a, fardouble *b, - fardouble *abase, fardouble *bbase); -static ulong DoLUIteration(fardouble *a, fardouble *b, - fardouble *abase, fardouble *bbase, - ulong numarrays); -static void build_problem( double a[][LUARRAYCOLS], - int n, double b[LUARRAYROWS]); -static int ludcmp(double a[][LUARRAYCOLS], - int n, int indx[], int *d); -static void lubksb(double a[][LUARRAYCOLS], - int n, int indx[LUARRAYROWS], - double b[LUARRAYROWS]); -static int lusolve(double a[][LUARRAYCOLS], - int n, double b[LUARRAYROWS]); - - diff --git a/neural.c b/neural.c new file mode 100644 index 0000000..0a1bced --- /dev/null +++ b/neural.c @@ -0,0 +1,815 @@ +#include <stdio.h> /* +#include <stdlib.h> +#include <string.h> +#include <strings.h>*/ +#include <math.h> +#include "nmglobal.h" +#include "nbench1.h" + +/* +** The Neural Net test requires an input data file. +** The name is specified here. +*/ +char *inpath="NNET.DAT"; + +/******************************** +** BACK PROPAGATION NEURAL NET ** +********************************* +** This code is a modified version of the code +** that was submitted to BYTE Magazine by +** Maureen Caudill. It accomanied an article +** that I CANNOT NOW RECALL. +** The author's original heading/comment was +** as follows: +** +** Backpropagation Network +** Written by Maureen Caudill +** in Think C 4.0 on a Macintosh +** +** (c) Maureen Caudill 1988-1991 +** This network will accept 5x7 input patterns +** and produce 8 bit output patterns. +** The source code may be copied or modified without restriction, +** but no fee may be charged for its use. +** +** ++++++++++++++ +** I have modified the code so that it will work +** on systems other than a Macintosh -- RG +*/ + +/*********** +** DoNNet ** +************ +** Perform the neural net benchmark. +** Note that this benchmark is one of the few that +** requires an input file. That file is "NNET.DAT" and +** should be on the local directory (from which the +** benchmark program in launched). +*/ +void DoNNET(void) +{ +NNetStruct *locnnetstruct; /* Local ptr to global data */ +char *errorcontext; +ulong accumtime; +double iterations; + +/* +** Link to global data +*/ +locnnetstruct=&global_nnetstruct; + +/* +** Set error context +*/ +errorcontext="CPU:NNET"; + +/* +** Init random number generator. +** NOTE: It is important that the random number generator +** be re-initialized for every pass through this test. +** The NNET algorithm uses the random number generator +** to initialize the net. Results are sensitive to +** the initial neural net state. +*/ +/* randnum(3L); */ +randnum((int32)3); + +/* +** Read in the input and output patterns. We'll do this +** only once here at the beginning. These values don't +** change once loaded. +*/ +if(read_data_file()!=0) + ErrorExit(); + + +/* +** See if we need to perform self adjustment loop. +*/ +if(locnnetstruct->adjust==0) +{ + /* + ** Do self-adjustment. This involves initializing the + ** # of loops and increasing the loop count until we + ** get a number of loops that we can use. + */ + for(locnnetstruct->loops=1L; + locnnetstruct->loops<MAXNNETLOOPS; + locnnetstruct->loops++) + { /*randnum(3L); */ + randnum((int32)3); + if(DoNNetIteration(locnnetstruct->loops) + >global_min_ticks) break; + } +} + +/* +** All's well if we get here. Do the test. +*/ +accumtime=0L; +iterations=(double)0.0; + +do { + /* randnum(3L); */ /* Gotta do this for Neural Net */ + randnum((int32)3); /* Gotta do this for Neural Net */ + accumtime+=DoNNetIteration(locnnetstruct->loops); + iterations+=(double)locnnetstruct->loops; +} while(TicksToSecs(accumtime)<locnnetstruct->request_secs); + +/* +** Clean up, calculate results, and go home. Be sure to +** show that we don't have to rerun adjustment code. +*/ +locnnetstruct->iterspersec=iterations / TicksToFracSecs(accumtime); + +if(locnnetstruct->adjust==0) + locnnetstruct->adjust=1; + + +return; +} + +/******************** +** DoNNetIteration ** +********************* +** Do a single iteration of the neural net benchmark. +** By iteration, we mean a "learning" pass. +*/ +static ulong DoNNetIteration(ulong nloops) +{ +ulong elapsed; /* Elapsed time */ +int patt; + +/* +** Run nloops learning cycles. Notice that, counted with +** the learning cycle is the weight randomization and +** zeroing of changes. This should reduce clock jitter, +** since we don't have to stop and start the clock for +** each iteration. +*/ +elapsed=StartStopwatch(); +while(nloops--) +{ + randomize_wts(); + zero_changes(); + iteration_count=1; + learned = F; + numpasses = 0; + while (learned == F) + { + for (patt=0; patt<numpats; patt++) + { + worst_error = 0.0; /* reset this every pass through data */ + move_wt_changes(); /* move last pass's wt changes to momentum array */ + do_forward_pass(patt); + do_back_pass(patt); + iteration_count++; + } + numpasses ++; + learned = check_out_error(); + } +#ifdef DEBUG +printf("Learned in %d passes\n",numpasses); +#endif +} +return(StopStopwatch(elapsed)); +} + +/************************* +** do_mid_forward(patt) ** +************************** +** Process the middle layer's forward pass +** The activation of middle layer's neurode is the weighted +** sum of the inputs from the input pattern, with sigmoid +** function applied to the inputs. +**/ +static void do_mid_forward(int patt) +{ +double sum; +int neurode, i; + +for (neurode=0;neurode<MID_SIZE; neurode++) +{ + sum = 0.0; + for (i=0; i<IN_SIZE; i++) + { /* compute weighted sum of input signals */ + sum += mid_wts[neurode][i]*in_pats[patt][i]; + } + /* + ** apply sigmoid function f(x) = 1/(1+exp(-x)) to weighted sum + */ + sum = 1.0/(1.0+exp(-sum)); + mid_out[neurode] = sum; +} +return; +} + +/********************* +** do_out_forward() ** +********************** +** process the forward pass through the output layer +** The activation of the output layer is the weighted sum of +** the inputs (outputs from middle layer), modified by the +** sigmoid function. +**/ +static void do_out_forward() +{ +double sum; +int neurode, i; + +for (neurode=0; neurode<OUT_SIZE; neurode++) +{ + sum = 0.0; + for (i=0; i<MID_SIZE; i++) + { /* + ** compute weighted sum of input signals + ** from middle layer + */ + sum += out_wts[neurode][i]*mid_out[i]; + } + /* + ** Apply f(x) = 1/(1+exp(-x)) to weighted input + */ + sum = 1.0/(1.0+exp(-sum)); + out_out[neurode] = sum; +} +return; +} + +/************************* +** display_output(patt) ** +************************** +** Display the actual output vs. the desired output of the +** network. +** Once the training is complete, and the "learned" flag set +** to TRUE, then display_output sends its output to both +** the screen and to a text output file. +** +** NOTE: This routine has been disabled in the benchmark +** version. -- RG +**/ +/* +void display_output(int patt) +{ +int i; + + fprintf(outfile,"\n Iteration # %d",iteration_count); + fprintf(outfile,"\n Desired Output: "); + + for (i=0; i<OUT_SIZE; i++) + { + fprintf(outfile,"%6.3f ",out_pats[patt][i]); + } + fprintf(outfile,"\n Actual Output: "); + + for (i=0; i<OUT_SIZE; i++) + { + fprintf(outfile,"%6.3f ",out_out[i]); + } + fprintf(outfile,"\n"); + return; +} +*/ + +/********************** +** do_forward_pass() ** +*********************** +** control function for the forward pass through the network +** NOTE: I have disabled the call to display_output() in +** the benchmark version -- RG. +**/ +static void do_forward_pass(int patt) +{ +do_mid_forward(patt); /* process forward pass, middle layer */ +do_out_forward(); /* process forward pass, output layer */ +/* display_output(patt); ** display results of forward pass */ +return; +} + +/*********************** +** do_out_error(patt) ** +************************ +** Compute the error for the output layer neurodes. +** This is simply Desired - Actual. +**/ +static void do_out_error(int patt) +{ +int neurode; +double error,tot_error, sum; + +tot_error = 0.0; +sum = 0.0; +for (neurode=0; neurode<OUT_SIZE; neurode++) +{ + out_error[neurode] = out_pats[patt][neurode] - out_out[neurode]; + /* + ** while we're here, also compute magnitude + ** of total error and worst error in this pass. + ** We use these to decide if we are done yet. + */ + error = out_error[neurode]; + if (error <0.0) + { + sum += -error; + if (-error > tot_error) + tot_error = -error; /* worst error this pattern */ + } + else + { + sum += error; + if (error > tot_error) + tot_error = error; /* worst error this pattern */ + } +} +avg_out_error[patt] = sum/OUT_SIZE; +tot_out_error[patt] = tot_error; +return; +} + +/*********************** +** worst_pass_error() ** +************************ +** Find the worst and average error in the pass and save it +**/ +static void worst_pass_error() +{ +double error,sum; + +int i; + +error = 0.0; +sum = 0.0; +for (i=0; i<numpats; i++) +{ + if (tot_out_error[i] > error) error = tot_out_error[i]; + sum += avg_out_error[i]; +} +worst_error = error; +average_error = sum/numpats; +return; +} + +/******************* +** do_mid_error() ** +******************** +** Compute the error for the middle layer neurodes +** This is based on the output errors computed above. +** Note that the derivative of the sigmoid f(x) is +** f'(x) = f(x)(1 - f(x)) +** Recall that f(x) is merely the output of the middle +** layer neurode on the forward pass. +**/ +static void do_mid_error() +{ +double sum; +int neurode, i; + +for (neurode=0; neurode<MID_SIZE; neurode++) +{ + sum = 0.0; + for (i=0; i<OUT_SIZE; i++) + sum += out_wts[i][neurode]*out_error[i]; + + /* + ** apply the derivative of the sigmoid here + ** Because of the choice of sigmoid f(I), the derivative + ** of the sigmoid is f'(I) = f(I)(1 - f(I)) + */ + mid_error[neurode] = mid_out[neurode]*(1-mid_out[neurode])*sum; +} +return; +} + +/********************* +** adjust_out_wts() ** +********************** +** Adjust the weights of the output layer. The error for +** the output layer has been previously propagated back to +** the middle layer. +** Use the Delta Rule with momentum term to adjust the weights. +**/ +static void adjust_out_wts() +{ +int weight, neurode; +double learn,delta,alph; + +learn = BETA; +alph = ALPHA; +for (neurode=0; neurode<OUT_SIZE; neurode++) +{ + for (weight=0; weight<MID_SIZE; weight++) + { + /* standard delta rule */ + delta = learn * out_error[neurode] * mid_out[weight]; + + /* now the momentum term */ + delta += alph * out_wt_change[neurode][weight]; + out_wts[neurode][weight] += delta; + + /* keep track of this pass's cum wt changes for next pass's momentum */ + out_wt_cum_change[neurode][weight] += delta; + } +} +return; +} + +/************************* +** adjust_mid_wts(patt) ** +************************** +** Adjust the middle layer weights using the previously computed +** errors. +** We use the Generalized Delta Rule with momentum term +**/ +static void adjust_mid_wts(int patt) +{ +int weight, neurode; +double learn,alph,delta; + +learn = BETA; +alph = ALPHA; +for (neurode=0; neurode<MID_SIZE; neurode++) +{ + for (weight=0; weight<IN_SIZE; weight++) + { + /* first the basic delta rule */ + delta = learn * mid_error[neurode] * in_pats[patt][weight]; + + /* with the momentum term */ + delta += alph * mid_wt_change[neurode][weight]; + mid_wts[neurode][weight] += delta; + + /* keep track of this pass's cum wt changes for next pass's momentum */ + mid_wt_cum_change[neurode][weight] += delta; + } +} +return; +} + +/******************* +** do_back_pass() ** +******************** +** Process the backward propagation of error through network. +**/ +void do_back_pass(int patt) +{ + +do_out_error(patt); +do_mid_error(); +adjust_out_wts(); +adjust_mid_wts(patt); + +return; +} + + +/********************** +** move_wt_changes() ** +*********************** +** Move the weight changes accumulated last pass into the wt-change +** array for use by the momentum term in this pass. Also zero out +** the accumulating arrays after the move. +**/ +static void move_wt_changes() +{ +int i,j; + +for (i = 0; i<MID_SIZE; i++) + for (j = 0; j<IN_SIZE; j++) + { + mid_wt_change[i][j] = mid_wt_cum_change[i][j]; + /* + ** Zero it out for next pass accumulation. + */ + mid_wt_cum_change[i][j] = 0.0; + } + +for (i = 0; i<OUT_SIZE; i++) + for (j=0; j<MID_SIZE; j++) + { + out_wt_change[i][j] = out_wt_cum_change[i][j]; + out_wt_cum_change[i][j] = 0.0; + } + +return; +} + +/********************** +** check_out_error() ** +*********************** +** Check to see if the error in the output layer is below +** MARGIN*OUT_SIZE for all output patterns. If so, then +** assume the network has learned acceptably well. This +** is simply an arbitrary measure of how well the network +** has learned -- many other standards are possible. +**/ +static int check_out_error() +{ +int result,i,error; + +result = T; +error = F; +worst_pass_error(); /* identify the worst error in this pass */ + +/* +#ifdef DEBUG +printf("\n Iteration # %d",iteration_count); +#endif +*/ +for (i=0; i<numpats; i++) +{ +/* printf("\n Error pattern %d: Worst: %8.3f; Average: %8.3f", + i+1,tot_out_error[i], avg_out_error[i]); + fprintf(outfile, + "\n Error pattern %d: Worst: %8.3f; Average: %8.3f", + i+1,tot_out_error[i]); +*/ + + if (worst_error >= STOP) result = F; + if (tot_out_error[i] >= 16.0) error = T; +} + +if (error == T) result = ERR; + + +#ifdef DEBUG +/* printf("\n Error this pass thru data: Worst: %8.3f; Average: %8.3f", + worst_error,average_error); +*/ +/* fprintf(outfile, + "\n Error this pass thru data: Worst: %8.3f; Average: %8.3f", + worst_error, average_error); */ +#endif + +return(result); +} + + +/******************* +** zero_changes() ** +******************** +** Zero out all the wt change arrays +**/ +static void zero_changes() +{ +int i,j; + +for (i = 0; i<MID_SIZE; i++) +{ + for (j=0; j<IN_SIZE; j++) + { + mid_wt_change[i][j] = 0.0; + mid_wt_cum_change[i][j] = 0.0; + } +} + +for (i = 0; i< OUT_SIZE; i++) +{ + for (j=0; j<MID_SIZE; j++) + { + out_wt_change[i][j] = 0.0; + out_wt_cum_change[i][j] = 0.0; + } +} +return; +} + + +/******************** +** randomize_wts() ** +********************* +** Intialize the weights in the middle and output layers to +** random values between -0.25..+0.25 +** Function rand() returns a value between 0 and 32767. +** +** NOTE: Had to make alterations to how the random numbers were +** created. -- RG. +**/ +static void randomize_wts() +{ +int neurode,i; +double value; + +/* +** Following not used int benchmark version -- RG +** +** printf("\n Please enter a random number seed (1..32767): "); +** scanf("%d", &i); +** srand(i); +*/ + +for (neurode = 0; neurode<MID_SIZE; neurode++) +{ + for(i=0; i<IN_SIZE; i++) + { + /* value=(double)abs_randwc(100000L); */ + value=(double)abs_randwc((int32)100000); + value=value/(double)100000.0 - (double) 0.5; + mid_wts[neurode][i] = value/2; + } +} +for (neurode=0; neurode<OUT_SIZE; neurode++) +{ + for(i=0; i<MID_SIZE; i++) + { + /* value=(double)abs_randwc(100000L); */ + value=(double)abs_randwc((int32)100000); + value=value/(double)10000.0 - (double) 0.5; + out_wts[neurode][i] = value/2; + } +} + +return; +} + + +/********************* +** read_data_file() ** +********************** +** Read in the input data file and store the patterns in +** in_pats and out_pats. +** The format for the data file is as follows: +** +** line# data expected +** ----- ------------------------------ +** 1 In-X-size,in-y-size,out-size +** 2 number of patterns in file +** 3 1st X row of 1st input pattern +** 4.. following rows of 1st input pattern pattern +** in-x+2 y-out pattern +** 1st X row of 2nd pattern +** etc. +** +** Each row of data is separated by commas or spaces. +** The data is expected to be ascii text corresponding to +** either a +1 or a 0. +** +** Sample input for a 1-pattern file (The comments to the +** right may NOT be in the file unless more sophisticated +** parsing of the input is done.): +** +** 5,7,8 input is 5x7 grid, output is 8 bits +** 1 one pattern in file +** 0,1,1,1,0 beginning of pattern for "O" +** 1,0,0,0,1 +** 1,0,0,0,1 +** 1,0,0,0,1 +** 1,0,0,0,1 +** 1,0,0,0,0 +** 0,1,1,1,0 +** 0,1,0,0,1,1,1,1 ASCII code for "O" -- 0100 1111 +** +** Clearly, this simple scheme can be expanded or enhanced +** any way you like. +** +** Returns -1 if any file error occurred, otherwise 0. +**/ +static int read_data_file() +{ +FILE *infile; + +int xinsize,yinsize,youtsize; +int patt, element, i, row; +int vals_read; +int val1,val2,val3,val4,val5,val6,val7,val8; + +/* printf("\n Opening and retrieving data from file."); */ + +infile = fopen(inpath, "r"); +if (infile == NULL) +{ + printf("\n CPU:NNET--error in opening file!"); + return -1 ; +} +vals_read =fscanf(infile,"%d %d %d",&xinsize,&yinsize,&youtsize); +if (vals_read != 3) +{ + printf("\n CPU:NNET -- Should read 3 items in line one; did read %d",vals_read); + return -1; +} +vals_read=fscanf(infile,"%d",&numpats); +if (vals_read !=1) +{ + printf("\n CPU:NNET -- Should read 1 item in line 2; did read %d",vals_read); + return -1; +} +if (numpats > MAXPATS) + numpats = MAXPATS; + +for (patt=0; patt<numpats; patt++) +{ + element = 0; + for (row = 0; row<yinsize; row++) + { + vals_read = fscanf(infile,"%d %d %d %d %d", + &val1, &val2, &val3, &val4, &val5); + if (vals_read != 5) + { + printf ("\n CPU:NNET -- failure in reading input!"); + return -1; + } + element=row*xinsize; + + in_pats[patt][element] = (double) val1; element++; + in_pats[patt][element] = (double) val2; element++; + in_pats[patt][element] = (double) val3; element++; + in_pats[patt][element] = (double) val4; element++; + in_pats[patt][element] = (double) val5; element++; + } + for (i=0;i<IN_SIZE; i++) + { + if (in_pats[patt][i] >= 0.9) + in_pats[patt][i] = 0.9; + if (in_pats[patt][i] <= 0.1) + in_pats[patt][i] = 0.1; + } + element = 0; + vals_read = fscanf(infile,"%d %d %d %d %d %d %d %d", + &val1, &val2, &val3, &val4, &val5, &val6, &val7, &val8); + + out_pats[patt][element] = (double) val1; element++; + out_pats[patt][element] = (double) val2; element++; + out_pats[patt][element] = (double) val3; element++; + out_pats[patt][element] = (double) val4; element++; + out_pats[patt][element] = (double) val5; element++; + out_pats[patt][element] = (double) val6; element++; + out_pats[patt][element] = (double) val7; element++; + out_pats[patt][element] = (double) val8; element++; +} + +/* printf("\n Closing the input file now. "); */ + +fclose(infile); +return(0); +} + +/********************* +** initialize_net() ** +********************** +** Do all the initialization stuff before beginning +*/ +/* +static int initialize_net() +{ +int err_code; + +randomize_wts(); +zero_changes(); +err_code = read_data_file(); +iteration_count = 1; +return(err_code); +} +*/ + +/********************** +** display_mid_wts() ** +*********************** +** Display the weights on the middle layer neurodes +** NOTE: This routine is not used in the benchmark +** test -- RG +**/ +/* static void display_mid_wts() +{ +int neurode, weight, row, col; + +fprintf(outfile,"\n Weights of Middle Layer neurodes:"); + +for (neurode=0; neurode<MID_SIZE; neurode++) +{ + fprintf(outfile,"\n Mid Neurode # %d",neurode); + for (row=0; row<IN_Y_SIZE; row++) + { + fprintf(outfile,"\n "); + for (col=0; col<IN_X_SIZE; col++) + { + weight = IN_X_SIZE * row + col; + fprintf(outfile," %8.3f ", mid_wts[neurode][weight]); + } + } +} +return; +} +*/ +/********************** +** display_out_wts() ** +*********************** +** Display the weights on the output layer neurodes +** NOTE: This code is not used in the benchmark +** test -- RG +*/ +/* void display_out_wts() +{ +int neurode, weight; + + fprintf(outfile,"\n Weights of Output Layer neurodes:"); + + for (neurode=0; neurode<OUT_SIZE; neurode++) + { + fprintf(outfile,"\n Out Neurode # %d \n",neurode); + for (weight=0; weight<MID_SIZE; weight++) + { + fprintf(outfile," %8.3f ", out_wts[neurode][weight]); + } + } + return; +} +*/ @@ -25,9 +25,6 @@ ** this code. */ -/* is this a 64 bit architecture? If so, this will define LONG64 */ -#include "pointer.h" - /* ** SYSTEM DEFINES */ diff --git a/numsort.c b/numsort.c new file mode 100644 index 0000000..936b390 --- /dev/null +++ b/numsort.c @@ -0,0 +1,292 @@ +#include <stdio.h> +#include <stdint.h> +/* +#include <stdlib.h> +#include <string.h> +#include <strings.h> +#include <math.h> +*/ +#include "nmglobal.h" +#include "nbench1.h" + +/********************* +** NUMERIC HEAPSORT ** +********************** +** This test implements a heapsort algorithm, performed on an +** array of longs. +*/ + +/************** +** DoNumSort ** +*************** +** This routine performs the CPU numeric sort test. +** NOTE: Last version incorrectly stated that the routine +** returned result in # of longword sorted per second. +** Not so; the routine returns # of iterations per sec. +*/ + +void DoNumSort(void) +{ +SortStruct *numsortstruct; /* Local pointer to global struct */ +long *arraybase; /* Base pointers of array */ +long accumtime; /* Accumulated time */ +double iterations; /* Iteration counter */ +char *errorcontext; /* Error context string pointer */ +int systemerror; /* For holding error codes */ + +/* +** Link to global structure +*/ +numsortstruct=&global_numsortstruct; + +/* +** Set the error context string. +*/ +errorcontext="CPU:Numeric Sort"; + +/* +** See if we need to do self adjustment code. +*/ +if(numsortstruct->adjust==0) +{ + /* + ** Self-adjustment code. The system begins by sorting 1 + ** array. If it does that in no time, then two arrays + ** are built and sorted. This process continues until + ** enough arrays are built to handle the tolerance. + */ + numsortstruct->numarrays=1; + while(1) + { + /* + ** Allocate space for arrays + */ + arraybase=(long *)AllocateMemory(sizeof(long) * + numsortstruct->numarrays * numsortstruct->arraysize, + &systemerror); + if(systemerror) + { ReportError(errorcontext,systemerror); + FreeMemory((void *)arraybase, + &systemerror); + } + + /* + ** Do an iteration of the numeric sort. If the + ** elapsed time is less than or equal to the permitted + ** minimum, then allocate for more arrays and + ** try again. + */ + if(DoNumSortIteration(arraybase, + numsortstruct->arraysize, + numsortstruct->numarrays)>global_min_ticks) + break; /* We're ok...exit */ + + FreeMemory((void *)arraybase,&systemerror); + if(numsortstruct->numarrays++>NUMNUMARRAYS) + { printf("CPU:NSORT -- NUMNUMARRAYS hit.\n"); + } + } +} +else +{ /* + ** Allocate space for arrays + */ + arraybase=(long *)AllocateMemory(sizeof(long) * + numsortstruct->numarrays * numsortstruct->arraysize, + &systemerror); + if(systemerror) + { ReportError(errorcontext,systemerror); + FreeMemory((void *)arraybase, + &systemerror); + } + +} +/* +** All's well if we get here. Repeatedly perform sorts until the +** accumulated elapsed time is greater than # of seconds requested. +*/ +accumtime=0L; +iterations=(double)0.0; + +do { + accumtime+=DoNumSortIteration(arraybase, + numsortstruct->arraysize, + numsortstruct->numarrays); + iterations+=(double)1.0; +} while(TicksToSecs(accumtime)<numsortstruct->request_secs); + +/* +** Clean up, calculate results, and go home. Be sure to +** show that we don't have to rerun adjustment code. +*/ +FreeMemory((void *)arraybase,&systemerror); + +numsortstruct->sortspersec=iterations * + (double)numsortstruct->numarrays / TicksToFracSecs(accumtime); + +if(numsortstruct->adjust==0) + numsortstruct->adjust=1; + +#ifdef DEBUG +if (numsort_status==0) printf("Numeric sort: OK\n"); +numsort_status=0; +#endif +return; +} + +/*********************** +** DoNumSortIteration ** +************************ +** This routine executes one iteration of the numeric +** sort benchmark. It returns the number of ticks +** elapsed for the iteration. +*/ +static unsigned long DoNumSortIteration(long *arraybase, + unsigned long arraysize, + unsigned int numarrays) +{ +unsigned long elapsed; /* Elapsed ticks */ +unsigned long i; +/* +** Load up the array with random numbers +*/ +LoadNumArrayWithRand(arraybase,arraysize,numarrays); + +/* +** Start the stopwatch +*/ +elapsed=StartStopwatch(); + +/* +** Execute a heap of heapsorts +*/ +for(i=0;i<numarrays;i++) + NumHeapSort(arraybase+i*arraysize,0L,arraysize-1L); + +/* +** Get elapsed time +*/ +elapsed=StopStopwatch(elapsed); +#ifdef DEBUG +{ + for(i=0;i<arraysize-1;i++) + { /* + ** Compare to check for proper + ** sort. + */ + if(arraybase[i+1]<arraybase[i]) + { printf("Sort Error\n"); + numsort_status=1; + break; + } + } +} +#endif + +return(elapsed); +} + +/************************* +** LoadNumArrayWithRand ** +************************** +** Load up an array with random longs. +*/ +static void LoadNumArrayWithRand(long *array, /* Pointer to arrays */ + unsigned long arraysize, + unsigned int numarrays) /* # of elements in array */ +{ +long i; /* Used for index */ +long *darray; /* Destination array pointer */ +/* +** Initialize the random number generator +*/ +/* randnum(13L); */ +randnum(13); + +/* +** Load up first array with randoms +*/ +for(i=0L;i<arraysize;i++) + /* array[i]=randnum(0L); */ + array[i]=randnum((int32_t)0); + +/* +** Now, if there's more than one array to load, copy the +** first into each of the others. +*/ +darray=array; +while(--numarrays) +{ darray+=arraysize; + for(i=0L;i<arraysize;i++) + darray[i]=array[i]; +} + +return; +} + +/**************** +** NumHeapSort ** +***************** +** Pass this routine a pointer to an array of long +** integers. Also pass in minimum and maximum offsets. +** This routine performs a heap sort on that array. +*/ +static void NumHeapSort(long *array, + unsigned long bottom, /* Lower bound */ + unsigned long top) /* Upper bound */ +{ +unsigned long temp; /* Used to exchange elements */ +unsigned long i; /* Loop index */ + +/* +** First, build a heap in the array +*/ +for(i=(top/2L); i>0; --i) + NumSift(array,i,top); + +/* +** Repeatedly extract maximum from heap and place it at the +** end of the array. When we get done, we'll have a sorted +** array. +*/ +for(i=top; i>0; --i) +{ NumSift(array,bottom,i); + temp=*array; /* Perform exchange */ + *array=*(array+i); + *(array+i)=temp; +} +return; +} + +/************ +** NumSift ** +************* +** Peforms the sift operation on a numeric array, +** constructing a heap in the array. +*/ +static void NumSift(long *array, /* Array of numbers */ + unsigned long i, /* Minimum of array */ + unsigned long j) /* Maximum of array */ +{ +unsigned long k; +long temp; /* Used for exchange */ + +while((i+i)<=j) +{ + k=i+i; + if(k<j) + if(array[k]<array[k+1L]) + ++k; + if(array[i]<array[k]) + { + temp=array[k]; + array[k]=array[i]; + array[i]=temp; + i=k; + } + else + i=j+1; +} +return; +} + diff --git a/stringsort.c b/stringsort.c new file mode 100644 index 0000000..5d7c67d --- /dev/null +++ b/stringsort.c @@ -0,0 +1,575 @@ +#include <stdint.h> +/*#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <strings.h> +#include <math.h>*/ +#include "nmglobal.h" +#include "nbench1.h" + +#ifdef DEBUG +static int numsort_status=0; +static int stringsort_status=0; +#endif + +/******************** +** STRING HEAPSORT ** +********************/ + +/***************** +** DoStringSort ** +****************** +** This routine performs the CPU string sort test. +** Arguments: +** requested_secs = # of seconds to execute test +** stringspersec = # of strings per second sorted (RETURNED) +*/ +void DoStringSort(void) +{ + +SortStruct *strsortstruct; /* Local for sort structure */ +unsigned char *arraybase; /* Base pointer of char array */ +long accumtime; /* Accumulated time */ +double iterations; /* # of iterations */ +char *errorcontext; /* Error context string pointer */ +int systemerror; /* For holding error code */ + +/* +** Link to global structure +*/ +strsortstruct=&global_strsortstruct; + +/* +** Set the error context +*/ +errorcontext="CPU:String Sort"; + +/* +** See if we have to perform self-adjustment code +*/ +if(strsortstruct->adjust==0) +{ + /* + ** Initialize the number of arrays. + */ + strsortstruct->numarrays=1; + while(1) + { + /* + ** Allocate space for array. We'll add an extra 100 + ** bytes to protect memory as strings move around + ** (this can happen during string adjustment) + */ + arraybase=(unsigned char *)AllocateMemory((strsortstruct->arraysize+100L) * + (long)strsortstruct->numarrays,&systemerror); + if(systemerror) + { ReportError(errorcontext,systemerror); + } + + /* + ** Do an iteration of the string sort. If the + ** elapsed time is less than or equal to the permitted + ** minimum, then de-allocate the array, reallocate a + ** an additional array, and try again. + */ + if(DoStringSortIteration(arraybase, + strsortstruct->numarrays, + strsortstruct->arraysize)>global_min_ticks) + break; /* We're ok...exit */ + + FreeMemory((void *)arraybase,&systemerror); + strsortstruct->numarrays+=1; + } +} +else +{ + /* + ** We don't have to perform self adjustment code. + ** Simply allocate the space for the array. + */ + arraybase=(unsigned char *)AllocateMemory((strsortstruct->arraysize+100L) * + (long)strsortstruct->numarrays,&systemerror); + if(systemerror) + { ReportError(errorcontext,systemerror); + } +} +/* +** All's well if we get here. Repeatedly perform sorts until the +** accumulated elapsed time is greater than # of seconds requested. +*/ +accumtime=0L; +iterations=(double)0.0; + +do { + accumtime+=DoStringSortIteration(arraybase, + strsortstruct->numarrays, + strsortstruct->arraysize); + iterations+=(double)strsortstruct->numarrays; +} while(TicksToSecs(accumtime)<strsortstruct->request_secs); + +/* +** Clean up, calculate results, and go home. +** Set flag to show we don't need to rerun adjustment code. +*/ +FreeMemory((void *)arraybase,&systemerror); +strsortstruct->sortspersec=iterations / (double)TicksToFracSecs(accumtime); +if(strsortstruct->adjust==0) + strsortstruct->adjust=1; +#ifdef DEBUG +if (stringsort_status==0) printf("String sort: OK\n"); +stringsort_status=0; +#endif +return; +} + +/************************** +** DoStringSortIteration ** +*************************** +** This routine executes one iteration of the string +** sort benchmark. It returns the number of ticks +** Note that this routine also builds the offset pointer +** array. +*/ +static unsigned long DoStringSortIteration(unsigned char *arraybase, + unsigned int numarrays,unsigned long arraysize) +{ +unsigned long *optrarray; /* Offset pointer array */ +unsigned long elapsed; /* Elapsed ticks */ +unsigned long nstrings; /* # of strings in array */ +int syserror; /* System error code */ +unsigned int i; /* Index */ +unsigned long *tempobase; /* Temporary offset pointer base */ +unsigned char *tempsbase; /* Temporary string base pointer */ + +/* +** Load up the array(s) with random numbers +*/ +optrarray=LoadStringArray(arraybase,numarrays,&nstrings,arraysize); + +/* +** Set temp base pointers...they will be modified as the +** benchmark proceeds. +*/ +tempobase=optrarray; +tempsbase=arraybase; + +/* +** Start the stopwatch +*/ +elapsed=StartStopwatch(); + +/* +** Execute heapsorts +*/ +for(i=0;i<numarrays;i++) +{ StrHeapSort(tempobase,tempsbase,nstrings,0L,nstrings-1); + tempobase+=nstrings; /* Advance base pointers */ + tempsbase+=arraysize+100; +} + +/* +** Record elapsed time +*/ +elapsed=StopStopwatch(elapsed); + +#ifdef DEBUG +{ + unsigned long i; + for(i=0;i<nstrings-1;i++) + { /* + ** Compare strings to check for proper + ** sort. + */ + if(str_is_less(optrarray,arraybase,nstrings,i+1,i)) + { printf("Sort Error\n"); + stringsort_status=1; + break; + } + } +} +#endif + +/* +** Release the offset pointer array built by +** LoadStringArray() +*/ +FreeMemory((void *)optrarray,&syserror); + +/* +** Return elapsed ticks. +*/ +return(elapsed); +} + +/******************** +** LoadStringArray ** +********************* +** Initialize the string array with random strings of +** varying sizes. +** Returns the pointer to the offset pointer array. +** Note that since we're creating a number of arrays, this +** routine builds one array, then copies it into the others. +*/ +static unsigned long *LoadStringArray(unsigned char *strarray, /* String array */ + unsigned int numarrays, /* # of arrays */ + unsigned long *nstrings, /* # of strings */ + unsigned long arraysize) /* Size of array */ +{ +unsigned char *tempsbase; /* Temporary string base pointer */ +unsigned long *optrarray; /* Local for pointer */ +unsigned long *tempobase; /* Temporary offset pointer base pointer */ +unsigned long curroffset; /* Current offset */ +int fullflag; /* Indicates full array */ +unsigned char stringlength; /* Length of string */ +unsigned char i; /* Index */ +unsigned long j; /* Another index */ +unsigned int k; /* Yet another index */ +unsigned int l; /* Ans still one more index */ +int systemerror; /* For holding error code */ + +/* +** Initialize random number generator. +*/ +/* randnum(13L); */ +randnum((int32_t)13); + +/* +** Start with no strings. Initialize our current offset pointer +** to 0. +*/ +*nstrings=0L; +curroffset=0L; +fullflag=0; + +do +{ + /* + ** Allocate a string with a random length no + ** shorter than 4 bytes and no longer than + ** 80 bytes. Note we have to also make sure + ** there's room in the array. + */ + /* stringlength=(unsigned char)((1+abs_randwc(76L)) & 0xFFL);*/ + stringlength=(unsigned char)((1+abs_randwc((int32_t)76)) & 0xFFL); + if((unsigned long)stringlength+curroffset+1L>=arraysize) + { stringlength=(unsigned char)((arraysize-curroffset-1L) & + 0xFF); + fullflag=1; /* Indicates a full */ + } + + /* + ** Store length at curroffset and advance current offset. + */ + *(strarray+curroffset)=stringlength; + curroffset++; + + /* + ** Fill up the rest of the string with random bytes. + */ + for(i=0;i<stringlength;i++) + { *(strarray+curroffset)= + /* (unsigned char)(abs_randwc((long)0xFE)); */ + (unsigned char)(abs_randwc((int32_t)0xFE)); + curroffset++; + } + + /* + ** Increment the # of strings counter. + */ + *nstrings+=1L; + +} while(fullflag==0); + +/* +** We now have initialized a single full array. If there +** is more than one array, copy the original into the +** others. +*/ +k=1; +tempsbase=strarray; +while(k<numarrays) +{ tempsbase+=arraysize+100; /* Set base */ + for(l=0;l<arraysize;l++) + tempsbase[l]=strarray[l]; + k++; +} + +/* +** Now the array is full, allocate enough space for an +** offset pointer array. +*/ +optrarray=(unsigned long *)AllocateMemory(*nstrings * sizeof(unsigned long) * + numarrays, + &systemerror); +if(systemerror) +{ ReportError("CPU:Stringsort",systemerror); + FreeMemory((void *)strarray,&systemerror); +} + +/* +** Go through the newly-built string array, building +** offsets and putting them into the offset pointer +** array. +*/ +curroffset=0; +for(j=0;j<*nstrings;j++) +{ *(optrarray+j)=curroffset; + curroffset+=(unsigned long)(*(strarray+curroffset))+1L; +} + +/* +** As above, we've made one copy of the offset pointers, +** so duplicate this array in the remaining ones. +*/ +k=1; +tempobase=optrarray; +while(k<numarrays) +{ tempobase+=*nstrings; + for(l=0;l<*nstrings;l++) + tempobase[l]=optrarray[l]; + k++; +} + +/* +** All done...go home. Pass local pointer back. +*/ +return(optrarray); +} + +/************** +** stradjust ** +*************** +** Used by the string heap sort. Call this routine to adjust the +** string at offset i to length l. The members of the string array +** are moved accordingly and the length of the string at offset i +** is set to l. +*/ +static void stradjust(unsigned long *optrarray, /* Offset pointer array */ + unsigned char *strarray, /* String array */ + unsigned long nstrings, /* # of strings */ + unsigned long i, /* Offset to adjust */ + unsigned char l) /* New length */ +{ +unsigned long nbytes; /* # of bytes to move */ +unsigned long j; /* Index */ +int direction; /* Direction indicator */ +unsigned char adjamount; /* Adjustment amount */ + +/* +** If new length is less than old length, the direction is +** down. If new length is greater than old length, the +** direction is up. +*/ +direction=(int)l - (int)*(strarray+*(optrarray+i)); +adjamount=(unsigned char)abs(direction); + +/* +** See if the adjustment is being made to the last +** string in the string array. If so, we don't have to +** do anything more than adjust the length field. +*/ +if(i==(nstrings-1L)) +{ *(strarray+*(optrarray+i))=l; + return; +} + +/* +** Calculate the total # of bytes in string array from +** location i+1 to end of array. Whether we're moving "up" or +** down, this is how many bytes we'll have to move. +*/ +nbytes=*(optrarray+nstrings-1L) + + (unsigned long)*(strarray+*(optrarray+nstrings-1L)) + 1L - + *(optrarray+i+1L); + +/* +** Calculate the source and the destination. Source is +** string position i+1. Destination is string position i+l +** (i+"ell"...don't confuse 1 and l). +** Hand this straight to memmove and let it handle the +** "overlap" problem. +*/ +MoveMemory((void *)(strarray+*(optrarray+i)+l+1), + (void *)(strarray+*(optrarray+i+1)), + (unsigned long)nbytes); + +/* +** We have to adjust the offset pointer array. +** This covers string i+1 to numstrings-1. +*/ +for(j=i+1;j<nstrings;j++) + if(direction<0) + *(optrarray+j)=*(optrarray+j)-adjamount; + else + *(optrarray+j)=*(optrarray+j)+adjamount; + +/* +** Store the new length and go home. +*/ +*(strarray+*(optrarray+i))=l; +return; +} + +/**************** +** strheapsort ** +***************** +** Pass this routine a pointer to an array of unsigned char. +** The array is presumed to hold strings occupying at most +** 80 bytes (counts a byte count). +** This routine also needs a pointer to an array of offsets +** which represent string locations in the array, and +** an unsigned long indicating the number of strings +** in the array. +*/ +static void StrHeapSort(unsigned long *optrarray, /* Offset pointers */ + unsigned char *strarray, /* Strings array */ + unsigned long numstrings, /* # of strings in array */ + unsigned long bottom, /* Region to sort...bottom */ + unsigned long top) /* Region to sort...top */ +{ +unsigned char temp[80]; /* Used to exchange elements */ +unsigned char tlen; /* Temp to hold length */ +unsigned long i; /* Loop index */ + + +/* +** Build a heap in the array +*/ +for(i=(top/2L); i>0; --i) + strsift(optrarray,strarray,numstrings,i,top); + +/* +** Repeatedly extract maximum from heap and place it at the +** end of the array. When we get done, we'll have a sorted +** array. +*/ +for(i=top; i>0; --i) +{ + strsift(optrarray,strarray,numstrings,0,i); + + /* temp = string[0] */ + tlen=*strarray; + MoveMemory((void *)&temp[0], /* Perform exchange */ + (void *)strarray, + (unsigned long)(tlen+1)); + + + /* string[0]=string[i] */ + tlen=*(strarray+*(optrarray+i)); + stradjust(optrarray,strarray,numstrings,0,tlen); + MoveMemory((void *)strarray, + (void *)(strarray+*(optrarray+i)), + (unsigned long)(tlen+1)); + + /* string[i]=temp */ + tlen=temp[0]; + stradjust(optrarray,strarray,numstrings,i,tlen); + MoveMemory((void *)(strarray+*(optrarray+i)), + (void *)&temp[0], + (unsigned long)(tlen+1)); + +} +return; +} + +/**************** +** str_is_less ** +***************** +** Pass this function: +** 1) A pointer to an array of offset pointers +** 2) A pointer to a string array +** 3) The number of elements in the string array +** 4) Offsets to two strings (a & b) +** This function returns TRUE if string a is < string b. +*/ +static int str_is_less(unsigned long *optrarray, /* Offset pointers */ + unsigned char *strarray, /* String array */ + unsigned long numstrings, /* # of strings */ + unsigned long a, unsigned long b) /* Offsets */ +{ +int slen; /* String length */ + +/* +** Determine which string has the minimum length. Use that +** to call strncmp(). If they match up to that point, the +** string with the longer length wins. +*/ +slen=(int)*(strarray+*(optrarray+a)); +if(slen > (int)*(strarray+*(optrarray+b))) + slen=(int)*(strarray+*(optrarray+b)); + +slen=strncmp((char *)(strarray+*(optrarray+a)), + (char *)(strarray+*(optrarray+b)),slen); + +if(slen==0) +{ + /* + ** They match. Return true if the length of a + ** is greater than the length of b. + */ + if(*(strarray+*(optrarray+a)) > + *(strarray+*(optrarray+b))) + return(TRUE); + return(FALSE); +} + +if(slen<0) return(TRUE); /* a is strictly less than b */ + +return(FALSE); /* Only other possibility */ +} + +/************ +** strsift ** +************* +** Pass this function: +** 1) A pointer to an array of offset pointers +** 2) A pointer to a string array +** 3) The number of elements in the string array +** 4) Offset within which to sort. +** Sift the array within the bounds of those offsets (thus +** building a heap). +*/ +static void strsift(unsigned long *optrarray, /* Offset pointers */ + unsigned char *strarray, /* String array */ + unsigned long numstrings, /* # of strings */ + unsigned long i, unsigned long j) /* Offsets */ +{ +unsigned long k; /* Temporaries */ +unsigned char temp[80]; +unsigned char tlen; /* For string lengths */ + + +while((i+i)<=j) +{ + k=i+i; + if(k<j) + if(str_is_less(optrarray,strarray,numstrings,k,k+1L)) + ++k; + if(str_is_less(optrarray,strarray,numstrings,i,k)) + { + /* temp=string[k] */ + tlen=*(strarray+*(optrarray+k)); + MoveMemory((void *)&temp[0], + (void *)(strarray+*(optrarray+k)), + (unsigned long)(tlen+1)); + + /* string[k]=string[i] */ + tlen=*(strarray+*(optrarray+i)); + stradjust(optrarray,strarray,numstrings,k,tlen); + MoveMemory((void *)(strarray+*(optrarray+k)), + (void *)(strarray+*(optrarray+i)), + (unsigned long)(tlen+1)); + + /* string[i]=temp */ + tlen=temp[0]; + stradjust(optrarray,strarray,numstrings,i,tlen); + MoveMemory((void *)(strarray+*(optrarray+i)), + (void *)&temp[0], + (unsigned long)(tlen+1)); + i=k; + } + else + i=j+1; +} +return; +} diff --git a/wordcat.h b/wordcat.h deleted file mode 100644 index 9f18b42..0000000 --- a/wordcat.h +++ /dev/null @@ -1,81 +0,0 @@ -/* -** wordcat.h -** Word catalog -** BYTEmark (tm) -** BYTE's Native Mode Benchmarks -** Rick Grehan, BYTE Magazine -** -** Creation: -** Revision: 3/95 -** -** DISCLAIMER -** The source, executable, and documentation files that comprise -** the BYTEmark benchmarks are made available on an "as is" basis. -** This means that we at BYTE Magazine have made every reasonable -** effort to verify that the there are no errors in the source and -** executable code. We cannot, however, guarantee that the programs -** are error-free. Consequently, McGraw-HIll and BYTE Magazine make -** no claims in regard to the fitness of the source code, executable -** code, and documentation of the BYTEmark. -** Furthermore, BYTE Magazine, McGraw-Hill, and all employees -** of McGraw-Hill cannot be held responsible for any damages resulting -** from the use of this code or the results obtained from using -** this code. -*/ - -/* -** Word catalog -*/ -#define WORDCATSIZE 50 - -char *wordcatarray[WORDCATSIZE] = -{ "Hello", - "He", - "Him", - "the", - "this", - "that", - "though", - "rough", - "cough", - "obviously", - "But", - "but", - "bye", - "begin", - "beginning", - "beginnings", - "of", - "our", - "ourselves", - "yourselves", - "to", - "together", - "togetherness", - "from", - "either", - "I", - "A", - "return", - "However", - "that", - "example", - "yet", - "quickly", - "all", - "if", - "were", - "includes", - "always", - "never", - "not", - "small", - "returns", - "set", - "basic", - "Entered", - "with", - "used", - "shown", - "you", - "know" }; |