summaryrefslogtreecommitdiff
path: root/nbench1.c
diff options
context:
space:
mode:
Diffstat (limited to 'nbench1.c')
-rw-r--r--nbench1.c4445
1 files changed, 4445 insertions, 0 deletions
diff --git a/nbench1.c b/nbench1.c
new file mode 100644
index 0000000..05c35df
--- /dev/null
+++ b/nbench1.c
@@ -0,0 +1,4445 @@
+
+/*
+** nbench1.c
+*/
+
+/********************************
+** BYTEmark (tm) **
+** BYTE NATIVE MODE BENCHMARKS **
+** VERSION 2 **
+** **
+** Included in this source **
+** file: **
+** Numeric Heapsort **
+** String Heapsort **
+** Bitfield test **
+** Floating point emulation **
+** Fourier coefficients **
+** Assignment algorithm **
+** IDEA Encyption **
+** Huffman compression **
+** Back prop. neural net **
+** LU Decomposition **
+** (linear equations) **
+** ---------- **
+** Rick Grehan, BYTE Magazine **
+*********************************
+**
+** BYTEmark (tm)
+** BYTE's Native Mode Benchmarks
+** Rick Grehan, BYTE Magazine
+**
+** Creation:
+** Revision: 3/95;10/95
+** 10/95 - Removed allocation that was taking place inside
+** the LU Decomposition benchmark. Though it didn't seem to
+** make a difference on systems we ran it on, it nonetheless
+** removes an operating system dependency that probably should
+** not have been there.
+**
+** DISCLAIMER
+** The source, executable, and documentation files that comprise
+** the BYTEmark benchmarks are made available on an "as is" basis.
+** This means that we at BYTE Magazine have made every reasonable
+** effort to verify that the there are no errors in the source and
+** executable code. We cannot, however, guarantee that the programs
+** are error-free. Consequently, McGraw-HIll and BYTE Magazine make
+** no claims in regard to the fitness of the source code, executable
+** code, and documentation of the BYTEmark.
+** Furthermore, BYTE Magazine, McGraw-Hill, and all employees
+** of McGraw-Hill cannot be held responsible for any damages resulting
+** from the use of this code or the results obtained from using
+** this code.
+*/
+
+/*
+** INCLUDES
+*/
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <strings.h>
+#include <math.h>
+#include "nmglobal.h"
+#include "nbench1.h"
+#include "wordcat.h"
+
+#ifdef DEBUG
+static int numsort_status=0;
+static int stringsort_status=0;
+#endif
+
+/*********************
+** NUMERIC HEAPSORT **
+**********************
+** This test implements a heapsort algorithm, performed on an
+** array of longs.
+*/
+
+/**************
+** DoNumSort **
+***************
+** This routine performs the CPU numeric sort test.
+** NOTE: Last version incorrectly stated that the routine
+** returned result in # of longword sorted per second.
+** Not so; the routine returns # of iterations per sec.
+*/
+
+void DoNumSort(void)
+{
+SortStruct *numsortstruct; /* Local pointer to global struct */
+farlong *arraybase; /* Base pointers of array */
+long accumtime; /* Accumulated time */
+double iterations; /* Iteration counter */
+char *errorcontext; /* Error context string pointer */
+int systemerror; /* For holding error codes */
+
+/*
+** Link to global structure
+*/
+numsortstruct=&global_numsortstruct;
+
+/*
+** Set the error context string.
+*/
+errorcontext="CPU:Numeric Sort";
+
+/*
+** See if we need to do self adjustment code.
+*/
+if(numsortstruct->adjust==0)
+{
+ /*
+ ** Self-adjustment code. The system begins by sorting 1
+ ** array. If it does that in no time, then two arrays
+ ** are built and sorted. This process continues until
+ ** enough arrays are built to handle the tolerance.
+ */
+ numsortstruct->numarrays=1;
+ while(1)
+ {
+ /*
+ ** Allocate space for arrays
+ */
+ arraybase=(farlong *)AllocateMemory(sizeof(long) *
+ numsortstruct->numarrays * numsortstruct->arraysize,
+ &systemerror);
+ if(systemerror)
+ { ReportError(errorcontext,systemerror);
+ FreeMemory((farvoid *)arraybase,
+ &systemerror);
+ ErrorExit();
+ }
+
+ /*
+ ** Do an iteration of the numeric sort. If the
+ ** elapsed time is less than or equal to the permitted
+ ** minimum, then allocate for more arrays and
+ ** try again.
+ */
+ if(DoNumSortIteration(arraybase,
+ numsortstruct->arraysize,
+ numsortstruct->numarrays)>global_min_ticks)
+ break; /* We're ok...exit */
+
+ FreeMemory((farvoid *)arraybase,&systemerror);
+ if(numsortstruct->numarrays++>NUMNUMARRAYS)
+ { printf("CPU:NSORT -- NUMNUMARRAYS hit.\n");
+ ErrorExit();
+ }
+ }
+}
+else
+{ /*
+ ** Allocate space for arrays
+ */
+ arraybase=(farlong *)AllocateMemory(sizeof(long) *
+ numsortstruct->numarrays * numsortstruct->arraysize,
+ &systemerror);
+ if(systemerror)
+ { ReportError(errorcontext,systemerror);
+ FreeMemory((farvoid *)arraybase,
+ &systemerror);
+ ErrorExit();
+ }
+
+}
+/*
+** All's well if we get here. Repeatedly perform sorts until the
+** accumulated elapsed time is greater than # of seconds requested.
+*/
+accumtime=0L;
+iterations=(double)0.0;
+
+do {
+ accumtime+=DoNumSortIteration(arraybase,
+ numsortstruct->arraysize,
+ numsortstruct->numarrays);
+ iterations+=(double)1.0;
+} while(TicksToSecs(accumtime)<numsortstruct->request_secs);
+
+/*
+** Clean up, calculate results, and go home. Be sure to
+** show that we don't have to rerun adjustment code.
+*/
+FreeMemory((farvoid *)arraybase,&systemerror);
+
+numsortstruct->sortspersec=iterations *
+ (double)numsortstruct->numarrays / TicksToFracSecs(accumtime);
+
+if(numsortstruct->adjust==0)
+ numsortstruct->adjust=1;
+
+#ifdef DEBUG
+if (numsort_status==0) printf("Numeric sort: OK\n");
+numsort_status=0;
+#endif
+return;
+}
+
+/***********************
+** DoNumSortIteration **
+************************
+** This routine executes one iteration of the numeric
+** sort benchmark. It returns the number of ticks
+** elapsed for the iteration.
+*/
+static ulong DoNumSortIteration(farlong *arraybase,
+ ulong arraysize,
+ uint numarrays)
+{
+ulong elapsed; /* Elapsed ticks */
+ulong i;
+/*
+** Load up the array with random numbers
+*/
+LoadNumArrayWithRand(arraybase,arraysize,numarrays);
+
+/*
+** Start the stopwatch
+*/
+elapsed=StartStopwatch();
+
+/*
+** Execute a heap of heapsorts
+*/
+for(i=0;i<numarrays;i++)
+ NumHeapSort(arraybase+i*arraysize,0L,arraysize-1L);
+
+/*
+** Get elapsed time
+*/
+elapsed=StopStopwatch(elapsed);
+#ifdef DEBUG
+{
+ for(i=0;i<arraysize-1;i++)
+ { /*
+ ** Compare to check for proper
+ ** sort.
+ */
+ if(arraybase[i+1]<arraybase[i])
+ { printf("Sort Error\n");
+ numsort_status=1;
+ break;
+ }
+ }
+}
+#endif
+
+return(elapsed);
+}
+
+/*************************
+** LoadNumArrayWithRand **
+**************************
+** Load up an array with random longs.
+*/
+static void LoadNumArrayWithRand(farlong *array, /* Pointer to arrays */
+ ulong arraysize,
+ uint numarrays) /* # of elements in array */
+{
+long i; /* Used for index */
+farlong *darray; /* Destination array pointer */
+/*
+** Initialize the random number generator
+*/
+/* randnum(13L); */
+randnum((int32)13);
+
+/*
+** Load up first array with randoms
+*/
+for(i=0L;i<arraysize;i++)
+ /* array[i]=randnum(0L); */
+ array[i]=randnum((int32)0);
+
+/*
+** Now, if there's more than one array to load, copy the
+** first into each of the others.
+*/
+darray=array;
+while(--numarrays)
+{ darray+=arraysize;
+ for(i=0L;i<arraysize;i++)
+ darray[i]=array[i];
+}
+
+return;
+}
+
+/****************
+** NumHeapSort **
+*****************
+** Pass this routine a pointer to an array of long
+** integers. Also pass in minimum and maximum offsets.
+** This routine performs a heap sort on that array.
+*/
+static void NumHeapSort(farlong *array,
+ ulong bottom, /* Lower bound */
+ ulong top) /* Upper bound */
+{
+ulong temp; /* Used to exchange elements */
+ulong i; /* Loop index */
+
+/*
+** First, build a heap in the array
+*/
+for(i=(top/2L); i>0; --i)
+ NumSift(array,i,top);
+
+/*
+** Repeatedly extract maximum from heap and place it at the
+** end of the array. When we get done, we'll have a sorted
+** array.
+*/
+for(i=top; i>0; --i)
+{ NumSift(array,bottom,i);
+ temp=*array; /* Perform exchange */
+ *array=*(array+i);
+ *(array+i)=temp;
+}
+return;
+}
+
+/************
+** NumSift **
+*************
+** Peforms the sift operation on a numeric array,
+** constructing a heap in the array.
+*/
+static void NumSift(farlong *array, /* Array of numbers */
+ ulong i, /* Minimum of array */
+ ulong j) /* Maximum of array */
+{
+unsigned long k;
+long temp; /* Used for exchange */
+
+while((i+i)<=j)
+{
+ k=i+i;
+ if(k<j)
+ if(array[k]<array[k+1L])
+ ++k;
+ if(array[i]<array[k])
+ {
+ temp=array[k];
+ array[k]=array[i];
+ array[i]=temp;
+ i=k;
+ }
+ else
+ i=j+1;
+}
+return;
+}
+
+/********************
+** STRING HEAPSORT **
+********************/
+
+/*****************
+** DoStringSort **
+******************
+** This routine performs the CPU string sort test.
+** Arguments:
+** requested_secs = # of seconds to execute test
+** stringspersec = # of strings per second sorted (RETURNED)
+*/
+void DoStringSort(void)
+{
+
+SortStruct *strsortstruct; /* Local for sort structure */
+faruchar *arraybase; /* Base pointer of char array */
+long accumtime; /* Accumulated time */
+double iterations; /* # of iterations */
+char *errorcontext; /* Error context string pointer */
+int systemerror; /* For holding error code */
+
+/*
+** Link to global structure
+*/
+strsortstruct=&global_strsortstruct;
+
+/*
+** Set the error context
+*/
+errorcontext="CPU:String Sort";
+
+/*
+** See if we have to perform self-adjustment code
+*/
+if(strsortstruct->adjust==0)
+{
+ /*
+ ** Initialize the number of arrays.
+ */
+ strsortstruct->numarrays=1;
+ while(1)
+ {
+ /*
+ ** Allocate space for array. We'll add an extra 100
+ ** bytes to protect memory as strings move around
+ ** (this can happen during string adjustment)
+ */
+ arraybase=(faruchar *)AllocateMemory((strsortstruct->arraysize+100L) *
+ (long)strsortstruct->numarrays,&systemerror);
+ if(systemerror)
+ { ReportError(errorcontext,systemerror);
+ ErrorExit();
+ }
+
+ /*
+ ** Do an iteration of the string sort. If the
+ ** elapsed time is less than or equal to the permitted
+ ** minimum, then de-allocate the array, reallocate a
+ ** an additional array, and try again.
+ */
+ if(DoStringSortIteration(arraybase,
+ strsortstruct->numarrays,
+ strsortstruct->arraysize)>global_min_ticks)
+ break; /* We're ok...exit */
+
+ FreeMemory((farvoid *)arraybase,&systemerror);
+ strsortstruct->numarrays+=1;
+ }
+}
+else
+{
+ /*
+ ** We don't have to perform self adjustment code.
+ ** Simply allocate the space for the array.
+ */
+ arraybase=(faruchar *)AllocateMemory((strsortstruct->arraysize+100L) *
+ (long)strsortstruct->numarrays,&systemerror);
+ if(systemerror)
+ { ReportError(errorcontext,systemerror);
+ ErrorExit();
+ }
+}
+/*
+** All's well if we get here. Repeatedly perform sorts until the
+** accumulated elapsed time is greater than # of seconds requested.
+*/
+accumtime=0L;
+iterations=(double)0.0;
+
+do {
+ accumtime+=DoStringSortIteration(arraybase,
+ strsortstruct->numarrays,
+ strsortstruct->arraysize);
+ iterations+=(double)strsortstruct->numarrays;
+} while(TicksToSecs(accumtime)<strsortstruct->request_secs);
+
+/*
+** Clean up, calculate results, and go home.
+** Set flag to show we don't need to rerun adjustment code.
+*/
+FreeMemory((farvoid *)arraybase,&systemerror);
+strsortstruct->sortspersec=iterations / (double)TicksToFracSecs(accumtime);
+if(strsortstruct->adjust==0)
+ strsortstruct->adjust=1;
+#ifdef DEBUG
+if (stringsort_status==0) printf("String sort: OK\n");
+stringsort_status=0;
+#endif
+return;
+}
+
+/**************************
+** DoStringSortIteration **
+***************************
+** This routine executes one iteration of the string
+** sort benchmark. It returns the number of ticks
+** Note that this routine also builds the offset pointer
+** array.
+*/
+static ulong DoStringSortIteration(faruchar *arraybase,
+ uint numarrays,ulong arraysize)
+{
+farulong *optrarray; /* Offset pointer array */
+unsigned long elapsed; /* Elapsed ticks */
+unsigned long nstrings; /* # of strings in array */
+int syserror; /* System error code */
+unsigned int i; /* Index */
+farulong *tempobase; /* Temporary offset pointer base */
+faruchar *tempsbase; /* Temporary string base pointer */
+
+/*
+** Load up the array(s) with random numbers
+*/
+optrarray=LoadStringArray(arraybase,numarrays,&nstrings,arraysize);
+
+/*
+** Set temp base pointers...they will be modified as the
+** benchmark proceeds.
+*/
+tempobase=optrarray;
+tempsbase=arraybase;
+
+/*
+** Start the stopwatch
+*/
+elapsed=StartStopwatch();
+
+/*
+** Execute heapsorts
+*/
+for(i=0;i<numarrays;i++)
+{ StrHeapSort(tempobase,tempsbase,nstrings,0L,nstrings-1);
+ tempobase+=nstrings; /* Advance base pointers */
+ tempsbase+=arraysize+100;
+}
+
+/*
+** Record elapsed time
+*/
+elapsed=StopStopwatch(elapsed);
+
+#ifdef DEBUG
+{
+ unsigned long i;
+ for(i=0;i<nstrings-1;i++)
+ { /*
+ ** Compare strings to check for proper
+ ** sort.
+ */
+ if(str_is_less(optrarray,arraybase,nstrings,i+1,i))
+ { printf("Sort Error\n");
+ stringsort_status=1;
+ break;
+ }
+ }
+}
+#endif
+
+/*
+** Release the offset pointer array built by
+** LoadStringArray()
+*/
+FreeMemory((farvoid *)optrarray,&syserror);
+
+/*
+** Return elapsed ticks.
+*/
+return(elapsed);
+}
+
+/********************
+** LoadStringArray **
+*********************
+** Initialize the string array with random strings of
+** varying sizes.
+** Returns the pointer to the offset pointer array.
+** Note that since we're creating a number of arrays, this
+** routine builds one array, then copies it into the others.
+*/
+static farulong *LoadStringArray(faruchar *strarray, /* String array */
+ uint numarrays, /* # of arrays */
+ ulong *nstrings, /* # of strings */
+ ulong arraysize) /* Size of array */
+{
+faruchar *tempsbase; /* Temporary string base pointer */
+farulong *optrarray; /* Local for pointer */
+farulong *tempobase; /* Temporary offset pointer base pointer */
+unsigned long curroffset; /* Current offset */
+int fullflag; /* Indicates full array */
+unsigned char stringlength; /* Length of string */
+unsigned char i; /* Index */
+unsigned long j; /* Another index */
+unsigned int k; /* Yet another index */
+unsigned int l; /* Ans still one more index */
+int systemerror; /* For holding error code */
+
+/*
+** Initialize random number generator.
+*/
+/* randnum(13L); */
+randnum((int32)13);
+
+/*
+** Start with no strings. Initialize our current offset pointer
+** to 0.
+*/
+*nstrings=0L;
+curroffset=0L;
+fullflag=0;
+
+do
+{
+ /*
+ ** Allocate a string with a random length no
+ ** shorter than 4 bytes and no longer than
+ ** 80 bytes. Note we have to also make sure
+ ** there's room in the array.
+ */
+ /* stringlength=(unsigned char)((1+abs_randwc(76L)) & 0xFFL);*/
+ stringlength=(unsigned char)((1+abs_randwc((int32)76)) & 0xFFL);
+ if((unsigned long)stringlength+curroffset+1L>=arraysize)
+ { stringlength=(unsigned char)((arraysize-curroffset-1L) &
+ 0xFF);
+ fullflag=1; /* Indicates a full */
+ }
+
+ /*
+ ** Store length at curroffset and advance current offset.
+ */
+ *(strarray+curroffset)=stringlength;
+ curroffset++;
+
+ /*
+ ** Fill up the rest of the string with random bytes.
+ */
+ for(i=0;i<stringlength;i++)
+ { *(strarray+curroffset)=
+ /* (unsigned char)(abs_randwc((long)0xFE)); */
+ (unsigned char)(abs_randwc((int32)0xFE));
+ curroffset++;
+ }
+
+ /*
+ ** Increment the # of strings counter.
+ */
+ *nstrings+=1L;
+
+} while(fullflag==0);
+
+/*
+** We now have initialized a single full array. If there
+** is more than one array, copy the original into the
+** others.
+*/
+k=1;
+tempsbase=strarray;
+while(k<numarrays)
+{ tempsbase+=arraysize+100; /* Set base */
+ for(l=0;l<arraysize;l++)
+ tempsbase[l]=strarray[l];
+ k++;
+}
+
+/*
+** Now the array is full, allocate enough space for an
+** offset pointer array.
+*/
+optrarray=(farulong *)AllocateMemory(*nstrings * sizeof(unsigned long) *
+ numarrays,
+ &systemerror);
+if(systemerror)
+{ ReportError("CPU:Stringsort",systemerror);
+ FreeMemory((void *)strarray,&systemerror);
+ ErrorExit();
+}
+
+/*
+** Go through the newly-built string array, building
+** offsets and putting them into the offset pointer
+** array.
+*/
+curroffset=0;
+for(j=0;j<*nstrings;j++)
+{ *(optrarray+j)=curroffset;
+ curroffset+=(unsigned long)(*(strarray+curroffset))+1L;
+}
+
+/*
+** As above, we've made one copy of the offset pointers,
+** so duplicate this array in the remaining ones.
+*/
+k=1;
+tempobase=optrarray;
+while(k<numarrays)
+{ tempobase+=*nstrings;
+ for(l=0;l<*nstrings;l++)
+ tempobase[l]=optrarray[l];
+ k++;
+}
+
+/*
+** All done...go home. Pass local pointer back.
+*/
+return(optrarray);
+}
+
+/**************
+** stradjust **
+***************
+** Used by the string heap sort. Call this routine to adjust the
+** string at offset i to length l. The members of the string array
+** are moved accordingly and the length of the string at offset i
+** is set to l.
+*/
+static void stradjust(farulong *optrarray, /* Offset pointer array */
+ faruchar *strarray, /* String array */
+ ulong nstrings, /* # of strings */
+ ulong i, /* Offset to adjust */
+ uchar l) /* New length */
+{
+unsigned long nbytes; /* # of bytes to move */
+unsigned long j; /* Index */
+int direction; /* Direction indicator */
+unsigned char adjamount; /* Adjustment amount */
+
+/*
+** If new length is less than old length, the direction is
+** down. If new length is greater than old length, the
+** direction is up.
+*/
+direction=(int)l - (int)*(strarray+*(optrarray+i));
+adjamount=(unsigned char)abs(direction);
+
+/*
+** See if the adjustment is being made to the last
+** string in the string array. If so, we don't have to
+** do anything more than adjust the length field.
+*/
+if(i==(nstrings-1L))
+{ *(strarray+*(optrarray+i))=l;
+ return;
+}
+
+/*
+** Calculate the total # of bytes in string array from
+** location i+1 to end of array. Whether we're moving "up" or
+** down, this is how many bytes we'll have to move.
+*/
+nbytes=*(optrarray+nstrings-1L) +
+ (unsigned long)*(strarray+*(optrarray+nstrings-1L)) + 1L -
+ *(optrarray+i+1L);
+
+/*
+** Calculate the source and the destination. Source is
+** string position i+1. Destination is string position i+l
+** (i+"ell"...don't confuse 1 and l).
+** Hand this straight to memmove and let it handle the
+** "overlap" problem.
+*/
+MoveMemory((farvoid *)(strarray+*(optrarray+i)+l+1),
+ (farvoid *)(strarray+*(optrarray+i+1)),
+ (unsigned long)nbytes);
+
+/*
+** We have to adjust the offset pointer array.
+** This covers string i+1 to numstrings-1.
+*/
+for(j=i+1;j<nstrings;j++)
+ if(direction<0)
+ *(optrarray+j)=*(optrarray+j)-adjamount;
+ else
+ *(optrarray+j)=*(optrarray+j)+adjamount;
+
+/*
+** Store the new length and go home.
+*/
+*(strarray+*(optrarray+i))=l;
+return;
+}
+
+/****************
+** strheapsort **
+*****************
+** Pass this routine a pointer to an array of unsigned char.
+** The array is presumed to hold strings occupying at most
+** 80 bytes (counts a byte count).
+** This routine also needs a pointer to an array of offsets
+** which represent string locations in the array, and
+** an unsigned long indicating the number of strings
+** in the array.
+*/
+static void StrHeapSort(farulong *optrarray, /* Offset pointers */
+ faruchar *strarray, /* Strings array */
+ ulong numstrings, /* # of strings in array */
+ ulong bottom, /* Region to sort...bottom */
+ ulong top) /* Region to sort...top */
+{
+unsigned char temp[80]; /* Used to exchange elements */
+unsigned char tlen; /* Temp to hold length */
+unsigned long i; /* Loop index */
+
+
+/*
+** Build a heap in the array
+*/
+for(i=(top/2L); i>0; --i)
+ strsift(optrarray,strarray,numstrings,i,top);
+
+/*
+** Repeatedly extract maximum from heap and place it at the
+** end of the array. When we get done, we'll have a sorted
+** array.
+*/
+for(i=top; i>0; --i)
+{
+ strsift(optrarray,strarray,numstrings,0,i);
+
+ /* temp = string[0] */
+ tlen=*strarray;
+ MoveMemory((farvoid *)&temp[0], /* Perform exchange */
+ (farvoid *)strarray,
+ (unsigned long)(tlen+1));
+
+
+ /* string[0]=string[i] */
+ tlen=*(strarray+*(optrarray+i));
+ stradjust(optrarray,strarray,numstrings,0,tlen);
+ MoveMemory((farvoid *)strarray,
+ (farvoid *)(strarray+*(optrarray+i)),
+ (unsigned long)(tlen+1));
+
+ /* string[i]=temp */
+ tlen=temp[0];
+ stradjust(optrarray,strarray,numstrings,i,tlen);
+ MoveMemory((farvoid *)(strarray+*(optrarray+i)),
+ (farvoid *)&temp[0],
+ (unsigned long)(tlen+1));
+
+}
+return;
+}
+
+/****************
+** str_is_less **
+*****************
+** Pass this function:
+** 1) A pointer to an array of offset pointers
+** 2) A pointer to a string array
+** 3) The number of elements in the string array
+** 4) Offsets to two strings (a & b)
+** This function returns TRUE if string a is < string b.
+*/
+static int str_is_less(farulong *optrarray, /* Offset pointers */
+ faruchar *strarray, /* String array */
+ ulong numstrings, /* # of strings */
+ ulong a, ulong b) /* Offsets */
+{
+int slen; /* String length */
+
+/*
+** Determine which string has the minimum length. Use that
+** to call strncmp(). If they match up to that point, the
+** string with the longer length wins.
+*/
+slen=(int)*(strarray+*(optrarray+a));
+if(slen > (int)*(strarray+*(optrarray+b)))
+ slen=(int)*(strarray+*(optrarray+b));
+
+slen=strncmp((char *)(strarray+*(optrarray+a)),
+ (char *)(strarray+*(optrarray+b)),slen);
+
+if(slen==0)
+{
+ /*
+ ** They match. Return true if the length of a
+ ** is greater than the length of b.
+ */
+ if(*(strarray+*(optrarray+a)) >
+ *(strarray+*(optrarray+b)))
+ return(TRUE);
+ return(FALSE);
+}
+
+if(slen<0) return(TRUE); /* a is strictly less than b */
+
+return(FALSE); /* Only other possibility */
+}
+
+/************
+** strsift **
+*************
+** Pass this function:
+** 1) A pointer to an array of offset pointers
+** 2) A pointer to a string array
+** 3) The number of elements in the string array
+** 4) Offset within which to sort.
+** Sift the array within the bounds of those offsets (thus
+** building a heap).
+*/
+static void strsift(farulong *optrarray, /* Offset pointers */
+ faruchar *strarray, /* String array */
+ ulong numstrings, /* # of strings */
+ ulong i, ulong j) /* Offsets */
+{
+unsigned long k; /* Temporaries */
+unsigned char temp[80];
+unsigned char tlen; /* For string lengths */
+
+
+while((i+i)<=j)
+{
+ k=i+i;
+ if(k<j)
+ if(str_is_less(optrarray,strarray,numstrings,k,k+1L))
+ ++k;
+ if(str_is_less(optrarray,strarray,numstrings,i,k))
+ {
+ /* temp=string[k] */
+ tlen=*(strarray+*(optrarray+k));
+ MoveMemory((farvoid *)&temp[0],
+ (farvoid *)(strarray+*(optrarray+k)),
+ (unsigned long)(tlen+1));
+
+ /* string[k]=string[i] */
+ tlen=*(strarray+*(optrarray+i));
+ stradjust(optrarray,strarray,numstrings,k,tlen);
+ MoveMemory((farvoid *)(strarray+*(optrarray+k)),
+ (farvoid *)(strarray+*(optrarray+i)),
+ (unsigned long)(tlen+1));
+
+ /* string[i]=temp */
+ tlen=temp[0];
+ stradjust(optrarray,strarray,numstrings,i,tlen);
+ MoveMemory((farvoid *)(strarray+*(optrarray+i)),
+ (farvoid *)&temp[0],
+ (unsigned long)(tlen+1));
+ i=k;
+ }
+ else
+ i=j+1;
+}
+return;
+}
+
+/************************
+** BITFIELD OPERATIONS **
+*************************/
+
+/*************
+** DoBitops **
+**************
+** Perform the bit operations test portion of the CPU
+** benchmark. Returns the iterations per second.
+*/
+void DoBitops(void)
+{
+BitOpStruct *locbitopstruct; /* Local bitop structure */
+farulong *bitarraybase; /* Base of bitmap array */
+farulong *bitoparraybase; /* Base of bitmap operations array */
+ulong nbitops; /* # of bitfield operations */
+ulong accumtime; /* Accumulated time in ticks */
+double iterations; /* # of iterations */
+char *errorcontext; /* Error context string */
+int systemerror; /* For holding error codes */
+int ticks;
+
+/*
+** Link to global structure.
+*/
+locbitopstruct=&global_bitopstruct;
+
+/*
+** Set the error context.
+*/
+errorcontext="CPU:Bitfields";
+
+/*
+** See if we need to run adjustment code.
+*/
+if(locbitopstruct->adjust==0)
+{
+ bitarraybase=(farulong *)AllocateMemory(locbitopstruct->bitfieldarraysize *
+ sizeof(ulong),&systemerror);
+ if(systemerror)
+ { ReportError(errorcontext,systemerror);
+ ErrorExit();
+ }
+
+ /*
+ ** Initialize bitfield operations array to [2,30] elements
+ */
+ locbitopstruct->bitoparraysize=30L;
+
+ while(1)
+ {
+ /*
+ ** Allocate space for operations array
+ */
+ bitoparraybase=(farulong *)AllocateMemory(locbitopstruct->bitoparraysize*2L*
+ sizeof(ulong),
+ &systemerror);
+ if(systemerror)
+ { ReportError(errorcontext,systemerror);
+ FreeMemory((farvoid *)bitarraybase,&systemerror);
+ ErrorExit();
+ }
+ /*
+ ** Do an iteration of the bitmap test. If the
+ ** elapsed time is less than or equal to the permitted
+ ** minimum, then de-allocate the array, reallocate a
+ ** larger version, and try again.
+ */
+ ticks=DoBitfieldIteration(bitarraybase,
+ bitoparraybase,
+ locbitopstruct->bitoparraysize,
+ &nbitops);
+#ifdef DEBUG
+#ifdef LINUX
+ if (locbitopstruct->bitoparraysize==30L){
+ /* this is the first loop, write a debug file */
+ FILE *file;
+ unsigned long *running_base; /* same as farulong */
+ long counter;
+ file=fopen("debugbit.dat","w");
+ running_base=bitarraybase;
+ for (counter=0;counter<(long)(locbitopstruct->bitfieldarraysize);counter++){
+#ifdef LONG64
+ fprintf(file,"%08X",(unsigned int)(*running_base&0xFFFFFFFFL));
+ fprintf(file,"%08X",(unsigned int)((*running_base>>32)&0xFFFFFFFFL));
+ if ((counter+1)%4==0) fprintf(file,"\n");
+#else
+ fprintf(file,"%08lX",*running_base);
+ if ((counter+1)%8==0) fprintf(file,"\n");
+#endif
+ running_base=running_base+1;
+ }
+ fclose(file);
+ printf("\nWrote the file debugbit.dat, you may want to compare it to debugbit.good\n");
+ }
+#endif
+#endif
+
+ if (ticks>global_min_ticks) break; /* We're ok...exit */
+
+ FreeMemory((farvoid *)bitoparraybase,&systemerror);
+ locbitopstruct->bitoparraysize+=100L;
+ }
+}
+else
+{
+ /*
+ ** Don't need to do self adjustment, just allocate
+ ** the array space.
+ */
+ bitarraybase=(farulong *)AllocateMemory(locbitopstruct->bitfieldarraysize *
+ sizeof(ulong),&systemerror);
+ if(systemerror)
+ { ReportError(errorcontext,systemerror);
+ ErrorExit();
+ }
+ bitoparraybase=(farulong *)AllocateMemory(locbitopstruct->bitoparraysize*2L*
+ sizeof(ulong),
+ &systemerror);
+ if(systemerror)
+ { ReportError(errorcontext,systemerror);
+ FreeMemory((farvoid *)bitarraybase,&systemerror);
+ ErrorExit();
+ }
+}
+
+/*
+** All's well if we get here. Repeatedly perform bitops until the
+** accumulated elapsed time is greater than # of seconds requested.
+*/
+accumtime=0L;
+iterations=(double)0.0;
+do {
+ accumtime+=DoBitfieldIteration(bitarraybase,
+ bitoparraybase,
+ locbitopstruct->bitoparraysize,&nbitops);
+ iterations+=(double)nbitops;
+} while(TicksToSecs(accumtime)<locbitopstruct->request_secs);
+
+/*
+** Clean up, calculate results, and go home.
+** Also, set adjustment flag to show that we don't have
+** to do self adjusting in the future.
+*/
+FreeMemory((farvoid *)bitarraybase,&systemerror);
+FreeMemory((farvoid *)bitoparraybase,&systemerror);
+locbitopstruct->bitopspersec=iterations /TicksToFracSecs(accumtime);
+if(locbitopstruct->adjust==0)
+ locbitopstruct->adjust=1;
+
+return;
+}
+
+/************************
+** DoBitfieldIteration **
+*************************
+** Perform a single iteration of the bitfield benchmark.
+** Return the # of ticks accumulated by the operation.
+*/
+static ulong DoBitfieldIteration(farulong *bitarraybase,
+ farulong *bitoparraybase,
+ long bitoparraysize,
+ ulong *nbitops)
+{
+long i; /* Index */
+ulong bitoffset; /* Offset into bitmap */
+ulong elapsed; /* Time to execute */
+/*
+** Clear # bitops counter
+*/
+*nbitops=0L;
+
+/*
+** Construct a set of bitmap offsets and run lengths.
+** The offset can be any random number from 0 to the
+** size of the bitmap (in bits). The run length can
+** be any random number from 1 to the number of bits
+** between the offset and the end of the bitmap.
+** Note that the bitmap has 8192 * 32 bits in it.
+** (262,144 bits)
+*/
+/*
+** Reset random number generator so things repeat.
+** Also reset the bit array we work on.
+** added by Uwe F. Mayer
+*/
+randnum((int32)13);
+for (i=0;i<global_bitopstruct.bitfieldarraysize;i++)
+{
+#ifdef LONG64
+ *(bitarraybase+i)=(ulong)0x5555555555555555;
+#else
+ *(bitarraybase+i)=(ulong)0x55555555;
+#endif
+}
+randnum((int32)13);
+/* end of addition of code */
+
+for (i=0;i<bitoparraysize;i++)
+{
+ /* First item is offset */
+ /* *(bitoparraybase+i+i)=bitoffset=abs_randwc(262140L); */
+ *(bitoparraybase+i+i)=bitoffset=abs_randwc((int32)262140);
+
+ /* Next item is run length */
+ /* *nbitops+=*(bitoparraybase+i+i+1L)=abs_randwc(262140L-bitoffset);*/
+ *nbitops+=*(bitoparraybase+i+i+1L)=abs_randwc((int32)262140-bitoffset);
+}
+
+/*
+** Array of offset and lengths built...do an iteration of
+** the test.
+** Start the stopwatch.
+*/
+elapsed=StartStopwatch();
+
+/*
+** Loop through array off offset/run length pairs.
+** Execute operation based on modulus of index.
+*/
+for(i=0;i<bitoparraysize;i++)
+{
+ switch(i % 3)
+ {
+
+ case 0: /* Set run of bits */
+ ToggleBitRun(bitarraybase,
+ *(bitoparraybase+i+i),
+ *(bitoparraybase+i+i+1),
+ 1);
+ break;
+
+ case 1: /* Clear run of bits */
+ ToggleBitRun(bitarraybase,
+ *(bitoparraybase+i+i),
+ *(bitoparraybase+i+i+1),
+ 0);
+ break;
+
+ case 2: /* Complement run of bits */
+ FlipBitRun(bitarraybase,
+ *(bitoparraybase+i+i),
+ *(bitoparraybase+i+i+1));
+ break;
+ }
+}
+
+/*
+** Return elapsed time
+*/
+return(StopStopwatch(elapsed));
+}
+
+
+/*****************************
+** ToggleBitRun *
+******************************
+** Set or clear a run of nbits starting at
+** bit_addr in bitmap.
+*/
+static void ToggleBitRun(farulong *bitmap, /* Bitmap */
+ ulong bit_addr, /* Address of bits to set */
+ ulong nbits, /* # of bits to set/clr */
+ uint val) /* 1 or 0 */
+{
+unsigned long bindex; /* Index into array */
+unsigned long bitnumb; /* Bit number */
+
+while(nbits--)
+{
+#ifdef LONG64
+ bindex=bit_addr>>6; /* Index is number /64 */
+ bitnumb=bit_addr % 64; /* Bit number in word */
+#else
+ bindex=bit_addr>>5; /* Index is number /32 */
+ bitnumb=bit_addr % 32; /* bit number in word */
+#endif
+ if(val)
+ bitmap[bindex]|=(1L<<bitnumb);
+ else
+ bitmap[bindex]&=~(1L<<bitnumb);
+ bit_addr++;
+}
+return;
+}
+
+/***************
+** FlipBitRun **
+****************
+** Complements a run of bits.
+*/
+static void FlipBitRun(farulong *bitmap, /* Bit map */
+ ulong bit_addr, /* Bit address */
+ ulong nbits) /* # of bits to flip */
+{
+unsigned long bindex; /* Index into array */
+unsigned long bitnumb; /* Bit number */
+
+while(nbits--)
+{
+#ifdef LONG64
+ bindex=bit_addr>>6; /* Index is number /64 */
+ bitnumb=bit_addr % 64; /* Bit number in longword */
+#else
+ bindex=bit_addr>>5; /* Index is number /32 */
+ bitnumb=bit_addr % 32; /* Bit number in longword */
+#endif
+ bitmap[bindex]^=(1L<<bitnumb);
+ bit_addr++;
+}
+
+return;
+}
+
+/*****************************
+** FLOATING-POINT EMULATION **
+*****************************/
+
+/**************
+** DoEmFloat **
+***************
+** Perform the floating-point emulation routines portion of the
+** CPU benchmark. Returns the operations per second.
+*/
+void DoEmFloat(void)
+{
+EmFloatStruct *locemfloatstruct; /* Local structure */
+InternalFPF *abase; /* Base of A array */
+InternalFPF *bbase; /* Base of B array */
+InternalFPF *cbase; /* Base of C array */
+ulong accumtime; /* Accumulated time in ticks */
+double iterations; /* # of iterations */
+ulong tickcount; /* # of ticks */
+char *errorcontext; /* Error context string pointer */
+int systemerror; /* For holding error code */
+ulong loops; /* # of loops */
+
+/*
+** Link to global structure
+*/
+locemfloatstruct=&global_emfloatstruct;
+
+/*
+** Set the error context
+*/
+errorcontext="CPU:Floating Emulation";
+
+
+/*
+** Test the emulation routines.
+*/
+#ifdef DEBUG
+#endif
+
+abase=(InternalFPF *)AllocateMemory(locemfloatstruct->arraysize*sizeof(InternalFPF),
+ &systemerror);
+if(systemerror)
+{ ReportError(errorcontext,systemerror);
+ ErrorExit();
+}
+
+bbase=(InternalFPF *)AllocateMemory(locemfloatstruct->arraysize*sizeof(InternalFPF),
+ &systemerror);
+if(systemerror)
+{ ReportError(errorcontext,systemerror);
+ FreeMemory((farvoid *)abase,&systemerror);
+ ErrorExit();
+}
+
+cbase=(InternalFPF *)AllocateMemory(locemfloatstruct->arraysize*sizeof(InternalFPF),
+ &systemerror);
+if(systemerror)
+{ ReportError(errorcontext,systemerror);
+ FreeMemory((farvoid *)abase,&systemerror);
+ FreeMemory((farvoid *)bbase,&systemerror);
+ ErrorExit();
+}
+
+/*
+** Set up the arrays
+*/
+SetupCPUEmFloatArrays(abase,bbase,cbase,locemfloatstruct->arraysize);
+
+/*
+** See if we need to do self-adjusting code.
+*/
+if(locemfloatstruct->adjust==0)
+{
+ locemfloatstruct->loops=0;
+
+ /*
+ ** Do an iteration of the tests. If the elapsed time is
+ ** less than minimum, increase the loop count and try
+ ** again.
+ */
+ for(loops=1;loops<CPUEMFLOATLOOPMAX;loops+=loops)
+ { tickcount=DoEmFloatIteration(abase,bbase,cbase,
+ locemfloatstruct->arraysize,
+ loops);
+ if(tickcount>global_min_ticks)
+ { locemfloatstruct->loops=loops;
+ break;
+ }
+ }
+}
+
+/*
+** Verify that selft adjustment code worked.
+*/
+if(locemfloatstruct->loops==0)
+{ printf("CPU:EMFPU -- CMPUEMFLOATLOOPMAX limit hit\n");
+ FreeMemory((farvoid *)abase,&systemerror);
+ FreeMemory((farvoid *)bbase,&systemerror);
+ FreeMemory((farvoid *)cbase,&systemerror);
+ ErrorExit();
+}
+
+/*
+** All's well if we get here. Repeatedly perform floating
+** tests until the accumulated time is greater than the
+** # of seconds requested.
+** Each iteration performs arraysize * 3 operations.
+*/
+accumtime=0L;
+iterations=(double)0.0;
+do {
+ accumtime+=DoEmFloatIteration(abase,bbase,cbase,
+ locemfloatstruct->arraysize,
+ locemfloatstruct->loops);
+ iterations+=(double)1.0;
+} while(TicksToSecs(accumtime)<locemfloatstruct->request_secs);
+
+
+/*
+** Clean up, calculate results, and go home.
+** Also, indicate that adjustment is done.
+*/
+FreeMemory((farvoid *)abase,&systemerror);
+FreeMemory((farvoid *)bbase,&systemerror);
+FreeMemory((farvoid *)cbase,&systemerror);
+
+locemfloatstruct->emflops=(iterations*(double)locemfloatstruct->loops)/
+ (double)TicksToFracSecs(accumtime);
+if(locemfloatstruct->adjust==0)
+ locemfloatstruct->adjust=1;
+
+#ifdef DEBUG
+printf("----------------------------------------------------------------------------\n");
+#endif
+return;
+}
+
+/*************************
+** FOURIER COEFFICIENTS **
+*************************/
+
+/**************
+** DoFourier **
+***************
+** Perform the transcendental/trigonometric portion of the
+** benchmark. This benchmark calculates the first n
+** fourier coefficients of the function (x+1)^x defined
+** on the interval 0,2.
+*/
+void DoFourier(void)
+{
+FourierStruct *locfourierstruct; /* Local fourier struct */
+fardouble *abase; /* Base of A[] coefficients array */
+fardouble *bbase; /* Base of B[] coefficients array */
+unsigned long accumtime; /* Accumulated time in ticks */
+double iterations; /* # of iterations */
+char *errorcontext; /* Error context string pointer */
+int systemerror; /* For error code */
+
+/*
+** Link to global structure
+*/
+locfourierstruct=&global_fourierstruct;
+
+/*
+** Set error context string
+*/
+errorcontext="FPU:Transcendental";
+
+/*
+** See if we need to do self-adjustment code.
+*/
+if(locfourierstruct->adjust==0)
+{
+ locfourierstruct->arraysize=100L; /* Start at 100 elements */
+ while(1)
+ {
+
+ abase=(fardouble *)AllocateMemory(locfourierstruct->arraysize*sizeof(double),
+ &systemerror);
+ if(systemerror)
+ { ReportError(errorcontext,systemerror);
+ ErrorExit();
+ }
+
+ bbase=(fardouble *)AllocateMemory(locfourierstruct->arraysize*sizeof(double),
+ &systemerror);
+ if(systemerror)
+ { ReportError(errorcontext,systemerror);
+ FreeMemory((void *)abase,&systemerror);
+ ErrorExit();
+ }
+ /*
+ ** Do an iteration of the tests. If the elapsed time is
+ ** less than or equal to the permitted minimum, re-allocate
+ ** larger arrays and try again.
+ */
+ if(DoFPUTransIteration(abase,bbase,
+ locfourierstruct->arraysize)>global_min_ticks)
+ break; /* We're ok...exit */
+
+ /*
+ ** Make bigger arrays and try again.
+ */
+ FreeMemory((farvoid *)abase,&systemerror);
+ FreeMemory((farvoid *)bbase,&systemerror);
+ locfourierstruct->arraysize+=50L;
+ }
+}
+else
+{ /*
+ ** Don't need self-adjustment. Just allocate the
+ ** arrays, and go.
+ */
+ abase=(fardouble *)AllocateMemory(locfourierstruct->arraysize*sizeof(double),
+ &systemerror);
+ if(systemerror)
+ { ReportError(errorcontext,systemerror);
+ ErrorExit();
+ }
+
+ bbase=(fardouble *)AllocateMemory(locfourierstruct->arraysize*sizeof(double),
+ &systemerror);
+ if(systemerror)
+ { ReportError(errorcontext,systemerror);
+ FreeMemory((void *)abase,&systemerror);
+ ErrorExit();
+ }
+}
+/*
+** All's well if we get here. Repeatedly perform integration
+** tests until the accumulated time is greater than the
+** # of seconds requested.
+*/
+accumtime=0L;
+iterations=(double)0.0;
+do {
+ accumtime+=DoFPUTransIteration(abase,bbase,locfourierstruct->arraysize);
+ iterations+=(double)locfourierstruct->arraysize*(double)2.0-(double)1.0;
+} while(TicksToSecs(accumtime)<locfourierstruct->request_secs);
+
+
+/*
+** Clean up, calculate results, and go home.
+** Also set adjustment flag to indicate no adjust code needed.
+*/
+FreeMemory((farvoid *)abase,&systemerror);
+FreeMemory((farvoid *)bbase,&systemerror);
+
+locfourierstruct->fflops=iterations/(double)TicksToFracSecs(accumtime);
+
+if(locfourierstruct->adjust==0)
+ locfourierstruct->adjust=1;
+
+return;
+}
+
+/************************
+** DoFPUTransIteration **
+*************************
+** Perform an iteration of the FPU Transcendental/trigonometric
+** benchmark. Here, an iteration consists of calculating the
+** first n fourier coefficients of the function (x+1)^x on
+** the interval 0,2. n is given by arraysize.
+** NOTE: The # of integration steps is fixed at
+** 200.
+*/
+static ulong DoFPUTransIteration(fardouble *abase, /* A coeffs. */
+ fardouble *bbase, /* B coeffs. */
+ ulong arraysize) /* # of coeffs */
+{
+double omega; /* Fundamental frequency */
+unsigned long i; /* Index */
+unsigned long elapsed; /* Elapsed time */
+
+/*
+** Start the stopwatch
+*/
+elapsed=StartStopwatch();
+
+/*
+** Calculate the fourier series. Begin by
+** calculating A[0].
+*/
+
+*abase=TrapezoidIntegrate((double)0.0,
+ (double)2.0,
+ 200,
+ (double)0.0, /* No omega * n needed */
+ 0 )/(double)2.0;
+
+/*
+** Calculate the fundamental frequency.
+** ( 2 * pi ) / period...and since the period
+** is 2, omega is simply pi.
+*/
+omega=(double)3.1415926535897932;
+
+for(i=1;i<arraysize;i++)
+{
+
+ /*
+ ** Calculate A[i] terms. Note, once again, that we
+ ** can ignore the 2/period term outside the integral
+ ** since the period is 2 and the term cancels itself
+ ** out.
+ */
+ *(abase+i)=TrapezoidIntegrate((double)0.0,
+ (double)2.0,
+ 200,
+ omega * (double)i,
+ 1);
+
+ /*
+ ** Calculate the B[i] terms.
+ */
+ *(bbase+i)=TrapezoidIntegrate((double)0.0,
+ (double)2.0,
+ 200,
+ omega * (double)i,
+ 2);
+
+}
+#ifdef DEBUG
+{
+ int i;
+ printf("\nA[i]=\n");
+ for (i=0;i<arraysize;i++) printf("%7.3g ",abase[i]);
+ printf("\nB[i]=\n(undefined) ");
+ for (i=1;i<arraysize;i++) printf("%7.3g ",bbase[i]);
+}
+#endif
+/*
+** All done, stop the stopwatch
+*/
+return(StopStopwatch(elapsed));
+}
+
+/***********************
+** TrapezoidIntegrate **
+************************
+** Perform a simple trapezoid integration on the
+** function (x+1)**x.
+** x0,x1 set the lower and upper bounds of the
+** integration.
+** nsteps indicates # of trapezoidal sections
+** omegan is the fundamental frequency times
+** the series member #
+** select = 0 for the A[0] term, 1 for cosine terms, and
+** 2 for sine terms.
+** Returns the value.
+*/
+static double TrapezoidIntegrate( double x0, /* Lower bound */
+ double x1, /* Upper bound */
+ int nsteps, /* # of steps */
+ double omegan, /* omega * n */
+ int select)
+{
+double x; /* Independent variable */
+double dx; /* Stepsize */
+double rvalue; /* Return value */
+
+
+/*
+** Initialize independent variable
+*/
+x=x0;
+
+/*
+** Calculate stepsize
+*/
+dx=(x1 - x0) / (double)nsteps;
+
+/*
+** Initialize the return value.
+*/
+rvalue=thefunction(x0,omegan,select)/(double)2.0;
+
+/*
+** Compute the other terms of the integral.
+*/
+if(nsteps!=1)
+{ --nsteps; /* Already done 1 step */
+ while(--nsteps )
+ {
+ x+=dx;
+ rvalue+=thefunction(x,omegan,select);
+ }
+}
+/*
+** Finish computation
+*/
+rvalue=(rvalue+thefunction(x1,omegan,select)/(double)2.0)*dx;
+
+return(rvalue);
+}
+
+/****************
+** thefunction **
+*****************
+** This routine selects the function to be used
+** in the Trapezoid integration.
+** x is the independent variable
+** omegan is omega * n
+** select chooses which of the sine/cosine functions
+** are used. note the special case for select=0.
+*/
+static double thefunction(double x, /* Independent variable */
+ double omegan, /* Omega * term */
+ int select) /* Choose term */
+{
+
+/*
+** Use select to pick which function we call.
+*/
+switch(select)
+{
+ case 0: return(pow(x+(double)1.0,x));
+
+ case 1: return(pow(x+(double)1.0,x) * cos(omegan * x));
+
+ case 2: return(pow(x+(double)1.0,x) * sin(omegan * x));
+}
+
+/*
+** We should never reach this point, but the following
+** keeps compilers from issuing a warning message.
+*/
+return(0.0);
+}
+
+/*************************
+** ASSIGNMENT ALGORITHM **
+*************************/
+
+/*************
+** DoAssign **
+**************
+** Perform an assignment algorithm.
+** The algorithm was adapted from the step by step guide found
+** in "Quantitative Decision Making for Business" (Gordon,
+** Pressman, and Cohn; Prentice-Hall)
+**
+**
+** NOTES:
+** 1. Even though the algorithm distinguishes between
+** ASSIGNROWS and ASSIGNCOLS, as though the two might
+** be different, it does presume a square matrix.
+** I.E., ASSIGNROWS and ASSIGNCOLS must be the same.
+** This makes for some algorithmically-correct but
+** probably non-optimal constructs.
+**
+*/
+void DoAssign(void)
+{
+AssignStruct *locassignstruct; /* Local structure ptr */
+farlong *arraybase;
+char *errorcontext;
+int systemerror;
+ulong accumtime;
+double iterations;
+
+/*
+** Link to global structure
+*/
+locassignstruct=&global_assignstruct;
+
+/*
+** Set the error context string.
+*/
+errorcontext="CPU:Assignment";
+
+/*
+** See if we need to do self adjustment code.
+*/
+if(locassignstruct->adjust==0)
+{
+ /*
+ ** Self-adjustment code. The system begins by working on 1
+ ** array. If it does that in no time, then two arrays
+ ** are built. This process continues until
+ ** enough arrays are built to handle the tolerance.
+ */
+ locassignstruct->numarrays=1;
+ while(1)
+ {
+ /*
+ ** Allocate space for arrays
+ */
+ arraybase=(farlong *) AllocateMemory(sizeof(long)*
+ ASSIGNROWS*ASSIGNCOLS*locassignstruct->numarrays,
+ &systemerror);
+ if(systemerror)
+ { ReportError(errorcontext,systemerror);
+ FreeMemory((farvoid *)arraybase,
+ &systemerror);
+ ErrorExit();
+ }
+
+ /*
+ ** Do an iteration of the assignment alg. If the
+ ** elapsed time is less than or equal to the permitted
+ ** minimum, then allocate for more arrays and
+ ** try again.
+ */
+ if(DoAssignIteration(arraybase,
+ locassignstruct->numarrays)>global_min_ticks)
+ break; /* We're ok...exit */
+
+ FreeMemory((farvoid *)arraybase, &systemerror);
+ locassignstruct->numarrays++;
+ }
+}
+else
+{ /*
+ ** Allocate space for arrays
+ */
+ arraybase=(farlong *)AllocateMemory(sizeof(long)*
+ ASSIGNROWS*ASSIGNCOLS*locassignstruct->numarrays,
+ &systemerror);
+ if(systemerror)
+ { ReportError(errorcontext,systemerror);
+ FreeMemory((farvoid *)arraybase,
+ &systemerror);
+ ErrorExit();
+ }
+}
+
+/*
+** All's well if we get here. Do the tests.
+*/
+accumtime=0L;
+iterations=(double)0.0;
+
+do {
+ accumtime+=DoAssignIteration(arraybase,
+ locassignstruct->numarrays);
+ iterations+=(double)1.0;
+} while(TicksToSecs(accumtime)<locassignstruct->request_secs);
+
+/*
+** Clean up, calculate results, and go home. Be sure to
+** show that we don't have to rerun adjustment code.
+*/
+FreeMemory((farvoid *)arraybase,&systemerror);
+
+locassignstruct->iterspersec=iterations *
+ (double)locassignstruct->numarrays / TicksToFracSecs(accumtime);
+
+if(locassignstruct->adjust==0)
+ locassignstruct->adjust=1;
+
+return;
+
+}
+
+/**********************
+** DoAssignIteration **
+***********************
+** This routine executes one iteration of the assignment test.
+** It returns the number of ticks elapsed in the iteration.
+*/
+static ulong DoAssignIteration(farlong *arraybase,
+ ulong numarrays)
+{
+longptr abase; /* local pointer */
+ulong elapsed; /* Elapsed ticks */
+ulong i;
+
+/*
+** Set up local pointer
+*/
+abase.ptrs.p=arraybase;
+
+/*
+** Load up the arrays with a random table.
+*/
+LoadAssignArrayWithRand(arraybase,numarrays);
+
+/*
+** Start the stopwatch
+*/
+elapsed=StartStopwatch();
+
+/*
+** Execute assignment algorithms
+*/
+for(i=0;i<numarrays;i++)
+{ /* abase.ptrs.p+=i*ASSIGNROWS*ASSIGNCOLS; */
+ /* Fixed by Eike Dierks */
+ Assignment(*abase.ptrs.ap);
+ abase.ptrs.p+=ASSIGNROWS*ASSIGNCOLS;
+}
+
+/*
+** Get elapsed time
+*/
+return(StopStopwatch(elapsed));
+}
+
+/****************************
+** LoadAssignArrayWithRand **
+*****************************
+** Load the assignment arrays with random numbers. All positive.
+** These numbers represent costs.
+*/
+static void LoadAssignArrayWithRand(farlong *arraybase,
+ ulong numarrays)
+{
+longptr abase,abase1; /* Local for array pointer */
+ulong i;
+
+/*
+** Set local array pointer
+*/
+abase.ptrs.p=arraybase;
+abase1.ptrs.p=arraybase;
+
+/*
+** Set up the first array. Then just copy it into the
+** others.
+*/
+LoadAssign(*(abase.ptrs.ap));
+if(numarrays>1)
+ for(i=1;i<numarrays;i++)
+ { /* abase1.ptrs.p+=i*ASSIGNROWS*ASSIGNCOLS; */
+ /* Fixed by Eike Dierks */
+ abase1.ptrs.p+=ASSIGNROWS*ASSIGNCOLS;
+ CopyToAssign(*(abase.ptrs.ap),*(abase1.ptrs.ap));
+ }
+
+return;
+}
+
+/***************
+** LoadAssign **
+****************
+** The array given by arraybase is loaded with positive random
+** numbers. Elements in the array are capped at 5,000,000.
+*/
+static void LoadAssign(farlong arraybase[][ASSIGNCOLS])
+{
+ushort i,j;
+
+/*
+** Reset random number generator so things repeat.
+*/
+/* randnum(13L); */
+randnum((int32)13);
+
+for(i=0;i<ASSIGNROWS;i++)
+ for(j=0;j<ASSIGNROWS;j++){
+ /* arraybase[i][j]=abs_randwc(5000000L);*/
+ arraybase[i][j]=abs_randwc((int32)5000000);
+ }
+
+return;
+}
+
+/*****************
+** CopyToAssign **
+******************
+** Copy the contents of one array to another. This is called by
+** the routine that builds the initial array, and is used to copy
+** the contents of the intial array into all following arrays.
+*/
+static void CopyToAssign(farlong arrayfrom[ASSIGNROWS][ASSIGNCOLS],
+ farlong arrayto[ASSIGNROWS][ASSIGNCOLS])
+{
+ushort i,j;
+
+for(i=0;i<ASSIGNROWS;i++)
+ for(j=0;j<ASSIGNCOLS;j++)
+ arrayto[i][j]=arrayfrom[i][j];
+
+return;
+}
+
+/***************
+** Assignment **
+***************/
+static void Assignment(farlong arraybase[][ASSIGNCOLS])
+{
+short assignedtableau[ASSIGNROWS][ASSIGNCOLS];
+
+/*
+** First, calculate minimum costs
+*/
+calc_minimum_costs(arraybase);
+
+/*
+** Repeat following until the number of rows selected
+** equals the number of rows in the tableau.
+*/
+while(first_assignments(arraybase,assignedtableau)!=ASSIGNROWS)
+{ second_assignments(arraybase,assignedtableau);
+}
+
+#ifdef DEBUG
+{
+ int i,j;
+ printf("\nColumn choices for each row\n");
+ for(i=0;i<ASSIGNROWS;i++)
+ {
+ printf("R%03d: ",i);
+ for(j=0;j<ASSIGNCOLS;j++)
+ if(assignedtableau[i][j]==1)
+ printf("%03d ",j);
+ }
+}
+#endif
+
+return;
+}
+
+/***********************
+** calc_minimum_costs **
+************************
+** Revise the tableau by calculating the minimum costs on a
+** row and column basis. These minima are subtracted from
+** their rows and columns, creating a new tableau.
+*/
+static void calc_minimum_costs(long tableau[][ASSIGNCOLS])
+{
+ushort i,j; /* Index variables */
+long currentmin; /* Current minimum */
+/*
+** Determine minimum costs on row basis. This is done by
+** subtracting -- on a row-per-row basis -- the minum value
+** for that row.
+*/
+for(i=0;i<ASSIGNROWS;i++)
+{
+ currentmin=MAXPOSLONG; /* Initialize minimum */
+ for(j=0;j<ASSIGNCOLS;j++)
+ if(tableau[i][j]<currentmin)
+ currentmin=tableau[i][j];
+
+ for(j=0;j<ASSIGNCOLS;j++)
+ tableau[i][j]-=currentmin;
+}
+
+/*
+** Determine minimum cost on a column basis. This works
+** just as above, only now we step through the array
+** column-wise
+*/
+for(j=0;j<ASSIGNCOLS;j++)
+{
+ currentmin=MAXPOSLONG; /* Initialize minimum */
+ for(i=0;i<ASSIGNROWS;i++)
+ if(tableau[i][j]<currentmin)
+ currentmin=tableau[i][j];
+
+ /*
+ ** Here, we'll take the trouble to see if the current
+ ** minimum is zero. This is likely worth it, since the
+ ** preceding loop will have created at least one zero in
+ ** each row. We can save ourselves a few iterations.
+ */
+ if(currentmin!=0)
+ for(i=0;i<ASSIGNROWS;i++)
+ tableau[i][j]-=currentmin;
+}
+
+return;
+}
+
+/**********************
+** first_assignments **
+***********************
+** Do first assignments.
+** The assignedtableau[] array holds a set of values that
+** indicate the assignment of a value, or its elimination.
+** The values are:
+** 0 = Item is neither assigned nor eliminated.
+** 1 = Item is assigned
+** 2 = Item is eliminated
+** Returns the number of selections made. If this equals
+** the number of rows, then an optimum has been determined.
+*/
+static int first_assignments(long tableau[][ASSIGNCOLS],
+ short assignedtableau[][ASSIGNCOLS])
+{
+ushort i,j,k; /* Index variables */
+ushort numassigns; /* # of assignments */
+ushort totnumassigns; /* Total # of assignments */
+ushort numzeros; /* # of zeros in row */
+int selected=0; /* Flag used to indicate selection */
+
+/*
+** Clear the assignedtableau, setting all members to show that
+** no one is yet assigned, eliminated, or anything.
+*/
+for(i=0;i<ASSIGNROWS;i++)
+ for(j=0;j<ASSIGNCOLS;j++)
+ assignedtableau[i][j]=0;
+
+totnumassigns=0;
+do {
+ numassigns=0;
+ /*
+ ** Step through rows. For each one that is not currently
+ ** assigned, see if the row has only one zero in it. If so,
+ ** mark that as an assigned row/col. Eliminate other zeros
+ ** in the same column.
+ */
+ for(i=0;i<ASSIGNROWS;i++)
+ { numzeros=0;
+ for(j=0;j<ASSIGNCOLS;j++)
+ if(tableau[i][j]==0L)
+ if(assignedtableau[i][j]==0)
+ { numzeros++;
+ selected=j;
+ }
+ if(numzeros==1)
+ { numassigns++;
+ totnumassigns++;
+ assignedtableau[i][selected]=1;
+ for(k=0;k<ASSIGNROWS;k++)
+ if((k!=i) &&
+ (tableau[k][selected]==0))
+ assignedtableau[k][selected]=2;
+ }
+ }
+ /*
+ ** Step through columns, doing same as above. Now, be careful
+ ** of items in the other rows of a selected column.
+ */
+ for(j=0;j<ASSIGNCOLS;j++)
+ { numzeros=0;
+ for(i=0;i<ASSIGNROWS;i++)
+ if(tableau[i][j]==0L)
+ if(assignedtableau[i][j]==0)
+ { numzeros++;
+ selected=i;
+ }
+ if(numzeros==1)
+ { numassigns++;
+ totnumassigns++;
+ assignedtableau[selected][j]=1;
+ for(k=0;k<ASSIGNCOLS;k++)
+ if((k!=j) &&
+ (tableau[selected][k]==0))
+ assignedtableau[selected][k]=2;
+ }
+ }
+ /*
+ ** Repeat until no more assignments to be made.
+ */
+} while(numassigns!=0);
+
+/*
+** See if we can leave at this point.
+*/
+if(totnumassigns==ASSIGNROWS) return(totnumassigns);
+
+/*
+** Now step through the array by row. If you find any unassigned
+** zeros, pick the first in the row. Eliminate all zeros from
+** that same row & column. This occurs if there are multiple optima...
+** possibly.
+*/
+for(i=0;i<ASSIGNROWS;i++)
+{ selected=-1;
+ for(j=0;j<ASSIGNCOLS;j++)
+ if((tableau[i][j]==0L) &&
+ (assignedtableau[i][j]==0))
+ { selected=j;
+ break;
+ }
+ if(selected!=-1)
+ { assignedtableau[i][selected]=1;
+ totnumassigns++;
+ for(k=0;k<ASSIGNCOLS;k++)
+ if((k!=selected) &&
+ (tableau[i][k]==0L))
+ assignedtableau[i][k]=2;
+ for(k=0;k<ASSIGNROWS;k++)
+ if((k!=i) &&
+ (tableau[k][selected]==0L))
+ assignedtableau[k][selected]=2;
+ }
+}
+
+return(totnumassigns);
+}
+
+/***********************
+** second_assignments **
+************************
+** This section of the algorithm creates the revised
+** tableau, and is difficult to explain. I suggest you
+** refer to the algorithm's source, mentioned in comments
+** toward the beginning of the program.
+*/
+static void second_assignments(long tableau[][ASSIGNCOLS],
+ short assignedtableau[][ASSIGNCOLS])
+{
+int i,j; /* Indexes */
+short linesrow[ASSIGNROWS];
+short linescol[ASSIGNCOLS];
+long smallest; /* Holds smallest value */
+ushort numassigns; /* Number of assignments */
+ushort newrows; /* New rows to be considered */
+/*
+** Clear the linesrow and linescol arrays.
+*/
+for(i=0;i<ASSIGNROWS;i++)
+ linesrow[i]=0;
+for(i=0;i<ASSIGNCOLS;i++)
+ linescol[i]=0;
+
+/*
+** Scan rows, flag each row that has no assignment in it.
+*/
+for(i=0;i<ASSIGNROWS;i++)
+{ numassigns=0;
+ for(j=0;j<ASSIGNCOLS;j++)
+ if(assignedtableau[i][j]==1)
+ { numassigns++;
+ break;
+ }
+ if(numassigns==0) linesrow[i]=1;
+}
+
+do {
+
+ newrows=0;
+ /*
+ ** For each row checked above, scan for any zeros. If found,
+ ** check the associated column.
+ */
+ for(i=0;i<ASSIGNROWS;i++)
+ { if(linesrow[i]==1)
+ for(j=0;j<ASSIGNCOLS;j++)
+ if(tableau[i][j]==0)
+ linescol[j]=1;
+ }
+
+ /*
+ ** Now scan checked columns. If any contain assigned zeros, check
+ ** the associated row.
+ */
+ for(j=0;j<ASSIGNCOLS;j++)
+ if(linescol[j]==1)
+ for(i=0;i<ASSIGNROWS;i++)
+ if((assignedtableau[i][j]==1) &&
+ (linesrow[i]!=1))
+ {
+ linesrow[i]=1;
+ newrows++;
+ }
+} while(newrows!=0);
+
+/*
+** linesrow[n]==0 indicate rows covered by imaginary line
+** linescol[n]==1 indicate cols covered by imaginary line
+** For all cells not covered by imaginary lines, determine smallest
+** value.
+*/
+smallest=MAXPOSLONG;
+for(i=0;i<ASSIGNROWS;i++)
+ if(linesrow[i]!=0)
+ for(j=0;j<ASSIGNCOLS;j++)
+ if(linescol[j]!=1)
+ if(tableau[i][j]<smallest)
+ smallest=tableau[i][j];
+
+/*
+** Subtract smallest from all cells in the above set.
+*/
+for(i=0;i<ASSIGNROWS;i++)
+ if(linesrow[i]!=0)
+ for(j=0;j<ASSIGNCOLS;j++)
+ if(linescol[j]!=1)
+ tableau[i][j]-=smallest;
+
+/*
+** Add smallest to all cells covered by two lines.
+*/
+for(i=0;i<ASSIGNROWS;i++)
+ if(linesrow[i]==0)
+ for(j=0;j<ASSIGNCOLS;j++)
+ if(linescol[j]==1)
+ tableau[i][j]+=smallest;
+
+return;
+}
+
+/********************
+** IDEA Encryption **
+*********************
+** IDEA - International Data Encryption Algorithm.
+** Based on code presented in Applied Cryptography by Bruce Schneier.
+** Which was based on code developed by Xuejia Lai and James L. Massey.
+** Other modifications made by Colin Plumb.
+**
+*/
+
+/***********
+** DoIDEA **
+************
+** Perform IDEA encryption. Note that we time encryption & decryption
+** time as being a single loop.
+*/
+void DoIDEA(void)
+{
+IDEAStruct *locideastruct; /* Loc pointer to global structure */
+int i;
+IDEAkey Z,DK;
+u16 userkey[8];
+ulong accumtime;
+double iterations;
+char *errorcontext;
+int systemerror;
+faruchar *plain1; /* First plaintext buffer */
+faruchar *crypt1; /* Encryption buffer */
+faruchar *plain2; /* Second plaintext buffer */
+
+/*
+** Link to global data
+*/
+locideastruct=&global_ideastruct;
+
+/*
+** Set error context
+*/
+errorcontext="CPU:IDEA";
+
+/*
+** Re-init random-number generator.
+*/
+/* randnum(3L); */
+randnum((int32)3);
+
+/*
+** Build an encryption/decryption key
+*/
+for (i=0;i<8;i++)
+ /* userkey[i]=(u16)(abs_randwc(60000L) & 0xFFFF); */
+ userkey[i]=(u16)(abs_randwc((int32)60000) & 0xFFFF);
+for(i=0;i<KEYLEN;i++)
+ Z[i]=0;
+
+/*
+** Compute encryption/decryption subkeys
+*/
+en_key_idea(userkey,Z);
+de_key_idea(Z,DK);
+
+/*
+** Allocate memory for buffers. We'll make 3, called plain1,
+** crypt1, and plain2. It works like this:
+** plain1 >>encrypt>> crypt1 >>decrypt>> plain2.
+** So, plain1 and plain2 should match.
+** Also, fill up plain1 with sample text.
+*/
+plain1=(faruchar *)AllocateMemory(locideastruct->arraysize,&systemerror);
+if(systemerror)
+{
+ ReportError(errorcontext,systemerror);
+ ErrorExit();
+}
+
+crypt1=(faruchar *)AllocateMemory(locideastruct->arraysize,&systemerror);
+if(systemerror)
+{
+ ReportError(errorcontext,systemerror);
+ FreeMemory((farvoid *)plain1,&systemerror);
+ ErrorExit();
+}
+
+plain2=(faruchar *)AllocateMemory(locideastruct->arraysize,&systemerror);
+if(systemerror)
+{
+ ReportError(errorcontext,systemerror);
+ FreeMemory((farvoid *)plain1,&systemerror);
+ FreeMemory((farvoid *)crypt1,&systemerror);
+ ErrorExit();
+}
+/*
+** Note that we build the "plaintext" by simply loading
+** the array up with random numbers.
+*/
+for(i=0;i<locideastruct->arraysize;i++)
+ plain1[i]=(uchar)(abs_randwc(255) & 0xFF);
+
+/*
+** See if we need to perform self adjustment loop.
+*/
+if(locideastruct->adjust==0)
+{
+ /*
+ ** Do self-adjustment. This involves initializing the
+ ** # of loops and increasing the loop count until we
+ ** get a number of loops that we can use.
+ */
+ for(locideastruct->loops=100L;
+ locideastruct->loops<MAXIDEALOOPS;
+ locideastruct->loops+=10L)
+ if(DoIDEAIteration(plain1,crypt1,plain2,
+ locideastruct->arraysize,
+ locideastruct->loops,
+ Z,DK)>global_min_ticks) break;
+}
+
+/*
+** All's well if we get here. Do the test.
+*/
+accumtime=0L;
+iterations=(double)0.0;
+
+do {
+ accumtime+=DoIDEAIteration(plain1,crypt1,plain2,
+ locideastruct->arraysize,
+ locideastruct->loops,Z,DK);
+ iterations+=(double)locideastruct->loops;
+} while(TicksToSecs(accumtime)<locideastruct->request_secs);
+
+/*
+** Clean up, calculate results, and go home. Be sure to
+** show that we don't have to rerun adjustment code.
+*/
+FreeMemory((farvoid *)plain1,&systemerror);
+FreeMemory((farvoid *)crypt1,&systemerror);
+FreeMemory((farvoid *)plain2,&systemerror);
+locideastruct->iterspersec=iterations / TicksToFracSecs(accumtime);
+
+if(locideastruct->adjust==0)
+ locideastruct->adjust=1;
+
+return;
+
+}
+
+/********************
+** DoIDEAIteration **
+*********************
+** Execute a single iteration of the IDEA encryption algorithm.
+** Actually, a single iteration is one encryption and one
+** decryption.
+*/
+static ulong DoIDEAIteration(faruchar *plain1,
+ faruchar *crypt1,
+ faruchar *plain2,
+ ulong arraysize,
+ ulong nloops,
+ IDEAkey Z,
+ IDEAkey DK)
+{
+register ulong i;
+register ulong j;
+ulong elapsed;
+#ifdef DEBUG
+int status=0;
+#endif
+
+/*
+** Start the stopwatch.
+*/
+elapsed=StartStopwatch();
+
+/*
+** Do everything for nloops.
+*/
+for(i=0;i<nloops;i++)
+{
+ for(j=0;j<arraysize;j+=(sizeof(u16)*4))
+ cipher_idea((u16 *)(plain1+j),(u16 *)(crypt1+j),Z); /* Encrypt */
+
+ for(j=0;j<arraysize;j+=(sizeof(u16)*4))
+ cipher_idea((u16 *)(crypt1+j),(u16 *)(plain2+j),DK); /* Decrypt */
+}
+
+#ifdef DEBUG
+for(j=0;j<arraysize;j++)
+ if(*(plain1+j)!=*(plain2+j)){
+ printf("IDEA Error! \n");
+ status=1;
+ }
+if (status==0) printf("IDEA: OK\n");
+#endif
+
+/*
+** Get elapsed time.
+*/
+return(StopStopwatch(elapsed));
+}
+
+/********
+** mul **
+*********
+** Performs multiplication, modulo (2**16)+1. This code is structured
+** on the assumption that untaken branches are cheaper than taken
+** branches, and that the compiler doesn't schedule branches.
+*/
+static u16 mul(register u16 a, register u16 b)
+{
+register u32 p;
+if(a)
+{ if(b)
+ { p=(u32)(a*b);
+ b=low16(p);
+ a=(u16)(p>>16);
+ return(b-a+(b<a));
+ }
+ else
+ return(1-a);
+}
+else
+ return(1-b);
+}
+
+/********
+** inv **
+*********
+** Compute multiplicative inverse of x, modulo (2**16)+1
+** using Euclid's GCD algorithm. It is unrolled twice
+** to avoid swapping the meaning of the registers. And
+** some subtracts are changed to adds.
+*/
+static u16 inv(u16 x)
+{
+u16 t0, t1;
+u16 q, y;
+
+if(x<=1)
+ return(x); /* 0 and 1 are self-inverse */
+t1=0x10001 / x;
+y=0x10001 % x;
+if(y==1)
+ return(low16(1-t1));
+t0=1;
+do {
+ q=x/y;
+ x=x%y;
+ t0+=q*t1;
+ if(x==1) return(t0);
+ q=y/x;
+ y=y%x;
+ t1+=q*t0;
+} while(y!=1);
+return(low16(1-t1));
+}
+
+/****************
+** en_key_idea **
+*****************
+** Compute IDEA encryption subkeys Z
+*/
+static void en_key_idea(u16 *userkey, u16 *Z)
+{
+int i,j;
+
+/*
+** shifts
+*/
+for(j=0;j<8;j++)
+ Z[j]=*userkey++;
+for(i=0;j<KEYLEN;j++)
+{ i++;
+ Z[i+7]=(Z[i&7]<<9)| (Z[(i+1) & 7] >> 7);
+ Z+=i&8;
+ i&=7;
+}
+return;
+}
+
+/****************
+** de_key_idea **
+*****************
+** Compute IDEA decryption subkeys DK from encryption
+** subkeys Z.
+*/
+static void de_key_idea(IDEAkey Z, IDEAkey DK)
+{
+IDEAkey TT;
+int j;
+u16 t1, t2, t3;
+u16 *p;
+p=(u16 *)(TT+KEYLEN);
+
+t1=inv(*Z++);
+t2=-*Z++;
+t3=-*Z++;
+*--p=inv(*Z++);
+*--p=t3;
+*--p=t2;
+*--p=t1;
+
+for(j=1;j<ROUNDS;j++)
+{ t1=*Z++;
+ *--p=*Z++;
+ *--p=t1;
+ t1=inv(*Z++);
+ t2=-*Z++;
+ t3=-*Z++;
+ *--p=inv(*Z++);
+ *--p=t2;
+ *--p=t3;
+ *--p=t1;
+}
+t1=*Z++;
+*--p=*Z++;
+*--p=t1;
+t1=inv(*Z++);
+t2=-*Z++;
+t3=-*Z++;
+*--p=inv(*Z++);
+*--p=t3;
+*--p=t2;
+*--p=t1;
+/*
+** Copy and destroy temp copy
+*/
+for(j=0,p=TT;j<KEYLEN;j++)
+{ *DK++=*p;
+ *p++=0;
+}
+
+return;
+}
+
+/*
+** MUL(x,y)
+** This #define creates a macro that computes x=x*y modulo 0x10001.
+** Requires temps t16 and t32. Also requires y to be strictly 16
+** bits. Here, I am using the simplest form. May not be the
+** fastest. -- RG
+*/
+/* #define MUL(x,y) (x=mul(low16(x),y)) */
+
+/****************
+** cipher_idea **
+*****************
+** IDEA encryption/decryption algorithm.
+*/
+static void cipher_idea(u16 in[4],
+ u16 out[4],
+ register IDEAkey Z)
+{
+register u16 x1, x2, x3, x4, t1, t2;
+/* register u16 t16;
+register u16 t32; */
+int r=ROUNDS;
+
+x1=*in++;
+x2=*in++;
+x3=*in++;
+x4=*in;
+
+do {
+ MUL(x1,*Z++);
+ x2+=*Z++;
+ x3+=*Z++;
+ MUL(x4,*Z++);
+
+ t2=x1^x3;
+ MUL(t2,*Z++);
+ t1=t2+(x2^x4);
+ MUL(t1,*Z++);
+ t2=t1+t2;
+
+ x1^=t1;
+ x4^=t2;
+
+ t2^=x2;
+ x2=x3^t1;
+ x3=t2;
+} while(--r);
+MUL(x1,*Z++);
+*out++=x1;
+*out++=x3+*Z++;
+*out++=x2+*Z++;
+MUL(x4,*Z);
+*out=x4;
+return;
+}
+
+/************************
+** HUFFMAN COMPRESSION **
+************************/
+
+/**************
+** DoHuffman **
+***************
+** Execute a huffman compression on a block of plaintext.
+** Note that (as with IDEA encryption) an iteration of the
+** Huffman test includes a compression AND a decompression.
+** Also, the compression cycle includes building the
+** Huffman tree.
+*/
+void DoHuffman(void)
+{
+HuffStruct *lochuffstruct; /* Loc pointer to global data */
+char *errorcontext;
+int systemerror;
+ulong accumtime;
+double iterations;
+farchar *comparray;
+farchar *decomparray;
+farchar *plaintext;
+
+/*
+** Link to global data
+*/
+lochuffstruct=&global_huffstruct;
+
+/*
+** Set error context.
+*/
+errorcontext="CPU:Huffman";
+
+/*
+** Allocate memory for the plaintext and the compressed text.
+** We'll be really pessimistic here, and allocate equal amounts
+** for both (though we know...well, we PRESUME) the compressed
+** stuff will take less than the plain stuff.
+** Also note that we'll build a 3rd buffer to decompress
+** into, and we preallocate space for the huffman tree.
+** (We presume that the Huffman tree will grow no larger
+** than 512 bytes. This is actually a super-conservative
+** estimate...but, who cares?)
+*/
+plaintext=(farchar *)AllocateMemory(lochuffstruct->arraysize,&systemerror);
+if(systemerror)
+{ ReportError(errorcontext,systemerror);
+ ErrorExit();
+}
+comparray=(farchar *)AllocateMemory(lochuffstruct->arraysize,&systemerror);
+if(systemerror)
+{ ReportError(errorcontext,systemerror);
+ FreeMemory(plaintext,&systemerror);
+ ErrorExit();
+}
+decomparray=(farchar *)AllocateMemory(lochuffstruct->arraysize,&systemerror);
+if(systemerror)
+{ ReportError(errorcontext,systemerror);
+ FreeMemory(plaintext,&systemerror);
+ FreeMemory(comparray,&systemerror);
+ ErrorExit();
+}
+
+hufftree=(huff_node *)AllocateMemory(sizeof(huff_node) * 512,
+ &systemerror);
+if(systemerror)
+{ ReportError(errorcontext,systemerror);
+ FreeMemory(plaintext,&systemerror);
+ FreeMemory(comparray,&systemerror);
+ FreeMemory(decomparray,&systemerror);
+ ErrorExit();
+}
+
+/*
+** Build the plaintext buffer. Since we want this to
+** actually be able to compress, we'll use the
+** wordcatalog to build the plaintext stuff.
+*/
+/*
+** Reset random number generator so things repeat.
+** added by Uwe F. Mayer
+*/
+randnum((int32)13);
+create_text_block(plaintext,lochuffstruct->arraysize-1,(ushort)500);
+plaintext[lochuffstruct->arraysize-1L]='\0';
+plaintextlen=lochuffstruct->arraysize;
+
+/*
+** See if we need to perform self adjustment loop.
+*/
+if(lochuffstruct->adjust==0)
+{
+ /*
+ ** Do self-adjustment. This involves initializing the
+ ** # of loops and increasing the loop count until we
+ ** get a number of loops that we can use.
+ */
+ for(lochuffstruct->loops=100L;
+ lochuffstruct->loops<MAXHUFFLOOPS;
+ lochuffstruct->loops+=10L)
+ if(DoHuffIteration(plaintext,
+ comparray,
+ decomparray,
+ lochuffstruct->arraysize,
+ lochuffstruct->loops,
+ hufftree)>global_min_ticks) break;
+}
+
+/*
+** All's well if we get here. Do the test.
+*/
+accumtime=0L;
+iterations=(double)0.0;
+
+do {
+ accumtime+=DoHuffIteration(plaintext,
+ comparray,
+ decomparray,
+ lochuffstruct->arraysize,
+ lochuffstruct->loops,
+ hufftree);
+ iterations+=(double)lochuffstruct->loops;
+} while(TicksToSecs(accumtime)<lochuffstruct->request_secs);
+
+/*
+** Clean up, calculate results, and go home. Be sure to
+** show that we don't have to rerun adjustment code.
+*/
+FreeMemory((farvoid *)plaintext,&systemerror);
+FreeMemory((farvoid *)comparray,&systemerror);
+FreeMemory((farvoid *)decomparray,&systemerror);
+FreeMemory((farvoid *)hufftree,&systemerror);
+lochuffstruct->iterspersec=iterations / TicksToFracSecs(accumtime);
+
+if(lochuffstruct->adjust==0)
+ lochuffstruct->adjust=1;
+
+}
+
+/*********************
+** create_text_line **
+**********************
+** Create a random line of text, stored at *dt. The line may be
+** no more than nchars long.
+*/
+static void create_text_line(farchar *dt,
+ long nchars)
+{
+long charssofar; /* # of characters so far */
+long tomove; /* # of characters to move */
+char myword[40]; /* Local buffer for words */
+farchar *wordptr; /* Pointer to word from catalog */
+
+charssofar=0;
+
+do {
+/*
+** Grab a random word from the wordcatalog
+*/
+/* wordptr=wordcatarray[abs_randwc((long)WORDCATSIZE)];*/
+wordptr=wordcatarray[abs_randwc((int32)WORDCATSIZE)];
+MoveMemory((farvoid *)myword,
+ (farvoid *)wordptr,
+ (unsigned long)strlen(wordptr)+1);
+
+/*
+** Append a blank.
+*/
+tomove=strlen(myword)+1;
+myword[tomove-1]=' ';
+
+/*
+** See how long it is. If its length+charssofar > nchars, we have
+** to trim it.
+*/
+if((tomove+charssofar)>nchars)
+ tomove=nchars-charssofar;
+/*
+** Attach the word to the current line. Increment counter.
+*/
+MoveMemory((farvoid *)dt,(farvoid *)myword,(unsigned long)tomove);
+charssofar+=tomove;
+dt+=tomove;
+
+/*
+** If we're done, bail out. Otherwise, go get another word.
+*/
+} while(charssofar<nchars);
+
+return;
+}
+
+/**********************
+** create_text_block **
+***********************
+** Build a block of text randomly loaded with words. The words
+** come from the wordcatalog (which must be loaded before you
+** call this).
+** *tb points to the memory where the text is to be built.
+** tblen is the # of bytes to put into the text block
+** maxlinlen is the maximum length of any line (line end indicated
+** by a carriage return).
+*/
+static void create_text_block(farchar *tb,
+ ulong tblen,
+ ushort maxlinlen)
+{
+ulong bytessofar; /* # of bytes so far */
+ulong linelen; /* Line length */
+
+bytessofar=0L;
+do {
+
+/*
+** Pick a random length for a line and fill the line.
+** Make sure the line can fit (haven't exceeded tablen) and also
+** make sure you leave room to append a carriage return.
+*/
+linelen=abs_randwc(maxlinlen-6)+6;
+if((linelen+bytessofar)>tblen)
+ linelen=tblen-bytessofar;
+
+if(linelen>1)
+{
+ create_text_line(tb,linelen);
+}
+tb+=linelen-1; /* Add the carriage return */
+*tb++='\n';
+
+bytessofar+=linelen;
+
+} while(bytessofar<tblen);
+
+}
+
+/********************
+** DoHuffIteration **
+*********************
+** Perform the huffman benchmark. This routine
+** (a) Builds the huffman tree
+** (b) Compresses the text
+** (c) Decompresses the text and verifies correct decompression
+*/
+static ulong DoHuffIteration(farchar *plaintext,
+ farchar *comparray,
+ farchar *decomparray,
+ ulong arraysize,
+ ulong nloops,
+ huff_node *hufftree)
+{
+int i; /* Index */
+long j; /* Bigger index */
+int root; /* Pointer to huffman tree root */
+float lowfreq1, lowfreq2; /* Low frequency counters */
+int lowidx1, lowidx2; /* Indexes of low freq. elements */
+long bitoffset; /* Bit offset into text */
+long textoffset; /* Char offset into text */
+long maxbitoffset; /* Holds limit of bit offset */
+long bitstringlen; /* Length of bitstring */
+int c; /* Character from plaintext */
+char bitstring[30]; /* Holds bitstring */
+ulong elapsed; /* For stopwatch */
+#ifdef DEBUG
+int status=0;
+#endif
+
+/*
+** Start the stopwatch
+*/
+elapsed=StartStopwatch();
+
+/*
+** Do everything for nloops
+*/
+while(nloops--)
+{
+
+/*
+** Calculate the frequency of each byte value. Store the
+** results in what will become the "leaves" of the
+** Huffman tree. Interior nodes will be built in those
+** nodes greater than node #255.
+*/
+for(i=0;i<256;i++)
+{
+ hufftree[i].freq=(float)0.0;
+ hufftree[i].c=(unsigned char)i;
+}
+
+for(j=0;j<arraysize;j++)
+ hufftree[(int)plaintext[j]].freq+=(float)1.0;
+
+for(i=0;i<256;i++)
+ if(hufftree[i].freq != (float)0.0)
+ hufftree[i].freq/=(float)arraysize;
+
+/* Reset the second half of the tree. Otherwise the loop below that
+** compares the frequencies up to index 512 makes no sense. Some
+** systems automatically zero out memory upon allocation, others (like
+** for example DEC Unix) do not. Depending on this the loop below gets
+** different data and different run times. On our alpha the data that
+** was arbitrarily assigned led to an underflow error at runtime. We
+** use that zeroed-out bits are in fact 0 as a float.
+** Uwe F. Mayer */
+bzero((char *)&(hufftree[256]),sizeof(huff_node)*256);
+/*
+** Build the huffman tree. First clear all the parent
+** pointers and left/right pointers. Also, discard all
+** nodes that have a frequency of true 0. */
+for(i=0;i<512;i++)
+{ if(hufftree[i].freq==(float)0.0)
+ hufftree[i].parent=EXCLUDED;
+ else
+ hufftree[i].parent=hufftree[i].left=hufftree[i].right=-1;
+}
+
+/*
+** Go through the tree. Finding nodes of really low
+** frequency.
+*/
+root=255; /* Starting root node-1 */
+while(1)
+{
+ lowfreq1=(float)2.0; lowfreq2=(float)2.0;
+ lowidx1=-1; lowidx2=-1;
+ /*
+ ** Find first lowest frequency.
+ */
+ for(i=0;i<=root;i++)
+ if(hufftree[i].parent<0)
+ if(hufftree[i].freq<lowfreq1)
+ { lowfreq1=hufftree[i].freq;
+ lowidx1=i;
+ }
+
+ /*
+ ** Did we find a lowest value? If not, the
+ ** tree is done.
+ */
+ if(lowidx1==-1) break;
+
+ /*
+ ** Find next lowest frequency
+ */
+ for(i=0;i<=root;i++)
+ if((hufftree[i].parent<0) && (i!=lowidx1))
+ if(hufftree[i].freq<lowfreq2)
+ { lowfreq2=hufftree[i].freq;
+ lowidx2=i;
+ }
+
+ /*
+ ** If we could only find one item, then that
+ ** item is surely the root, and (as above) the
+ ** tree is done.
+ */
+ if(lowidx2==-1) break;
+
+ /*
+ ** Attach the two new nodes to the current root, and
+ ** advance the current root.
+ */
+ root++; /* New root */
+ hufftree[lowidx1].parent=root;
+ hufftree[lowidx2].parent=root;
+ hufftree[root].freq=lowfreq1+lowfreq2;
+ hufftree[root].left=lowidx1;
+ hufftree[root].right=lowidx2;
+ hufftree[root].parent=-2; /* Show root */
+}
+
+/*
+** Huffman tree built...compress the plaintext
+*/
+bitoffset=0L; /* Initialize bit offset */
+for(i=0;i<arraysize;i++)
+{
+ c=(int)plaintext[i]; /* Fetch character */
+ /*
+ ** Build a bit string for byte c
+ */
+ bitstringlen=0;
+ while(hufftree[c].parent!=-2)
+ { if(hufftree[hufftree[c].parent].left==c)
+ bitstring[bitstringlen]='0';
+ else
+ bitstring[bitstringlen]='1';
+ c=hufftree[c].parent;
+ bitstringlen++;
+ }
+
+ /*
+ ** Step backwards through the bit string, setting
+ ** bits in the compressed array as you go.
+ */
+ while(bitstringlen--)
+ { SetCompBit((u8 *)comparray,(u32)bitoffset,bitstring[bitstringlen]);
+ bitoffset++;
+ }
+}
+
+/*
+** Compression done. Perform de-compression.
+*/
+maxbitoffset=bitoffset;
+bitoffset=0;
+textoffset=0;
+do {
+ i=root;
+ while(hufftree[i].left!=-1)
+ { if(GetCompBit((u8 *)comparray,(u32)bitoffset)==0)
+ i=hufftree[i].left;
+ else
+ i=hufftree[i].right;
+ bitoffset++;
+ }
+ decomparray[textoffset]=hufftree[i].c;
+
+#ifdef DEBUG
+ if(hufftree[i].c != plaintext[textoffset])
+ {
+ /* Show error */
+ printf("Error at textoffset %ld\n",textoffset);
+ status=1;
+ }
+#endif
+ textoffset++;
+} while(bitoffset<maxbitoffset);
+
+} /* End the big while(nloops--) from above */
+
+/*
+** All done
+*/
+#ifdef DEBUG
+ if (status==0) printf("Huffman: OK\n");
+#endif
+return(StopStopwatch(elapsed));
+}
+
+/***************
+** SetCompBit **
+****************
+** Set a bit in the compression array. The value of the
+** bit is set according to char bitchar.
+*/
+static void SetCompBit(u8 *comparray,
+ u32 bitoffset,
+ char bitchar)
+{
+u32 byteoffset;
+int bitnumb;
+
+/*
+** First calculate which element in the comparray to
+** alter. and the bitnumber.
+*/
+byteoffset=bitoffset>>3;
+bitnumb=bitoffset % 8;
+
+/*
+** Set or clear
+*/
+if(bitchar=='1')
+ comparray[byteoffset]|=(1<<bitnumb);
+else
+ comparray[byteoffset]&=~(1<<bitnumb);
+
+return;
+}
+
+/***************
+** GetCompBit **
+****************
+** Return the bit value of a bit in the comparession array.
+** Returns 0 if the bit is clear, nonzero otherwise.
+*/
+static int GetCompBit(u8 *comparray,
+ u32 bitoffset)
+{
+u32 byteoffset;
+int bitnumb;
+
+/*
+** Calculate byte offset and bit number.
+*/
+byteoffset=bitoffset>>3;
+bitnumb=bitoffset % 8;
+
+/*
+** Fetch
+*/
+return((1<<bitnumb) & comparray[byteoffset] );
+}
+
+/********************************
+** BACK PROPAGATION NEURAL NET **
+*********************************
+** This code is a modified version of the code
+** that was submitted to BYTE Magazine by
+** Maureen Caudill. It accomanied an article
+** that I CANNOT NOW RECALL.
+** The author's original heading/comment was
+** as follows:
+**
+** Backpropagation Network
+** Written by Maureen Caudill
+** in Think C 4.0 on a Macintosh
+**
+** (c) Maureen Caudill 1988-1991
+** This network will accept 5x7 input patterns
+** and produce 8 bit output patterns.
+** The source code may be copied or modified without restriction,
+** but no fee may be charged for its use.
+**
+** ++++++++++++++
+** I have modified the code so that it will work
+** on systems other than a Macintosh -- RG
+*/
+
+/***********
+** DoNNet **
+************
+** Perform the neural net benchmark.
+** Note that this benchmark is one of the few that
+** requires an input file. That file is "NNET.DAT" and
+** should be on the local directory (from which the
+** benchmark program in launched).
+*/
+void DoNNET(void)
+{
+NNetStruct *locnnetstruct; /* Local ptr to global data */
+char *errorcontext;
+ulong accumtime;
+double iterations;
+
+/*
+** Link to global data
+*/
+locnnetstruct=&global_nnetstruct;
+
+/*
+** Set error context
+*/
+errorcontext="CPU:NNET";
+
+/*
+** Init random number generator.
+** NOTE: It is important that the random number generator
+** be re-initialized for every pass through this test.
+** The NNET algorithm uses the random number generator
+** to initialize the net. Results are sensitive to
+** the initial neural net state.
+*/
+/* randnum(3L); */
+randnum((int32)3);
+
+/*
+** Read in the input and output patterns. We'll do this
+** only once here at the beginning. These values don't
+** change once loaded.
+*/
+if(read_data_file()!=0)
+ ErrorExit();
+
+
+/*
+** See if we need to perform self adjustment loop.
+*/
+if(locnnetstruct->adjust==0)
+{
+ /*
+ ** Do self-adjustment. This involves initializing the
+ ** # of loops and increasing the loop count until we
+ ** get a number of loops that we can use.
+ */
+ for(locnnetstruct->loops=1L;
+ locnnetstruct->loops<MAXNNETLOOPS;
+ locnnetstruct->loops++)
+ { /*randnum(3L); */
+ randnum((int32)3);
+ if(DoNNetIteration(locnnetstruct->loops)
+ >global_min_ticks) break;
+ }
+}
+
+/*
+** All's well if we get here. Do the test.
+*/
+accumtime=0L;
+iterations=(double)0.0;
+
+do {
+ /* randnum(3L); */ /* Gotta do this for Neural Net */
+ randnum((int32)3); /* Gotta do this for Neural Net */
+ accumtime+=DoNNetIteration(locnnetstruct->loops);
+ iterations+=(double)locnnetstruct->loops;
+} while(TicksToSecs(accumtime)<locnnetstruct->request_secs);
+
+/*
+** Clean up, calculate results, and go home. Be sure to
+** show that we don't have to rerun adjustment code.
+*/
+locnnetstruct->iterspersec=iterations / TicksToFracSecs(accumtime);
+
+if(locnnetstruct->adjust==0)
+ locnnetstruct->adjust=1;
+
+
+return;
+}
+
+/********************
+** DoNNetIteration **
+*********************
+** Do a single iteration of the neural net benchmark.
+** By iteration, we mean a "learning" pass.
+*/
+static ulong DoNNetIteration(ulong nloops)
+{
+ulong elapsed; /* Elapsed time */
+int patt;
+
+/*
+** Run nloops learning cycles. Notice that, counted with
+** the learning cycle is the weight randomization and
+** zeroing of changes. This should reduce clock jitter,
+** since we don't have to stop and start the clock for
+** each iteration.
+*/
+elapsed=StartStopwatch();
+while(nloops--)
+{
+ randomize_wts();
+ zero_changes();
+ iteration_count=1;
+ learned = F;
+ numpasses = 0;
+ while (learned == F)
+ {
+ for (patt=0; patt<numpats; patt++)
+ {
+ worst_error = 0.0; /* reset this every pass through data */
+ move_wt_changes(); /* move last pass's wt changes to momentum array */
+ do_forward_pass(patt);
+ do_back_pass(patt);
+ iteration_count++;
+ }
+ numpasses ++;
+ learned = check_out_error();
+ }
+#ifdef DEBUG
+printf("Learned in %d passes\n",numpasses);
+#endif
+}
+return(StopStopwatch(elapsed));
+}
+
+/*************************
+** do_mid_forward(patt) **
+**************************
+** Process the middle layer's forward pass
+** The activation of middle layer's neurode is the weighted
+** sum of the inputs from the input pattern, with sigmoid
+** function applied to the inputs.
+**/
+static void do_mid_forward(int patt)
+{
+double sum;
+int neurode, i;
+
+for (neurode=0;neurode<MID_SIZE; neurode++)
+{
+ sum = 0.0;
+ for (i=0; i<IN_SIZE; i++)
+ { /* compute weighted sum of input signals */
+ sum += mid_wts[neurode][i]*in_pats[patt][i];
+ }
+ /*
+ ** apply sigmoid function f(x) = 1/(1+exp(-x)) to weighted sum
+ */
+ sum = 1.0/(1.0+exp(-sum));
+ mid_out[neurode] = sum;
+}
+return;
+}
+
+/*********************
+** do_out_forward() **
+**********************
+** process the forward pass through the output layer
+** The activation of the output layer is the weighted sum of
+** the inputs (outputs from middle layer), modified by the
+** sigmoid function.
+**/
+static void do_out_forward()
+{
+double sum;
+int neurode, i;
+
+for (neurode=0; neurode<OUT_SIZE; neurode++)
+{
+ sum = 0.0;
+ for (i=0; i<MID_SIZE; i++)
+ { /*
+ ** compute weighted sum of input signals
+ ** from middle layer
+ */
+ sum += out_wts[neurode][i]*mid_out[i];
+ }
+ /*
+ ** Apply f(x) = 1/(1+exp(-x)) to weighted input
+ */
+ sum = 1.0/(1.0+exp(-sum));
+ out_out[neurode] = sum;
+}
+return;
+}
+
+/*************************
+** display_output(patt) **
+**************************
+** Display the actual output vs. the desired output of the
+** network.
+** Once the training is complete, and the "learned" flag set
+** to TRUE, then display_output sends its output to both
+** the screen and to a text output file.
+**
+** NOTE: This routine has been disabled in the benchmark
+** version. -- RG
+**/
+/*
+void display_output(int patt)
+{
+int i;
+
+ fprintf(outfile,"\n Iteration # %d",iteration_count);
+ fprintf(outfile,"\n Desired Output: ");
+
+ for (i=0; i<OUT_SIZE; i++)
+ {
+ fprintf(outfile,"%6.3f ",out_pats[patt][i]);
+ }
+ fprintf(outfile,"\n Actual Output: ");
+
+ for (i=0; i<OUT_SIZE; i++)
+ {
+ fprintf(outfile,"%6.3f ",out_out[i]);
+ }
+ fprintf(outfile,"\n");
+ return;
+}
+*/
+
+/**********************
+** do_forward_pass() **
+***********************
+** control function for the forward pass through the network
+** NOTE: I have disabled the call to display_output() in
+** the benchmark version -- RG.
+**/
+static void do_forward_pass(int patt)
+{
+do_mid_forward(patt); /* process forward pass, middle layer */
+do_out_forward(); /* process forward pass, output layer */
+/* display_output(patt); ** display results of forward pass */
+return;
+}
+
+/***********************
+** do_out_error(patt) **
+************************
+** Compute the error for the output layer neurodes.
+** This is simply Desired - Actual.
+**/
+static void do_out_error(int patt)
+{
+int neurode;
+double error,tot_error, sum;
+
+tot_error = 0.0;
+sum = 0.0;
+for (neurode=0; neurode<OUT_SIZE; neurode++)
+{
+ out_error[neurode] = out_pats[patt][neurode] - out_out[neurode];
+ /*
+ ** while we're here, also compute magnitude
+ ** of total error and worst error in this pass.
+ ** We use these to decide if we are done yet.
+ */
+ error = out_error[neurode];
+ if (error <0.0)
+ {
+ sum += -error;
+ if (-error > tot_error)
+ tot_error = -error; /* worst error this pattern */
+ }
+ else
+ {
+ sum += error;
+ if (error > tot_error)
+ tot_error = error; /* worst error this pattern */
+ }
+}
+avg_out_error[patt] = sum/OUT_SIZE;
+tot_out_error[patt] = tot_error;
+return;
+}
+
+/***********************
+** worst_pass_error() **
+************************
+** Find the worst and average error in the pass and save it
+**/
+static void worst_pass_error()
+{
+double error,sum;
+
+int i;
+
+error = 0.0;
+sum = 0.0;
+for (i=0; i<numpats; i++)
+{
+ if (tot_out_error[i] > error) error = tot_out_error[i];
+ sum += avg_out_error[i];
+}
+worst_error = error;
+average_error = sum/numpats;
+return;
+}
+
+/*******************
+** do_mid_error() **
+********************
+** Compute the error for the middle layer neurodes
+** This is based on the output errors computed above.
+** Note that the derivative of the sigmoid f(x) is
+** f'(x) = f(x)(1 - f(x))
+** Recall that f(x) is merely the output of the middle
+** layer neurode on the forward pass.
+**/
+static void do_mid_error()
+{
+double sum;
+int neurode, i;
+
+for (neurode=0; neurode<MID_SIZE; neurode++)
+{
+ sum = 0.0;
+ for (i=0; i<OUT_SIZE; i++)
+ sum += out_wts[i][neurode]*out_error[i];
+
+ /*
+ ** apply the derivative of the sigmoid here
+ ** Because of the choice of sigmoid f(I), the derivative
+ ** of the sigmoid is f'(I) = f(I)(1 - f(I))
+ */
+ mid_error[neurode] = mid_out[neurode]*(1-mid_out[neurode])*sum;
+}
+return;
+}
+
+/*********************
+** adjust_out_wts() **
+**********************
+** Adjust the weights of the output layer. The error for
+** the output layer has been previously propagated back to
+** the middle layer.
+** Use the Delta Rule with momentum term to adjust the weights.
+**/
+static void adjust_out_wts()
+{
+int weight, neurode;
+double learn,delta,alph;
+
+learn = BETA;
+alph = ALPHA;
+for (neurode=0; neurode<OUT_SIZE; neurode++)
+{
+ for (weight=0; weight<MID_SIZE; weight++)
+ {
+ /* standard delta rule */
+ delta = learn * out_error[neurode] * mid_out[weight];
+
+ /* now the momentum term */
+ delta += alph * out_wt_change[neurode][weight];
+ out_wts[neurode][weight] += delta;
+
+ /* keep track of this pass's cum wt changes for next pass's momentum */
+ out_wt_cum_change[neurode][weight] += delta;
+ }
+}
+return;
+}
+
+/*************************
+** adjust_mid_wts(patt) **
+**************************
+** Adjust the middle layer weights using the previously computed
+** errors.
+** We use the Generalized Delta Rule with momentum term
+**/
+static void adjust_mid_wts(int patt)
+{
+int weight, neurode;
+double learn,alph,delta;
+
+learn = BETA;
+alph = ALPHA;
+for (neurode=0; neurode<MID_SIZE; neurode++)
+{
+ for (weight=0; weight<IN_SIZE; weight++)
+ {
+ /* first the basic delta rule */
+ delta = learn * mid_error[neurode] * in_pats[patt][weight];
+
+ /* with the momentum term */
+ delta += alph * mid_wt_change[neurode][weight];
+ mid_wts[neurode][weight] += delta;
+
+ /* keep track of this pass's cum wt changes for next pass's momentum */
+ mid_wt_cum_change[neurode][weight] += delta;
+ }
+}
+return;
+}
+
+/*******************
+** do_back_pass() **
+********************
+** Process the backward propagation of error through network.
+**/
+void do_back_pass(int patt)
+{
+
+do_out_error(patt);
+do_mid_error();
+adjust_out_wts();
+adjust_mid_wts(patt);
+
+return;
+}
+
+
+/**********************
+** move_wt_changes() **
+***********************
+** Move the weight changes accumulated last pass into the wt-change
+** array for use by the momentum term in this pass. Also zero out
+** the accumulating arrays after the move.
+**/
+static void move_wt_changes()
+{
+int i,j;
+
+for (i = 0; i<MID_SIZE; i++)
+ for (j = 0; j<IN_SIZE; j++)
+ {
+ mid_wt_change[i][j] = mid_wt_cum_change[i][j];
+ /*
+ ** Zero it out for next pass accumulation.
+ */
+ mid_wt_cum_change[i][j] = 0.0;
+ }
+
+for (i = 0; i<OUT_SIZE; i++)
+ for (j=0; j<MID_SIZE; j++)
+ {
+ out_wt_change[i][j] = out_wt_cum_change[i][j];
+ out_wt_cum_change[i][j] = 0.0;
+ }
+
+return;
+}
+
+/**********************
+** check_out_error() **
+***********************
+** Check to see if the error in the output layer is below
+** MARGIN*OUT_SIZE for all output patterns. If so, then
+** assume the network has learned acceptably well. This
+** is simply an arbitrary measure of how well the network
+** has learned -- many other standards are possible.
+**/
+static int check_out_error()
+{
+int result,i,error;
+
+result = T;
+error = F;
+worst_pass_error(); /* identify the worst error in this pass */
+
+/*
+#ifdef DEBUG
+printf("\n Iteration # %d",iteration_count);
+#endif
+*/
+for (i=0; i<numpats; i++)
+{
+/* printf("\n Error pattern %d: Worst: %8.3f; Average: %8.3f",
+ i+1,tot_out_error[i], avg_out_error[i]);
+ fprintf(outfile,
+ "\n Error pattern %d: Worst: %8.3f; Average: %8.3f",
+ i+1,tot_out_error[i]);
+*/
+
+ if (worst_error >= STOP) result = F;
+ if (tot_out_error[i] >= 16.0) error = T;
+}
+
+if (error == T) result = ERR;
+
+
+#ifdef DEBUG
+/* printf("\n Error this pass thru data: Worst: %8.3f; Average: %8.3f",
+ worst_error,average_error);
+*/
+/* fprintf(outfile,
+ "\n Error this pass thru data: Worst: %8.3f; Average: %8.3f",
+ worst_error, average_error); */
+#endif
+
+return(result);
+}
+
+
+/*******************
+** zero_changes() **
+********************
+** Zero out all the wt change arrays
+**/
+static void zero_changes()
+{
+int i,j;
+
+for (i = 0; i<MID_SIZE; i++)
+{
+ for (j=0; j<IN_SIZE; j++)
+ {
+ mid_wt_change[i][j] = 0.0;
+ mid_wt_cum_change[i][j] = 0.0;
+ }
+}
+
+for (i = 0; i< OUT_SIZE; i++)
+{
+ for (j=0; j<MID_SIZE; j++)
+ {
+ out_wt_change[i][j] = 0.0;
+ out_wt_cum_change[i][j] = 0.0;
+ }
+}
+return;
+}
+
+
+/********************
+** randomize_wts() **
+*********************
+** Intialize the weights in the middle and output layers to
+** random values between -0.25..+0.25
+** Function rand() returns a value between 0 and 32767.
+**
+** NOTE: Had to make alterations to how the random numbers were
+** created. -- RG.
+**/
+static void randomize_wts()
+{
+int neurode,i;
+double value;
+
+/*
+** Following not used int benchmark version -- RG
+**
+** printf("\n Please enter a random number seed (1..32767): ");
+** scanf("%d", &i);
+** srand(i);
+*/
+
+for (neurode = 0; neurode<MID_SIZE; neurode++)
+{
+ for(i=0; i<IN_SIZE; i++)
+ {
+ /* value=(double)abs_randwc(100000L); */
+ value=(double)abs_randwc((int32)100000);
+ value=value/(double)100000.0 - (double) 0.5;
+ mid_wts[neurode][i] = value/2;
+ }
+}
+for (neurode=0; neurode<OUT_SIZE; neurode++)
+{
+ for(i=0; i<MID_SIZE; i++)
+ {
+ /* value=(double)abs_randwc(100000L); */
+ value=(double)abs_randwc((int32)100000);
+ value=value/(double)10000.0 - (double) 0.5;
+ out_wts[neurode][i] = value/2;
+ }
+}
+
+return;
+}
+
+
+/*********************
+** read_data_file() **
+**********************
+** Read in the input data file and store the patterns in
+** in_pats and out_pats.
+** The format for the data file is as follows:
+**
+** line# data expected
+** ----- ------------------------------
+** 1 In-X-size,in-y-size,out-size
+** 2 number of patterns in file
+** 3 1st X row of 1st input pattern
+** 4.. following rows of 1st input pattern pattern
+** in-x+2 y-out pattern
+** 1st X row of 2nd pattern
+** etc.
+**
+** Each row of data is separated by commas or spaces.
+** The data is expected to be ascii text corresponding to
+** either a +1 or a 0.
+**
+** Sample input for a 1-pattern file (The comments to the
+** right may NOT be in the file unless more sophisticated
+** parsing of the input is done.):
+**
+** 5,7,8 input is 5x7 grid, output is 8 bits
+** 1 one pattern in file
+** 0,1,1,1,0 beginning of pattern for "O"
+** 1,0,0,0,1
+** 1,0,0,0,1
+** 1,0,0,0,1
+** 1,0,0,0,1
+** 1,0,0,0,0
+** 0,1,1,1,0
+** 0,1,0,0,1,1,1,1 ASCII code for "O" -- 0100 1111
+**
+** Clearly, this simple scheme can be expanded or enhanced
+** any way you like.
+**
+** Returns -1 if any file error occurred, otherwise 0.
+**/
+static int read_data_file()
+{
+FILE *infile;
+
+int xinsize,yinsize,youtsize;
+int patt, element, i, row;
+int vals_read;
+int val1,val2,val3,val4,val5,val6,val7,val8;
+
+/* printf("\n Opening and retrieving data from file."); */
+
+infile = fopen(inpath, "r");
+if (infile == NULL)
+{
+ printf("\n CPU:NNET--error in opening file!");
+ return -1 ;
+}
+vals_read =fscanf(infile,"%d %d %d",&xinsize,&yinsize,&youtsize);
+if (vals_read != 3)
+{
+ printf("\n CPU:NNET -- Should read 3 items in line one; did read %d",vals_read);
+ return -1;
+}
+vals_read=fscanf(infile,"%d",&numpats);
+if (vals_read !=1)
+{
+ printf("\n CPU:NNET -- Should read 1 item in line 2; did read %d",vals_read);
+ return -1;
+}
+if (numpats > MAXPATS)
+ numpats = MAXPATS;
+
+for (patt=0; patt<numpats; patt++)
+{
+ element = 0;
+ for (row = 0; row<yinsize; row++)
+ {
+ vals_read = fscanf(infile,"%d %d %d %d %d",
+ &val1, &val2, &val3, &val4, &val5);
+ if (vals_read != 5)
+ {
+ printf ("\n CPU:NNET -- failure in reading input!");
+ return -1;
+ }
+ element=row*xinsize;
+
+ in_pats[patt][element] = (double) val1; element++;
+ in_pats[patt][element] = (double) val2; element++;
+ in_pats[patt][element] = (double) val3; element++;
+ in_pats[patt][element] = (double) val4; element++;
+ in_pats[patt][element] = (double) val5; element++;
+ }
+ for (i=0;i<IN_SIZE; i++)
+ {
+ if (in_pats[patt][i] >= 0.9)
+ in_pats[patt][i] = 0.9;
+ if (in_pats[patt][i] <= 0.1)
+ in_pats[patt][i] = 0.1;
+ }
+ element = 0;
+ vals_read = fscanf(infile,"%d %d %d %d %d %d %d %d",
+ &val1, &val2, &val3, &val4, &val5, &val6, &val7, &val8);
+
+ out_pats[patt][element] = (double) val1; element++;
+ out_pats[patt][element] = (double) val2; element++;
+ out_pats[patt][element] = (double) val3; element++;
+ out_pats[patt][element] = (double) val4; element++;
+ out_pats[patt][element] = (double) val5; element++;
+ out_pats[patt][element] = (double) val6; element++;
+ out_pats[patt][element] = (double) val7; element++;
+ out_pats[patt][element] = (double) val8; element++;
+}
+
+/* printf("\n Closing the input file now. "); */
+
+fclose(infile);
+return(0);
+}
+
+/*********************
+** initialize_net() **
+**********************
+** Do all the initialization stuff before beginning
+*/
+/*
+static int initialize_net()
+{
+int err_code;
+
+randomize_wts();
+zero_changes();
+err_code = read_data_file();
+iteration_count = 1;
+return(err_code);
+}
+*/
+
+/**********************
+** display_mid_wts() **
+***********************
+** Display the weights on the middle layer neurodes
+** NOTE: This routine is not used in the benchmark
+** test -- RG
+**/
+/* static void display_mid_wts()
+{
+int neurode, weight, row, col;
+
+fprintf(outfile,"\n Weights of Middle Layer neurodes:");
+
+for (neurode=0; neurode<MID_SIZE; neurode++)
+{
+ fprintf(outfile,"\n Mid Neurode # %d",neurode);
+ for (row=0; row<IN_Y_SIZE; row++)
+ {
+ fprintf(outfile,"\n ");
+ for (col=0; col<IN_X_SIZE; col++)
+ {
+ weight = IN_X_SIZE * row + col;
+ fprintf(outfile," %8.3f ", mid_wts[neurode][weight]);
+ }
+ }
+}
+return;
+}
+*/
+/**********************
+** display_out_wts() **
+***********************
+** Display the weights on the output layer neurodes
+** NOTE: This code is not used in the benchmark
+** test -- RG
+*/
+/* void display_out_wts()
+{
+int neurode, weight;
+
+ fprintf(outfile,"\n Weights of Output Layer neurodes:");
+
+ for (neurode=0; neurode<OUT_SIZE; neurode++)
+ {
+ fprintf(outfile,"\n Out Neurode # %d \n",neurode);
+ for (weight=0; weight<MID_SIZE; weight++)
+ {
+ fprintf(outfile," %8.3f ", out_wts[neurode][weight]);
+ }
+ }
+ return;
+}
+*/
+
+/***********************
+** LU DECOMPOSITION **
+** (Linear Equations) **
+************************
+** These routines come from "Numerical Recipes in Pascal".
+** Note that, as in the assignment algorithm, though we
+** separately define LUARRAYROWS and LUARRAYCOLS, the two
+** must be the same value (this routine depends on a square
+** matrix).
+*/
+
+/*********
+** DoLU **
+**********
+** Perform the LU decomposition benchmark.
+*/
+void DoLU(void)
+{
+LUStruct *loclustruct; /* Local pointer to global data */
+char *errorcontext;
+int systemerror;
+fardouble *a;
+fardouble *b;
+fardouble *abase;
+fardouble *bbase;
+LUdblptr ptra;
+int n;
+int i;
+ulong accumtime;
+double iterations;
+
+/*
+** Link to global data
+*/
+loclustruct=&global_lustruct;
+
+/*
+** Set error context.
+*/
+errorcontext="FPU:LU";
+
+/*
+** Our first step is to build a "solvable" problem. This
+** will become the "seed" set that all others will be
+** derived from. (I.E., we'll simply copy these arrays
+** into the others.
+*/
+a=(fardouble *)AllocateMemory(sizeof(double) * LUARRAYCOLS * LUARRAYROWS,
+ &systemerror);
+b=(fardouble *)AllocateMemory(sizeof(double) * LUARRAYROWS,
+ &systemerror);
+n=LUARRAYROWS;
+
+/*
+** We need to allocate a temp vector that is used by the LU
+** algorithm. This removes the allocation routine from the
+** timing.
+*/
+LUtempvv=(fardouble *)AllocateMemory(sizeof(double)*LUARRAYROWS,
+ &systemerror);
+
+/*
+** Build a problem to be solved.
+*/
+ptra.ptrs.p=a; /* Gotta coerce linear array to 2D array */
+build_problem(*ptra.ptrs.ap,n,b);
+
+/*
+** Now that we have a problem built, see if we need to do
+** auto-adjust. If so, repeatedly call the DoLUIteration routine,
+** increasing the number of solutions per iteration as you go.
+*/
+if(loclustruct->adjust==0)
+{
+ loclustruct->numarrays=0;
+ for(i=1;i<=MAXLUARRAYS;i++)
+ {
+ abase=(fardouble *)AllocateMemory(sizeof(double) *
+ LUARRAYCOLS*LUARRAYROWS*(i+1),&systemerror);
+ if(systemerror)
+ { ReportError(errorcontext,systemerror);
+ LUFreeMem(a,b,(fardouble *)NULL,(fardouble *)NULL);
+ ErrorExit();
+ }
+ bbase=(fardouble *)AllocateMemory(sizeof(double) *
+ LUARRAYROWS*(i+1),&systemerror);
+ if(systemerror)
+ { ReportError(errorcontext,systemerror);
+ LUFreeMem(a,b,abase,(fardouble *)NULL);
+ ErrorExit();
+ }
+ if(DoLUIteration(a,b,abase,bbase,i)>global_min_ticks)
+ { loclustruct->numarrays=i;
+ break;
+ }
+ /*
+ ** Not enough arrays...free them all and try again
+ */
+ FreeMemory((farvoid *)abase,&systemerror);
+ FreeMemory((farvoid *)bbase,&systemerror);
+ }
+ /*
+ ** Were we able to do it?
+ */
+ if(loclustruct->numarrays==0)
+ { printf("FPU:LU -- Array limit reached\n");
+ LUFreeMem(a,b,abase,bbase);
+ ErrorExit();
+ }
+}
+else
+{ /*
+ ** Don't need to adjust -- just allocate the proper
+ ** number of arrays and proceed.
+ */
+ abase=(fardouble *)AllocateMemory(sizeof(double) *
+ LUARRAYCOLS*LUARRAYROWS*loclustruct->numarrays,
+ &systemerror);
+ if(systemerror)
+ { ReportError(errorcontext,systemerror);
+ LUFreeMem(a,b,(fardouble *)NULL,(fardouble *)NULL);
+ ErrorExit();
+ }
+ bbase=(fardouble *)AllocateMemory(sizeof(double) *
+ LUARRAYROWS*loclustruct->numarrays,&systemerror);
+ if(systemerror)
+ {
+ ReportError(errorcontext,systemerror);
+ LUFreeMem(a,b,abase,(fardouble *)NULL);
+ ErrorExit();
+ }
+}
+/*
+** All's well if we get here. Do the test.
+*/
+accumtime=0L;
+iterations=(double)0.0;
+
+do {
+ accumtime+=DoLUIteration(a,b,abase,bbase,
+ loclustruct->numarrays);
+ iterations+=(double)loclustruct->numarrays;
+} while(TicksToSecs(accumtime)<loclustruct->request_secs);
+
+/*
+** Clean up, calculate results, and go home. Be sure to
+** show that we don't have to rerun adjustment code.
+*/
+loclustruct->iterspersec=iterations / TicksToFracSecs(accumtime);
+
+if(loclustruct->adjust==0)
+ loclustruct->adjust=1;
+
+LUFreeMem(a,b,abase,bbase);
+return;
+}
+
+/**************
+** LUFreeMem **
+***************
+** Release memory associated with LU benchmark.
+*/
+static void LUFreeMem(fardouble *a, fardouble *b,
+ fardouble *abase,fardouble *bbase)
+{
+int systemerror;
+
+FreeMemory((farvoid *)a,&systemerror);
+FreeMemory((farvoid *)b,&systemerror);
+FreeMemory((farvoid *)LUtempvv,&systemerror);
+
+if(abase!=(fardouble *)NULL) FreeMemory((farvoid *)abase,&systemerror);
+if(bbase!=(fardouble *)NULL) FreeMemory((farvoid *)bbase,&systemerror);
+return;
+}
+
+/******************
+** DoLUIteration **
+*******************
+** Perform an iteration of the LU decomposition benchmark.
+** An iteration refers to the repeated solution of several
+** identical matrices.
+*/
+static ulong DoLUIteration(fardouble *a,fardouble *b,
+ fardouble *abase, fardouble *bbase,
+ ulong numarrays)
+{
+fardouble *locabase;
+fardouble *locbbase;
+LUdblptr ptra; /* For converting ptr to 2D array */
+ulong elapsed;
+ulong j,i; /* Indexes */
+
+
+/*
+** Move the seed arrays (a & b) into the destination
+** arrays;
+*/
+for(j=0;j<numarrays;j++)
+{ locabase=abase+j*LUARRAYROWS*LUARRAYCOLS;
+ locbbase=bbase+j*LUARRAYROWS;
+ for(i=0;i<LUARRAYROWS*LUARRAYCOLS;i++)
+ *(locabase+i)=*(a+i);
+ for(i=0;i<LUARRAYROWS;i++)
+ *(locbbase+i)=*(b+i);
+}
+
+/*
+** Do test...begin timing.
+*/
+elapsed=StartStopwatch();
+for(i=0;i<numarrays;i++)
+{ locabase=abase+i*LUARRAYROWS*LUARRAYCOLS;
+ locbbase=bbase+i*LUARRAYROWS;
+ ptra.ptrs.p=locabase;
+ lusolve(*ptra.ptrs.ap,LUARRAYROWS,locbbase);
+}
+
+return(StopStopwatch(elapsed));
+}
+
+/******************
+** build_problem **
+*******************
+** Constructs a solvable set of linear equations. It does this by
+** creating an identity matrix, then loading the solution vector
+** with random numbers. After that, the identity matrix and
+** solution vector are randomly "scrambled". Scrambling is
+** done by (a) randomly selecting a row and multiplying that
+** row by a random number and (b) adding one randomly-selected
+** row to another.
+*/
+static void build_problem(double a[][LUARRAYCOLS],
+ int n,
+ double b[LUARRAYROWS])
+{
+long i,j,k,k1; /* Indexes */
+double rcon; /* Random constant */
+
+/*
+** Reset random number generator
+*/
+/* randnum(13L); */
+randnum((int32)13);
+
+/*
+** Build an identity matrix.
+** We'll also use this as a chance to load the solution
+** vector.
+*/
+for(i=0;i<n;i++)
+{ /* b[i]=(double)(abs_randwc(100L)+1L); */
+ b[i]=(double)(abs_randwc((int32)100)+(int32)1);
+ for(j=0;j<n;j++)
+ if(i==j)
+ /* a[i][j]=(double)(abs_randwc(1000L)+1L); */
+ a[i][j]=(double)(abs_randwc((int32)1000)+(int32)1);
+ else
+ a[i][j]=(double)0.0;
+}
+
+#ifdef DEBUG
+printf("Problem:\n");
+for(i=0;i<n;i++)
+{
+/*
+ for(j=0;j<n;j++)
+ printf("%6.2f ",a[i][j]);
+*/
+ printf("%.0f/%.0f=%.2f\t",b[i],a[i][i],b[i]/a[i][i]);
+/*
+ printf("\n");
+*/
+}
+#endif
+
+/*
+** Scramble. Do this 8n times. See comment above for
+** a description of the scrambling process.
+*/
+
+for(i=0;i<8*n;i++)
+{
+ /*
+ ** Pick a row and a random constant. Multiply
+ ** all elements in the row by the constant.
+ */
+ /* k=abs_randwc((long)n);
+ rcon=(double)(abs_randwc(20L)+1L);
+ for(j=0;j<n;j++)
+ a[k][j]=a[k][j]*rcon;
+ b[k]=b[k]*rcon;
+*/
+ /*
+ ** Pick two random rows and add second to
+ ** first. Note that we also occasionally multiply
+ ** by minus 1 so that we get a subtraction operation.
+ */
+ /* k=abs_randwc((long)n); */
+ /* k1=abs_randwc((long)n); */
+ k=abs_randwc((int32)n);
+ k1=abs_randwc((int32)n);
+ if(k!=k1)
+ {
+ if(k<k1) rcon=(double)1.0;
+ else rcon=(double)-1.0;
+ for(j=0;j<n;j++)
+ a[k][j]+=a[k1][j]*rcon;;
+ b[k]+=b[k1]*rcon;
+ }
+}
+
+return;
+}
+
+
+/***********
+** ludcmp **
+************
+** From the procedure of the same name in "Numerical Recipes in Pascal",
+** by Press, Flannery, Tukolsky, and Vetterling.
+** Given an nxn matrix a[], this routine replaces it by the LU
+** decomposition of a rowwise permutation of itself. a[] and n
+** are input. a[] is output, modified as follows:
+** -- --
+** | b(1,1) b(1,2) b(1,3)... |
+** | a(2,1) b(2,2) b(2,3)... |
+** | a(3,1) a(3,2) b(3,3)... |
+** | a(4,1) a(4,2) a(4,3)... |
+** | ... |
+** -- --
+**
+** Where the b(i,j) elements form the upper triangular matrix of the
+** LU decomposition, and the a(i,j) elements form the lower triangular
+** elements. The LU decomposition is calculated so that we don't
+** need to store the a(i,i) elements (which would have laid along the
+** diagonal and would have all been 1).
+**
+** indx[] is an output vector that records the row permutation
+** effected by the partial pivoting; d is output as +/-1 depending
+** on whether the number of row interchanges was even or odd,
+** respectively.
+** Returns 0 if matrix singular, else returns 1.
+*/
+static int ludcmp(double a[][LUARRAYCOLS],
+ int n,
+ int indx[],
+ int *d)
+{
+
+double big; /* Holds largest element value */
+double sum;
+double dum; /* Holds dummy value */
+int i,j,k; /* Indexes */
+int imax=0; /* Holds max index value */
+double tiny; /* A really small number */
+
+tiny=(double)1.0e-20;
+
+*d=1; /* No interchanges yet */
+
+for(i=0;i<n;i++)
+{ big=(double)0.0;
+ for(j=0;j<n;j++)
+ if((double)fabs(a[i][j]) > big)
+ big=fabs(a[i][j]);
+ /* Bail out on singular matrix */
+ if(big==(double)0.0) return(0);
+ LUtempvv[i]=1.0/big;
+}
+
+/*
+** Crout's algorithm...loop over columns.
+*/
+for(j=0;j<n;j++)
+{ if(j!=0)
+ for(i=0;i<j;i++)
+ { sum=a[i][j];
+ if(i!=0)
+ for(k=0;k<i;k++)
+ sum-=(a[i][k]*a[k][j]);
+ a[i][j]=sum;
+ }
+ big=(double)0.0;
+ for(i=j;i<n;i++)
+ { sum=a[i][j];
+ if(j!=0)
+ for(k=0;k<j;k++)
+ sum-=a[i][k]*a[k][j];
+ a[i][j]=sum;
+ dum=LUtempvv[i]*fabs(sum);
+ if(dum>=big)
+ { big=dum;
+ imax=i;
+ }
+ }
+ if(j!=imax) /* Interchange rows if necessary */
+ { for(k=0;k<n;k++)
+ { dum=a[imax][k];
+ a[imax][k]=a[j][k];
+ a[j][k]=dum;
+ }
+ *d=-*d; /* Change parity of d */
+ dum=LUtempvv[imax];
+ LUtempvv[imax]=LUtempvv[j]; /* Don't forget scale factor */
+ LUtempvv[j]=dum;
+ }
+ indx[j]=imax;
+ /*
+ ** If the pivot element is zero, the matrix is singular
+ ** (at least as far as the precision of the machine
+ ** is concerned.) We'll take the original author's
+ ** recommendation and replace 0.0 with "tiny".
+ */
+ if(a[j][j]==(double)0.0)
+ a[j][j]=tiny;
+
+ if(j!=(n-1))
+ { dum=1.0/a[j][j];
+ for(i=j+1;i<n;i++)
+ a[i][j]=a[i][j]*dum;
+ }
+}
+
+return(1);
+}
+
+/***********
+** lubksb **
+************
+** Also from "Numerical Recipes in Pascal".
+** This routine solves the set of n linear equations A X = B.
+** Here, a[][] is input, not as the matrix A, but as its
+** LU decomposition, created by the routine ludcmp().
+** Indx[] is input as the permutation vector returned by ludcmp().
+** b[] is input as the right-hand side an returns the
+** solution vector X.
+** a[], n, and indx are not modified by this routine and
+** can be left in place for different values of b[].
+** This routine takes into account the possibility that b will
+** begin with many zero elements, so it is efficient for use in
+** matrix inversion.
+*/
+static void lubksb( double a[][LUARRAYCOLS],
+ int n,
+ int indx[LUARRAYROWS],
+ double b[LUARRAYROWS])
+{
+
+int i,j; /* Indexes */
+int ip; /* "pointer" into indx */
+int ii;
+double sum;
+
+/*
+** When ii is set to a positive value, it will become
+** the index of the first nonvanishing element of b[].
+** We now do the forward substitution. The only wrinkle
+** is to unscramble the permutation as we go.
+*/
+ii=-1;
+for(i=0;i<n;i++)
+{ ip=indx[i];
+ sum=b[ip];
+ b[ip]=b[i];
+ if(ii!=-1)
+ for(j=ii;j<i;j++)
+ sum=sum-a[i][j]*b[j];
+ else
+ /*
+ ** If a nonzero element is encountered, we have
+ ** to do the sums in the loop above.
+ */
+ if(sum!=(double)0.0)
+ ii=i;
+ b[i]=sum;
+}
+/*
+** Do backsubstitution
+*/
+for(i=(n-1);i>=0;i--)
+{
+ sum=b[i];
+ if(i!=(n-1))
+ for(j=(i+1);j<n;j++)
+ sum=sum-a[i][j]*b[j];
+ b[i]=sum/a[i][i];
+}
+return;
+}
+
+/************
+** lusolve **
+*************
+** Solve a linear set of equations: A x = b
+** Original matrix A will be destroyed by this operation.
+** Returns 0 if matrix is singular, 1 otherwise.
+*/
+static int lusolve(double a[][LUARRAYCOLS],
+ int n,
+ double b[LUARRAYROWS])
+{
+int indx[LUARRAYROWS];
+int d;
+#ifdef DEBUG
+int i,j;
+#endif
+
+if(ludcmp(a,n,indx,&d)==0) return(0);
+
+/* Matrix not singular -- proceed */
+lubksb(a,n,indx,b);
+
+#ifdef DEBUG
+printf("Solution:\n");
+for(i=0;i<n;i++)
+{
+ for(j=0;j<n;j++){
+ /*
+ printf("%6.2f ",a[i][j]);
+ */
+ }
+ printf("%6.2f\t",b[i]);
+ /*
+ printf("\n");
+ */
+}
+printf("\n");
+#endif
+
+return(1);
+}