/*
** nbench1.c
*/

/********************************
**       BYTEmark (tm)         **
** BYTE NATIVE MODE BENCHMARKS **
**       VERSION 2             **
**                             **
** Included in this source     **
** file:                       **
**  Numeric Heapsort           **
**  String Heapsort            **
**  Bitfield test              **
**  Floating point emulation   **
**  Fourier coefficients       **
**  Assignment algorithm       **
**  IDEA Encyption             **
**  Huffman compression        **
**  Back prop. neural net      **
**  LU Decomposition           **
**    (linear equations)       **
** ----------                  **
** Rick Grehan, BYTE Magazine  **
*********************************
**
** BYTEmark (tm)
** BYTE's Native Mode Benchmarks
** Rick Grehan, BYTE Magazine
**
** Creation:
** Revision: 3/95;10/95
**  10/95 - Removed allocation that was taking place inside
**   the LU Decomposition benchmark. Though it didn't seem to
**   make a difference on systems we ran it on, it nonetheless
**   removes an operating system dependency that probably should
**   not have been there.
**
** DISCLAIMER
** The source, executable, and documentation files that comprise
** the BYTEmark benchmarks are made available on an "as is" basis.
** This means that we at BYTE Magazine have made every reasonable
** effort to verify that the there are no errors in the source and
** executable code.  We cannot, however, guarantee that the programs
** are error-free.  Consequently, McGraw-HIll and BYTE Magazine make
** no claims in regard to the fitness of the source code, executable
** code, and documentation of the BYTEmark.
**  Furthermore, BYTE Magazine, McGraw-Hill, and all employees
** of McGraw-Hill cannot be held responsible for any damages resulting
** from the use of this code or the results obtained from using
** this code.
*/

/*
** INCLUDES
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <strings.h>
#include <math.h>
#include "nmglobal.h"
#include "nbench1.h"
#include "wordcat.h"

#ifdef DEBUG
static int numsort_status=0;
static int stringsort_status=0;
#endif

/*********************
** NUMERIC HEAPSORT **
**********************
** This test implements a heapsort algorithm, performed on an
** array of longs.
*/

/**************
** DoNumSort **
***************
** This routine performs the CPU numeric sort test.
** NOTE: Last version incorrectly stated that the routine
**  returned result in # of longword sorted per second.
**  Not so; the routine returns # of iterations per sec.
*/

void DoNumSort(void)
{
SortStruct *numsortstruct;      /* Local pointer to global struct */
farlong *arraybase;     /* Base pointers of array */
long accumtime;         /* Accumulated time */
double iterations;      /* Iteration counter */
char *errorcontext;     /* Error context string pointer */
int systemerror;        /* For holding error codes */

/*
** Link to global structure
*/
numsortstruct=&global_numsortstruct;

/*
** Set the error context string.
*/
errorcontext="CPU:Numeric Sort";

/*
** See if we need to do self adjustment code.
*/
if(numsortstruct->adjust==0)
{
	/*
	** Self-adjustment code.  The system begins by sorting 1
	** array.  If it does that in no time, then two arrays
	** are built and sorted.  This process continues until
	** enough arrays are built to handle the tolerance.
	*/
	numsortstruct->numarrays=1;
	while(1)
	{
		/*
		** Allocate space for arrays
		*/
		arraybase=(farlong *)AllocateMemory(sizeof(long) *
			numsortstruct->numarrays * numsortstruct->arraysize,
			&systemerror);
		if(systemerror)
		{       ReportError(errorcontext,systemerror);
			FreeMemory((farvoid *)arraybase,
				  &systemerror);
			ErrorExit();
		}

		/*
		** Do an iteration of the numeric sort.  If the
		** elapsed time is less than or equal to the permitted
		** minimum, then allocate for more arrays and
		** try again.
		*/
		if(DoNumSortIteration(arraybase,
			numsortstruct->arraysize,
			numsortstruct->numarrays)>global_min_ticks)
			break;          /* We're ok...exit */

		FreeMemory((farvoid *)arraybase,&systemerror);
		if(numsortstruct->numarrays++>NUMNUMARRAYS)
		{       printf("CPU:NSORT -- NUMNUMARRAYS hit.\n");
			ErrorExit();
		}
	}
}
else
{       /*
	** Allocate space for arrays
	*/
	arraybase=(farlong *)AllocateMemory(sizeof(long) *
		numsortstruct->numarrays * numsortstruct->arraysize,
		&systemerror);
	if(systemerror)
	{       ReportError(errorcontext,systemerror);
		FreeMemory((farvoid *)arraybase,
			  &systemerror);
		ErrorExit();
	}

}
/*
** All's well if we get here.  Repeatedly perform sorts until the
** accumulated elapsed time is greater than # of seconds requested.
*/
accumtime=0L;
iterations=(double)0.0;

do {
	accumtime+=DoNumSortIteration(arraybase,
		numsortstruct->arraysize,
		numsortstruct->numarrays);
	iterations+=(double)1.0;
} while(TicksToSecs(accumtime)<numsortstruct->request_secs);

/*
** Clean up, calculate results, and go home.  Be sure to
** show that we don't have to rerun adjustment code.
*/
FreeMemory((farvoid *)arraybase,&systemerror);

numsortstruct->sortspersec=iterations *
	(double)numsortstruct->numarrays / TicksToFracSecs(accumtime);

if(numsortstruct->adjust==0)
	numsortstruct->adjust=1;

#ifdef DEBUG
if (numsort_status==0) printf("Numeric sort: OK\n");
numsort_status=0;
#endif
return;
}

/***********************
** DoNumSortIteration **
************************
** This routine executes one iteration of the numeric
** sort benchmark.  It returns the number of ticks
** elapsed for the iteration.
*/
static ulong DoNumSortIteration(farlong *arraybase,
		ulong arraysize,
		uint numarrays)
{
ulong elapsed;          /* Elapsed ticks */
ulong i;
/*
** Load up the array with random numbers
*/
LoadNumArrayWithRand(arraybase,arraysize,numarrays);

/*
** Start the stopwatch
*/
elapsed=StartStopwatch();

/*
** Execute a heap of heapsorts
*/
for(i=0;i<numarrays;i++)
	NumHeapSort(arraybase+i*arraysize,0L,arraysize-1L);

/*
** Get elapsed time
*/
elapsed=StopStopwatch(elapsed);
#ifdef DEBUG
{
	for(i=0;i<arraysize-1;i++)
	{       /*
		** Compare to check for proper
		** sort.
		*/
		if(arraybase[i+1]<arraybase[i])
		{       printf("Sort Error\n");
			numsort_status=1;
                        break;
		}
	}
}
#endif

return(elapsed);
}

/*************************
** LoadNumArrayWithRand **
**************************
** Load up an array with random longs.
*/
static void LoadNumArrayWithRand(farlong *array,     /* Pointer to arrays */
		ulong arraysize,
		uint numarrays)         /* # of elements in array */
{
long i;                 /* Used for index */
farlong *darray;        /* Destination array pointer */
/*
** Initialize the random number generator
*/
/* randnum(13L); */
randnum((int32)13);

/*
** Load up first array with randoms
*/
for(i=0L;i<arraysize;i++)
        /* array[i]=randnum(0L); */
	array[i]=randnum((int32)0);

/*
** Now, if there's more than one array to load, copy the
** first into each of the others.
*/
darray=array;
while(--numarrays)
{       darray+=arraysize;
	for(i=0L;i<arraysize;i++)
		darray[i]=array[i];
}

return;
}

/****************
** NumHeapSort **
*****************
** Pass this routine a pointer to an array of long
** integers.  Also pass in minimum and maximum offsets.
** This routine performs a heap sort on that array.
*/
static void NumHeapSort(farlong *array,
	ulong bottom,           /* Lower bound */
	ulong top)              /* Upper bound */
{
ulong temp;                     /* Used to exchange elements */
ulong i;                        /* Loop index */

/*
** First, build a heap in the array
*/
for(i=(top/2L); i>0; --i)
	NumSift(array,i,top);

/*
** Repeatedly extract maximum from heap and place it at the
** end of the array.  When we get done, we'll have a sorted
** array.
*/
for(i=top; i>0; --i)
{       NumSift(array,bottom,i);
	temp=*array;                    /* Perform exchange */
	*array=*(array+i);
	*(array+i)=temp;
}
return;
}

/************
** NumSift **
*************
** Peforms the sift operation on a numeric array,
** constructing a heap in the array.
*/
static void NumSift(farlong *array,     /* Array of numbers */
	ulong i,                /* Minimum of array */
	ulong j)                /* Maximum of array */
{
unsigned long k;
long temp;                              /* Used for exchange */

while((i+i)<=j)
{
	k=i+i;
	if(k<j)
		if(array[k]<array[k+1L])
			++k;
	if(array[i]<array[k])
	{
		temp=array[k];
		array[k]=array[i];
		array[i]=temp;
		i=k;
	}
	else
		i=j+1;
}
return;
}

/********************
** STRING HEAPSORT **
********************/

/*****************
** DoStringSort **
******************
** This routine performs the CPU string sort test.
** Arguments:
**      requested_secs = # of seconds to execute test
**      stringspersec = # of strings per second sorted (RETURNED)
*/
void DoStringSort(void)
{

SortStruct *strsortstruct;      /* Local for sort structure */
faruchar *arraybase;            /* Base pointer of char array */
long accumtime;                 /* Accumulated time */
double iterations;              /* # of iterations */
char *errorcontext;             /* Error context string pointer */
int systemerror;                /* For holding error code */

/*
** Link to global structure
*/
strsortstruct=&global_strsortstruct;

/*
** Set the error context
*/
errorcontext="CPU:String Sort";

/*
** See if we have to perform self-adjustment code
*/
if(strsortstruct->adjust==0)
{
	/*
	** Initialize the number of arrays.
	*/
	strsortstruct->numarrays=1;
	while(1)
	{
		/*
		** Allocate space for array.  We'll add an extra 100
		** bytes to protect memory as strings move around
		** (this can happen during string adjustment)
		*/
		arraybase=(faruchar *)AllocateMemory((strsortstruct->arraysize+100L) *
			(long)strsortstruct->numarrays,&systemerror);
		if(systemerror)
		{       ReportError(errorcontext,systemerror);
			ErrorExit();
		}

		/*
		** Do an iteration of the string sort.  If the
		** elapsed time is less than or equal to the permitted
		** minimum, then de-allocate the array, reallocate a
		** an additional array, and try again.
		*/
		if(DoStringSortIteration(arraybase,
			strsortstruct->numarrays,
			strsortstruct->arraysize)>global_min_ticks)
			break;          /* We're ok...exit */

		FreeMemory((farvoid *)arraybase,&systemerror);
		strsortstruct->numarrays+=1;
	}
}
else
{
	/*
	** We don't have to perform self adjustment code.
	** Simply allocate the space for the array.
	*/
	arraybase=(faruchar *)AllocateMemory((strsortstruct->arraysize+100L) *
		(long)strsortstruct->numarrays,&systemerror);
	if(systemerror)
	{       ReportError(errorcontext,systemerror);
		ErrorExit();
	}
}
/*
** All's well if we get here.  Repeatedly perform sorts until the
** accumulated elapsed time is greater than # of seconds requested.
*/
accumtime=0L;
iterations=(double)0.0;

do {
	accumtime+=DoStringSortIteration(arraybase,
				strsortstruct->numarrays,
				strsortstruct->arraysize);
	iterations+=(double)strsortstruct->numarrays;
} while(TicksToSecs(accumtime)<strsortstruct->request_secs);

/*
** Clean up, calculate results, and go home.
** Set flag to show we don't need to rerun adjustment code.
*/
FreeMemory((farvoid *)arraybase,&systemerror);
strsortstruct->sortspersec=iterations / (double)TicksToFracSecs(accumtime);
if(strsortstruct->adjust==0)
	strsortstruct->adjust=1;
#ifdef DEBUG
if (stringsort_status==0) printf("String sort: OK\n");
stringsort_status=0;
#endif
return;
}

/**************************
** DoStringSortIteration **
***************************
** This routine executes one iteration of the string
** sort benchmark.  It returns the number of ticks
** Note that this routine also builds the offset pointer
** array.
*/
static ulong DoStringSortIteration(faruchar *arraybase,
		uint numarrays,ulong arraysize)
{
farulong *optrarray;            /* Offset pointer array */
unsigned long elapsed;          /* Elapsed ticks */
unsigned long nstrings;         /* # of strings in array */
int syserror;                   /* System error code */
unsigned int i;                 /* Index */
farulong *tempobase;            /* Temporary offset pointer base */
faruchar *tempsbase;            /* Temporary string base pointer */

/*
** Load up the array(s) with random numbers
*/
optrarray=LoadStringArray(arraybase,numarrays,&nstrings,arraysize);

/*
** Set temp base pointers...they will be modified as the
** benchmark proceeds.
*/
tempobase=optrarray;
tempsbase=arraybase;

/*
** Start the stopwatch
*/
elapsed=StartStopwatch();

/*
** Execute heapsorts
*/
for(i=0;i<numarrays;i++)
{       StrHeapSort(tempobase,tempsbase,nstrings,0L,nstrings-1);
	tempobase+=nstrings;    /* Advance base pointers */
	tempsbase+=arraysize+100;
}

/*
** Record elapsed time
*/
elapsed=StopStopwatch(elapsed);

#ifdef DEBUG
{
	unsigned long i;
	for(i=0;i<nstrings-1;i++)
	{       /*
		** Compare strings to check for proper
		** sort.
		*/
		if(str_is_less(optrarray,arraybase,nstrings,i+1,i))
		{       printf("Sort Error\n");
			stringsort_status=1;
                        break;
		}
	}
}
#endif

/*
** Release the offset pointer array built by
** LoadStringArray()
*/
FreeMemory((farvoid *)optrarray,&syserror);

/*
** Return elapsed ticks.
*/
return(elapsed);
}

/********************
** LoadStringArray **
*********************
** Initialize the string array with random strings of
** varying sizes.
** Returns the pointer to the offset pointer array.
** Note that since we're creating a number of arrays, this
** routine builds one array, then copies it into the others.
*/
static farulong *LoadStringArray(faruchar *strarray, /* String array */
	uint numarrays,                 /* # of arrays */
	ulong *nstrings,                /* # of strings */
	ulong arraysize)                /* Size of array */
{
faruchar *tempsbase;            /* Temporary string base pointer */
farulong *optrarray;            /* Local for pointer */
farulong *tempobase;            /* Temporary offset pointer base pointer */
unsigned long curroffset;       /* Current offset */
int fullflag;                   /* Indicates full array */
unsigned char stringlength;     /* Length of string */
unsigned char i;                /* Index */
unsigned long j;                /* Another index */
unsigned int k;                 /* Yet another index */
unsigned int l;                 /* Ans still one more index */
int systemerror;                /* For holding error code */

/*
** Initialize random number generator.
*/
/* randnum(13L); */
randnum((int32)13);

/*
** Start with no strings.  Initialize our current offset pointer
** to 0.
*/
*nstrings=0L;
curroffset=0L;
fullflag=0;

do
{
	/*
	** Allocate a string with a random length no
	** shorter than 4 bytes and no longer than
	** 80 bytes.  Note we have to also make sure
	** there's room in the array.
	*/
        /* stringlength=(unsigned char)((1+abs_randwc(76L)) & 0xFFL);*/
	stringlength=(unsigned char)((1+abs_randwc((int32)76)) & 0xFFL);
	if((unsigned long)stringlength+curroffset+1L>=arraysize)
	{       stringlength=(unsigned char)((arraysize-curroffset-1L) &
				0xFF);
		fullflag=1;     /* Indicates a full */
	}

	/*
	** Store length at curroffset and advance current offset.
	*/
	*(strarray+curroffset)=stringlength;
	curroffset++;

	/*
	** Fill up the rest of the string with random bytes.
	*/
	for(i=0;i<stringlength;i++)
	{       *(strarray+curroffset)=
		        /* (unsigned char)(abs_randwc((long)0xFE)); */
			(unsigned char)(abs_randwc((int32)0xFE));
		curroffset++;
	}

	/*
	** Increment the # of strings counter.
	*/
	*nstrings+=1L;

} while(fullflag==0);

/*
** We now have initialized a single full array.  If there
** is more than one array, copy the original into the
** others.
*/
k=1;
tempsbase=strarray;
while(k<numarrays)
{       tempsbase+=arraysize+100;         /* Set base */
	for(l=0;l<arraysize;l++)
		tempsbase[l]=strarray[l];
	k++;
}

/*
** Now the array is full, allocate enough space for an
** offset pointer array.
*/
optrarray=(farulong *)AllocateMemory(*nstrings * sizeof(unsigned long) *
		numarrays,
		&systemerror);
if(systemerror)
{       ReportError("CPU:Stringsort",systemerror);
	FreeMemory((void *)strarray,&systemerror);
	ErrorExit();
}

/*
** Go through the newly-built string array, building
** offsets and putting them into the offset pointer
** array.
*/
curroffset=0;
for(j=0;j<*nstrings;j++)
{       *(optrarray+j)=curroffset;
	curroffset+=(unsigned long)(*(strarray+curroffset))+1L;
}

/*
** As above, we've made one copy of the offset pointers,
** so duplicate this array in the remaining ones.
*/
k=1;
tempobase=optrarray;
while(k<numarrays)
{       tempobase+=*nstrings;
	for(l=0;l<*nstrings;l++)
		tempobase[l]=optrarray[l];
	k++;
}

/*
** All done...go home.  Pass local pointer back.
*/
return(optrarray);
}

/**************
** stradjust **
***************
** Used by the string heap sort.  Call this routine to adjust the
** string at offset i to length l.  The members of the string array
** are moved accordingly and the length of the string at offset i
** is set to l.
*/
static void stradjust(farulong *optrarray,      /* Offset pointer array */
	faruchar *strarray,                     /* String array */
	ulong nstrings,                         /* # of strings */
	ulong i,                                /* Offset to adjust */
	uchar l)                                /* New length */
{
unsigned long nbytes;           /* # of bytes to move */
unsigned long j;                /* Index */
int direction;                  /* Direction indicator */
unsigned char adjamount;        /* Adjustment amount */

/*
** If new length is less than old length, the direction is
** down.  If new length is greater than old length, the
** direction is up.
*/
direction=(int)l - (int)*(strarray+*(optrarray+i));
adjamount=(unsigned char)abs(direction);

/*
** See if the adjustment is being made to the last
** string in the string array.  If so, we don't have to
** do anything more than adjust the length field.
*/
if(i==(nstrings-1L))
{       *(strarray+*(optrarray+i))=l;
	return;
}

/*
** Calculate the total # of bytes in string array from
** location i+1 to end of array.  Whether we're moving "up" or
** down, this is how many bytes we'll have to move.
*/
nbytes=*(optrarray+nstrings-1L) +
	(unsigned long)*(strarray+*(optrarray+nstrings-1L)) + 1L -
	*(optrarray+i+1L);

/*
** Calculate the source and the destination.  Source is
** string position i+1.  Destination is string position i+l
** (i+"ell"...don't confuse 1 and l).
** Hand this straight to memmove and let it handle the
** "overlap" problem.
*/
MoveMemory((farvoid *)(strarray+*(optrarray+i)+l+1),
	(farvoid *)(strarray+*(optrarray+i+1)),
	(unsigned long)nbytes);

/*
** We have to adjust the offset pointer array.
** This covers string i+1 to numstrings-1.
*/
for(j=i+1;j<nstrings;j++)
	if(direction<0)
		*(optrarray+j)=*(optrarray+j)-adjamount;
	else
		*(optrarray+j)=*(optrarray+j)+adjamount;

/*
** Store the new length and go home.
*/
*(strarray+*(optrarray+i))=l;
return;
}

/****************
** strheapsort **
*****************
** Pass this routine a pointer to an array of unsigned char.
** The array is presumed to hold strings occupying at most
** 80 bytes (counts a byte count).
** This routine also needs a pointer to an array of offsets
** which represent string locations in the array, and
** an unsigned long indicating the number of strings
** in the array.
*/
static void StrHeapSort(farulong *optrarray, /* Offset pointers */
	faruchar *strarray,             /* Strings array */
	ulong numstrings,               /* # of strings in array */
	ulong bottom,                   /* Region to sort...bottom */
	ulong top)                      /* Region to sort...top */
{
unsigned char temp[80];                 /* Used to exchange elements */
unsigned char tlen;                     /* Temp to hold length */
unsigned long i;                        /* Loop index */


/*
** Build a heap in the array
*/
for(i=(top/2L); i>0; --i)
	strsift(optrarray,strarray,numstrings,i,top);

/*
** Repeatedly extract maximum from heap and place it at the
** end of the array.  When we get done, we'll have a sorted
** array.
*/
for(i=top; i>0; --i)
{
	strsift(optrarray,strarray,numstrings,0,i);

	/* temp = string[0] */
	tlen=*strarray;
	MoveMemory((farvoid *)&temp[0], /* Perform exchange */
		(farvoid *)strarray,
		(unsigned long)(tlen+1));


	/* string[0]=string[i] */
	tlen=*(strarray+*(optrarray+i));
	stradjust(optrarray,strarray,numstrings,0,tlen);
	MoveMemory((farvoid *)strarray,
		(farvoid *)(strarray+*(optrarray+i)),
		(unsigned long)(tlen+1));

	/* string[i]=temp */
	tlen=temp[0];
	stradjust(optrarray,strarray,numstrings,i,tlen);
	MoveMemory((farvoid *)(strarray+*(optrarray+i)),
		(farvoid *)&temp[0],
		(unsigned long)(tlen+1));

}
return;
}

/****************
** str_is_less **
*****************
** Pass this function:
**      1) A pointer to an array of offset pointers
**      2) A pointer to a string array
**      3) The number of elements in the string array
**      4) Offsets to two strings (a & b)
** This function returns TRUE if string a is < string b.
*/
static int str_is_less(farulong *optrarray, /* Offset pointers */
	faruchar *strarray,                     /* String array */
	ulong numstrings,                       /* # of strings */
	ulong a, ulong b)                       /* Offsets */
{
int slen;               /* String length */

/*
** Determine which string has the minimum length.  Use that
** to call strncmp().  If they match up to that point, the
** string with the longer length wins.
*/
slen=(int)*(strarray+*(optrarray+a));
if(slen > (int)*(strarray+*(optrarray+b)))
	slen=(int)*(strarray+*(optrarray+b));

slen=strncmp((char *)(strarray+*(optrarray+a)),
		(char *)(strarray+*(optrarray+b)),slen);

if(slen==0)
{
	/*
	** They match.  Return true if the length of a
	** is greater than the length of b.
	*/
	if(*(strarray+*(optrarray+a)) >
		*(strarray+*(optrarray+b)))
		return(TRUE);
	return(FALSE);
}

if(slen<0) return(TRUE);        /* a is strictly less than b */

return(FALSE);                  /* Only other possibility */
}

/************
** strsift **
*************
** Pass this function:
**      1) A pointer to an array of offset pointers
**      2) A pointer to a string array
**      3) The number of elements in the string array
**      4) Offset within which to sort.
** Sift the array within the bounds of those offsets (thus
** building a heap).
*/
static void strsift(farulong *optrarray,        /* Offset pointers */
	faruchar *strarray,                     /* String array */
	ulong numstrings,                       /* # of strings */
	ulong i, ulong j)                       /* Offsets */
{
unsigned long k;                /* Temporaries */
unsigned char temp[80];
unsigned char tlen;             /* For string lengths */


while((i+i)<=j)
{
	k=i+i;
	if(k<j)
		if(str_is_less(optrarray,strarray,numstrings,k,k+1L))
			++k;
	if(str_is_less(optrarray,strarray,numstrings,i,k))
	{
		/* temp=string[k] */
		tlen=*(strarray+*(optrarray+k));
		MoveMemory((farvoid *)&temp[0],
			(farvoid *)(strarray+*(optrarray+k)),
			(unsigned long)(tlen+1));

		/* string[k]=string[i] */
		tlen=*(strarray+*(optrarray+i));
		stradjust(optrarray,strarray,numstrings,k,tlen);
		MoveMemory((farvoid *)(strarray+*(optrarray+k)),
			(farvoid *)(strarray+*(optrarray+i)),
			(unsigned long)(tlen+1));

		/* string[i]=temp */
		tlen=temp[0];
		stradjust(optrarray,strarray,numstrings,i,tlen);
		MoveMemory((farvoid *)(strarray+*(optrarray+i)),
			(farvoid *)&temp[0],
			(unsigned long)(tlen+1));
		i=k;
	}
	else
		i=j+1;
}
return;
}

/************************
** BITFIELD OPERATIONS **
*************************/

/*************
** DoBitops **
**************
** Perform the bit operations test portion of the CPU
** benchmark.  Returns the iterations per second.
*/
void DoBitops(void)
{
BitOpStruct *locbitopstruct;    /* Local bitop structure */
farulong *bitarraybase;         /* Base of bitmap array */
farulong *bitoparraybase;       /* Base of bitmap operations array */
ulong nbitops;                  /* # of bitfield operations */
ulong accumtime;                /* Accumulated time in ticks */
double iterations;              /* # of iterations */
char *errorcontext;             /* Error context string */
int systemerror;                /* For holding error codes */
int ticks;

/*
** Link to global structure.
*/
locbitopstruct=&global_bitopstruct;

/*
** Set the error context.
*/
errorcontext="CPU:Bitfields";

/*
** See if we need to run adjustment code.
*/
if(locbitopstruct->adjust==0)
{
	bitarraybase=(farulong *)AllocateMemory(locbitopstruct->bitfieldarraysize *
		sizeof(ulong),&systemerror);
	if(systemerror)
	{       ReportError(errorcontext,systemerror);
		ErrorExit();
	}

	/*
	** Initialize bitfield operations array to [2,30] elements
	*/
	locbitopstruct->bitoparraysize=30L;

	while(1)
	{
		/*
		** Allocate space for operations array
		*/
		bitoparraybase=(farulong *)AllocateMemory(locbitopstruct->bitoparraysize*2L*
			sizeof(ulong),
			&systemerror);
		if(systemerror)
		{       ReportError(errorcontext,systemerror);
			FreeMemory((farvoid *)bitarraybase,&systemerror);
			ErrorExit();
		}
		/*
		** Do an iteration of the bitmap test.  If the
		** elapsed time is less than or equal to the permitted
		** minimum, then de-allocate the array, reallocate a
		** larger version, and try again.
		*/
		ticks=DoBitfieldIteration(bitarraybase,
					   bitoparraybase,
					   locbitopstruct->bitoparraysize,
					   &nbitops);
#ifdef DEBUG
#ifdef LINUX
	        if (locbitopstruct->bitoparraysize==30L){
		  /* this is the first loop, write a debug file */
		  FILE *file;
		  unsigned long *running_base; /* same as farulong */
		  long counter;
		  file=fopen("debugbit.dat","w");
		  running_base=bitarraybase;
		  for (counter=0;counter<(long)(locbitopstruct->bitfieldarraysize);counter++){
#ifdef LONG64
		    fprintf(file,"%08X",(unsigned int)(*running_base&0xFFFFFFFFL));
		    fprintf(file,"%08X",(unsigned int)((*running_base>>32)&0xFFFFFFFFL));
		    if ((counter+1)%4==0) fprintf(file,"\n");
#else
		    fprintf(file,"%08lX",*running_base);
		    if ((counter+1)%8==0) fprintf(file,"\n");
#endif
		    running_base=running_base+1;
		  }
		  fclose(file);
		  printf("\nWrote the file debugbit.dat, you may want to compare it to debugbit.good\n");
		}
#endif
#endif

		if (ticks>global_min_ticks) break;      /* We're ok...exit */

		FreeMemory((farvoid *)bitoparraybase,&systemerror);
		locbitopstruct->bitoparraysize+=100L;
	}
}
else
{
	/*
	** Don't need to do self adjustment, just allocate
	** the array space.
	*/
	bitarraybase=(farulong *)AllocateMemory(locbitopstruct->bitfieldarraysize *
		sizeof(ulong),&systemerror);
	if(systemerror)
	{       ReportError(errorcontext,systemerror);
		ErrorExit();
	}
	bitoparraybase=(farulong *)AllocateMemory(locbitopstruct->bitoparraysize*2L*
		sizeof(ulong),
		&systemerror);
	if(systemerror)
	{       ReportError(errorcontext,systemerror);
		FreeMemory((farvoid *)bitarraybase,&systemerror);
		ErrorExit();
	}
}

/*
** All's well if we get here.  Repeatedly perform bitops until the
** accumulated elapsed time is greater than # of seconds requested.
*/
accumtime=0L;
iterations=(double)0.0;
do {
	accumtime+=DoBitfieldIteration(bitarraybase,
			bitoparraybase,
			locbitopstruct->bitoparraysize,&nbitops);
	iterations+=(double)nbitops;
} while(TicksToSecs(accumtime)<locbitopstruct->request_secs);

/*
** Clean up, calculate results, and go home.
** Also, set adjustment flag to show that we don't have
** to do self adjusting in the future.
*/
FreeMemory((farvoid *)bitarraybase,&systemerror);
FreeMemory((farvoid *)bitoparraybase,&systemerror);
locbitopstruct->bitopspersec=iterations /TicksToFracSecs(accumtime);
if(locbitopstruct->adjust==0)
	locbitopstruct->adjust=1;

return;
}

/************************
** DoBitfieldIteration **
*************************
** Perform a single iteration of the bitfield benchmark.
** Return the # of ticks accumulated by the operation.
*/
static ulong DoBitfieldIteration(farulong *bitarraybase,
		farulong *bitoparraybase,
		long bitoparraysize,
		ulong *nbitops)
{
long i;                         /* Index */
ulong bitoffset;                /* Offset into bitmap */
ulong elapsed;                  /* Time to execute */
/*
** Clear # bitops counter
*/
*nbitops=0L;

/*
** Construct a set of bitmap offsets and run lengths.
** The offset can be any random number from 0 to the
** size of the bitmap (in bits).  The run length can
** be any random number from 1 to the number of bits
** between the offset and the end of the bitmap.
** Note that the bitmap has 8192 * 32 bits in it.
** (262,144 bits)
*/
/*
** Reset random number generator so things repeat.
** Also reset the bit array we work on.
** added by Uwe F. Mayer
*/
randnum((int32)13);
for (i=0;i<global_bitopstruct.bitfieldarraysize;i++)
{
#ifdef LONG64
	*(bitarraybase+i)=(ulong)0x5555555555555555;
#else
	*(bitarraybase+i)=(ulong)0x55555555;
#endif
}
randnum((int32)13);
/* end of addition of code */

for (i=0;i<bitoparraysize;i++)
{
	/* First item is offset */
        /* *(bitoparraybase+i+i)=bitoffset=abs_randwc(262140L); */
	*(bitoparraybase+i+i)=bitoffset=abs_randwc((int32)262140);

	/* Next item is run length */
	/* *nbitops+=*(bitoparraybase+i+i+1L)=abs_randwc(262140L-bitoffset);*/
	*nbitops+=*(bitoparraybase+i+i+1L)=abs_randwc((int32)262140-bitoffset);
}

/*
** Array of offset and lengths built...do an iteration of
** the test.
** Start the stopwatch.
*/
elapsed=StartStopwatch();

/*
** Loop through array off offset/run length pairs.
** Execute operation based on modulus of index.
*/
for(i=0;i<bitoparraysize;i++)
{
	switch(i % 3)
	{

		case 0: /* Set run of bits */
			ToggleBitRun(bitarraybase,
				*(bitoparraybase+i+i),
				*(bitoparraybase+i+i+1),
				1);
			break;

		case 1: /* Clear run of bits */
			ToggleBitRun(bitarraybase,
				*(bitoparraybase+i+i),
				*(bitoparraybase+i+i+1),
				0);
			break;

		case 2: /* Complement run of bits */
			FlipBitRun(bitarraybase,
				*(bitoparraybase+i+i),
				*(bitoparraybase+i+i+1));
			break;
	}
}

/*
** Return elapsed time
*/
return(StopStopwatch(elapsed));
}


/*****************************
**     ToggleBitRun          *
******************************
** Set or clear a run of nbits starting at
** bit_addr in bitmap.
*/
static void ToggleBitRun(farulong *bitmap, /* Bitmap */
		ulong bit_addr,         /* Address of bits to set */
		ulong nbits,            /* # of bits to set/clr */
		uint val)               /* 1 or 0 */
{
unsigned long bindex;   /* Index into array */
unsigned long bitnumb;  /* Bit number */

while(nbits--)
{
#ifdef LONG64
	bindex=bit_addr>>6;     /* Index is number /64 */
	bitnumb=bit_addr % 64;   /* Bit number in word */
#else
	bindex=bit_addr>>5;     /* Index is number /32 */
	bitnumb=bit_addr % 32;  /* bit number in word */
#endif
	if(val)
		bitmap[bindex]|=(1L<<bitnumb);
	else
		bitmap[bindex]&=~(1L<<bitnumb);
	bit_addr++;
}
return;
}

/***************
** FlipBitRun **
****************
** Complements a run of bits.
*/
static void FlipBitRun(farulong *bitmap,        /* Bit map */
		ulong bit_addr,                 /* Bit address */
		ulong nbits)                    /* # of bits to flip */
{
unsigned long bindex;   /* Index into array */
unsigned long bitnumb;  /* Bit number */

while(nbits--)
{
#ifdef LONG64
	bindex=bit_addr>>6;     /* Index is number /64 */
	bitnumb=bit_addr % 64;  /* Bit number in longword */
#else
	bindex=bit_addr>>5;     /* Index is number /32 */
	bitnumb=bit_addr % 32;  /* Bit number in longword */
#endif
	bitmap[bindex]^=(1L<<bitnumb);
	bit_addr++;
}

return;
}

/*****************************
** FLOATING-POINT EMULATION **
*****************************/

/**************
** DoEmFloat **
***************
** Perform the floating-point emulation routines portion of the
** CPU benchmark.  Returns the operations per second.
*/
void DoEmFloat(void)
{
EmFloatStruct *locemfloatstruct;        /* Local structure */
InternalFPF *abase;             /* Base of A array */
InternalFPF *bbase;             /* Base of B array */
InternalFPF *cbase;             /* Base of C array */
ulong accumtime;                /* Accumulated time in ticks */
double iterations;              /* # of iterations */
ulong tickcount;                /* # of ticks */
char *errorcontext;             /* Error context string pointer */
int systemerror;                /* For holding error code */
ulong loops;                    /* # of loops */

/*
** Link to global structure
*/
locemfloatstruct=&global_emfloatstruct;

/*
** Set the error context
*/
errorcontext="CPU:Floating Emulation";


/*
** Test the emulation routines.
*/
#ifdef DEBUG
#endif

abase=(InternalFPF *)AllocateMemory(locemfloatstruct->arraysize*sizeof(InternalFPF),
		&systemerror);
if(systemerror)
{       ReportError(errorcontext,systemerror);
	ErrorExit();
}

bbase=(InternalFPF *)AllocateMemory(locemfloatstruct->arraysize*sizeof(InternalFPF),
		&systemerror);
if(systemerror)
{       ReportError(errorcontext,systemerror);
	FreeMemory((farvoid *)abase,&systemerror);
	ErrorExit();
}

cbase=(InternalFPF *)AllocateMemory(locemfloatstruct->arraysize*sizeof(InternalFPF),
		&systemerror);
if(systemerror)
{       ReportError(errorcontext,systemerror);
	FreeMemory((farvoid *)abase,&systemerror);
	FreeMemory((farvoid *)bbase,&systemerror);
	ErrorExit();
}

/*
** Set up the arrays
*/
SetupCPUEmFloatArrays(abase,bbase,cbase,locemfloatstruct->arraysize);

/*
** See if we need to do self-adjusting code.
*/
if(locemfloatstruct->adjust==0)
{
	locemfloatstruct->loops=0;

	/*
	** Do an iteration of the tests.  If the elapsed time is
	** less than minimum, increase the loop count and try
	** again.
	*/
	for(loops=1;loops<CPUEMFLOATLOOPMAX;loops+=loops)
	{       tickcount=DoEmFloatIteration(abase,bbase,cbase,
			locemfloatstruct->arraysize,
			loops);
		if(tickcount>global_min_ticks)
		{       locemfloatstruct->loops=loops;
			break;
		}
	}
}

/*
** Verify that selft adjustment code worked.
*/
if(locemfloatstruct->loops==0)
{       printf("CPU:EMFPU -- CMPUEMFLOATLOOPMAX limit hit\n");
	FreeMemory((farvoid *)abase,&systemerror);
	FreeMemory((farvoid *)bbase,&systemerror);
	FreeMemory((farvoid *)cbase,&systemerror);
	ErrorExit();
}

/*
** All's well if we get here.  Repeatedly perform floating
** tests until the accumulated time is greater than the
** # of seconds requested.
** Each iteration performs arraysize * 3 operations.
*/
accumtime=0L;
iterations=(double)0.0;
do {
	accumtime+=DoEmFloatIteration(abase,bbase,cbase,
			locemfloatstruct->arraysize,
			locemfloatstruct->loops);
	iterations+=(double)1.0;
} while(TicksToSecs(accumtime)<locemfloatstruct->request_secs);


/*
** Clean up, calculate results, and go home.
** Also, indicate that adjustment is done.
*/
FreeMemory((farvoid *)abase,&systemerror);
FreeMemory((farvoid *)bbase,&systemerror);
FreeMemory((farvoid *)cbase,&systemerror);

locemfloatstruct->emflops=(iterations*(double)locemfloatstruct->loops)/
		(double)TicksToFracSecs(accumtime);
if(locemfloatstruct->adjust==0)
	locemfloatstruct->adjust=1;

#ifdef DEBUG
printf("----------------------------------------------------------------------------\n");
#endif
return;
}

/*************************
** FOURIER COEFFICIENTS **
*************************/

/**************
** DoFourier **
***************
** Perform the transcendental/trigonometric portion of the
** benchmark.  This benchmark calculates the first n
** fourier coefficients of the function (x+1)^x defined
** on the interval 0,2.
*/
void DoFourier(void)
{
FourierStruct *locfourierstruct;        /* Local fourier struct */
fardouble *abase;               /* Base of A[] coefficients array */
fardouble *bbase;               /* Base of B[] coefficients array */
unsigned long accumtime;        /* Accumulated time in ticks */
double iterations;              /* # of iterations */
char *errorcontext;             /* Error context string pointer */
int systemerror;                /* For error code */

/*
** Link to global structure
*/
locfourierstruct=&global_fourierstruct;

/*
** Set error context string
*/
errorcontext="FPU:Transcendental";

/*
** See if we need to do self-adjustment code.
*/
if(locfourierstruct->adjust==0)
{
	locfourierstruct->arraysize=100L;       /* Start at 100 elements */
	while(1)
	{

		abase=(fardouble *)AllocateMemory(locfourierstruct->arraysize*sizeof(double),
				&systemerror);
		if(systemerror)
		{       ReportError(errorcontext,systemerror);
			ErrorExit();
		}

		bbase=(fardouble *)AllocateMemory(locfourierstruct->arraysize*sizeof(double),
				&systemerror);
		if(systemerror)
		{       ReportError(errorcontext,systemerror);
			FreeMemory((void *)abase,&systemerror);
			ErrorExit();
		}
		/*
		** Do an iteration of the tests.  If the elapsed time is
		** less than or equal to the permitted minimum, re-allocate
		** larger arrays and try again.
		*/
		if(DoFPUTransIteration(abase,bbase,
			locfourierstruct->arraysize)>global_min_ticks)
			break;          /* We're ok...exit */

		/*
		** Make bigger arrays and try again.
		*/
		FreeMemory((farvoid *)abase,&systemerror);
		FreeMemory((farvoid *)bbase,&systemerror);
		locfourierstruct->arraysize+=50L;
	}
}
else
{       /*
	** Don't need self-adjustment.  Just allocate the
	** arrays, and go.
	*/
	abase=(fardouble *)AllocateMemory(locfourierstruct->arraysize*sizeof(double),
			&systemerror);
	if(systemerror)
	{       ReportError(errorcontext,systemerror);
		ErrorExit();
	}

	bbase=(fardouble *)AllocateMemory(locfourierstruct->arraysize*sizeof(double),
			&systemerror);
	if(systemerror)
	{       ReportError(errorcontext,systemerror);
		FreeMemory((void *)abase,&systemerror);
		ErrorExit();
	}
}
/*
** All's well if we get here.  Repeatedly perform integration
** tests until the accumulated time is greater than the
** # of seconds requested.
*/
accumtime=0L;
iterations=(double)0.0;
do {
	accumtime+=DoFPUTransIteration(abase,bbase,locfourierstruct->arraysize);
	iterations+=(double)locfourierstruct->arraysize*(double)2.0-(double)1.0;
} while(TicksToSecs(accumtime)<locfourierstruct->request_secs);


/*
** Clean up, calculate results, and go home.
** Also set adjustment flag to indicate no adjust code needed.
*/
FreeMemory((farvoid *)abase,&systemerror);
FreeMemory((farvoid *)bbase,&systemerror);

locfourierstruct->fflops=iterations/(double)TicksToFracSecs(accumtime);

if(locfourierstruct->adjust==0)
	locfourierstruct->adjust=1;

return;
}

/************************
** DoFPUTransIteration **
*************************
** Perform an iteration of the FPU Transcendental/trigonometric
** benchmark.  Here, an iteration consists of calculating the
** first n fourier coefficients of the function (x+1)^x on
** the interval 0,2.  n is given by arraysize.
** NOTE: The # of integration steps is fixed at
** 200.
*/
static ulong DoFPUTransIteration(fardouble *abase,      /* A coeffs. */
			fardouble *bbase,               /* B coeffs. */
			ulong arraysize)                /* # of coeffs */
{
double omega;           /* Fundamental frequency */
unsigned long i;        /* Index */
unsigned long elapsed;  /* Elapsed time */

/*
** Start the stopwatch
*/
elapsed=StartStopwatch();

/*
** Calculate the fourier series.  Begin by
** calculating A[0].
*/

*abase=TrapezoidIntegrate((double)0.0,
			(double)2.0,
			200,
			(double)0.0,    /* No omega * n needed */
			0 )/(double)2.0;

/*
** Calculate the fundamental frequency.
** ( 2 * pi ) / period...and since the period
** is 2, omega is simply pi.
*/
omega=(double)3.1415926535897932;

for(i=1;i<arraysize;i++)
{

	/*
	** Calculate A[i] terms.  Note, once again, that we
	** can ignore the 2/period term outside the integral
	** since the period is 2 and the term cancels itself
	** out.
	*/
	*(abase+i)=TrapezoidIntegrate((double)0.0,
			(double)2.0,
			200,
			omega * (double)i,
			1);

	/*
	** Calculate the B[i] terms.
	*/
	*(bbase+i)=TrapezoidIntegrate((double)0.0,
			(double)2.0,
			200,
			omega * (double)i,
			2);

}
#ifdef DEBUG
{
  int i;
  printf("\nA[i]=\n");
  for (i=0;i<arraysize;i++) printf("%7.3g ",abase[i]);
  printf("\nB[i]=\n(undefined) ");
  for (i=1;i<arraysize;i++) printf("%7.3g ",bbase[i]);
}
#endif
/*
** All done, stop the stopwatch
*/
return(StopStopwatch(elapsed));
}

/***********************
** TrapezoidIntegrate **
************************
** Perform a simple trapezoid integration on the
** function (x+1)**x.
** x0,x1 set the lower and upper bounds of the
** integration.
** nsteps indicates # of trapezoidal sections
** omegan is the fundamental frequency times
**  the series member #
** select = 0 for the A[0] term, 1 for cosine terms, and
**   2 for sine terms.
** Returns the value.
*/
static double TrapezoidIntegrate( double x0,            /* Lower bound */
			double x1,              /* Upper bound */
			int nsteps,             /* # of steps */
			double omegan,          /* omega * n */
			int select)
{
double x;               /* Independent variable */
double dx;              /* Stepsize */
double rvalue;          /* Return value */


/*
** Initialize independent variable
*/
x=x0;

/*
** Calculate stepsize
*/
dx=(x1 - x0) / (double)nsteps;

/*
** Initialize the return value.
*/
rvalue=thefunction(x0,omegan,select)/(double)2.0;

/*
** Compute the other terms of the integral.
*/
if(nsteps!=1)
{       --nsteps;               /* Already done 1 step */
	while(--nsteps )
	{
		x+=dx;
		rvalue+=thefunction(x,omegan,select);
	}
}
/*
** Finish computation
*/
rvalue=(rvalue+thefunction(x1,omegan,select)/(double)2.0)*dx;

return(rvalue);
}

/****************
** thefunction **
*****************
** This routine selects the function to be used
** in the Trapezoid integration.
** x is the independent variable
** omegan is omega * n
** select chooses which of the sine/cosine functions
**  are used.  note the special case for select=0.
*/
static double thefunction(double x,             /* Independent variable */
		double omegan,          /* Omega * term */
		int select)             /* Choose term */
{

/*
** Use select to pick which function we call.
*/
switch(select)
{
	case 0: return(pow(x+(double)1.0,x));

	case 1: return(pow(x+(double)1.0,x) * cos(omegan * x));

	case 2: return(pow(x+(double)1.0,x) * sin(omegan * x));
}

/*
** We should never reach this point, but the following
** keeps compilers from issuing a warning message.
*/
return(0.0);
}

/*************************
** ASSIGNMENT ALGORITHM **
*************************/

/*************
** DoAssign **
**************
** Perform an assignment algorithm.
** The algorithm was adapted from the step by step guide found
** in "Quantitative Decision Making for Business" (Gordon,
**  Pressman, and Cohn; Prentice-Hall)
**
**
** NOTES:
** 1. Even though the algorithm distinguishes between
**    ASSIGNROWS and ASSIGNCOLS, as though the two might
**    be different, it does presume a square matrix.
**    I.E., ASSIGNROWS and ASSIGNCOLS must be the same.
**    This makes for some algorithmically-correct but
**    probably non-optimal constructs.
**
*/
void DoAssign(void)
{
AssignStruct *locassignstruct;  /* Local structure ptr */
farlong *arraybase;
char *errorcontext;
int systemerror;
ulong accumtime;
double iterations;

/*
** Link to global structure
*/
locassignstruct=&global_assignstruct;

/*
** Set the error context string.
*/
errorcontext="CPU:Assignment";

/*
** See if we need to do self adjustment code.
*/
if(locassignstruct->adjust==0)
{
	/*
	** Self-adjustment code.  The system begins by working on 1
	** array.  If it does that in no time, then two arrays
	** are built.  This process continues until
	** enough arrays are built to handle the tolerance.
	*/
	locassignstruct->numarrays=1;
	while(1)
	{
		/*
		** Allocate space for arrays
		*/
		arraybase=(farlong *) AllocateMemory(sizeof(long)*
			ASSIGNROWS*ASSIGNCOLS*locassignstruct->numarrays,
			 &systemerror);
		if(systemerror)
		{       ReportError(errorcontext,systemerror);
			FreeMemory((farvoid *)arraybase,
			  &systemerror);
			ErrorExit();
		}

		/*
		** Do an iteration of the assignment alg.  If the
		** elapsed time is less than or equal to the permitted
		** minimum, then allocate for more arrays and
		** try again.
		*/
		if(DoAssignIteration(arraybase,
			locassignstruct->numarrays)>global_min_ticks)
			break;          /* We're ok...exit */

		FreeMemory((farvoid *)arraybase, &systemerror);
		locassignstruct->numarrays++;
	}
}
else
{       /*
	** Allocate space for arrays
	*/
	arraybase=(farlong *)AllocateMemory(sizeof(long)*
		ASSIGNROWS*ASSIGNCOLS*locassignstruct->numarrays,
		 &systemerror);
	if(systemerror)
	{       ReportError(errorcontext,systemerror);
		FreeMemory((farvoid *)arraybase,
		  &systemerror);
		ErrorExit();
	}
}

/*
** All's well if we get here.  Do the tests.
*/
accumtime=0L;
iterations=(double)0.0;

do {
	accumtime+=DoAssignIteration(arraybase,
		locassignstruct->numarrays);
	iterations+=(double)1.0;
} while(TicksToSecs(accumtime)<locassignstruct->request_secs);

/*
** Clean up, calculate results, and go home.  Be sure to
** show that we don't have to rerun adjustment code.
*/
FreeMemory((farvoid *)arraybase,&systemerror);

locassignstruct->iterspersec=iterations *
	(double)locassignstruct->numarrays / TicksToFracSecs(accumtime);

if(locassignstruct->adjust==0)
	locassignstruct->adjust=1;

return;

}

/**********************
** DoAssignIteration **
***********************
** This routine executes one iteration of the assignment test.
** It returns the number of ticks elapsed in the iteration.
*/
static ulong DoAssignIteration(farlong *arraybase,
	ulong numarrays)
{
longptr abase;                  /* local pointer */
ulong elapsed;          /* Elapsed ticks */
ulong i;

/*
** Set up local pointer
*/
abase.ptrs.p=arraybase;

/*
** Load up the arrays with a random table.
*/
LoadAssignArrayWithRand(arraybase,numarrays);

/*
** Start the stopwatch
*/
elapsed=StartStopwatch();

/*
** Execute assignment algorithms
*/
for(i=0;i<numarrays;i++)
{       /* abase.ptrs.p+=i*ASSIGNROWS*ASSIGNCOLS; */
        /* Fixed  by Eike Dierks */
	Assignment(*abase.ptrs.ap);
	abase.ptrs.p+=ASSIGNROWS*ASSIGNCOLS;
}

/*
** Get elapsed time
*/
return(StopStopwatch(elapsed));
}

/****************************
** LoadAssignArrayWithRand **
*****************************
** Load the assignment arrays with random numbers.  All positive.
** These numbers represent costs.
*/
static void LoadAssignArrayWithRand(farlong *arraybase,
	ulong numarrays)
{
longptr abase,abase1;   /* Local for array pointer */
ulong i;

/*
** Set local array pointer
*/
abase.ptrs.p=arraybase;
abase1.ptrs.p=arraybase;

/*
** Set up the first array.  Then just copy it into the
** others.
*/
LoadAssign(*(abase.ptrs.ap));
if(numarrays>1)
	for(i=1;i<numarrays;i++)
	  {     /* abase1.ptrs.p+=i*ASSIGNROWS*ASSIGNCOLS; */
	        /* Fixed  by Eike Dierks */
	        abase1.ptrs.p+=ASSIGNROWS*ASSIGNCOLS;
		CopyToAssign(*(abase.ptrs.ap),*(abase1.ptrs.ap));
	}

return;
}

/***************
** LoadAssign **
****************
** The array given by arraybase is loaded with positive random
** numbers.  Elements in the array are capped at 5,000,000.
*/
static void LoadAssign(farlong arraybase[][ASSIGNCOLS])
{
ushort i,j;

/*
** Reset random number generator so things repeat.
*/
/* randnum(13L); */
randnum((int32)13);

for(i=0;i<ASSIGNROWS;i++)
  for(j=0;j<ASSIGNROWS;j++){
    /* arraybase[i][j]=abs_randwc(5000000L);*/
    arraybase[i][j]=abs_randwc((int32)5000000);
  }

return;
}

/*****************
** CopyToAssign **
******************
** Copy the contents of one array to another.  This is called by
** the routine that builds the initial array, and is used to copy
** the contents of the intial array into all following arrays.
*/
static void CopyToAssign(farlong arrayfrom[ASSIGNROWS][ASSIGNCOLS],
		farlong arrayto[ASSIGNROWS][ASSIGNCOLS])
{
ushort i,j;

for(i=0;i<ASSIGNROWS;i++)
	for(j=0;j<ASSIGNCOLS;j++)
		arrayto[i][j]=arrayfrom[i][j];

return;
}

/***************
** Assignment **
***************/
static void Assignment(farlong arraybase[][ASSIGNCOLS])
{
short assignedtableau[ASSIGNROWS][ASSIGNCOLS];

/*
** First, calculate minimum costs
*/
calc_minimum_costs(arraybase);

/*
** Repeat following until the number of rows selected
** equals the number of rows in the tableau.
*/
while(first_assignments(arraybase,assignedtableau)!=ASSIGNROWS)
{         second_assignments(arraybase,assignedtableau);
}

#ifdef DEBUG
{
	int i,j;
	printf("\nColumn choices for each row\n");
	for(i=0;i<ASSIGNROWS;i++)
	{
	        printf("R%03d: ",i);
		for(j=0;j<ASSIGNCOLS;j++)
			if(assignedtableau[i][j]==1)
				printf("%03d ",j);
	}
}
#endif

return;
}

/***********************
** calc_minimum_costs **
************************
** Revise the tableau by calculating the minimum costs on a
** row and column basis.  These minima are subtracted from
** their rows and columns, creating a new tableau.
*/
static void calc_minimum_costs(long tableau[][ASSIGNCOLS])
{
ushort i,j;              /* Index variables */
long currentmin;        /* Current minimum */
/*
** Determine minimum costs on row basis.  This is done by
** subtracting -- on a row-per-row basis -- the minum value
** for that row.
*/
for(i=0;i<ASSIGNROWS;i++)
{
	currentmin=MAXPOSLONG;  /* Initialize minimum */
	for(j=0;j<ASSIGNCOLS;j++)
		if(tableau[i][j]<currentmin)
			currentmin=tableau[i][j];

	for(j=0;j<ASSIGNCOLS;j++)
		tableau[i][j]-=currentmin;
}

/*
** Determine minimum cost on a column basis.  This works
** just as above, only now we step through the array
** column-wise
*/
for(j=0;j<ASSIGNCOLS;j++)
{
	currentmin=MAXPOSLONG;  /* Initialize minimum */
	for(i=0;i<ASSIGNROWS;i++)
		if(tableau[i][j]<currentmin)
			currentmin=tableau[i][j];

	/*
	** Here, we'll take the trouble to see if the current
	** minimum is zero.  This is likely worth it, since the
	** preceding loop will have created at least one zero in
	** each row.  We can save ourselves a few iterations.
	*/
	if(currentmin!=0)
		for(i=0;i<ASSIGNROWS;i++)
			tableau[i][j]-=currentmin;
}

return;
}

/**********************
** first_assignments **
***********************
** Do first assignments.
** The assignedtableau[] array holds a set of values that
** indicate the assignment of a value, or its elimination.
** The values are:
**      0 = Item is neither assigned nor eliminated.
**      1 = Item is assigned
**      2 = Item is eliminated
** Returns the number of selections made.  If this equals
** the number of rows, then an optimum has been determined.
*/
static int first_assignments(long tableau[][ASSIGNCOLS],
		short assignedtableau[][ASSIGNCOLS])
{
ushort i,j,k;                   /* Index variables */
ushort numassigns;              /* # of assignments */
ushort totnumassigns;           /* Total # of assignments */
ushort numzeros;                /* # of zeros in row */
int selected=0;                 /* Flag used to indicate selection */

/*
** Clear the assignedtableau, setting all members to show that
** no one is yet assigned, eliminated, or anything.
*/
for(i=0;i<ASSIGNROWS;i++)
	for(j=0;j<ASSIGNCOLS;j++)
		assignedtableau[i][j]=0;

totnumassigns=0;
do {
	numassigns=0;
	/*
	** Step through rows.  For each one that is not currently
	** assigned, see if the row has only one zero in it.  If so,
	** mark that as an assigned row/col.  Eliminate other zeros
	** in the same column.
	*/
	for(i=0;i<ASSIGNROWS;i++)
	{       numzeros=0;
		for(j=0;j<ASSIGNCOLS;j++)
			if(tableau[i][j]==0L)
				if(assignedtableau[i][j]==0)
				{       numzeros++;
					selected=j;
				}
		if(numzeros==1)
		{       numassigns++;
			totnumassigns++;
			assignedtableau[i][selected]=1;
			for(k=0;k<ASSIGNROWS;k++)
				if((k!=i) &&
				   (tableau[k][selected]==0))
					assignedtableau[k][selected]=2;
		}
	}
	/*
	** Step through columns, doing same as above.  Now, be careful
	** of items in the other rows of a selected column.
	*/
	for(j=0;j<ASSIGNCOLS;j++)
	{       numzeros=0;
		for(i=0;i<ASSIGNROWS;i++)
			if(tableau[i][j]==0L)
				if(assignedtableau[i][j]==0)
				{       numzeros++;
					selected=i;
				}
		if(numzeros==1)
		{       numassigns++;
			totnumassigns++;
			assignedtableau[selected][j]=1;
			for(k=0;k<ASSIGNCOLS;k++)
				if((k!=j) &&
				   (tableau[selected][k]==0))
					assignedtableau[selected][k]=2;
		}
	}
	/*
	** Repeat until no more assignments to be made.
	*/
} while(numassigns!=0);

/*
** See if we can leave at this point.
*/
if(totnumassigns==ASSIGNROWS) return(totnumassigns);

/*
** Now step through the array by row.  If you find any unassigned
** zeros, pick the first in the row.  Eliminate all zeros from
** that same row & column.  This occurs if there are multiple optima...
** possibly.
*/
for(i=0;i<ASSIGNROWS;i++)
{       selected=-1;
	for(j=0;j<ASSIGNCOLS;j++)
		if((tableau[i][j]==0L) &&
		   (assignedtableau[i][j]==0))
		{       selected=j;
			break;
		}
	if(selected!=-1)
	{       assignedtableau[i][selected]=1;
		totnumassigns++;
		for(k=0;k<ASSIGNCOLS;k++)
			if((k!=selected) &&
			   (tableau[i][k]==0L))
				assignedtableau[i][k]=2;
		for(k=0;k<ASSIGNROWS;k++)
			if((k!=i) &&
			   (tableau[k][selected]==0L))
				assignedtableau[k][selected]=2;
	}
}

return(totnumassigns);
}

/***********************
** second_assignments **
************************
** This section of the algorithm creates the revised
** tableau, and is difficult to explain.  I suggest you
** refer to the algorithm's source, mentioned in comments
** toward the beginning of the program.
*/
static void second_assignments(long tableau[][ASSIGNCOLS],
		short assignedtableau[][ASSIGNCOLS])
{
int i,j;                                /* Indexes */
short linesrow[ASSIGNROWS];
short linescol[ASSIGNCOLS];
long smallest;                          /* Holds smallest value */
ushort numassigns;                      /* Number of assignments */
ushort newrows;                         /* New rows to be considered */
/*
** Clear the linesrow and linescol arrays.
*/
for(i=0;i<ASSIGNROWS;i++)
	linesrow[i]=0;
for(i=0;i<ASSIGNCOLS;i++)
	linescol[i]=0;

/*
** Scan rows, flag each row that has no assignment in it.
*/
for(i=0;i<ASSIGNROWS;i++)
{       numassigns=0;
	for(j=0;j<ASSIGNCOLS;j++)
		if(assignedtableau[i][j]==1)
		{       numassigns++;
			break;
		}
	if(numassigns==0) linesrow[i]=1;
}

do {

	newrows=0;
	/*
	** For each row checked above, scan for any zeros.  If found,
	** check the associated column.
	*/
	for(i=0;i<ASSIGNROWS;i++)
	{       if(linesrow[i]==1)
			for(j=0;j<ASSIGNCOLS;j++)
				if(tableau[i][j]==0)
					linescol[j]=1;
	}

	/*
	** Now scan checked columns.  If any contain assigned zeros, check
	** the associated row.
	*/
	for(j=0;j<ASSIGNCOLS;j++)
		if(linescol[j]==1)
			for(i=0;i<ASSIGNROWS;i++)
				if((assignedtableau[i][j]==1) &&
					(linesrow[i]!=1))
				{
					linesrow[i]=1;
					newrows++;
				}
} while(newrows!=0);

/*
** linesrow[n]==0 indicate rows covered by imaginary line
** linescol[n]==1 indicate cols covered by imaginary line
** For all cells not covered by imaginary lines, determine smallest
** value.
*/
smallest=MAXPOSLONG;
for(i=0;i<ASSIGNROWS;i++)
	if(linesrow[i]!=0)
		for(j=0;j<ASSIGNCOLS;j++)
			if(linescol[j]!=1)
				if(tableau[i][j]<smallest)
					smallest=tableau[i][j];

/*
** Subtract smallest from all cells in the above set.
*/
for(i=0;i<ASSIGNROWS;i++)
	if(linesrow[i]!=0)
		for(j=0;j<ASSIGNCOLS;j++)
			if(linescol[j]!=1)
				tableau[i][j]-=smallest;

/*
** Add smallest to all cells covered by two lines.
*/
for(i=0;i<ASSIGNROWS;i++)
	if(linesrow[i]==0)
		for(j=0;j<ASSIGNCOLS;j++)
			if(linescol[j]==1)
				tableau[i][j]+=smallest;

return;
}

/********************
** IDEA Encryption **
*********************
** IDEA - International Data Encryption Algorithm.
** Based on code presented in Applied Cryptography by Bruce Schneier.
** Which was based on code developed by Xuejia Lai and James L. Massey.
** Other modifications made by Colin Plumb.
**
*/

/***********
** DoIDEA **
************
** Perform IDEA encryption.  Note that we time encryption & decryption
** time as being a single loop.
*/
void DoIDEA(void)
{
IDEAStruct *locideastruct;      /* Loc pointer to global structure */
int i;
IDEAkey Z,DK;
u16 userkey[8];
ulong accumtime;
double iterations;
char *errorcontext;
int systemerror;
faruchar *plain1;               /* First plaintext buffer */
faruchar *crypt1;               /* Encryption buffer */
faruchar *plain2;               /* Second plaintext buffer */

/*
** Link to global data
*/
locideastruct=&global_ideastruct;

/*
** Set error context
*/
errorcontext="CPU:IDEA";

/*
** Re-init random-number generator.
*/
/* randnum(3L); */
randnum((int32)3);

/*
** Build an encryption/decryption key
*/
for (i=0;i<8;i++)
        /* userkey[i]=(u16)(abs_randwc(60000L) & 0xFFFF); */
	userkey[i]=(u16)(abs_randwc((int32)60000) & 0xFFFF);
for(i=0;i<KEYLEN;i++)
	Z[i]=0;

/*
** Compute encryption/decryption subkeys
*/
en_key_idea(userkey,Z);
de_key_idea(Z,DK);

/*
** Allocate memory for buffers.  We'll make 3, called plain1,
** crypt1, and plain2.  It works like this:
**   plain1 >>encrypt>> crypt1 >>decrypt>> plain2.
** So, plain1 and plain2 should match.
** Also, fill up plain1 with sample text.
*/
plain1=(faruchar *)AllocateMemory(locideastruct->arraysize,&systemerror);
if(systemerror)
{
	ReportError(errorcontext,systemerror);
	ErrorExit();
}

crypt1=(faruchar *)AllocateMemory(locideastruct->arraysize,&systemerror);
if(systemerror)
{
	ReportError(errorcontext,systemerror);
	FreeMemory((farvoid *)plain1,&systemerror);
	ErrorExit();
}

plain2=(faruchar *)AllocateMemory(locideastruct->arraysize,&systemerror);
if(systemerror)
{
	ReportError(errorcontext,systemerror);
	FreeMemory((farvoid *)plain1,&systemerror);
	FreeMemory((farvoid *)crypt1,&systemerror);
	ErrorExit();
}
/*
** Note that we build the "plaintext" by simply loading
** the array up with random numbers.
*/
for(i=0;i<locideastruct->arraysize;i++)
	plain1[i]=(uchar)(abs_randwc(255) & 0xFF);

/*
** See if we need to perform self adjustment loop.
*/
if(locideastruct->adjust==0)
{
	/*
	** Do self-adjustment.  This involves initializing the
	** # of loops and increasing the loop count until we
	** get a number of loops that we can use.
	*/
	for(locideastruct->loops=100L;
	  locideastruct->loops<MAXIDEALOOPS;
	  locideastruct->loops+=10L)
		if(DoIDEAIteration(plain1,crypt1,plain2,
		  locideastruct->arraysize,
		  locideastruct->loops,
		  Z,DK)>global_min_ticks) break;
}

/*
** All's well if we get here.  Do the test.
*/
accumtime=0L;
iterations=(double)0.0;

do {
	accumtime+=DoIDEAIteration(plain1,crypt1,plain2,
		locideastruct->arraysize,
		locideastruct->loops,Z,DK);
	iterations+=(double)locideastruct->loops;
} while(TicksToSecs(accumtime)<locideastruct->request_secs);

/*
** Clean up, calculate results, and go home.  Be sure to
** show that we don't have to rerun adjustment code.
*/
FreeMemory((farvoid *)plain1,&systemerror);
FreeMemory((farvoid *)crypt1,&systemerror);
FreeMemory((farvoid *)plain2,&systemerror);
locideastruct->iterspersec=iterations / TicksToFracSecs(accumtime);

if(locideastruct->adjust==0)
	locideastruct->adjust=1;

return;

}

/********************
** DoIDEAIteration **
*********************
** Execute a single iteration of the IDEA encryption algorithm.
** Actually, a single iteration is one encryption and one
** decryption.
*/
static ulong DoIDEAIteration(faruchar *plain1,
			faruchar *crypt1,
			faruchar *plain2,
			ulong arraysize,
			ulong nloops,
			IDEAkey Z,
			IDEAkey DK)
{
register ulong i;
register ulong j;
ulong elapsed;
#ifdef DEBUG
int status=0;
#endif

/*
** Start the stopwatch.
*/
elapsed=StartStopwatch();

/*
** Do everything for nloops.
*/
for(i=0;i<nloops;i++)
{
	for(j=0;j<arraysize;j+=(sizeof(u16)*4))
		cipher_idea((u16 *)(plain1+j),(u16 *)(crypt1+j),Z);       /* Encrypt */

	for(j=0;j<arraysize;j+=(sizeof(u16)*4))
		cipher_idea((u16 *)(crypt1+j),(u16 *)(plain2+j),DK);      /* Decrypt */
}

#ifdef DEBUG
for(j=0;j<arraysize;j++)
	if(*(plain1+j)!=*(plain2+j)){
		printf("IDEA Error! \n");
                status=1;
                }
if (status==0) printf("IDEA: OK\n");
#endif

/*
** Get elapsed time.
*/
return(StopStopwatch(elapsed));
}

/********
** mul **
*********
** Performs multiplication, modulo (2**16)+1.  This code is structured
** on the assumption that untaken branches are cheaper than taken
** branches, and that the compiler doesn't schedule branches.
*/
static u16 mul(register u16 a, register u16 b)
{
register u32 p;
if(a)
{       if(b)
	{       p=(u32)(a*b);
		b=low16(p);
		a=(u16)(p>>16);
		return(b-a+(b<a));
	}
	else
		return(1-a);
}
else
	return(1-b);
}

/********
** inv **
*********
** Compute multiplicative inverse of x, modulo (2**16)+1
** using Euclid's GCD algorithm.  It is unrolled twice
** to avoid swapping the meaning of the registers.  And
** some subtracts are changed to adds.
*/
static u16 inv(u16 x)
{
u16 t0, t1;
u16 q, y;

if(x<=1)
	return(x);      /* 0 and 1 are self-inverse */
t1=0x10001 / x;
y=0x10001 % x;
if(y==1)
	return(low16(1-t1));
t0=1;
do {
	q=x/y;
	x=x%y;
	t0+=q*t1;
	if(x==1) return(t0);
	q=y/x;
	y=y%x;
	t1+=q*t0;
} while(y!=1);
return(low16(1-t1));
}

/****************
** en_key_idea **
*****************
** Compute IDEA encryption subkeys Z
*/
static void en_key_idea(u16 *userkey, u16 *Z)
{
int i,j;

/*
** shifts
*/
for(j=0;j<8;j++)
	Z[j]=*userkey++;
for(i=0;j<KEYLEN;j++)
{       i++;
	Z[i+7]=(Z[i&7]<<9)| (Z[(i+1) & 7] >> 7);
	Z+=i&8;
	i&=7;
}
return;
}

/****************
** de_key_idea **
*****************
** Compute IDEA decryption subkeys DK from encryption
** subkeys Z.
*/
static void de_key_idea(IDEAkey Z, IDEAkey DK)
{
IDEAkey TT;
int j;
u16 t1, t2, t3;
u16 *p;
p=(u16 *)(TT+KEYLEN);

t1=inv(*Z++);
t2=-*Z++;
t3=-*Z++;
*--p=inv(*Z++);
*--p=t3;
*--p=t2;
*--p=t1;

for(j=1;j<ROUNDS;j++)
{       t1=*Z++;
	*--p=*Z++;
	*--p=t1;
	t1=inv(*Z++);
	t2=-*Z++;
	t3=-*Z++;
	*--p=inv(*Z++);
	*--p=t2;
	*--p=t3;
	*--p=t1;
}
t1=*Z++;
*--p=*Z++;
*--p=t1;
t1=inv(*Z++);
t2=-*Z++;
t3=-*Z++;
*--p=inv(*Z++);
*--p=t3;
*--p=t2;
*--p=t1;
/*
** Copy and destroy temp copy
*/
for(j=0,p=TT;j<KEYLEN;j++)
{       *DK++=*p;
	*p++=0;
}

return;
}

/*
** MUL(x,y)
** This #define creates a macro that computes x=x*y modulo 0x10001.
** Requires temps t16 and t32.  Also requires y to be strictly 16
** bits.  Here, I am using the simplest form.  May not be the
** fastest. -- RG
*/
/* #define MUL(x,y) (x=mul(low16(x),y)) */

/****************
** cipher_idea **
*****************
** IDEA encryption/decryption algorithm.
*/
static void cipher_idea(u16 in[4],
		u16 out[4],
		register IDEAkey Z)
{
register u16 x1, x2, x3, x4, t1, t2;
/* register u16 t16;
register u16 t32; */
int r=ROUNDS;

x1=*in++;
x2=*in++;
x3=*in++;
x4=*in;

do {
	MUL(x1,*Z++);
	x2+=*Z++;
	x3+=*Z++;
	MUL(x4,*Z++);

	t2=x1^x3;
	MUL(t2,*Z++);
	t1=t2+(x2^x4);
	MUL(t1,*Z++);
	t2=t1+t2;

	x1^=t1;
	x4^=t2;

	t2^=x2;
	x2=x3^t1;
	x3=t2;
} while(--r);
MUL(x1,*Z++);
*out++=x1;
*out++=x3+*Z++;
*out++=x2+*Z++;
MUL(x4,*Z);
*out=x4;
return;
}

/************************
** HUFFMAN COMPRESSION **
************************/

/**************
** DoHuffman **
***************
** Execute a huffman compression on a block of plaintext.
** Note that (as with IDEA encryption) an iteration of the
** Huffman test includes a compression AND a decompression.
** Also, the compression cycle includes building the
** Huffman tree.
*/
void DoHuffman(void)
{
HuffStruct *lochuffstruct;      /* Loc pointer to global data */
char *errorcontext;
int systemerror;
ulong accumtime;
double iterations;
farchar *comparray;
farchar *decomparray;
farchar *plaintext;

/*
** Link to global data
*/
lochuffstruct=&global_huffstruct;

/*
** Set error context.
*/
errorcontext="CPU:Huffman";

/*
** Allocate memory for the plaintext and the compressed text.
** We'll be really pessimistic here, and allocate equal amounts
** for both (though we know...well, we PRESUME) the compressed
** stuff will take less than the plain stuff.
** Also note that we'll build a 3rd buffer to decompress
** into, and we preallocate space for the huffman tree.
** (We presume that the Huffman tree will grow no larger
** than 512 bytes.  This is actually a super-conservative
** estimate...but, who cares?)
*/
plaintext=(farchar *)AllocateMemory(lochuffstruct->arraysize,&systemerror);
if(systemerror)
{       ReportError(errorcontext,systemerror);
	ErrorExit();
}
comparray=(farchar *)AllocateMemory(lochuffstruct->arraysize,&systemerror);
if(systemerror)
{       ReportError(errorcontext,systemerror);
	FreeMemory(plaintext,&systemerror);
	ErrorExit();
}
decomparray=(farchar *)AllocateMemory(lochuffstruct->arraysize,&systemerror);
if(systemerror)
{       ReportError(errorcontext,systemerror);
	FreeMemory(plaintext,&systemerror);
	FreeMemory(comparray,&systemerror);
	ErrorExit();
}

hufftree=(huff_node *)AllocateMemory(sizeof(huff_node) * 512,
	&systemerror);
if(systemerror)
{       ReportError(errorcontext,systemerror);
	FreeMemory(plaintext,&systemerror);
	FreeMemory(comparray,&systemerror);
	FreeMemory(decomparray,&systemerror);
	ErrorExit();
}

/*
** Build the plaintext buffer.  Since we want this to
** actually be able to compress, we'll use the
** wordcatalog to build the plaintext stuff.
*/
/*
** Reset random number generator so things repeat.
** added by Uwe F. Mayer
*/
randnum((int32)13);
create_text_block(plaintext,lochuffstruct->arraysize-1,(ushort)500);
plaintext[lochuffstruct->arraysize-1L]='\0';
plaintextlen=lochuffstruct->arraysize;

/*
** See if we need to perform self adjustment loop.
*/
if(lochuffstruct->adjust==0)
{
	/*
	** Do self-adjustment.  This involves initializing the
	** # of loops and increasing the loop count until we
	** get a number of loops that we can use.
	*/
	for(lochuffstruct->loops=100L;
	  lochuffstruct->loops<MAXHUFFLOOPS;
	  lochuffstruct->loops+=10L)
		if(DoHuffIteration(plaintext,
			comparray,
			decomparray,
		  lochuffstruct->arraysize,
		  lochuffstruct->loops,
		  hufftree)>global_min_ticks) break;
}

/*
** All's well if we get here.  Do the test.
*/
accumtime=0L;
iterations=(double)0.0;

do {
	accumtime+=DoHuffIteration(plaintext,
		comparray,
		decomparray,
		lochuffstruct->arraysize,
		lochuffstruct->loops,
		hufftree);
	iterations+=(double)lochuffstruct->loops;
} while(TicksToSecs(accumtime)<lochuffstruct->request_secs);

/*
** Clean up, calculate results, and go home.  Be sure to
** show that we don't have to rerun adjustment code.
*/
FreeMemory((farvoid *)plaintext,&systemerror);
FreeMemory((farvoid *)comparray,&systemerror);
FreeMemory((farvoid *)decomparray,&systemerror);
FreeMemory((farvoid *)hufftree,&systemerror);
lochuffstruct->iterspersec=iterations / TicksToFracSecs(accumtime);

if(lochuffstruct->adjust==0)
	lochuffstruct->adjust=1;

}

/*********************
** create_text_line **
**********************
** Create a random line of text, stored at *dt.  The line may be
** no more than nchars long.
*/
static void create_text_line(farchar *dt,
			long nchars)
{
long charssofar;        /* # of characters so far */
long tomove;            /* # of characters to move */
char myword[40];        /* Local buffer for words */
farchar *wordptr;       /* Pointer to word from catalog */

charssofar=0;

do {
/*
** Grab a random word from the wordcatalog
*/
/* wordptr=wordcatarray[abs_randwc((long)WORDCATSIZE)];*/
wordptr=wordcatarray[abs_randwc((int32)WORDCATSIZE)];
MoveMemory((farvoid *)myword,
	(farvoid *)wordptr,
	(unsigned long)strlen(wordptr)+1);

/*
** Append a blank.
*/
tomove=strlen(myword)+1;
myword[tomove-1]=' ';

/*
** See how long it is.  If its length+charssofar > nchars, we have
** to trim it.
*/
if((tomove+charssofar)>nchars)
	tomove=nchars-charssofar;
/*
** Attach the word to the current line.  Increment counter.
*/
MoveMemory((farvoid *)dt,(farvoid *)myword,(unsigned long)tomove);
charssofar+=tomove;
dt+=tomove;

/*
** If we're done, bail out.  Otherwise, go get another word.
*/
} while(charssofar<nchars);

return;
}

/**********************
** create_text_block **
***********************
** Build a block of text randomly loaded with words.  The words
** come from the wordcatalog (which must be loaded before you
** call this).
** *tb points to the memory where the text is to be built.
** tblen is the # of bytes to put into the text block
** maxlinlen is the maximum length of any line (line end indicated
**  by a carriage return).
*/
static void create_text_block(farchar *tb,
			ulong tblen,
			ushort maxlinlen)
{
ulong bytessofar;       /* # of bytes so far */
ulong linelen;          /* Line length */

bytessofar=0L;
do {

/*
** Pick a random length for a line and fill the line.
** Make sure the line can fit (haven't exceeded tablen) and also
** make sure you leave room to append a carriage return.
*/
linelen=abs_randwc(maxlinlen-6)+6;
if((linelen+bytessofar)>tblen)
	linelen=tblen-bytessofar;

if(linelen>1)
{
	create_text_line(tb,linelen);
}
tb+=linelen-1;          /* Add the carriage return */
*tb++='\n';

bytessofar+=linelen;

} while(bytessofar<tblen);

}

/********************
** DoHuffIteration **
*********************
** Perform the huffman benchmark.  This routine
**  (a) Builds the huffman tree
**  (b) Compresses the text
**  (c) Decompresses the text and verifies correct decompression
*/
static ulong DoHuffIteration(farchar *plaintext,
	farchar *comparray,
	farchar *decomparray,
	ulong arraysize,
	ulong nloops,
	huff_node *hufftree)
{
int i;                          /* Index */
long j;                         /* Bigger index */
int root;                       /* Pointer to huffman tree root */
float lowfreq1, lowfreq2;       /* Low frequency counters */
int lowidx1, lowidx2;           /* Indexes of low freq. elements */
long bitoffset;                 /* Bit offset into text */
long textoffset;                /* Char offset into text */
long maxbitoffset;              /* Holds limit of bit offset */
long bitstringlen;              /* Length of bitstring */
int c;                          /* Character from plaintext */
char bitstring[30];             /* Holds bitstring */
ulong elapsed;                  /* For stopwatch */
#ifdef DEBUG
int status=0;
#endif

/*
** Start the stopwatch
*/
elapsed=StartStopwatch();

/*
** Do everything for nloops
*/
while(nloops--)
{

/*
** Calculate the frequency of each byte value. Store the
** results in what will become the "leaves" of the
** Huffman tree.  Interior nodes will be built in those
** nodes greater than node #255.
*/
for(i=0;i<256;i++)
{
	hufftree[i].freq=(float)0.0;
	hufftree[i].c=(unsigned char)i;
}

for(j=0;j<arraysize;j++)
	hufftree[(int)plaintext[j]].freq+=(float)1.0;

for(i=0;i<256;i++)
	if(hufftree[i].freq != (float)0.0)
		hufftree[i].freq/=(float)arraysize;

/* Reset the second half of the tree. Otherwise the loop below that
** compares the frequencies up to index 512 makes no sense. Some
** systems automatically zero out memory upon allocation, others (like
** for example DEC Unix) do not. Depending on this the loop below gets
** different data and different run times. On our alpha the data that
** was arbitrarily assigned led to an underflow error at runtime. We
** use that zeroed-out bits are in fact 0 as a float.
** Uwe F. Mayer */
bzero((char *)&(hufftree[256]),sizeof(huff_node)*256);
/*
** Build the huffman tree.  First clear all the parent
** pointers and left/right pointers.  Also, discard all
** nodes that have a frequency of true 0.  */
for(i=0;i<512;i++)
{       if(hufftree[i].freq==(float)0.0)
		hufftree[i].parent=EXCLUDED;
	else
		hufftree[i].parent=hufftree[i].left=hufftree[i].right=-1;
}

/*
** Go through the tree. Finding nodes of really low
** frequency.
*/
root=255;                       /* Starting root node-1 */
while(1)
{
	lowfreq1=(float)2.0; lowfreq2=(float)2.0;
	lowidx1=-1; lowidx2=-1;
	/*
	** Find first lowest frequency.
	*/
	for(i=0;i<=root;i++)
		if(hufftree[i].parent<0)
			if(hufftree[i].freq<lowfreq1)
			{       lowfreq1=hufftree[i].freq;
				lowidx1=i;
			}

	/*
	** Did we find a lowest value?  If not, the
	** tree is done.
	*/
	if(lowidx1==-1) break;

	/*
	** Find next lowest frequency
	*/
	for(i=0;i<=root;i++)
		if((hufftree[i].parent<0) && (i!=lowidx1))
			if(hufftree[i].freq<lowfreq2)
			{       lowfreq2=hufftree[i].freq;
				lowidx2=i;
			}

	/*
	** If we could only find one item, then that
	** item is surely the root, and (as above) the
	** tree is done.
	*/
	if(lowidx2==-1) break;

	/*
	** Attach the two new nodes to the current root, and
	** advance the current root.
	*/
	root++;                 /* New root */
	hufftree[lowidx1].parent=root;
	hufftree[lowidx2].parent=root;
	hufftree[root].freq=lowfreq1+lowfreq2;
	hufftree[root].left=lowidx1;
	hufftree[root].right=lowidx2;
	hufftree[root].parent=-2;       /* Show root */
}

/*
** Huffman tree built...compress the plaintext
*/
bitoffset=0L;                           /* Initialize bit offset */
for(i=0;i<arraysize;i++)
{
	c=(int)plaintext[i];                 /* Fetch character */
	/*
	** Build a bit string for byte c
	*/
	bitstringlen=0;
	while(hufftree[c].parent!=-2)
	{       if(hufftree[hufftree[c].parent].left==c)
			bitstring[bitstringlen]='0';
		else
			bitstring[bitstringlen]='1';
		c=hufftree[c].parent;
		bitstringlen++;
	}

	/*
	** Step backwards through the bit string, setting
	** bits in the compressed array as you go.
	*/
	while(bitstringlen--)
	{       SetCompBit((u8 *)comparray,(u32)bitoffset,bitstring[bitstringlen]);
		bitoffset++;
	}
}

/*
** Compression done.  Perform de-compression.
*/
maxbitoffset=bitoffset;
bitoffset=0;
textoffset=0;
do {
	i=root;
	while(hufftree[i].left!=-1)
	{       if(GetCompBit((u8 *)comparray,(u32)bitoffset)==0)
			i=hufftree[i].left;
		else
			i=hufftree[i].right;
		bitoffset++;
	}
	decomparray[textoffset]=hufftree[i].c;

#ifdef DEBUG
	if(hufftree[i].c != plaintext[textoffset])
	{
		/* Show error */
		printf("Error at textoffset %ld\n",textoffset);
		status=1;
	}
#endif
	textoffset++;
} while(bitoffset<maxbitoffset);

}       /* End the big while(nloops--) from above */

/*
** All done
*/
#ifdef DEBUG
  if (status==0) printf("Huffman: OK\n");
#endif
return(StopStopwatch(elapsed));
}

/***************
** SetCompBit **
****************
** Set a bit in the compression array.  The value of the
** bit is set according to char bitchar.
*/
static void SetCompBit(u8 *comparray,
		u32 bitoffset,
		char bitchar)
{
u32 byteoffset;
int bitnumb;

/*
** First calculate which element in the comparray to
** alter. and the bitnumber.
*/
byteoffset=bitoffset>>3;
bitnumb=bitoffset % 8;

/*
** Set or clear
*/
if(bitchar=='1')
	comparray[byteoffset]|=(1<<bitnumb);
else
	comparray[byteoffset]&=~(1<<bitnumb);

return;
}

/***************
** GetCompBit **
****************
** Return the bit value of a bit in the comparession array.
** Returns 0 if the bit is clear, nonzero otherwise.
*/
static int GetCompBit(u8 *comparray,
		u32 bitoffset)
{
u32 byteoffset;
int bitnumb;

/*
** Calculate byte offset and bit number.
*/
byteoffset=bitoffset>>3;
bitnumb=bitoffset % 8;

/*
** Fetch
*/
return((1<<bitnumb) & comparray[byteoffset] );
}

/********************************
** BACK PROPAGATION NEURAL NET **
*********************************
** This code is a modified version of the code
** that was submitted to BYTE Magazine by
** Maureen Caudill.  It accomanied an article
** that I CANNOT NOW RECALL.
** The author's original heading/comment was
** as follows:
**
**  Backpropagation Network
**  Written by Maureen Caudill
**  in Think C 4.0 on a Macintosh
**
**  (c) Maureen Caudill 1988-1991
**  This network will accept 5x7 input patterns
**  and produce 8 bit output patterns.
**  The source code may be copied or modified without restriction,
**  but no fee may be charged for its use.
**
** ++++++++++++++
** I have modified the code so that it will work
** on systems other than a Macintosh -- RG
*/

/***********
** DoNNet **
************
** Perform the neural net benchmark.
** Note that this benchmark is one of the few that
** requires an input file.  That file is "NNET.DAT" and
** should be on the local directory (from which the
** benchmark program in launched).
*/
void DoNNET(void)
{
NNetStruct *locnnetstruct;      /* Local ptr to global data */
char *errorcontext;
ulong accumtime;
double iterations;

/*
** Link to global data
*/
locnnetstruct=&global_nnetstruct;

/*
** Set error context
*/
errorcontext="CPU:NNET";

/*
** Init random number generator.
** NOTE: It is important that the random number generator
**  be re-initialized for every pass through this test.
**  The NNET algorithm uses the random number generator
**  to initialize the net.  Results are sensitive to
**  the initial neural net state.
*/
/* randnum(3L); */
randnum((int32)3);

/*
** Read in the input and output patterns.  We'll do this
** only once here at the beginning.  These values don't
** change once loaded.
*/
if(read_data_file()!=0)
   ErrorExit();


/*
** See if we need to perform self adjustment loop.
*/
if(locnnetstruct->adjust==0)
{
	/*
	** Do self-adjustment.  This involves initializing the
	** # of loops and increasing the loop count until we
	** get a number of loops that we can use.
	*/
	for(locnnetstruct->loops=1L;
	  locnnetstruct->loops<MAXNNETLOOPS;
	  locnnetstruct->loops++)
	  {     /*randnum(3L); */
		randnum((int32)3);
		if(DoNNetIteration(locnnetstruct->loops)
			>global_min_ticks) break;
	  }
}

/*
** All's well if we get here.  Do the test.
*/
accumtime=0L;
iterations=(double)0.0;

do {
	/* randnum(3L); */    /* Gotta do this for Neural Net */
	randnum((int32)3);    /* Gotta do this for Neural Net */
	accumtime+=DoNNetIteration(locnnetstruct->loops);
	iterations+=(double)locnnetstruct->loops;
} while(TicksToSecs(accumtime)<locnnetstruct->request_secs);

/*
** Clean up, calculate results, and go home.  Be sure to
** show that we don't have to rerun adjustment code.
*/
locnnetstruct->iterspersec=iterations / TicksToFracSecs(accumtime);

if(locnnetstruct->adjust==0)
	locnnetstruct->adjust=1;


return;
}

/********************
** DoNNetIteration **
*********************
** Do a single iteration of the neural net benchmark.
** By iteration, we mean a "learning" pass.
*/
static ulong DoNNetIteration(ulong nloops)
{
ulong elapsed;          /* Elapsed time */
int patt;

/*
** Run nloops learning cycles.  Notice that, counted with
** the learning cycle is the weight randomization and
** zeroing of changes.  This should reduce clock jitter,
** since we don't have to stop and start the clock for
** each iteration.
*/
elapsed=StartStopwatch();
while(nloops--)
{
	randomize_wts();
	zero_changes();
	iteration_count=1;
	learned = F;
	numpasses = 0;
	while (learned == F)
	{
		for (patt=0; patt<numpats; patt++)
		{
			worst_error = 0.0;      /* reset this every pass through data */
			move_wt_changes();      /* move last pass's wt changes to momentum array */
			do_forward_pass(patt);
			do_back_pass(patt);
			iteration_count++;
		}
		numpasses ++;
		learned = check_out_error();
	}
#ifdef DEBUG
printf("Learned in %d passes\n",numpasses);
#endif
}
return(StopStopwatch(elapsed));
}

/*************************
** do_mid_forward(patt) **
**************************
** Process the middle layer's forward pass
** The activation of middle layer's neurode is the weighted
** sum of the inputs from the input pattern, with sigmoid
** function applied to the inputs.
**/
static void  do_mid_forward(int patt)
{
double  sum;
int     neurode, i;

for (neurode=0;neurode<MID_SIZE; neurode++)
{
	sum = 0.0;
	for (i=0; i<IN_SIZE; i++)
	{       /* compute weighted sum of input signals */
		sum += mid_wts[neurode][i]*in_pats[patt][i];
	}
	/*
	** apply sigmoid function f(x) = 1/(1+exp(-x)) to weighted sum
	*/
	sum = 1.0/(1.0+exp(-sum));
	mid_out[neurode] = sum;
}
return;
}

/*********************
** do_out_forward() **
**********************
** process the forward pass through the output layer
** The activation of the output layer is the weighted sum of
** the inputs (outputs from middle layer), modified by the
** sigmoid function.
**/
static void  do_out_forward()
{
double sum;
int neurode, i;

for (neurode=0; neurode<OUT_SIZE; neurode++)
{
	sum = 0.0;
	for (i=0; i<MID_SIZE; i++)
	{       /*
		** compute weighted sum of input signals
		** from middle layer
		*/
		sum += out_wts[neurode][i]*mid_out[i];
	}
	/*
	** Apply f(x) = 1/(1+exp(-x)) to weighted input
	*/
	sum = 1.0/(1.0+exp(-sum));
	out_out[neurode] = sum;
}
return;
}

/*************************
** display_output(patt) **
**************************
** Display the actual output vs. the desired output of the
** network.
** Once the training is complete, and the "learned" flag set
** to TRUE, then display_output sends its output to both
** the screen and to a text output file.
**
** NOTE: This routine has been disabled in the benchmark
** version. -- RG
**/
/*
void  display_output(int patt)
{
int             i;

	fprintf(outfile,"\n Iteration # %d",iteration_count);
	fprintf(outfile,"\n Desired Output:  ");

	for (i=0; i<OUT_SIZE; i++)
	{
		fprintf(outfile,"%6.3f  ",out_pats[patt][i]);
	}
	fprintf(outfile,"\n Actual Output:   ");

	for (i=0; i<OUT_SIZE; i++)
	{
		fprintf(outfile,"%6.3f  ",out_out[i]);
	}
	fprintf(outfile,"\n");
	return;
}
*/

/**********************
** do_forward_pass() **
***********************
** control function for the forward pass through the network
** NOTE: I have disabled the call to display_output() in
**  the benchmark version -- RG.
**/
static void  do_forward_pass(int patt)
{
do_mid_forward(patt);   /* process forward pass, middle layer */
do_out_forward();       /* process forward pass, output layer */
/* display_output(patt);        ** display results of forward pass */
return;
}

/***********************
** do_out_error(patt) **
************************
** Compute the error for the output layer neurodes.
** This is simply Desired - Actual.
**/
static void do_out_error(int patt)
{
int neurode;
double error,tot_error, sum;

tot_error = 0.0;
sum = 0.0;
for (neurode=0; neurode<OUT_SIZE; neurode++)
{
	out_error[neurode] = out_pats[patt][neurode] - out_out[neurode];
	/*
	** while we're here, also compute magnitude
	** of total error and worst error in this pass.
	** We use these to decide if we are done yet.
	*/
	error = out_error[neurode];
	if (error <0.0)
	{
		sum += -error;
		if (-error > tot_error)
			tot_error = -error; /* worst error this pattern */
	}
	else
	{
		sum += error;
		if (error > tot_error)
			tot_error = error; /* worst error this pattern */
	}
}
avg_out_error[patt] = sum/OUT_SIZE;
tot_out_error[patt] = tot_error;
return;
}

/***********************
** worst_pass_error() **
************************
** Find the worst and average error in the pass and save it
**/
static void  worst_pass_error()
{
double error,sum;

int i;

error = 0.0;
sum = 0.0;
for (i=0; i<numpats; i++)
{
	if (tot_out_error[i] > error) error = tot_out_error[i];
	sum += avg_out_error[i];
}
worst_error = error;
average_error = sum/numpats;
return;
}

/*******************
** do_mid_error() **
********************
** Compute the error for the middle layer neurodes
** This is based on the output errors computed above.
** Note that the derivative of the sigmoid f(x) is
**        f'(x) = f(x)(1 - f(x))
** Recall that f(x) is merely the output of the middle
** layer neurode on the forward pass.
**/
static void do_mid_error()
{
double sum;
int neurode, i;

for (neurode=0; neurode<MID_SIZE; neurode++)
{
	sum = 0.0;
	for (i=0; i<OUT_SIZE; i++)
		sum += out_wts[i][neurode]*out_error[i];

	/*
	** apply the derivative of the sigmoid here
	** Because of the choice of sigmoid f(I), the derivative
	** of the sigmoid is f'(I) = f(I)(1 - f(I))
	*/
	mid_error[neurode] = mid_out[neurode]*(1-mid_out[neurode])*sum;
}
return;
}

/*********************
** adjust_out_wts() **
**********************
** Adjust the weights of the output layer.  The error for
** the output layer has been previously propagated back to
** the middle layer.
** Use the Delta Rule with momentum term to adjust the weights.
**/
static void adjust_out_wts()
{
int weight, neurode;
double learn,delta,alph;

learn = BETA;
alph  = ALPHA;
for (neurode=0; neurode<OUT_SIZE; neurode++)
{
	for (weight=0; weight<MID_SIZE; weight++)
	{
		/* standard delta rule */
		delta = learn * out_error[neurode] * mid_out[weight];

		/* now the momentum term */
		delta += alph * out_wt_change[neurode][weight];
		out_wts[neurode][weight] += delta;

		/* keep track of this pass's cum wt changes for next pass's momentum */
		out_wt_cum_change[neurode][weight] += delta;
	}
}
return;
}

/*************************
** adjust_mid_wts(patt) **
**************************
** Adjust the middle layer weights using the previously computed
** errors.
** We use the Generalized Delta Rule with momentum term
**/
static void adjust_mid_wts(int patt)
{
int weight, neurode;
double learn,alph,delta;

learn = BETA;
alph  = ALPHA;
for (neurode=0; neurode<MID_SIZE; neurode++)
{
	for (weight=0; weight<IN_SIZE; weight++)
	{
		/* first the basic delta rule */
		delta = learn * mid_error[neurode] * in_pats[patt][weight];

		/* with the momentum term */
		delta += alph * mid_wt_change[neurode][weight];
		mid_wts[neurode][weight] += delta;

		/* keep track of this pass's cum wt changes for next pass's momentum */
		mid_wt_cum_change[neurode][weight] += delta;
	}
}
return;
}

/*******************
** do_back_pass() **
********************
** Process the backward propagation of error through network.
**/
void  do_back_pass(int patt)
{

do_out_error(patt);
do_mid_error();
adjust_out_wts();
adjust_mid_wts(patt);

return;
}


/**********************
** move_wt_changes() **
***********************
** Move the weight changes accumulated last pass into the wt-change
** array for use by the momentum term in this pass. Also zero out
** the accumulating arrays after the move.
**/
static void move_wt_changes()
{
int i,j;

for (i = 0; i<MID_SIZE; i++)
	for (j = 0; j<IN_SIZE; j++)
	{
		mid_wt_change[i][j] = mid_wt_cum_change[i][j];
		/*
		** Zero it out for next pass accumulation.
		*/
		mid_wt_cum_change[i][j] = 0.0;
	}

for (i = 0; i<OUT_SIZE; i++)
	for (j=0; j<MID_SIZE; j++)
	{
		out_wt_change[i][j] = out_wt_cum_change[i][j];
		out_wt_cum_change[i][j] = 0.0;
	}

return;
}

/**********************
** check_out_error() **
***********************
** Check to see if the error in the output layer is below
** MARGIN*OUT_SIZE for all output patterns.  If so, then
** assume the network has learned acceptably well.  This
** is simply an arbitrary measure of how well the network
** has learned -- many other standards are possible.
**/
static int check_out_error()
{
int result,i,error;

result  = T;
error   = F;
worst_pass_error();     /* identify the worst error in this pass */

/*
#ifdef DEBUG
printf("\n Iteration # %d",iteration_count);
#endif
*/
for (i=0; i<numpats; i++)
{
/*      printf("\n Error pattern %d:   Worst: %8.3f; Average: %8.3f",
	  i+1,tot_out_error[i], avg_out_error[i]);
	fprintf(outfile,
	 "\n Error pattern %d:   Worst: %8.3f; Average: %8.3f",
	 i+1,tot_out_error[i]);
*/

	if (worst_error >= STOP) result = F;
	if (tot_out_error[i] >= 16.0) error = T;
}

if (error == T) result = ERR;


#ifdef DEBUG
/* printf("\n Error this pass thru data:   Worst: %8.3f; Average: %8.3f",
 worst_error,average_error);
*/
/* fprintf(outfile,
 "\n Error this pass thru data:   Worst: %8.3f; Average: %8.3f",
  worst_error, average_error); */
#endif

return(result);
}


/*******************
** zero_changes() **
********************
** Zero out all the wt change arrays
**/
static void zero_changes()
{
int i,j;

for (i = 0; i<MID_SIZE; i++)
{
	for (j=0; j<IN_SIZE; j++)
	{
		mid_wt_change[i][j] = 0.0;
		mid_wt_cum_change[i][j] = 0.0;
	}
}

for (i = 0; i< OUT_SIZE; i++)
{
	for (j=0; j<MID_SIZE; j++)
	{
		out_wt_change[i][j] = 0.0;
		out_wt_cum_change[i][j] = 0.0;
	}
}
return;
}


/********************
** randomize_wts() **
*********************
** Intialize the weights in the middle and output layers to
** random values between -0.25..+0.25
** Function rand() returns a value between 0 and 32767.
**
** NOTE: Had to make alterations to how the random numbers were
** created.  -- RG.
**/
static void randomize_wts()
{
int neurode,i;
double value;

/*
** Following not used int benchmark version -- RG
**
**        printf("\n Please enter a random number seed (1..32767):  ");
**        scanf("%d", &i);
**        srand(i);
*/

for (neurode = 0; neurode<MID_SIZE; neurode++)
{
	for(i=0; i<IN_SIZE; i++)
	{
	        /* value=(double)abs_randwc(100000L); */
		value=(double)abs_randwc((int32)100000);
		value=value/(double)100000.0 - (double) 0.5;
		mid_wts[neurode][i] = value/2;
	}
}
for (neurode=0; neurode<OUT_SIZE; neurode++)
{
	for(i=0; i<MID_SIZE; i++)
	{
	        /* value=(double)abs_randwc(100000L); */
		value=(double)abs_randwc((int32)100000);
		value=value/(double)10000.0 - (double) 0.5;
		out_wts[neurode][i] = value/2;
	}
}

return;
}


/*********************
** read_data_file() **
**********************
** Read in the input data file and store the patterns in
** in_pats and out_pats.
** The format for the data file is as follows:
**
** line#   data expected
** -----   ------------------------------
** 1               In-X-size,in-y-size,out-size
** 2               number of patterns in file
** 3               1st X row of 1st input pattern
** 4..             following rows of 1st input pattern pattern
**                 in-x+2  y-out pattern
**                                 1st X row of 2nd pattern
**                 etc.
**
** Each row of data is separated by commas or spaces.
** The data is expected to be ascii text corresponding to
** either a +1 or a 0.
**
** Sample input for a 1-pattern file (The comments to the
** right may NOT be in the file unless more sophisticated
** parsing of the input is done.):
**
** 5,7,8                      input is 5x7 grid, output is 8 bits
** 1                          one pattern in file
** 0,1,1,1,0                  beginning of pattern for "O"
** 1,0,0,0,1
** 1,0,0,0,1
** 1,0,0,0,1
** 1,0,0,0,1
** 1,0,0,0,0
** 0,1,1,1,0
** 0,1,0,0,1,1,1,1            ASCII code for "O" -- 0100 1111
**
** Clearly, this simple scheme can be expanded or enhanced
** any way you like.
**
** Returns -1 if any file error occurred, otherwise 0.
**/
static int read_data_file()
{
FILE *infile;

int xinsize,yinsize,youtsize;
int patt, element, i, row;
int vals_read;
int val1,val2,val3,val4,val5,val6,val7,val8;

/* printf("\n Opening and retrieving data from file."); */

infile = fopen(inpath, "r");
if (infile == NULL)
{
	printf("\n CPU:NNET--error in opening file!");
	return -1 ;
}
vals_read =fscanf(infile,"%d  %d  %d",&xinsize,&yinsize,&youtsize);
if (vals_read != 3)
{
	printf("\n CPU:NNET -- Should read 3 items in line one; did read %d",vals_read);
	return -1;
}
vals_read=fscanf(infile,"%d",&numpats);
if (vals_read !=1)
{
	printf("\n CPU:NNET -- Should read 1 item in line 2; did read %d",vals_read);
	return -1;
}
if (numpats > MAXPATS)
	numpats = MAXPATS;

for (patt=0; patt<numpats; patt++)
{
	element = 0;
	for (row = 0; row<yinsize; row++)
	{
		vals_read = fscanf(infile,"%d  %d  %d  %d  %d",
			&val1, &val2, &val3, &val4, &val5);
		if (vals_read != 5)
		{
			printf ("\n CPU:NNET -- failure in reading input!");
			return -1;
		}
		element=row*xinsize;

		in_pats[patt][element] = (double) val1; element++;
		in_pats[patt][element] = (double) val2; element++;
		in_pats[patt][element] = (double) val3; element++;
		in_pats[patt][element] = (double) val4; element++;
		in_pats[patt][element] = (double) val5; element++;
	}
	for (i=0;i<IN_SIZE; i++)
	{
		if (in_pats[patt][i] >= 0.9)
			in_pats[patt][i] = 0.9;
		if (in_pats[patt][i] <= 0.1)
			in_pats[patt][i] = 0.1;
	}
	element = 0;
	vals_read = fscanf(infile,"%d  %d  %d  %d  %d  %d  %d  %d",
		&val1, &val2, &val3, &val4, &val5, &val6, &val7, &val8);

	out_pats[patt][element] = (double) val1; element++;
	out_pats[patt][element] = (double) val2; element++;
	out_pats[patt][element] = (double) val3; element++;
	out_pats[patt][element] = (double) val4; element++;
	out_pats[patt][element] = (double) val5; element++;
	out_pats[patt][element] = (double) val6; element++;
	out_pats[patt][element] = (double) val7; element++;
	out_pats[patt][element] = (double) val8; element++;
}

/* printf("\n Closing the input file now. "); */

fclose(infile);
return(0);
}

/*********************
** initialize_net() **
**********************
** Do all the initialization stuff before beginning
*/
/*
static int initialize_net()
{
int err_code;

randomize_wts();
zero_changes();
err_code = read_data_file();
iteration_count = 1;
return(err_code);
}
*/

/**********************
** display_mid_wts() **
***********************
** Display the weights on the middle layer neurodes
** NOTE: This routine is not used in the benchmark
**  test -- RG
**/
/* static void display_mid_wts()
{
int             neurode, weight, row, col;

fprintf(outfile,"\n Weights of Middle Layer neurodes:");

for (neurode=0; neurode<MID_SIZE; neurode++)
{
	fprintf(outfile,"\n  Mid Neurode # %d",neurode);
	for (row=0; row<IN_Y_SIZE; row++)
	{
		fprintf(outfile,"\n ");
		for (col=0; col<IN_X_SIZE; col++)
		{
			weight = IN_X_SIZE * row + col;
			fprintf(outfile," %8.3f ", mid_wts[neurode][weight]);
		}
	}
}
return;
}
*/
/**********************
** display_out_wts() **
***********************
** Display the weights on the output layer neurodes
** NOTE: This code is not used in the benchmark
**  test -- RG
*/
/* void  display_out_wts()
{
int             neurode, weight;

	fprintf(outfile,"\n Weights of Output Layer neurodes:");

	for (neurode=0; neurode<OUT_SIZE; neurode++)
	{
		fprintf(outfile,"\n  Out Neurode # %d \n",neurode);
		for (weight=0; weight<MID_SIZE; weight++)
		{
			fprintf(outfile," %8.3f ", out_wts[neurode][weight]);
		}
	}
	return;
}
*/

/***********************
**  LU DECOMPOSITION  **
** (Linear Equations) **
************************
** These routines come from "Numerical Recipes in Pascal".
** Note that, as in the assignment algorithm, though we
** separately define LUARRAYROWS and LUARRAYCOLS, the two
** must be the same value (this routine depends on a square
** matrix).
*/

/*********
** DoLU **
**********
** Perform the LU decomposition benchmark.
*/
void DoLU(void)
{
LUStruct *loclustruct;  /* Local pointer to global data */
char *errorcontext;
int systemerror;
fardouble *a;
fardouble *b;
fardouble *abase;
fardouble *bbase;
LUdblptr ptra;
int n;
int i;
ulong accumtime;
double iterations;

/*
** Link to global data
*/
loclustruct=&global_lustruct;

/*
** Set error context.
*/
errorcontext="FPU:LU";

/*
** Our first step is to build a "solvable" problem.  This
** will become the "seed" set that all others will be
** derived from. (I.E., we'll simply copy these arrays
** into the others.
*/
a=(fardouble *)AllocateMemory(sizeof(double) * LUARRAYCOLS * LUARRAYROWS,
		&systemerror);
b=(fardouble *)AllocateMemory(sizeof(double) * LUARRAYROWS,
		&systemerror);
n=LUARRAYROWS;

/*
** We need to allocate a temp vector that is used by the LU
** algorithm.  This removes the allocation routine from the
** timing.
*/
LUtempvv=(fardouble *)AllocateMemory(sizeof(double)*LUARRAYROWS,
	&systemerror);

/*
** Build a problem to be solved.
*/
ptra.ptrs.p=a;                  /* Gotta coerce linear array to 2D array */
build_problem(*ptra.ptrs.ap,n,b);

/*
** Now that we have a problem built, see if we need to do
** auto-adjust.  If so, repeatedly call the DoLUIteration routine,
** increasing the number of solutions per iteration as you go.
*/
if(loclustruct->adjust==0)
{
	loclustruct->numarrays=0;
	for(i=1;i<=MAXLUARRAYS;i++)
	{
		abase=(fardouble *)AllocateMemory(sizeof(double) *
			LUARRAYCOLS*LUARRAYROWS*(i+1),&systemerror);
		if(systemerror)
		{       ReportError(errorcontext,systemerror);
			LUFreeMem(a,b,(fardouble *)NULL,(fardouble *)NULL);
			ErrorExit();
		}
		bbase=(fardouble *)AllocateMemory(sizeof(double) *
			LUARRAYROWS*(i+1),&systemerror);
		if(systemerror)
		{       ReportError(errorcontext,systemerror);
			LUFreeMem(a,b,abase,(fardouble *)NULL);
			ErrorExit();
		}
		if(DoLUIteration(a,b,abase,bbase,i)>global_min_ticks)
		{       loclustruct->numarrays=i;
			break;
		}
		/*
		** Not enough arrays...free them all and try again
		*/
		FreeMemory((farvoid *)abase,&systemerror);
		FreeMemory((farvoid *)bbase,&systemerror);
	}
	/*
	** Were we able to do it?
	*/
	if(loclustruct->numarrays==0)
	{       printf("FPU:LU -- Array limit reached\n");
		LUFreeMem(a,b,abase,bbase);
		ErrorExit();
	}
}
else
{       /*
	** Don't need to adjust -- just allocate the proper
	** number of arrays and proceed.
	*/
	abase=(fardouble *)AllocateMemory(sizeof(double) *
		LUARRAYCOLS*LUARRAYROWS*loclustruct->numarrays,
		&systemerror);
	if(systemerror)
	{       ReportError(errorcontext,systemerror);
		LUFreeMem(a,b,(fardouble *)NULL,(fardouble *)NULL);
		ErrorExit();
	}
	bbase=(fardouble *)AllocateMemory(sizeof(double) *
		LUARRAYROWS*loclustruct->numarrays,&systemerror);
	if(systemerror)
	{
		ReportError(errorcontext,systemerror);
		LUFreeMem(a,b,abase,(fardouble *)NULL);
		ErrorExit();
	}
}
/*
** All's well if we get here.  Do the test.
*/
accumtime=0L;
iterations=(double)0.0;

do {
	accumtime+=DoLUIteration(a,b,abase,bbase,
		loclustruct->numarrays);
	iterations+=(double)loclustruct->numarrays;
} while(TicksToSecs(accumtime)<loclustruct->request_secs);

/*
** Clean up, calculate results, and go home.  Be sure to
** show that we don't have to rerun adjustment code.
*/
loclustruct->iterspersec=iterations / TicksToFracSecs(accumtime);

if(loclustruct->adjust==0)
	loclustruct->adjust=1;

LUFreeMem(a,b,abase,bbase);
return;
}

/**************
** LUFreeMem **
***************
** Release memory associated with LU benchmark.
*/
static void LUFreeMem(fardouble *a, fardouble *b,
			fardouble *abase,fardouble *bbase)
{
int systemerror;

FreeMemory((farvoid *)a,&systemerror);
FreeMemory((farvoid *)b,&systemerror);
FreeMemory((farvoid *)LUtempvv,&systemerror);

if(abase!=(fardouble *)NULL) FreeMemory((farvoid *)abase,&systemerror);
if(bbase!=(fardouble *)NULL) FreeMemory((farvoid *)bbase,&systemerror);
return;
}

/******************
** DoLUIteration **
*******************
** Perform an iteration of the LU decomposition benchmark.
** An iteration refers to the repeated solution of several
** identical matrices.
*/
static ulong DoLUIteration(fardouble *a,fardouble *b,
		fardouble *abase, fardouble *bbase,
		ulong numarrays)
{
fardouble *locabase;
fardouble *locbbase;
LUdblptr ptra;  /* For converting ptr to 2D array */
ulong elapsed;
ulong j,i;              /* Indexes */


/*
** Move the seed arrays (a & b) into the destination
** arrays;
*/
for(j=0;j<numarrays;j++)
{       locabase=abase+j*LUARRAYROWS*LUARRAYCOLS;
	locbbase=bbase+j*LUARRAYROWS;
	for(i=0;i<LUARRAYROWS*LUARRAYCOLS;i++)
		*(locabase+i)=*(a+i);
	for(i=0;i<LUARRAYROWS;i++)
		*(locbbase+i)=*(b+i);
}

/*
** Do test...begin timing.
*/
elapsed=StartStopwatch();
for(i=0;i<numarrays;i++)
{       locabase=abase+i*LUARRAYROWS*LUARRAYCOLS;
	locbbase=bbase+i*LUARRAYROWS;
	ptra.ptrs.p=locabase;
	lusolve(*ptra.ptrs.ap,LUARRAYROWS,locbbase);
}

return(StopStopwatch(elapsed));
}

/******************
** build_problem **
*******************
** Constructs a solvable set of linear equations.  It does this by
** creating an identity matrix, then loading the solution vector
** with random numbers.  After that, the identity matrix and
** solution vector are randomly "scrambled".  Scrambling is
** done by (a) randomly selecting a row and multiplying that
** row by a random number and (b) adding one randomly-selected
** row to another.
*/
static void build_problem(double a[][LUARRAYCOLS],
		int n,
		double b[LUARRAYROWS])
{
long i,j,k,k1;  /* Indexes */
double rcon;     /* Random constant */

/*
** Reset random number generator
*/
/* randnum(13L); */
randnum((int32)13);

/*
** Build an identity matrix.
** We'll also use this as a chance to load the solution
** vector.
*/
for(i=0;i<n;i++)
{       /* b[i]=(double)(abs_randwc(100L)+1L); */
	b[i]=(double)(abs_randwc((int32)100)+(int32)1);
	for(j=0;j<n;j++)
		if(i==j)
		        /* a[i][j]=(double)(abs_randwc(1000L)+1L); */
			a[i][j]=(double)(abs_randwc((int32)1000)+(int32)1);
		else
			a[i][j]=(double)0.0;
}

#ifdef DEBUG
printf("Problem:\n");
for(i=0;i<n;i++)
{
/*
	for(j=0;j<n;j++)
		printf("%6.2f ",a[i][j]);
*/
	printf("%.0f/%.0f=%.2f\t",b[i],a[i][i],b[i]/a[i][i]);
/*
        printf("\n");
*/
}
#endif

/*
** Scramble.  Do this 8n times.  See comment above for
** a description of the scrambling process.
*/

for(i=0;i<8*n;i++)
{
	/*
	** Pick a row and a random constant.  Multiply
	** all elements in the row by the constant.
	*/
 /*       k=abs_randwc((long)n);
	rcon=(double)(abs_randwc(20L)+1L);
	for(j=0;j<n;j++)
		a[k][j]=a[k][j]*rcon;
	b[k]=b[k]*rcon;
*/
	/*
	** Pick two random rows and add second to
	** first.  Note that we also occasionally multiply
	** by minus 1 so that we get a subtraction operation.
	*/
        /* k=abs_randwc((long)n); */
        /* k1=abs_randwc((long)n); */
	k=abs_randwc((int32)n);
	k1=abs_randwc((int32)n);
	if(k!=k1)
	{
		if(k<k1) rcon=(double)1.0;
			else rcon=(double)-1.0;
		for(j=0;j<n;j++)
			a[k][j]+=a[k1][j]*rcon;;
		b[k]+=b[k1]*rcon;
	}
}

return;
}


/***********
** ludcmp **
************
** From the procedure of the same name in "Numerical Recipes in Pascal",
** by Press, Flannery, Tukolsky, and Vetterling.
** Given an nxn matrix a[], this routine replaces it by the LU
** decomposition of a rowwise permutation of itself.  a[] and n
** are input.  a[] is output, modified as follows:
**   --                       --
**  |  b(1,1) b(1,2) b(1,3)...  |
**  |  a(2,1) b(2,2) b(2,3)...  |
**  |  a(3,1) a(3,2) b(3,3)...  |
**  |  a(4,1) a(4,2) a(4,3)...  |
**  |  ...                      |
**   --                        --
**
** Where the b(i,j) elements form the upper triangular matrix of the
** LU decomposition, and the a(i,j) elements form the lower triangular
** elements.  The LU decomposition is calculated so that we don't
** need to store the a(i,i) elements (which would have laid along the
** diagonal and would have all been 1).
**
** indx[] is an output vector that records the row permutation
** effected by the partial pivoting; d is output as +/-1 depending
** on whether the number of row interchanges was even or odd,
** respectively.
** Returns 0 if matrix singular, else returns 1.
*/
static int ludcmp(double a[][LUARRAYCOLS],
		int n,
		int indx[],
		int *d)
{

double big;     /* Holds largest element value */
double sum;
double dum;     /* Holds dummy value */
int i,j,k;      /* Indexes */
int imax=0;     /* Holds max index value */
double tiny;    /* A really small number */

tiny=(double)1.0e-20;

*d=1;           /* No interchanges yet */

for(i=0;i<n;i++)
{       big=(double)0.0;
	for(j=0;j<n;j++)
		if((double)fabs(a[i][j]) > big)
			big=fabs(a[i][j]);
	/* Bail out on singular matrix */
	if(big==(double)0.0) return(0);
	LUtempvv[i]=1.0/big;
}

/*
** Crout's algorithm...loop over columns.
*/
for(j=0;j<n;j++)
{       if(j!=0)
		for(i=0;i<j;i++)
		{       sum=a[i][j];
			if(i!=0)
				for(k=0;k<i;k++)
					sum-=(a[i][k]*a[k][j]);
			a[i][j]=sum;
		}
	big=(double)0.0;
	for(i=j;i<n;i++)
	{       sum=a[i][j];
		if(j!=0)
			for(k=0;k<j;k++)
				sum-=a[i][k]*a[k][j];
		a[i][j]=sum;
		dum=LUtempvv[i]*fabs(sum);
		if(dum>=big)
		{       big=dum;
			imax=i;
		}
	}
	if(j!=imax)             /* Interchange rows if necessary */
	{       for(k=0;k<n;k++)
		{       dum=a[imax][k];
			a[imax][k]=a[j][k];
			a[j][k]=dum;
		}
		*d=-*d;         /* Change parity of d */
		dum=LUtempvv[imax];
		LUtempvv[imax]=LUtempvv[j]; /* Don't forget scale factor */
		LUtempvv[j]=dum;
	}
	indx[j]=imax;
	/*
	** If the pivot element is zero, the matrix is singular
	** (at least as far as the precision of the machine
	** is concerned.)  We'll take the original author's
	** recommendation and replace 0.0 with "tiny".
	*/
	if(a[j][j]==(double)0.0)
		a[j][j]=tiny;

	if(j!=(n-1))
	{       dum=1.0/a[j][j];
		for(i=j+1;i<n;i++)
			a[i][j]=a[i][j]*dum;
	}
}

return(1);
}

/***********
** lubksb **
************
** Also from "Numerical Recipes in Pascal".
** This routine solves the set of n linear equations A X = B.
** Here, a[][] is input, not as the matrix A, but as its
** LU decomposition, created by the routine ludcmp().
** Indx[] is input as the permutation vector returned by ludcmp().
**  b[] is input as the right-hand side an returns the
** solution vector X.
** a[], n, and indx are not modified by this routine and
** can be left in place for different values of b[].
** This routine takes into account the possibility that b will
** begin with many zero elements, so it is efficient for use in
** matrix inversion.
*/
static void lubksb( double a[][LUARRAYCOLS],
		int n,
		int indx[LUARRAYROWS],
		double b[LUARRAYROWS])
{

int i,j;        /* Indexes */
int ip;         /* "pointer" into indx */
int ii;
double sum;

/*
** When ii is set to a positive value, it will become
** the index of the first nonvanishing element of b[].
** We now do the forward substitution. The only wrinkle
** is to unscramble the permutation as we go.
*/
ii=-1;
for(i=0;i<n;i++)
{       ip=indx[i];
	sum=b[ip];
	b[ip]=b[i];
	if(ii!=-1)
		for(j=ii;j<i;j++)
			sum=sum-a[i][j]*b[j];
	else
		/*
		** If a nonzero element is encountered, we have
		** to do the sums in the loop above.
		*/
		if(sum!=(double)0.0)
			ii=i;
	b[i]=sum;
}
/*
** Do backsubstitution
*/
for(i=(n-1);i>=0;i--)
{
	sum=b[i];
	if(i!=(n-1))
		for(j=(i+1);j<n;j++)
			sum=sum-a[i][j]*b[j];
	b[i]=sum/a[i][i];
}
return;
}

/************
** lusolve **
*************
** Solve a linear set of equations: A x = b
** Original matrix A will be destroyed by this operation.
** Returns 0 if matrix is singular, 1 otherwise.
*/
static int lusolve(double a[][LUARRAYCOLS],
		int n,
		double b[LUARRAYROWS])
{
int indx[LUARRAYROWS];
int d;
#ifdef DEBUG
int i,j;
#endif

if(ludcmp(a,n,indx,&d)==0) return(0);

/* Matrix not singular -- proceed */
lubksb(a,n,indx,b);

#ifdef DEBUG
printf("Solution:\n");
for(i=0;i<n;i++)
{
  for(j=0;j<n;j++){
  /*
    printf("%6.2f ",a[i][j]);
  */
  }
  printf("%6.2f\t",b[i]);
  /*
    printf("\n");
  */
}
printf("\n");
#endif

return(1);
}