18 files changed, 4813 insertions, 5007 deletions
diff --git a/Makefile b/Makefile
index 5045c77..311e55d 100644
--- a/Makefile
+++ b/Makefile
@@ -19,10 +19,16 @@ default: nbench
 # You should leave -static in the CFLAGS so that your sysinfo can be
 # compiled into the executable.
 
-CC = gcc
+CCC=gcc
+GCC=gcc
 
 # generic options for gcc
-CFLAGS = -s -static -Wall -O3
+#CFLAGS=-O3 -mcpu=ev67 -mieee -funroll-loops -ftree-vectorize -fprefetch-loop-arrays -pipe -static -msmall-data -msmall-text
+#CFLAGS=-O3 -mcpu=ev67 -mieee -pipe -msmall-data -msmall-text -funroll-loops -ftree-vectorize -static
+GCCFLAGS=-O3 -march=k8 -msse3 -ftree-vectorize -funroll-loops -pipe
+#CCCFLAGS=-fast -unroll 0 -inline speed
+CCCFLAGS=${GCCFLAGS}
+LINKFLAGS=
 
 # if your gcc lets you do it, then try this one
 #CFLAGS = -s -static -Wall -O3 -fomit-frame-pointer -funroll-loops
@@ -91,15 +97,15 @@ MACHINE=
 # For any Unix flavor you need -DLINUX
 # You also need -DLINUX to get the new indices
 
-DEFINES= -DLINUX $(NO_UNAME)
+DEFINES=$(NO_UNAME) -DLINUX
 
 ##########################################################################
 # For LINUX-like systems with gcc
 sysinfoc.c: Makefile
-	./sysinfo.sh $(CC) $(MACHINE) $(DEFINES) $(CFLAGS)
+	./sysinfo.sh $(GCC) $(DEFINES) $(GCCFLAGS)
 
 sysinfo.c: Makefile
-	./sysinfo.sh $(CC) $(MACHINE) $(DEFINES) $(CFLAGS)
+	./sysinfo.sh $(GCC) $(DEFINES) $(GCCFLAGS)
 
 ##########################################################################
 # For non-LINUX systems
@@ -107,47 +113,82 @@ sysinfo.c: Makefile
 # and take sysinfo.c and sysinfoc.c out of the dependencies for nbench0.o
 
 hardware.o: hardware.c hardware.h Makefile
-	$(CC) $(MACHINE) $(DEFINES) $(CFLAGS)\
-		-c hardware.c
-
-nbench0.o: nbench0.h nbench0.c nmglobal.h pointer.h hardware.h\
-	   Makefile sysinfo.c sysinfoc.c
-	$(CC) $(MACHINE) $(DEFINES) $(CFLAGS)\
-		-c nbench0.c
-
-emfloat.o: emfloat.h emfloat.c nmglobal.h pointer.h Makefile
-	$(CC) $(MACHINE) $(DEFINES) $(CFLAGS)\
-		-c emfloat.c
-
-pointer.h: pointer Makefile
-	$(CC) $(MACHINE) $(DEFINES) $(CFLAGS)\
-		-o pointer pointer.c
-	rm -f pointer.h
-	if [ "4" = `./pointer` ] ; then touch pointer.h ;\
-	else echo "#define LONG64" >pointer.h ; fi
-
-misc.o: misc.h misc.c Makefile
-	$(CC) $(MACHINE) $(DEFINES) $(CFLAGS)\
-		-c misc.c
-
-nbench1.o: nbench1.h nbench1.c wordcat.h nmglobal.h pointer.h Makefile
-	$(CC) $(MACHINE) $(DEFINES) $(CFLAGS)\
-		-c nbench1.c
-
-sysspec.o: sysspec.h sysspec.c nmglobal.h pointer.h Makefile
-	$(CC) $(MACHINE) $(DEFINES) $(CFLAGS)\
-		-c sysspec.c
-
-nbench: emfloat.o misc.o nbench0.o nbench1.o sysspec.o hardware.o
-	$(CC) $(MACHINE) $(DEFINES) $(CFLAGS) $(LINKFLAGS)\
-		emfloat.o misc.o nbench0.o nbench1.o sysspec.o hardware.o\
-		-o nbench -lm
+	$(GCC) $(DEFINES) $(GCCFLAGS) -c hardware.c
+
+nbench0.o: nbench0.h nbench0.c nmglobal.h hardware.h Makefile sysinfo.c sysinfoc.c
+	$(GCC) $(DEFINES) $(GCCFLAGS) -c nbench0.c
+# Segfault before first test
+
+emfloat.o: emfloat.h emfloat.c nmglobal.h Makefile
+	$(CCC) $(DEFINES) $(CCCFLAGS) -c emfloat.c
+
+misc.o: misc.c Makefile
+	$(CCC) $(DEFINES) $(CCCFLAGS) -c misc.c
+
+numsort.o: numsort.c
+	$(CCC) $(DEFINES) $(CCCFLAGS) -c numsort.c
+# gcc - 2.95
+# ccc - 5.08
+
+stringsort.o: stringsort.c
+	$(CCC) $(DEFINES) $(CCCFLAGS) -c stringsort.c
+# gcc - 6.77
+# ccc - 7.24
+
+bitfield.o: bitfield.c
+	$(CCC) $(DEFINES) $(CCCFLAGS) -c bitfield.c
+# gcc - 4.93, 4.90, 4.93, 4.93
+# ccc - 4.79, 4.85, 4.81,
+
+fpemulation.o: fpemulation.c
+	$(CCC) $(DEFINES) $(CCCFLAGS) -c fpemulation.c
+# gcc - 13.90
+# ccc - 13.90
+
+fourier.o: fourier.c
+	$(CCC) $(DEFINES) $(CCCFLAGS) -c fourier.c
+# gcc - 15.96 
+# ccc - 19.30
+
+assignment.o: assignment.c
+	$(GCC) $(DEFINES) $(GCCFLAGS) -c assignment.c
+# gcc - 16.51
+# ccc - 7.98
+
+idea.o: idea.c
+	$(CCC) $(DEFINES) $(CCCFLAGS) -c idea.c
+# gcc - 8.79
+# ccc - 9.10
+
+huffman.o: huffman.c
+	$(GCC) $(DEFINES) $(GCCFLAGS) -c huffman.c
+#	$(CCC) $(DEFINES) -O1 -host -unroll 0 -inline speed -c huffman.c -g3
+#	$(CCC) $(DEFINES) $(CCCFLAGS) -c huffman.c -g3
+# gcc - 6.94
+# ccc - segfault
+# ccc -O1 -host - 5.14
+# ccc -O2 -host - segfault
+# ccc -O1 -host -unroll 0 -inline speed - 5.95
+
+neural.o: neural.c
+	$(CCC) $(DEFINES) $(CCCFLAGS) -c neural.c
+# gcc - 12.00
+# ccc - 14.20
+
+linear.o: linear.c
+	$(CCC) $(DEFINES) $(CCCFLAGS) -c linear.c
+# gcc - 16.38
+# ccc - 23.29
+
+sysspec.o: sysspec.h sysspec.c nmglobal.h Makefile
+	$(GCC) $(DEFINES) $(GCCFLAGS) -c sysspec.c
+
+nbench: emfloat.o misc.o nbench0.o numsort.o sysspec.o hardware.o stringsort.o bitfield.o fpemulation.o fourier.o assignment.o idea.o huffman.o neural.o linear.o
+	$(GCC) emfloat.o misc.o nbench0.o numsort.o sysspec.o hardware.o stringsort.o bitfield.o fpemulation.o fourier.o assignment.o idea.o huffman.o neural.o linear.o -o nbench -lm
 
 ##########################################################################
 
 clean:
-	- /bin/rm -f *.o *~ \#* core a.out hello sysinfo.c sysinfoc.c \
-		 bug pointer pointer.h debugbit.dat
+	- /bin/rm -f *.o nbench sysinfo.c sysinfoc.c
 
-mrproper: clean
-	- /bin/rm -f nbench
+remake: clean nbench
diff --git a/NNET.DAT b/NNET.DAT
new file mode 100644
index 0000000..5711730
--- /dev/null
+++ b/NNET.DAT
@@ -0,0 +1,210 @@
+5  7  8 
+26
+0  0  1  0  0
+0  1  0  1  0
+1  0  0  0  1
+1  0  0  0  1
+1  1  1  1  1
+1  0  0  0  1
+1  0  0  0  1
+0  1  0  0  0  0  0  1
+1  1  1  1  0
+1  0  0  0  1
+1  0  0  0  1
+1  1  1  1  0
+1  0  0  0  1
+1  0  0  0  1
+1  1  1  1  0
+0  1  0  0  0  0  1  0
+0  1  1  1  0
+1  0  0  0  1
+1  0  0  0  0
+1  0  0  0  0
+1  0  0  0  0
+1  0  0  0  1
+0  1  1  1  0
+0  1  0  0  0  0  1  1
+1  1  1  1  0
+1  0  0  0  1
+1  0  0  0  1
+1  0  0  0  1
+1  0  0  0  1
+1  0  0  0  1
+1  1  1  1  0
+0  1  0  0  0  1  0  0
+1  1  1  1  1
+1  0  0  0  0
+1  0  0  0  0
+1  1  1  0  0
+1  0  0  0  0
+1  0  0  0  0
+1  1  1  1  1
+0  1  0  0  0  1  0  1
+1  1  1  1  1
+1  0  0  0  0
+1  0  0  0  0
+1  1  1  0  0
+1  0  0  0  0
+1  0  0  0  0
+1  0  0  0  0
+0  1  0  0  0  1  1  0
+0  1  1  1  0
+1  0  0  0  1
+1  0  0  0  0
+1  0  0  0  0
+1  0  0  1  1
+1  0  0  0  1
+0  1  1  1  0
+0  1  0  0  0  1  1  1
+1  0  0  0  1
+1  0  0  0  1
+1  0  0  0  1
+1  1  1  1  1
+1  0  0  0  1
+1  0  0  0  1
+1  0  0  0  1
+0  1  0  0  1  0  0  0
+0  1  1  1  0
+0  0  1  0  0
+0  0  1  0  0
+0  0  1  0  0
+0  0  1  0  0
+0  0  1  0  0
+0  1  1  1  0
+0  1  0  0  1  0  0  1
+0  0  0  0  1
+0  0  0  0  1
+0  0  0  0  1
+0  0  0  0  1
+1  0  0  0  1
+1  0  0  0  1
+0  1  1  1  0
+0  1  0  0  1  0  1  0
+1  0  0  0  1
+1  0  0  1  0
+1  0  1  0  0
+1  1  0  0  0
+1  0  1  0  0
+1  0  0  1  0
+1  0  0  0  1
+0  1  0  0  1  0  1  1
+1  0  0  0  0
+1  0  0  0  0
+1  0  0  0  0
+1  0  0  0  0
+1  0  0  0  0
+1  0  0  0  0
+1  1  1  1  1
+0  1  0  0  1  1  0  0
+1  0  0  0  1
+1  1  0  1  1
+1  0  1  0  1
+1  0  1  0  1
+1  0  0  0  1
+1  0  0  0  1
+1  0  0  0  1
+0  1  0  0  1  1  0  1
+1  0  0  0  1
+1  1  0  0  1
+1  0  1  0  1
+1  0  1  0  1
+1  0  1  0  1
+1  0  0  1  1
+1  0  0  0  1
+0  1  0  0  1  1  1  0
+0  1  1  1  0
+1  0  0  0  1
+1  0  0  0  1
+1  0  0  0  1
+1  0  0  0  1
+1  0  0  0  1
+0  1  1  1  0
+0  1  0  0  1  1  1  1
+1  1  1  1  0
+1  0  0  0  1
+1  0  0  0  1
+1  1  1  1  0
+1  0  0  0  0
+1  0  0  0  0
+1  0  0  0  0
+0  1  0  1  0  0  0  0
+0  1  1  1  0
+1  0  0  0  1
+1  0  0  0  1
+1  0  0  0  1
+1  0  1  0  1
+1  0  0  1  1
+0  1  1  1  1
+0  1  0  1  0  0  0  1
+1  1  1  1  0  
+1  0  0  0  1
+1  0  0  0  1
+1  1  1  1  0
+1  0  1  0  0
+1  0  0  1  0
+1  0  0  0  1
+0  1  0  1  0  0  1  0
+0  1  1  1  1
+1  0  0  0  0
+1  0  0  0  0
+0  1  1  1  0
+0  0  0  0  1
+0  0  0  0  1
+1  1  1  1  0
+0  1  0  1  0  0  1  1
+1  1  1  1  1
+0  0  1  0  0
+0  0  1  0  0
+0  0  1  0  0
+0  0  1  0  0
+0  0  1  0  0
+0  0  1  0  0
+0  1  0  1  0  1  0  0
+1  0  0  0  1
+1  0  0  0  1
+1  0  0  0  1
+1  0  0  0  1
+1  0  0  0  1
+1  0  0  0  1
+0  1  1  1  0
+0  1  0  1  0  1  0  1
+1  0  0  0  1
+1  0  0  0  1
+0  1  0  1  0
+0  1  0  1  0
+0  1  0  1  0
+0  1  0  1  0
+0  0  1  0  0
+0  1  0  1  0  1  1  0
+1  0  0  0  1
+1  0  0  0  1
+1  0  0  0  1
+1  0  1  0  1
+1  0  1  0  1
+1  0  1  0  1
+0  1  0  1  0
+0  1  0  1  0  1  1  1
+1  0  0  0  1
+0  1  0  1  0
+0  1  0  1  0
+0  0  1  0  0
+0  1  0  1  0
+0  1  0  1  0
+1  0  0  0  1
+0  1  0  1  1  0  0  0
+1  0  0  0  1
+0  1  0  1  0
+0  1  0  1  0
+0  0  1  0  0
+0  0  1  0  0
+0  0  1  0  0
+0  0  1  0  0
+0  1  0  1  1  0  0  1
+1  1  1  1  1
+0  0  0  1  0
+0  0  0  1  0
+0  0  1  0  0
+0  1  0  0  0
+0  1  0  0  0
+1  1  1  1  1
+0  1  0  1  1  0  1  0
diff --git a/assignment.c b/assignment.c
new file mode 100644
index 0000000..7313d78
--- /dev/null
+++ b/assignment.c
@@ -0,0 +1,562 @@
+#include <string.h>
+#include "nmglobal.h"
+#include "nbench1.h"
+
+/*************************
+** ASSIGNMENT ALGORITHM **
+*************************/
+
+/*************
+** DoAssign **
+**************
+** Perform an assignment algorithm.
+** The algorithm was adapted from the step by step guide found
+** in "Quantitative Decision Making for Business" (Gordon,
+**  Pressman, and Cohn; Prentice-Hall)
+**
+**
+** NOTES:
+** 1. Even though the algorithm distinguishes between
+**    ASSIGNROWS and ASSIGNCOLS, as though the two might
+**    be different, it does presume a square matrix.
+**    I.E., ASSIGNROWS and ASSIGNCOLS must be the same.
+**    This makes for some algorithmically-correct but
+**    probably non-optimal constructs.
+**
+*/
+void DoAssign(void)
+{
+AssignStruct *locassignstruct;  /* Local structure ptr */
+farlong *arraybase;
+char *errorcontext;
+int systemerror;
+ulong accumtime;
+double iterations;
+
+/*
+** Link to global structure
+*/
+locassignstruct=&global_assignstruct;
+
+/*
+** Set the error context string.
+*/
+errorcontext="CPU:Assignment";
+
+/*
+** See if we need to do self adjustment code.
+*/
+if(locassignstruct->adjust==0)
+{
+	/*
+	** Self-adjustment code.  The system begins by working on 1
+	** array.  If it does that in no time, then two arrays
+	** are built.  This process continues until
+	** enough arrays are built to handle the tolerance.
+	*/
+	locassignstruct->numarrays=1;
+	while(1)
+	{
+		/*
+		** Allocate space for arrays
+		*/
+		arraybase=(farlong *) AllocateMemory(sizeof(long)*
+			ASSIGNROWS*ASSIGNCOLS*locassignstruct->numarrays,
+			 &systemerror);
+		if(systemerror)
+		{       ReportError(errorcontext,systemerror);
+			FreeMemory((farvoid *)arraybase,
+			  &systemerror);
+			ErrorExit();
+		}
+
+		/*
+		** Do an iteration of the assignment alg.  If the
+		** elapsed time is less than or equal to the permitted
+		** minimum, then allocate for more arrays and
+		** try again.
+		*/
+		if(DoAssignIteration(arraybase,
+			locassignstruct->numarrays)>global_min_ticks)
+			break;          /* We're ok...exit */
+
+		FreeMemory((farvoid *)arraybase, &systemerror);
+		locassignstruct->numarrays++;
+	}
+}
+else
+{       /*
+	** Allocate space for arrays
+	*/
+	arraybase=(farlong *)AllocateMemory(sizeof(long)*
+		ASSIGNROWS*ASSIGNCOLS*locassignstruct->numarrays,
+		 &systemerror);
+	if(systemerror)
+	{       ReportError(errorcontext,systemerror);
+		FreeMemory((farvoid *)arraybase,
+		  &systemerror);
+		ErrorExit();
+	}
+}
+
+/*
+** All's well if we get here.  Do the tests.
+*/
+accumtime=0L;
+iterations=(double)0.0;
+
+do {
+	accumtime+=DoAssignIteration(arraybase,
+		locassignstruct->numarrays);
+	iterations+=(double)1.0;
+} while(TicksToSecs(accumtime)<locassignstruct->request_secs);
+
+/*
+** Clean up, calculate results, and go home.  Be sure to
+** show that we don't have to rerun adjustment code.
+*/
+FreeMemory((farvoid *)arraybase,&systemerror);
+
+locassignstruct->iterspersec=iterations *
+	(double)locassignstruct->numarrays / TicksToFracSecs(accumtime);
+
+if(locassignstruct->adjust==0)
+	locassignstruct->adjust=1;
+
+return;
+
+}
+
+/**********************
+** DoAssignIteration **
+***********************
+** This routine executes one iteration of the assignment test.
+** It returns the number of ticks elapsed in the iteration.
+*/
+static ulong DoAssignIteration(farlong *arraybase,
+	ulong numarrays)
+{
+longptr abase;                  /* local pointer */
+ulong elapsed;          /* Elapsed ticks */
+ulong i;
+
+/*
+** Set up local pointer
+*/
+abase.ptrs.p=arraybase;
+
+/*
+** Load up the arrays with a random table.
+*/
+LoadAssignArrayWithRand(arraybase,numarrays);
+
+/*
+** Start the stopwatch
+*/
+elapsed=StartStopwatch();
+
+/*
+** Execute assignment algorithms
+*/
+for(i=0;i<numarrays;i++)
+{       /* abase.ptrs.p+=i*ASSIGNROWS*ASSIGNCOLS; */
+        /* Fixed  by Eike Dierks */
+	Assignment(*abase.ptrs.ap);
+	abase.ptrs.p+=ASSIGNROWS*ASSIGNCOLS;
+}
+
+/*
+** Get elapsed time
+*/
+return(StopStopwatch(elapsed));
+}
+
+/****************************
+** LoadAssignArrayWithRand **
+*****************************
+** Load the assignment arrays with random numbers.  All positive.
+** These numbers represent costs.
+*/
+static void LoadAssignArrayWithRand(farlong *arraybase,
+	ulong numarrays)
+{
+longptr abase,abase1;   /* Local for array pointer */
+ulong i;
+
+/*
+** Set local array pointer
+*/
+abase.ptrs.p=arraybase;
+abase1.ptrs.p=arraybase;
+
+/*
+** Set up the first array.  Then just copy it into the
+** others.
+*/
+LoadAssign(*(abase.ptrs.ap));
+if(numarrays>1)
+	for(i=1;i<numarrays;i++)
+	  {     /* abase1.ptrs.p+=i*ASSIGNROWS*ASSIGNCOLS; */
+	        /* Fixed  by Eike Dierks */
+	        abase1.ptrs.p+=ASSIGNROWS*ASSIGNCOLS;
+		CopyToAssign(*(abase.ptrs.ap),*(abase1.ptrs.ap));
+	}
+
+return;
+}
+
+/***************
+** LoadAssign **
+****************
+** The array given by arraybase is loaded with positive random
+** numbers.  Elements in the array are capped at 5,000,000.
+*/
+static void LoadAssign(farlong arraybase[][ASSIGNCOLS])
+{
+ushort i,j;
+
+/*
+** Reset random number generator so things repeat.
+*/
+/* randnum(13L); */
+randnum((int32)13);
+
+for(i=0;i<ASSIGNROWS;i++)
+  for(j=0;j<ASSIGNROWS;j++){
+    /* arraybase[i][j]=abs_randwc(5000000L);*/
+    arraybase[i][j]=abs_randwc((int32)5000000);
+  }
+
+return;
+}
+
+/*****************
+** CopyToAssign **
+******************
+** Copy the contents of one array to another.  This is called by
+** the routine that builds the initial array, and is used to copy
+** the contents of the intial array into all following arrays.
+*/
+static void CopyToAssign(farlong arrayfrom[ASSIGNROWS][ASSIGNCOLS],
+		farlong arrayto[ASSIGNROWS][ASSIGNCOLS])
+{
+ushort i,j;
+
+for(i=0;i<ASSIGNROWS;i++)
+	for(j=0;j<ASSIGNCOLS;j++)
+		arrayto[i][j]=arrayfrom[i][j];
+
+return;
+}
+
+/***************
+** Assignment **
+***************/
+static void Assignment(farlong arraybase[][ASSIGNCOLS])
+{
+short assignedtableau[ASSIGNROWS][ASSIGNCOLS];
+
+/*
+** First, calculate minimum costs
+*/
+calc_minimum_costs(arraybase);
+
+/*
+** Repeat following until the number of rows selected
+** equals the number of rows in the tableau.
+*/
+while(first_assignments(arraybase,assignedtableau)!=ASSIGNROWS)
+{         second_assignments(arraybase,assignedtableau);
+}
+
+#ifdef DEBUG
+{
+	int i,j;
+	printf("\nColumn choices for each row\n");
+	for(i=0;i<ASSIGNROWS;i++)
+	{
+	        printf("R%03d: ",i);
+		for(j=0;j<ASSIGNCOLS;j++)
+			if(assignedtableau[i][j]==1)
+				printf("%03d ",j);
+	}
+}
+#endif
+
+return;
+}
+
+/***********************
+** calc_minimum_costs **
+************************
+** Revise the tableau by calculating the minimum costs on a
+** row and column basis.  These minima are subtracted from
+** their rows and columns, creating a new tableau.
+*/
+static void calc_minimum_costs(long tableau[][ASSIGNCOLS])
+{
+ushort i,j;              /* Index variables */
+long currentmin;        /* Current minimum */
+/*
+** Determine minimum costs on row basis.  This is done by
+** subtracting -- on a row-per-row basis -- the minum value
+** for that row.
+*/
+for(i=0;i<ASSIGNROWS;i++)
+{
+	currentmin=MAXPOSLONG;  /* Initialize minimum */
+	for(j=0;j<ASSIGNCOLS;j++)
+		if(tableau[i][j]<currentmin)
+			currentmin=tableau[i][j];
+
+	for(j=0;j<ASSIGNCOLS;j++)
+		tableau[i][j]-=currentmin;
+}
+
+/*
+** Determine minimum cost on a column basis.  This works
+** just as above, only now we step through the array
+** column-wise
+*/
+for(j=0;j<ASSIGNCOLS;j++)
+{
+	currentmin=MAXPOSLONG;  /* Initialize minimum */
+	for(i=0;i<ASSIGNROWS;i++)
+		if(tableau[i][j]<currentmin)
+			currentmin=tableau[i][j];
+
+	/*
+	** Here, we'll take the trouble to see if the current
+	** minimum is zero.  This is likely worth it, since the
+	** preceding loop will have created at least one zero in
+	** each row.  We can save ourselves a few iterations.
+	*/
+	if(currentmin!=0)
+		for(i=0;i<ASSIGNROWS;i++)
+			tableau[i][j]-=currentmin;
+}
+
+return;
+}
+
+/**********************
+** first_assignments **
+***********************
+** Do first assignments.
+** The assignedtableau[] array holds a set of values that
+** indicate the assignment of a value, or its elimination.
+** The values are:
+**      0 = Item is neither assigned nor eliminated.
+**      1 = Item is assigned
+**      2 = Item is eliminated
+** Returns the number of selections made.  If this equals
+** the number of rows, then an optimum has been determined.
+*/
+static int first_assignments(long tableau[][ASSIGNCOLS],
+		short assignedtableau[][ASSIGNCOLS])
+{
+ushort i,j,k;                   /* Index variables */
+ushort numassigns;              /* # of assignments */
+ushort totnumassigns;           /* Total # of assignments */
+ushort numzeros;                /* # of zeros in row */
+int selected=0;                 /* Flag used to indicate selection */
+
+/*
+** Clear the assignedtableau, setting all members to show that
+** no one is yet assigned, eliminated, or anything.
+*/
+for(i=0;i<ASSIGNROWS;i++)
+	for(j=0;j<ASSIGNCOLS;j++)
+		assignedtableau[i][j]=0;
+
+totnumassigns=0;
+do {
+	numassigns=0;
+	/*
+	** Step through rows.  For each one that is not currently
+	** assigned, see if the row has only one zero in it.  If so,
+	** mark that as an assigned row/col.  Eliminate other zeros
+	** in the same column.
+	*/
+	for(i=0;i<ASSIGNROWS;i++)
+	{       numzeros=0;
+		for(j=0;j<ASSIGNCOLS;j++)
+			if(tableau[i][j]==0L)
+				if(assignedtableau[i][j]==0)
+				{       numzeros++;
+					selected=j;
+				}
+		if(numzeros==1)
+		{       numassigns++;
+			totnumassigns++;
+			assignedtableau[i][selected]=1;
+			for(k=0;k<ASSIGNROWS;k++)
+				if((k!=i) &&
+				   (tableau[k][selected]==0))
+					assignedtableau[k][selected]=2;
+		}
+	}
+	/*
+	** Step through columns, doing same as above.  Now, be careful
+	** of items in the other rows of a selected column.
+	*/
+	for(j=0;j<ASSIGNCOLS;j++)
+	{       numzeros=0;
+		for(i=0;i<ASSIGNROWS;i++)
+			if(tableau[i][j]==0L)
+				if(assignedtableau[i][j]==0)
+				{       numzeros++;
+					selected=i;
+				}
+		if(numzeros==1)
+		{       numassigns++;
+			totnumassigns++;
+			assignedtableau[selected][j]=1;
+			for(k=0;k<ASSIGNCOLS;k++)
+				if((k!=j) &&
+				   (tableau[selected][k]==0))
+					assignedtableau[selected][k]=2;
+		}
+	}
+	/*
+	** Repeat until no more assignments to be made.
+	*/
+} while(numassigns!=0);
+
+/*
+** See if we can leave at this point.
+*/
+if(totnumassigns==ASSIGNROWS) return(totnumassigns);
+
+/*
+** Now step through the array by row.  If you find any unassigned
+** zeros, pick the first in the row.  Eliminate all zeros from
+** that same row & column.  This occurs if there are multiple optima...
+** possibly.
+*/
+for(i=0;i<ASSIGNROWS;i++)
+{       selected=-1;
+	for(j=0;j<ASSIGNCOLS;j++)
+		if((tableau[i][j]==0L) &&
+		   (assignedtableau[i][j]==0))
+		{       selected=j;
+			break;
+		}
+	if(selected!=-1)
+	{       assignedtableau[i][selected]=1;
+		totnumassigns++;
+		for(k=0;k<ASSIGNCOLS;k++)
+			if((k!=selected) &&
+			   (tableau[i][k]==0L))
+				assignedtableau[i][k]=2;
+		for(k=0;k<ASSIGNROWS;k++)
+			if((k!=i) &&
+			   (tableau[k][selected]==0L))
+				assignedtableau[k][selected]=2;
+	}
+}
+
+return(totnumassigns);
+}
+
+/***********************
+** second_assignments **
+************************
+** This section of the algorithm creates the revised
+** tableau, and is difficult to explain.  I suggest you
+** refer to the algorithm's source, mentioned in comments
+** toward the beginning of the program.
+*/
+static void second_assignments(long tableau[][ASSIGNCOLS],
+		short assignedtableau[][ASSIGNCOLS])
+{
+int i,j;                                /* Indexes */
+short linesrow[ASSIGNROWS];
+short linescol[ASSIGNCOLS];
+long smallest;                          /* Holds smallest value */
+ushort numassigns;                      /* Number of assignments */
+ushort newrows;                         /* New rows to be considered */
+/*
+** Clear the linesrow and linescol arrays.
+*/
+for(i=0;i<ASSIGNROWS;i++)
+	linesrow[i]=0;
+for(i=0;i<ASSIGNCOLS;i++)
+	linescol[i]=0;
+
+/*
+** Scan rows, flag each row that has no assignment in it.
+*/
+for(i=0;i<ASSIGNROWS;i++)
+{       numassigns=0;
+	for(j=0;j<ASSIGNCOLS;j++)
+		if(assignedtableau[i][j]==1)
+		{       numassigns++;
+			break;
+		}
+	if(numassigns==0) linesrow[i]=1;
+}
+
+do {
+
+	newrows=0;
+	/*
+	** For each row checked above, scan for any zeros.  If found,
+	** check the associated column.
+	*/
+	for(i=0;i<ASSIGNROWS;i++)
+	{       if(linesrow[i]==1)
+			for(j=0;j<ASSIGNCOLS;j++)
+				if(tableau[i][j]==0)
+					linescol[j]=1;
+	}
+
+	/*
+	** Now scan checked columns.  If any contain assigned zeros, check
+	** the associated row.
+	*/
+	for(j=0;j<ASSIGNCOLS;j++)
+		if(linescol[j]==1)
+			for(i=0;i<ASSIGNROWS;i++)
+				if((assignedtableau[i][j]==1) &&
+					(linesrow[i]!=1))
+				{
+					linesrow[i]=1;
+					newrows++;
+				}
+} while(newrows!=0);
+
+/*
+** linesrow[n]==0 indicate rows covered by imaginary line
+** linescol[n]==1 indicate cols covered by imaginary line
+** For all cells not covered by imaginary lines, determine smallest
+** value.
+*/
+smallest=MAXPOSLONG;
+for(i=0;i<ASSIGNROWS;i++)
+	if(linesrow[i]!=0)
+		for(j=0;j<ASSIGNCOLS;j++)
+			if(linescol[j]!=1)
+				if(tableau[i][j]<smallest)
+					smallest=tableau[i][j];
+
+/*
+** Subtract smallest from all cells in the above set.
+*/
+for(i=0;i<ASSIGNROWS;i++)
+	if(linesrow[i]!=0)
+		for(j=0;j<ASSIGNCOLS;j++)
+			if(linescol[j]!=1)
+				tableau[i][j]-=smallest;
+
+/*
+** Add smallest to all cells covered by two lines.
+*/
+for(i=0;i<ASSIGNROWS;i++)
+	if(linesrow[i]==0)
+		for(j=0;j<ASSIGNCOLS;j++)
+			if(linescol[j]==1)
+				tableau[i][j]+=smallest;
+
+return;
+}
diff --git a/bitfield.c b/bitfield.c
new file mode 100644
index 0000000..78aba9e
--- /dev/null
+++ b/bitfield.c
@@ -0,0 +1,335 @@
+#include <stdint.h>
+/*
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <strings.h>
+#include <math.h>*/
+#include "nmglobal.h"
+#include "nbench1.h"
+
+#ifdef DEBUG
+static int numsort_status=0;
+static int stringsort_status=0;
+#endif
+
+/************************
+** BITFIELD OPERATIONS **
+*************************/
+
+/*************
+** DoBitops **
+**************
+** Perform the bit operations test portion of the CPU
+** benchmark.  Returns the iterations per second.
+*/
+void DoBitops(void)
+{
+BitOpStruct *locbitopstruct;    /* Local bitop structure */
+unsigned long *bitarraybase;         /* Base of bitmap array */
+unsigned long *bitoparraybase;       /* Base of bitmap operations array */
+unsigned long nbitops;                  /* # of bitfield operations */
+unsigned long accumtime;                /* Accumulated time in ticks */
+double iterations;              /* # of iterations */
+char *errorcontext;             /* Error context string */
+int systemerror;                /* For holding error codes */
+int ticks;
+
+/*
+** Link to global structure.
+*/
+locbitopstruct=&global_bitopstruct;
+
+/*
+** Set the error context.
+*/
+errorcontext="CPU:Bitfields";
+
+/*
+** See if we need to run adjustment code.
+*/
+if(locbitopstruct->adjust==0)
+{
+	bitarraybase=(unsigned long *)AllocateMemory(locbitopstruct->bitfieldarraysize *
+		sizeof(unsigned long),&systemerror);
+	if(systemerror)
+	{       ReportError(errorcontext,systemerror);
+	}
+
+	/*
+	** Initialize bitfield operations array to [2,30] elements
+	*/
+	locbitopstruct->bitoparraysize=30L;
+
+	while(1)
+	{
+		/*
+		** Allocate space for operations array
+		*/
+		bitoparraybase=(unsigned long *)AllocateMemory(locbitopstruct->bitoparraysize*2L*
+			sizeof(unsigned long),
+			&systemerror);
+		if(systemerror)
+		{       ReportError(errorcontext,systemerror);
+			FreeMemory((void *)bitarraybase,&systemerror);
+		}
+		/*
+		** Do an iteration of the bitmap test.  If the
+		** elapsed time is less than or equal to the permitted
+		** minimum, then de-allocate the array, reallocate a
+		** larger version, and try again.
+		*/
+		ticks=DoBitfieldIteration(bitarraybase,
+					   bitoparraybase,
+					   locbitopstruct->bitoparraysize,
+					   &nbitops);
+#ifdef DEBUG
+#ifdef LINUX
+	        if (locbitopstruct->bitoparraysize==30L){
+		  /* this is the first loop, write a debug file */
+		  FILE *file;
+		  unsigned long *running_base; /* same as unsigned long */
+		  long counter;
+		  file=fopen("debugbit.dat","w");
+		  running_base=bitarraybase;
+		  for (counter=0;counter<(long)(locbitopstruct->bitfieldarraysize);counter++){
+#ifdef _LP64
+		    fprintf(file,"%08X",(unsigned int)(*running_base&0xFFFFFFFFL));
+		    fprintf(file,"%08X",(unsigned int)((*running_base>>32)&0xFFFFFFFFL));
+		    if ((counter+1)%4==0) fprintf(file,"\n");
+#else
+		    fprintf(file,"%08lX",*running_base);
+		    if ((counter+1)%8==0) fprintf(file,"\n");
+#endif
+		    running_base=running_base+1;
+		  }
+		  fclose(file);
+		  printf("\nWrote the file debugbit.dat, you may want to compare it to debugbit.good\n");
+		}
+#endif
+#endif
+
+		if (ticks>global_min_ticks) break;      /* We're ok...exit */
+
+		FreeMemory((void *)bitoparraybase,&systemerror);
+		locbitopstruct->bitoparraysize+=100L;
+	}
+}
+else
+{
+	/*
+	** Don't need to do self adjustment, just allocate
+	** the array space.
+	*/
+	bitarraybase=(unsigned long *)AllocateMemory(locbitopstruct->bitfieldarraysize *
+		sizeof(unsigned long),&systemerror);
+	if(systemerror)
+	{       ReportError(errorcontext,systemerror);
+	}
+	bitoparraybase=(unsigned long *)AllocateMemory(locbitopstruct->bitoparraysize*2L*
+		sizeof(unsigned long),
+		&systemerror);
+	if(systemerror)
+	{       ReportError(errorcontext,systemerror);
+		FreeMemory((void *)bitarraybase,&systemerror);
+	}
+}
+
+/*
+** All's well if we get here.  Repeatedly perform bitops until the
+** accumulated elapsed time is greater than # of seconds requested.
+*/
+accumtime=0L;
+iterations=(double)0.0;
+do {
+	accumtime+=DoBitfieldIteration(bitarraybase,
+			bitoparraybase,
+			locbitopstruct->bitoparraysize,&nbitops);
+	iterations+=(double)nbitops;
+} while(TicksToSecs(accumtime)<locbitopstruct->request_secs);
+
+/*
+** Clean up, calculate results, and go home.
+** Also, set adjustment flag to show that we don't have
+** to do self adjusting in the future.
+*/
+FreeMemory((void *)bitarraybase,&systemerror);
+FreeMemory((void *)bitoparraybase,&systemerror);
+locbitopstruct->bitopspersec=iterations /TicksToFracSecs(accumtime);
+if(locbitopstruct->adjust==0)
+	locbitopstruct->adjust=1;
+
+return;
+}
+
+/************************
+** DoBitfieldIteration **
+*************************
+** Perform a single iteration of the bitfield benchmark.
+** Return the # of ticks accumulated by the operation.
+*/
+static unsigned long DoBitfieldIteration(unsigned long *bitarraybase,
+		unsigned long *bitoparraybase,
+		long bitoparraysize,
+		unsigned long *nbitops)
+{
+long i;                         /* Index */
+unsigned long bitoffset;                /* Offset into bitmap */
+unsigned long elapsed;                  /* Time to execute */
+/*
+** Clear # bitops counter
+*/
+*nbitops=0L;
+
+/*
+** Construct a set of bitmap offsets and run lengths.
+** The offset can be any random number from 0 to the
+** size of the bitmap (in bits).  The run length can
+** be any random number from 1 to the number of bits
+** between the offset and the end of the bitmap.
+** Note that the bitmap has 8192 * 32 bits in it.
+** (262,144 bits)
+*/
+/*
+** Reset random number generator so things repeat.
+** Also reset the bit array we work on.
+** added by Uwe F. Mayer
+*/
+randnum((int32_t)13);
+
+for (i=0;i<global_bitopstruct.bitfieldarraysize;i++)
+{
+#ifdef _LP64
+	*(bitarraybase+i)=(unsigned long)0x5555555555555555;
+#else
+	*(bitarraybase+i)=(unsigned long)0x55555555;
+#endif
+}
+
+randnum((int32_t)13);
+/* end of addition of code */
+
+for (i=0;i<bitoparraysize;i++)
+{
+	/* First item is offset */
+        /* *(bitoparraybase+i+i)=bitoffset=abs_randwc(262140L); */
+	*(bitoparraybase+i+i)=bitoffset=abs_randwc((int32_t)262140);
+
+	/* Next item is run length */
+	/* *nbitops+=*(bitoparraybase+i+i+1L)=abs_randwc(262140L-bitoffset);*/
+	*nbitops+=*(bitoparraybase+i+i+1L)=abs_randwc((int32_t)262140-bitoffset);
+}
+
+/*
+** Array of offset and lengths built...do an iteration of
+** the test.
+** Start the stopwatch.
+*/
+elapsed=StartStopwatch();
+
+/*
+** Loop through array off offset/run length pairs.
+** Execute operation based on modulus of index.
+*/
+for(i = 0; i < bitoparraysize; i++) {
+	int asdf = i % 3;
+	switch(asdf) {
+	case 2: /* Complement run of bits */
+		FlipBitRun(bitarraybase,
+			*(bitoparraybase+i+i),
+			*(bitoparraybase+i+i+1));
+		break;
+	default:
+		ToggleBitRun(bitarraybase,
+			*(bitoparraybase+i+i),
+			*(bitoparraybase+i+i+1),
+			!i);
+		break;
+	}
+}
+
+/*
+** Return elapsed time
+*/
+return(StopStopwatch(elapsed));
+}
+
+/***************
+** FlipBitRun **
+****************
+** Complements a run of bits.
+*/
+static void FlipBitRun(unsigned long *bitmap,        /* Bit map */
+		unsigned long bit_addr,                 /* Bit address */
+		unsigned long nbits)                    /* # of bits to flip */
+{
+unsigned long bindex;   /* Index into array */
+unsigned long bitnumb;  /* Bit number */
+
+while(nbits--)
+{
+#ifdef _LP64
+	bindex=bit_addr>>6;     /* Index is number /64 */
+	bitnumb=bit_addr % 64;  /* Bit number in longword */
+#else
+	bindex=bit_addr>>5;     /* Index is number /32 */
+	bitnumb=bit_addr % 32;  /* Bit number in longword */
+#endif
+	bitmap[bindex]^=(1L<<bitnumb);
+	bit_addr++;
+}
+
+return;
+}
+
+/*****************************
+**     ToggleBitRun          *
+******************************
+** Set or clear a run of nbits starting at
+** bit_addr in bitmap.
+*/
+void ToggleBitRun(unsigned long *bitmap, /* Bitmap */
+		unsigned long bit_addr,         /* Address of bits to set */
+		unsigned long nbits,            /* # of bits to set/clr */
+		unsigned int val)               /* 1 or 0 */
+{
+	unsigned long bindex;   /* Index into array */
+	unsigned long bitnumb;  /* Bit number */
+
+#if 0
+	while (nbits != 0) {
+		nbits--;
+
+		bindex = bit_addr >> 6;     /* Index is number /64 */
+		bitnumb = bit_addr % 64;    /* Bit number in word */
+
+		if (val) {
+			bitmap[bindex] |= (1L << bitnumb);
+		} else {
+			bitmap[bindex] &= ~(1L << bitnumb);
+		}
+		bit_addr++;
+	}
+#else
+	if (val) {
+		for (; nbits != 0; nbits--) {
+			bindex = bit_addr >> 6;
+			bitnumb = bit_addr % 64;
+
+			bitmap[bindex] |= (1L << bitnumb);
+
+			bit_addr++;
+		}
+	} else {
+		for (; nbits != 0; nbits--) {
+			bindex = bit_addr >> 6;
+			bitnumb = bit_addr % 64;
+
+			bitmap[bindex] &= ~(1L << bitnumb);
+
+			bit_addr++;
+		}
+	}	
+#endif
+	return;
+}
diff --git a/emfloat.h b/emfloat.h
index 41cc6d9..7699c1e 100644
--- a/emfloat.h
+++ b/emfloat.h
@@ -27,10 +27,6 @@
 
 #include <stdio.h>
 
-/* Is this a 64 bit architecture? If so, this will define LONG64 */
-/* Uwe F. Mayer 15 November 1997                                 */
-#include "pointer.h"
-
 /*
 ** DEFINES
 */
diff --git a/fourier.c b/fourier.c
new file mode 100644
index 0000000..1c95cb7
--- /dev/null
+++ b/fourier.c
@@ -0,0 +1,302 @@
+/*
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <strings.h>*/
+#include <math.h>
+#include "nmglobal.h"
+#include "nbench1.h"
+
+/*************************
+** FOURIER COEFFICIENTS **
+*************************/
+
+/**************
+** DoFourier **
+***************
+** Perform the transcendental/trigonometric portion of the
+** benchmark.  This benchmark calculates the first n
+** fourier coefficients of the function (x+1)^x defined
+** on the interval 0,2.
+*/
+void DoFourier(void)
+{
+FourierStruct *locfourierstruct;        /* Local fourier struct */
+fardouble *abase;               /* Base of A[] coefficients array */
+fardouble *bbase;               /* Base of B[] coefficients array */
+unsigned long accumtime;        /* Accumulated time in ticks */
+double iterations;              /* # of iterations */
+char *errorcontext;             /* Error context string pointer */
+int systemerror;                /* For error code */
+
+/*
+** Link to global structure
+*/
+locfourierstruct=&global_fourierstruct;
+
+/*
+** Set error context string
+*/
+errorcontext="FPU:Transcendental";
+
+/*
+** See if we need to do self-adjustment code.
+*/
+if(locfourierstruct->adjust==0)
+{
+	locfourierstruct->arraysize=100L;       /* Start at 100 elements */
+	while(1)
+	{
+
+		abase=(fardouble *)AllocateMemory(locfourierstruct->arraysize*sizeof(double),
+				&systemerror);
+		if(systemerror)
+		{       ReportError(errorcontext,systemerror);
+			ErrorExit();
+		}
+
+		bbase=(fardouble *)AllocateMemory(locfourierstruct->arraysize*sizeof(double),
+				&systemerror);
+		if(systemerror)
+		{       ReportError(errorcontext,systemerror);
+			FreeMemory((void *)abase,&systemerror);
+			ErrorExit();
+		}
+		/*
+		** Do an iteration of the tests.  If the elapsed time is
+		** less than or equal to the permitted minimum, re-allocate
+		** larger arrays and try again.
+		*/
+		if(DoFPUTransIteration(abase,bbase,
+			locfourierstruct->arraysize)>global_min_ticks)
+			break;          /* We're ok...exit */
+
+		/*
+		** Make bigger arrays and try again.
+		*/
+		FreeMemory((farvoid *)abase,&systemerror);
+		FreeMemory((farvoid *)bbase,&systemerror);
+		locfourierstruct->arraysize+=50L;
+	}
+}
+else
+{       /*
+	** Don't need self-adjustment.  Just allocate the
+	** arrays, and go.
+	*/
+	abase=(fardouble *)AllocateMemory(locfourierstruct->arraysize*sizeof(double),
+			&systemerror);
+	if(systemerror)
+	{       ReportError(errorcontext,systemerror);
+		ErrorExit();
+	}
+
+	bbase=(fardouble *)AllocateMemory(locfourierstruct->arraysize*sizeof(double),
+			&systemerror);
+	if(systemerror)
+	{       ReportError(errorcontext,systemerror);
+		FreeMemory((void *)abase,&systemerror);
+		ErrorExit();
+	}
+}
+/*
+** All's well if we get here.  Repeatedly perform integration
+** tests until the accumulated time is greater than the
+** # of seconds requested.
+*/
+accumtime=0L;
+iterations=(double)0.0;
+do {
+	accumtime+=DoFPUTransIteration(abase,bbase,locfourierstruct->arraysize);
+	iterations+=(double)locfourierstruct->arraysize*(double)2.0-(double)1.0;
+} while(TicksToSecs(accumtime)<locfourierstruct->request_secs);
+
+
+/*
+** Clean up, calculate results, and go home.
+** Also set adjustment flag to indicate no adjust code needed.
+*/
+FreeMemory((farvoid *)abase,&systemerror);
+FreeMemory((farvoid *)bbase,&systemerror);
+
+locfourierstruct->fflops=iterations/(double)TicksToFracSecs(accumtime);
+
+if(locfourierstruct->adjust==0)
+	locfourierstruct->adjust=1;
+
+return;
+}
+
+/************************
+** DoFPUTransIteration **
+*************************
+** Perform an iteration of the FPU Transcendental/trigonometric
+** benchmark.  Here, an iteration consists of calculating the
+** first n fourier coefficients of the function (x+1)^x on
+** the interval 0,2.  n is given by arraysize.
+** NOTE: The # of integration steps is fixed at
+** 200.
+*/
+static ulong DoFPUTransIteration(fardouble *abase,      /* A coeffs. */
+			fardouble *bbase,               /* B coeffs. */
+			ulong arraysize)                /* # of coeffs */
+{
+double omega;           /* Fundamental frequency */
+unsigned long i;        /* Index */
+unsigned long elapsed;  /* Elapsed time */
+
+/*
+** Start the stopwatch
+*/
+elapsed=StartStopwatch();
+
+/*
+** Calculate the fourier series.  Begin by
+** calculating A[0].
+*/
+
+*abase=TrapezoidIntegrate((double)0.0,
+			(double)2.0,
+			200,
+			(double)0.0,    /* No omega * n needed */
+			0 )/(double)2.0;
+
+/*
+** Calculate the fundamental frequency.
+** ( 2 * pi ) / period...and since the period
+** is 2, omega is simply pi.
+*/
+omega=(double)3.1415926535897932;
+
+for(i=1;i<arraysize;i++)
+{
+
+	/*
+	** Calculate A[i] terms.  Note, once again, that we
+	** can ignore the 2/period term outside the integral
+	** since the period is 2 and the term cancels itself
+	** out.
+	*/
+	*(abase+i)=TrapezoidIntegrate((double)0.0,
+			(double)2.0,
+			200,
+			omega * (double)i,
+			1);
+
+	/*
+	** Calculate the B[i] terms.
+	*/
+	*(bbase+i)=TrapezoidIntegrate((double)0.0,
+			(double)2.0,
+			200,
+			omega * (double)i,
+			2);
+
+}
+#ifdef DEBUG
+{
+  int i;
+  printf("\nA[i]=\n");
+  for (i=0;i<arraysize;i++) printf("%7.3g ",abase[i]);
+  printf("\nB[i]=\n(undefined) ");
+  for (i=1;i<arraysize;i++) printf("%7.3g ",bbase[i]);
+}
+#endif
+/*
+** All done, stop the stopwatch
+*/
+return(StopStopwatch(elapsed));
+}
+
+/***********************
+** TrapezoidIntegrate **
+************************
+** Perform a simple trapezoid integration on the
+** function (x+1)**x.
+** x0,x1 set the lower and upper bounds of the
+** integration.
+** nsteps indicates # of trapezoidal sections
+** omegan is the fundamental frequency times
+**  the series member #
+** select = 0 for the A[0] term, 1 for cosine terms, and
+**   2 for sine terms.
+** Returns the value.
+*/
+static double TrapezoidIntegrate( double x0,            /* Lower bound */
+			double x1,              /* Upper bound */
+			int nsteps,             /* # of steps */
+			double omegan,          /* omega * n */
+			int select)
+{
+double x;               /* Independent variable */
+double dx;              /* Stepsize */
+double rvalue;          /* Return value */
+
+
+/*
+** Initialize independent variable
+*/
+x=x0;
+
+/*
+** Calculate stepsize
+*/
+dx=(x1 - x0) / (double)nsteps;
+
+/*
+** Initialize the return value.
+*/
+rvalue=thefunction(x0,omegan,select)/(double)2.0;
+
+/*
+** Compute the other terms of the integral.
+*/
+if(nsteps!=1)
+{       --nsteps;               /* Already done 1 step */
+	while(--nsteps )
+	{
+		x+=dx;
+		rvalue+=thefunction(x,omegan,select);
+	}
+}
+/*
+** Finish computation
+*/
+rvalue=(rvalue+thefunction(x1,omegan,select)/(double)2.0)*dx;
+
+return(rvalue);
+}
+
+/****************
+** thefunction **
+*****************
+** This routine selects the function to be used
+** in the Trapezoid integration.
+** x is the independent variable
+** omegan is omega * n
+** select chooses which of the sine/cosine functions
+**  are used.  note the special case for select=0.
+*/
+static double thefunction(double x,             /* Independent variable */
+		double omegan,          /* Omega * term */
+		int select)             /* Choose term */
+{
+
+/*
+** Use select to pick which function we call.
+*/
+switch(select)
+{
+	case 0: return(pow(x+(double)1.0,x));
+
+	case 1: return(pow(x+(double)1.0,x) * cos(omegan * x));
+
+	case 2: return(pow(x+(double)1.0,x) * sin(omegan * x));
+}
+
+/*
+** We should never reach this point, but the following
+** keeps compilers from issuing a warning message.
+*/
+return(0.0);
+}
diff --git a/fpemulation.c b/fpemulation.c
new file mode 100644
index 0000000..d4332a3
--- /dev/null
+++ b/fpemulation.c
@@ -0,0 +1,147 @@
+#include <stdio.h>
+/*
+#include <stdlib.h>
+#include <string.h>
+#include <strings.h>
+#include <math.h>*/
+#include "nmglobal.h"
+#include "nbench1.h"
+
+
+/*****************************
+** FLOATING-POINT EMULATION **
+*****************************/
+
+/**************
+** DoEmFloat **
+***************
+** Perform the floating-point emulation routines portion of the
+** CPU benchmark.  Returns the operations per second.
+*/
+void DoEmFloat(void)
+{
+EmFloatStruct *locemfloatstruct;        /* Local structure */
+InternalFPF *abase;             /* Base of A array */
+InternalFPF *bbase;             /* Base of B array */
+InternalFPF *cbase;             /* Base of C array */
+unsigned long accumtime;        /* Accumulated time in ticks */
+double iterations;              /* # of iterations */
+unsigned long tickcount;        /* # of ticks */
+char *errorcontext;             /* Error context string pointer */
+int systemerror;                /* For holding error code */
+unsigned long loops;            /* # of loops */
+
+/*
+** Link to global structure
+*/
+locemfloatstruct=&global_emfloatstruct;
+
+/*
+** Set the error context
+*/
+errorcontext="CPU:Floating Emulation";
+
+
+/*
+** Test the emulation routines.
+*/
+#ifdef DEBUG
+#endif
+
+abase=(InternalFPF *)AllocateMemory(locemfloatstruct->arraysize*sizeof(InternalFPF),
+		&systemerror);
+if(systemerror)
+{       ReportError(errorcontext,systemerror);
+	ErrorExit();
+}
+
+bbase=(InternalFPF *)AllocateMemory(locemfloatstruct->arraysize*sizeof(InternalFPF),
+		&systemerror);
+if(systemerror)
+{       ReportError(errorcontext,systemerror);
+	FreeMemory((void *)abase,&systemerror);
+	ErrorExit();
+}
+
+cbase=(InternalFPF *)AllocateMemory(locemfloatstruct->arraysize*sizeof(InternalFPF),
+		&systemerror);
+if(systemerror)
+{       ReportError(errorcontext,systemerror);
+	FreeMemory((void *)abase,&systemerror);
+	FreeMemory((void *)bbase,&systemerror);
+	ErrorExit();
+}
+
+/*
+** Set up the arrays
+*/
+SetupCPUEmFloatArrays(abase,bbase,cbase,locemfloatstruct->arraysize);
+
+/*
+** See if we need to do self-adjusting code.
+*/
+if(locemfloatstruct->adjust==0)
+{
+	locemfloatstruct->loops=0;
+
+	/*
+	** Do an iteration of the tests.  If the elapsed time is
+	** less than minimum, increase the loop count and try
+	** again.
+	*/
+	for(loops=1;loops<CPUEMFLOATLOOPMAX;loops+=loops)
+	{       tickcount=DoEmFloatIteration(abase,bbase,cbase,
+			locemfloatstruct->arraysize,
+			loops);
+		if(tickcount>global_min_ticks)
+		{       locemfloatstruct->loops=loops;
+			break;
+		}
+	}
+}
+
+/*
+** Verify that selft adjustment code worked.
+*/
+if(locemfloatstruct->loops==0)
+{       printf("CPU:EMFPU -- CMPUEMFLOATLOOPMAX limit hit\n");
+	FreeMemory((void *)abase,&systemerror);
+	FreeMemory((void *)bbase,&systemerror);
+	FreeMemory((void *)cbase,&systemerror);
+	ErrorExit();
+}
+
+/*
+** All's well if we get here.  Repeatedly perform floating
+** tests until the accumulated time is greater than the
+** # of seconds requested.
+** Each iteration performs arraysize * 3 operations.
+*/
+accumtime=0L;
+iterations=(double)0.0;
+do {
+	accumtime+=DoEmFloatIteration(abase,bbase,cbase,
+			locemfloatstruct->arraysize,
+			locemfloatstruct->loops);
+	iterations+=(double)1.0;
+} while(TicksToSecs(accumtime)<locemfloatstruct->request_secs);
+
+
+/*
+** Clean up, calculate results, and go home.
+** Also, indicate that adjustment is done.
+*/
+FreeMemory((void *)abase,&systemerror);
+FreeMemory((void *)bbase,&systemerror);
+FreeMemory((void *)cbase,&systemerror);
+
+locemfloatstruct->emflops=(iterations*(double)locemfloatstruct->loops)/
+		(double)TicksToFracSecs(accumtime);
+if(locemfloatstruct->adjust==0)
+	locemfloatstruct->adjust=1;
+
+#ifdef DEBUG
+printf("----------------------------------------------------------------------------\n");
+#endif
+return;
+}
diff --git a/huffman.c b/huffman.c
new file mode 100644
index 0000000..961fc08
--- /dev/null
+++ b/huffman.c
@@ -0,0 +1,557 @@
+#include <string.h>
+#include <stdint.h>
+#include "nmglobal.h"
+#include "nbench1.h"
+
+/*
+** Word catalog
+*/
+#define WORDCATSIZE 50
+
+char *wordcatarray[WORDCATSIZE] =
+{	"Hello",
+	"He",
+	"Him",
+	"the",
+	"this",
+	"that",
+	"though",
+	"rough",
+	"cough",
+	"obviously",
+	"But",
+	"but",
+	"bye",
+	"begin",
+	"beginning",
+	"beginnings",
+	"of",
+	"our",
+	"ourselves",
+	"yourselves",
+	"to",
+	"together",
+	"togetherness",
+	"from",
+	"either",
+	"I",
+	"A",
+	"return",
+	"However",
+	"that",
+	"example",
+	"yet",
+	"quickly",
+	"all",
+	"if",
+	"were",
+	"includes",
+	"always",
+	"never",
+	"not",
+	"small",
+	"returns",
+	"set",
+	"basic",
+	"Entered",
+	"with",
+	"used",
+	"shown",
+	"you",
+	"know" };
+
+
+/************************
+** HUFFMAN COMPRESSION **
+************************/
+
+/**************
+** DoHuffman **
+***************
+** Execute a huffman compression on a block of plaintext.
+** Note that (as with IDEA encryption) an iteration of the
+** Huffman test includes a compression AND a decompression.
+** Also, the compression cycle includes building the
+** Huffman tree.
+*/
+void DoHuffman(void)
+{
+HuffStruct *lochuffstruct;      /* Loc pointer to global data */
+char *errorcontext;
+int systemerror;
+unsigned long accumtime;
+double iterations;
+char *comparray;
+char *decomparray;
+char *plaintext;
+
+/*
+** Link to global data
+*/
+lochuffstruct=&global_huffstruct;
+
+/*
+** Set error context.
+*/
+errorcontext="CPU:Huffman";
+
+/*
+** Allocate memory for the plaintext and the compressed text.
+** We'll be really pessimistic here, and allocate equal amounts
+** for both (though we know...well, we PRESUME) the compressed
+** stuff will take less than the plain stuff.
+** Also note that we'll build a 3rd buffer to decompress
+** into, and we preallocate space for the huffman tree.
+** (We presume that the Huffman tree will grow no larger
+** than 512 bytes.  This is actually a super-conservative
+** estimate...but, who cares?)
+*/
+plaintext=(char *)AllocateMemory(lochuffstruct->arraysize,&systemerror);
+if(systemerror)
+{       ReportError(errorcontext,systemerror);
+}
+comparray=(char *)AllocateMemory(lochuffstruct->arraysize,&systemerror);
+if(systemerror)
+{       ReportError(errorcontext,systemerror);
+	FreeMemory(plaintext,&systemerror);
+}
+decomparray=(char *)AllocateMemory(lochuffstruct->arraysize,&systemerror);
+if(systemerror)
+{       ReportError(errorcontext,systemerror);
+	FreeMemory(plaintext,&systemerror);
+	FreeMemory(comparray,&systemerror);
+}
+
+hufftree=(huff_node *)AllocateMemory(sizeof(huff_node) * 512,
+	&systemerror);
+if(systemerror)
+{       ReportError(errorcontext,systemerror);
+	FreeMemory(plaintext,&systemerror);
+	FreeMemory(comparray,&systemerror);
+	FreeMemory(decomparray,&systemerror);
+}
+
+/*
+** Build the plaintext buffer.  Since we want this to
+** actually be able to compress, we'll use the
+** wordcatalog to build the plaintext stuff.
+*/
+/*
+** Reset random number generator so things repeat.
+** added by Uwe F. Mayer
+*/
+randnum((int32_t)13);
+create_text_block(plaintext,lochuffstruct->arraysize-1,(unsigned short)500);
+plaintext[lochuffstruct->arraysize-1L]='\0';
+plaintextlen=lochuffstruct->arraysize;
+
+/*
+** See if we need to perform self adjustment loop.
+*/
+if(lochuffstruct->adjust==0)
+{
+	/*
+	** Do self-adjustment.  This involves initializing the
+	** # of loops and increasing the loop count until we
+	** get a number of loops that we can use.
+	*/
+	for(lochuffstruct->loops=100L;
+	  lochuffstruct->loops<MAXHUFFLOOPS;
+	  lochuffstruct->loops+=10L)
+		if(DoHuffIteration(plaintext,
+			comparray,
+			decomparray,
+		  lochuffstruct->arraysize,
+		  lochuffstruct->loops,
+		  hufftree)>global_min_ticks) break;
+}
+
+/*
+** All's well if we get here.  Do the test.
+*/
+accumtime=0L;
+iterations=(double)0.0;
+
+do {
+	accumtime+=DoHuffIteration(plaintext,
+		comparray,
+		decomparray,
+		lochuffstruct->arraysize,
+		lochuffstruct->loops,
+		hufftree);
+	iterations+=(double)lochuffstruct->loops;
+} while(TicksToSecs(accumtime)<lochuffstruct->request_secs);
+
+/*
+** Clean up, calculate results, and go home.  Be sure to
+** show that we don't have to rerun adjustment code.
+*/
+FreeMemory((void *)plaintext,&systemerror);
+FreeMemory((void *)comparray,&systemerror);
+FreeMemory((void *)decomparray,&systemerror);
+FreeMemory((void *)hufftree,&systemerror);
+lochuffstruct->iterspersec=iterations / TicksToFracSecs(accumtime);
+
+if(lochuffstruct->adjust==0)
+	lochuffstruct->adjust=1;
+
+}
+
+/*********************
+** create_text_line **
+**********************
+** Create a random line of text, stored at *dt.  The line may be
+** no more than nchars long.
+*/
+static void create_text_line(char *dt,
+			long nchars)
+{
+long charssofar;        /* # of characters so far */
+long tomove;            /* # of characters to move */
+char myword[40];        /* Local buffer for words */
+char *wordptr;       /* Pointer to word from catalog */
+
+charssofar=0;
+
+do {
+/*
+** Grab a random word from the wordcatalog
+*/
+/* wordptr=wordcatarray[abs_randwc((long)WORDCATSIZE)];*/
+wordptr=wordcatarray[abs_randwc((int32_t)WORDCATSIZE)];
+MoveMemory((void *)myword,
+	(void *)wordptr,
+	(unsigned long)strlen(wordptr)+1);
+
+/*
+** Append a blank.
+*/
+tomove=strlen(myword)+1;
+myword[tomove-1]=' ';
+
+/*
+** See how long it is.  If its length+charssofar > nchars, we have
+** to trim it.
+*/
+if((tomove+charssofar)>nchars)
+	tomove=nchars-charssofar;
+/*
+** Attach the word to the current line.  Increment counter.
+*/
+MoveMemory((void *)dt,(void *)myword,(unsigned long)tomove);
+charssofar+=tomove;
+dt+=tomove;
+
+/*
+** If we're done, bail out.  Otherwise, go get another word.
+*/
+} while(charssofar<nchars);
+
+return;
+}
+
+/**********************
+** create_text_block **
+***********************
+** Build a block of text randomly loaded with words.  The words
+** come from the wordcatalog (which must be loaded before you
+** call this).
+** *tb points to the memory where the text is to be built.
+** tblen is the # of bytes to put into the text block
+** maxlinlen is the maximum length of any line (line end indicated
+**  by a carriage return).
+*/
+static void create_text_block(char *tb,
+			unsigned long tblen,
+			unsigned short maxlinlen)
+{
+unsigned long bytessofar;       /* # of bytes so far */
+unsigned long linelen;          /* Line length */
+
+bytessofar=0L;
+do {
+
+/*
+** Pick a random length for a line and fill the line.
+** Make sure the line can fit (haven't exceeded tablen) and also
+** make sure you leave room to append a carriage return.
+*/
+linelen=abs_randwc(maxlinlen-6)+6;
+if((linelen+bytessofar)>tblen)
+	linelen=tblen-bytessofar;
+
+if(linelen>1)
+{
+	create_text_line(tb,linelen);
+}
+tb+=linelen-1;          /* Add the carriage return */
+*tb++='\n';
+
+bytessofar+=linelen;
+
+} while(bytessofar<tblen);
+
+}
+
+/********************
+** DoHuffIteration **
+*********************
+** Perform the huffman benchmark.  This routine
+**  (a) Builds the huffman tree
+**  (b) Compresses the text
+**  (c) Decompresses the text and verifies correct decompression
+*/
+static unsigned long DoHuffIteration(char *plaintext,
+	char *comparray,
+	char *decomparray,
+	unsigned long arraysize,
+	unsigned long nloops,
+	huff_node *hufftree)
+{
+int i;                          /* Index */
+long j;                         /* Bigger index */
+int root;                       /* Pointer to huffman tree root */
+float lowfreq1, lowfreq2;       /* Low frequency counters */
+int lowidx1, lowidx2;           /* Indexes of low freq. elements */
+long bitoffset;                 /* Bit offset into text */
+long textoffset;                /* Char offset into text */
+long maxbitoffset;              /* Holds limit of bit offset */
+long bitstringlen;              /* Length of bitstring */
+int c;                          /* Character from plaintext */
+char bitstring[30];             /* Holds bitstring */
+unsigned long elapsed;                  /* For stopwatch */
+#ifdef DEBUG
+int status=0;
+#endif
+
+/*
+** Start the stopwatch
+*/
+elapsed=StartStopwatch();
+
+/*
+** Do everything for nloops
+*/
+while(nloops--)
+{
+
+/*
+** Calculate the frequency of each byte value. Store the
+** results in what will become the "leaves" of the
+** Huffman tree.  Interior nodes will be built in those
+** nodes greater than node #255.
+*/
+for(i=0;i<256;i++)
+{
+	hufftree[i].freq=(float)0.0;
+	hufftree[i].c=(unsigned char)i;
+}
+
+for(j=0;j<arraysize;j++)
+	hufftree[(int)plaintext[j]].freq+=(float)1.0;
+
+for(i=0;i<256;i++)
+	if(hufftree[i].freq != (float)0.0)
+		hufftree[i].freq/=(float)arraysize;
+
+/* Reset the second half of the tree. Otherwise the loop below that
+** compares the frequencies up to index 512 makes no sense. Some
+** systems automatically zero out memory upon allocation, others (like
+** for example DEC Unix) do not. Depending on this the loop below gets
+** different data and different run times. On our alpha the data that
+** was arbitrarily assigned led to an underflow error at runtime. We
+** use that zeroed-out bits are in fact 0 as a float.
+** Uwe F. Mayer */
+bzero((char *)&(hufftree[256]),sizeof(huff_node)*256);
+/*
+** Build the huffman tree.  First clear all the parent
+** pointers and left/right pointers.  Also, discard all
+** nodes that have a frequency of true 0.  */
+for(i=0;i<512;i++)
+{       if(hufftree[i].freq==(float)0.0)
+		hufftree[i].parent=EXCLUDED;
+	else {
+		hufftree[i].right = -1;
+		hufftree[i].left = -1;
+		hufftree[i].parent = -1;
+	}
+}
+
+/*
+** Go through the tree. Finding nodes of really low
+** frequency.
+*/
+root=255;                       /* Starting root node-1 */
+while(1)
+{
+	lowfreq1=(float)2.0; lowfreq2=(float)2.0;
+	lowidx1=-1; lowidx2=-1;
+	/*
+	** Find first lowest frequency.
+	*/
+	for(i=0;i<=root;i++)
+		if(hufftree[i].parent<0)
+			if(hufftree[i].freq<lowfreq1)
+			{       lowfreq1=hufftree[i].freq;
+				lowidx1=i;
+			}
+
+	/*
+	** Did we find a lowest value?  If not, the
+	** tree is done.
+	*/
+	if(lowidx1==-1) break;
+
+	/*
+	** Find next lowest frequency
+	*/
+	for(i=0;i<=root;i++)
+		if((hufftree[i].parent<0) && (i!=lowidx1))
+			if(hufftree[i].freq<lowfreq2)
+			{       lowfreq2=hufftree[i].freq;
+				lowidx2=i;
+			}
+
+	/*
+	** If we could only find one item, then that
+	** item is surely the root, and (as above) the
+	** tree is done.
+	*/
+	if(lowidx2==-1) break;
+
+	/*
+	** Attach the two new nodes to the current root, and
+	** advance the current root.
+	*/
+	root++;                 /* New root */
+	hufftree[lowidx1].parent=root;
+	hufftree[lowidx2].parent=root;
+	hufftree[root].freq=lowfreq1+lowfreq2;
+	hufftree[root].left=lowidx1;
+	hufftree[root].right=lowidx2;
+	hufftree[root].parent=-2;       /* Show root */
+}
+
+/*
+** Huffman tree built...compress the plaintext
+*/
+bitoffset=0L;                           /* Initialize bit offset */
+for(i=0;i<arraysize;i++)
+{
+	c=(int)plaintext[i];                 /* Fetch character */
+	/*
+	** Build a bit string for byte c
+	*/
+	bitstringlen=0;
+	while(hufftree[c].parent!=-2)
+	{       if(hufftree[hufftree[c].parent].left==c)
+			bitstring[bitstringlen]='0';
+		else
+			bitstring[bitstringlen]='1';
+		c=hufftree[c].parent;
+		bitstringlen++;
+	}
+
+	/*
+	** Step backwards through the bit string, setting
+	** bits in the compressed array as you go.
+	*/
+	while(bitstringlen--)
+	{       SetCompBit((uint8_t *)comparray,(uint32_t)bitoffset,bitstring[bitstringlen]);
+		bitoffset++;
+	}
+}
+
+/*
+** Compression done.  Perform de-compression.
+*/
+maxbitoffset=bitoffset;
+bitoffset=0;
+textoffset=0;
+do {
+	i=root;
+	while(hufftree[i].left!=-1)
+	{       if(GetCompBit((uint8_t *)comparray,(uint32_t)bitoffset)==0)
+			i=hufftree[i].left;
+		else
+			i=hufftree[i].right;
+		bitoffset++;
+	}
+	decomparray[textoffset]=hufftree[i].c;
+
+#ifdef DEBUG
+	if(hufftree[i].c != plaintext[textoffset])
+	{
+		/* Show error */
+		printf("Error at textoffset %ld\n",textoffset);
+		status=1;
+	}
+#endif
+	textoffset++;
+} while(bitoffset<maxbitoffset);
+
+}       /* End the big while(nloops--) from above */
+
+/*
+** All done
+*/
+#ifdef DEBUG
+  if (status==0) printf("Huffman: OK\n");
+#endif
+return(StopStopwatch(elapsed));
+}
+
+/***************
+** SetCompBit **
+****************
+** Set a bit in the compression array.  The value of the
+** bit is set according to char bitchar.
+*/
+static void SetCompBit(uint8_t *comparray,
+		uint32_t bitoffset,
+		char bitchar)
+{
+uint32_t byteoffset;
+int bitnumb;
+
+/*
+** First calculate which element in the comparray to
+** alter. and the bitnumber.
+*/
+byteoffset=bitoffset>>3;
+bitnumb=bitoffset % 8;
+
+/*
+** Set or clear
+*/
+if(bitchar=='1')
+	comparray[byteoffset]|=(1<<bitnumb);
+else
+	comparray[byteoffset]&=~(1<<bitnumb);
+
+return;
+}
+
+/***************
+** GetCompBit **
+****************
+** Return the bit value of a bit in the comparession array.
+** Returns 0 if the bit is clear, nonzero otherwise.
+*/
+static int GetCompBit(uint8_t *comparray,
+		uint32_t bitoffset)
+{
+uint32_t byteoffset;
+int bitnumb;
+
+/*
+** Calculate byte offset and bit number.
+*/
+byteoffset=bitoffset>>3;
+bitnumb=bitoffset % 8;
+
+/*
+** Fetch
+*/
+return((1<<bitnumb) & comparray[byteoffset] );
+}
diff --git a/idea.c b/idea.c
new file mode 100644
index 0000000..43abad9
--- /dev/null
+++ b/idea.c
@@ -0,0 +1,392 @@
+#include "nmglobal.h"
+#include "nbench1.h"
+
+/********************
+** IDEA Encryption **
+*********************
+** IDEA - International Data Encryption Algorithm.
+** Based on code presented in Applied Cryptography by Bruce Schneier.
+** Which was based on code developed by Xuejia Lai and James L. Massey.
+** Other modifications made by Colin Plumb.
+**
+*/
+
+/***********
+** DoIDEA **
+************
+** Perform IDEA encryption.  Note that we time encryption & decryption
+** time as being a single loop.
+*/
+void DoIDEA(void)
+{
+IDEAStruct *locideastruct;      /* Loc pointer to global structure */
+int i;
+IDEAkey Z,DK;
+u16 userkey[8];
+ulong accumtime;
+double iterations;
+char *errorcontext;
+int systemerror;
+faruchar *plain1;               /* First plaintext buffer */
+faruchar *crypt1;               /* Encryption buffer */
+faruchar *plain2;               /* Second plaintext buffer */
+
+/*
+** Link to global data
+*/
+locideastruct=&global_ideastruct;
+
+/*
+** Set error context
+*/
+errorcontext="CPU:IDEA";
+
+/*
+** Re-init random-number generator.
+*/
+/* randnum(3L); */
+randnum((int32)3);
+
+/*
+** Build an encryption/decryption key
+*/
+for (i=0;i<8;i++)
+        /* userkey[i]=(u16)(abs_randwc(60000L) & 0xFFFF); */
+	userkey[i]=(u16)(abs_randwc((int32)60000) & 0xFFFF);
+for(i=0;i<KEYLEN;i++)
+	Z[i]=0;
+
+/*
+** Compute encryption/decryption subkeys
+*/
+en_key_idea(userkey,Z);
+de_key_idea(Z,DK);
+
+/*
+** Allocate memory for buffers.  We'll make 3, called plain1,
+** crypt1, and plain2.  It works like this:
+**   plain1 >>encrypt>> crypt1 >>decrypt>> plain2.
+** So, plain1 and plain2 should match.
+** Also, fill up plain1 with sample text.
+*/
+plain1=(faruchar *)AllocateMemory(locideastruct->arraysize,&systemerror);
+if(systemerror)
+{
+	ReportError(errorcontext,systemerror);
+	ErrorExit();
+}
+
+crypt1=(faruchar *)AllocateMemory(locideastruct->arraysize,&systemerror);
+if(systemerror)
+{
+	ReportError(errorcontext,systemerror);
+	FreeMemory((farvoid *)plain1,&systemerror);
+	ErrorExit();
+}
+
+plain2=(faruchar *)AllocateMemory(locideastruct->arraysize,&systemerror);
+if(systemerror)
+{
+	ReportError(errorcontext,systemerror);
+	FreeMemory((farvoid *)plain1,&systemerror);
+	FreeMemory((farvoid *)crypt1,&systemerror);
+	ErrorExit();
+}
+/*
+** Note that we build the "plaintext" by simply loading
+** the array up with random numbers.
+*/
+for(i=0;i<locideastruct->arraysize;i++)
+	plain1[i]=(uchar)(abs_randwc(255) & 0xFF);
+
+/*
+** See if we need to perform self adjustment loop.
+*/
+if(locideastruct->adjust==0)
+{
+	/*
+	** Do self-adjustment.  This involves initializing the
+	** # of loops and increasing the loop count until we
+	** get a number of loops that we can use.
+	*/
+	for(locideastruct->loops=100L;
+	  locideastruct->loops<MAXIDEALOOPS;
+	  locideastruct->loops+=10L)
+		if(DoIDEAIteration(plain1,crypt1,plain2,
+		  locideastruct->arraysize,
+		  locideastruct->loops,
+		  Z,DK)>global_min_ticks) break;
+}
+
+/*
+** All's well if we get here.  Do the test.
+*/
+accumtime=0L;
+iterations=(double)0.0;
+
+do {
+	accumtime+=DoIDEAIteration(plain1,crypt1,plain2,
+		locideastruct->arraysize,
+		locideastruct->loops,Z,DK);
+	iterations+=(double)locideastruct->loops;
+} while(TicksToSecs(accumtime)<locideastruct->request_secs);
+
+/*
+** Clean up, calculate results, and go home.  Be sure to
+** show that we don't have to rerun adjustment code.
+*/
+FreeMemory((farvoid *)plain1,&systemerror);
+FreeMemory((farvoid *)crypt1,&systemerror);
+FreeMemory((farvoid *)plain2,&systemerror);
+locideastruct->iterspersec=iterations / TicksToFracSecs(accumtime);
+
+if(locideastruct->adjust==0)
+	locideastruct->adjust=1;
+
+return;
+
+}
+
+/********************
+** DoIDEAIteration **
+*********************
+** Execute a single iteration of the IDEA encryption algorithm.
+** Actually, a single iteration is one encryption and one
+** decryption.
+*/
+static ulong DoIDEAIteration(faruchar *plain1,
+			faruchar *crypt1,
+			faruchar *plain2,
+			ulong arraysize,
+			ulong nloops,
+			IDEAkey Z,
+			IDEAkey DK)
+{
+register ulong i;
+register ulong j;
+ulong elapsed;
+#ifdef DEBUG
+int status=0;
+#endif
+
+/*
+** Start the stopwatch.
+*/
+elapsed=StartStopwatch();
+
+/*
+** Do everything for nloops.
+*/
+for(i=0;i<nloops;i++)
+{
+	for(j=0;j<arraysize;j+=(sizeof(u16)*4))
+		cipher_idea((u16 *)(plain1+j),(u16 *)(crypt1+j),Z);       /* Encrypt */
+
+	for(j=0;j<arraysize;j+=(sizeof(u16)*4))
+		cipher_idea((u16 *)(crypt1+j),(u16 *)(plain2+j),DK);      /* Decrypt */
+}
+
+#ifdef DEBUG
+for(j=0;j<arraysize;j++)
+	if(*(plain1+j)!=*(plain2+j)){
+		printf("IDEA Error! \n");
+                status=1;
+                }
+if (status==0) printf("IDEA: OK\n");
+#endif
+
+/*
+** Get elapsed time.
+*/
+return(StopStopwatch(elapsed));
+}
+
+/********
+** mul **
+*********
+** Performs multiplication, modulo (2**16)+1.  This code is structured
+** on the assumption that untaken branches are cheaper than taken
+** branches, and that the compiler doesn't schedule branches.
+*/
+static u16 mul(register u16 a, register u16 b)
+{
+register u32 p;
+if(a)
+{       if(b)
+	{       p=(u32)(a*b);
+		b=low16(p);
+		a=(u16)(p>>16);
+		return(b-a+(b<a));
+	}
+	else
+		return(1-a);
+}
+else
+	return(1-b);
+}
+
+/********
+** inv **
+*********
+** Compute multiplicative inverse of x, modulo (2**16)+1
+** using Euclid's GCD algorithm.  It is unrolled twice
+** to avoid swapping the meaning of the registers.  And
+** some subtracts are changed to adds.
+*/
+static u16 inv(u16 x)
+{
+u16 t0, t1;
+u16 q, y;
+
+if(x<=1)
+	return(x);      /* 0 and 1 are self-inverse */
+t1=0x10001 / x;
+y=0x10001 % x;
+if(y==1)
+	return(low16(1-t1));
+t0=1;
+do {
+	q=x/y;
+	x=x%y;
+	t0+=q*t1;
+	if(x==1) return(t0);
+	q=y/x;
+	y=y%x;
+	t1+=q*t0;
+} while(y!=1);
+return(low16(1-t1));
+}
+
+/****************
+** en_key_idea **
+*****************
+** Compute IDEA encryption subkeys Z
+*/
+static void en_key_idea(u16 *userkey, u16 *Z)
+{
+int i,j;
+
+/*
+** shifts
+*/
+for(j=0;j<8;j++)
+	Z[j]=*userkey++;
+for(i=0;j<KEYLEN;j++)
+{       i++;
+	Z[i+7]=(Z[i&7]<<9)| (Z[(i+1) & 7] >> 7);
+	Z+=i&8;
+	i&=7;
+}
+return;
+}
+
+/****************
+** de_key_idea **
+*****************
+** Compute IDEA decryption subkeys DK from encryption
+** subkeys Z.
+*/
+static void de_key_idea(IDEAkey Z, IDEAkey DK)
+{
+IDEAkey TT;
+int j;
+u16 t1, t2, t3;
+u16 *p;
+p=(u16 *)(TT+KEYLEN);
+
+t1=inv(*Z++);
+t2=-*Z++;
+t3=-*Z++;
+*--p=inv(*Z++);
+*--p=t3;
+*--p=t2;
+*--p=t1;
+
+for(j=1;j<ROUNDS;j++)
+{       t1=*Z++;
+	*--p=*Z++;
+	*--p=t1;
+	t1=inv(*Z++);
+	t2=-*Z++;
+	t3=-*Z++;
+	*--p=inv(*Z++);
+	*--p=t2;
+	*--p=t3;
+	*--p=t1;
+}
+t1=*Z++;
+*--p=*Z++;
+*--p=t1;
+t1=inv(*Z++);
+t2=-*Z++;
+t3=-*Z++;
+*--p=inv(*Z++);
+*--p=t3;
+*--p=t2;
+*--p=t1;
+/*
+** Copy and destroy temp copy
+*/
+for(j=0,p=TT;j<KEYLEN;j++)
+{       *DK++=*p;
+	*p++=0;
+}
+
+return;
+}
+
+/*
+** MUL(x,y)
+** This #define creates a macro that computes x=x*y modulo 0x10001.
+** Requires temps t16 and t32.  Also requires y to be strictly 16
+** bits.  Here, I am using the simplest form.  May not be the
+** fastest. -- RG
+*/
+/* #define MUL(x,y) (x=mul(low16(x),y)) */
+
+/****************
+** cipher_idea **
+*****************
+** IDEA encryption/decryption algorithm.
+*/
+static void cipher_idea(u16 in[4],
+		u16 out[4],
+		register IDEAkey Z)
+{
+register u16 x1, x2, x3, x4, t1, t2;
+/* register u16 t16;
+register u16 t32; */
+int r=ROUNDS;
+
+x1=*in++;
+x2=*in++;
+x3=*in++;
+x4=*in;
+
+do {
+	MUL(x1,*Z++);
+	x2+=*Z++;
+	x3+=*Z++;
+	MUL(x4,*Z++);
+
+	t2=x1^x3;
+	MUL(t2,*Z++);
+	t1=t2+(x2^x4);
+	MUL(t1,*Z++);
+	t2=t1+t2;
+
+	x1^=t1;
+	x4^=t2;
+
+	t2^=x2;
+	x2=x3^t1;
+	x3=t2;
+} while(--r);
+MUL(x1,*Z++);
+*out++=x1;
+*out++=x3+*Z++;
+*out++=x2+*Z++;
+MUL(x4,*Z);
+*out=x4;
+return;
+}
diff --git a/linear.c b/linear.c
new file mode 100644
index 0000000..ff1b1d7
--- /dev/null
+++ b/linear.c
@@ -0,0 +1,541 @@
+#include <stdio.h>
+/*#include <stdlib.h>
+#include <string.h>
+#include <strings.h>*/
+#include <math.h>
+#include <stddef.h>
+#include "nmglobal.h"
+#include "nbench1.h"
+
+/***********************
+**  LU DECOMPOSITION  **
+** (Linear Equations) **
+************************
+** These routines come from "Numerical Recipes in Pascal".
+** Note that, as in the assignment algorithm, though we
+** separately define LUARRAYROWS and LUARRAYCOLS, the two
+** must be the same value (this routine depends on a square
+** matrix).
+*/
+
+/*********
+** DoLU **
+**********
+** Perform the LU decomposition benchmark.
+*/
+void DoLU(void)
+{
+LUStruct *loclustruct;  /* Local pointer to global data */
+char *errorcontext;
+int systemerror;
+fardouble *a;
+fardouble *b;
+fardouble *abase;
+fardouble *bbase;
+LUdblptr ptra;
+int n;
+int i;
+ulong accumtime;
+double iterations;
+
+/*
+** Link to global data
+*/
+loclustruct=&global_lustruct;
+
+/*
+** Set error context.
+*/
+errorcontext="FPU:LU";
+
+/*
+** Our first step is to build a "solvable" problem.  This
+** will become the "seed" set that all others will be
+** derived from. (I.E., we'll simply copy these arrays
+** into the others.
+*/
+a=(fardouble *)AllocateMemory(sizeof(double) * LUARRAYCOLS * LUARRAYROWS,
+		&systemerror);
+b=(fardouble *)AllocateMemory(sizeof(double) * LUARRAYROWS,
+		&systemerror);
+n=LUARRAYROWS;
+
+/*
+** We need to allocate a temp vector that is used by the LU
+** algorithm.  This removes the allocation routine from the
+** timing.
+*/
+LUtempvv=(fardouble *)AllocateMemory(sizeof(double)*LUARRAYROWS,
+	&systemerror);
+
+/*
+** Build a problem to be solved.
+*/
+ptra.ptrs.p=a;                  /* Gotta coerce linear array to 2D array */
+build_problem(*ptra.ptrs.ap,n,b);
+
+/*
+** Now that we have a problem built, see if we need to do
+** auto-adjust.  If so, repeatedly call the DoLUIteration routine,
+** increasing the number of solutions per iteration as you go.
+*/
+if(loclustruct->adjust==0)
+{
+	loclustruct->numarrays=0;
+	for(i=1;i<=MAXLUARRAYS;i++)
+	{
+		abase=(fardouble *)AllocateMemory(sizeof(double) *
+			LUARRAYCOLS*LUARRAYROWS*(i+1),&systemerror);
+		if(systemerror)
+		{       ReportError(errorcontext,systemerror);
+			LUFreeMem(a,b,(fardouble *)NULL,(fardouble *)NULL);
+			ErrorExit();
+		}
+		bbase=(fardouble *)AllocateMemory(sizeof(double) *
+			LUARRAYROWS*(i+1),&systemerror);
+		if(systemerror)
+		{       ReportError(errorcontext,systemerror);
+			LUFreeMem(a,b,abase,(fardouble *)NULL);
+			ErrorExit();
+		}
+		if(DoLUIteration(a,b,abase,bbase,i)>global_min_ticks)
+		{       loclustruct->numarrays=i;
+			break;
+		}
+		/*
+		** Not enough arrays...free them all and try again
+		*/
+		FreeMemory((farvoid *)abase,&systemerror);
+		FreeMemory((farvoid *)bbase,&systemerror);
+	}
+	/*
+	** Were we able to do it?
+	*/
+	if(loclustruct->numarrays==0)
+	{       printf("FPU:LU -- Array limit reached\n");
+		LUFreeMem(a,b,abase,bbase);
+		ErrorExit();
+	}
+}
+else
+{       /*
+	** Don't need to adjust -- just allocate the proper
+	** number of arrays and proceed.
+	*/
+	abase=(fardouble *)AllocateMemory(sizeof(double) *
+		LUARRAYCOLS*LUARRAYROWS*loclustruct->numarrays,
+		&systemerror);
+	if(systemerror)
+	{       ReportError(errorcontext,systemerror);
+		LUFreeMem(a,b,(fardouble *)NULL,(fardouble *)NULL);
+		ErrorExit();
+	}
+	bbase=(fardouble *)AllocateMemory(sizeof(double) *
+		LUARRAYROWS*loclustruct->numarrays,&systemerror);
+	if(systemerror)
+	{
+		ReportError(errorcontext,systemerror);
+		LUFreeMem(a,b,abase,(fardouble *)NULL);
+		ErrorExit();
+	}
+}
+/*
+** All's well if we get here.  Do the test.
+*/
+accumtime=0L;
+iterations=(double)0.0;
+
+do {
+	accumtime+=DoLUIteration(a,b,abase,bbase,
+		loclustruct->numarrays);
+	iterations+=(double)loclustruct->numarrays;
+} while(TicksToSecs(accumtime)<loclustruct->request_secs);
+
+/*
+** Clean up, calculate results, and go home.  Be sure to
+** show that we don't have to rerun adjustment code.
+*/
+loclustruct->iterspersec=iterations / TicksToFracSecs(accumtime);
+
+if(loclustruct->adjust==0)
+	loclustruct->adjust=1;
+
+LUFreeMem(a,b,abase,bbase);
+return;
+}
+
+/**************
+** LUFreeMem **
+***************
+** Release memory associated with LU benchmark.
+*/
+static void LUFreeMem(fardouble *a, fardouble *b,
+			fardouble *abase,fardouble *bbase)
+{
+int systemerror;
+
+FreeMemory((farvoid *)a,&systemerror);
+FreeMemory((farvoid *)b,&systemerror);
+FreeMemory((farvoid *)LUtempvv,&systemerror);
+
+if(abase!=(fardouble *)NULL) FreeMemory((farvoid *)abase,&systemerror);
+if(bbase!=(fardouble *)NULL) FreeMemory((farvoid *)bbase,&systemerror);
+return;
+}
+
+/******************
+** DoLUIteration **
+*******************
+** Perform an iteration of the LU decomposition benchmark.
+** An iteration refers to the repeated solution of several
+** identical matrices.
+*/
+static ulong DoLUIteration(fardouble *a,fardouble *b,
+		fardouble *abase, fardouble *bbase,
+		ulong numarrays)
+{
+fardouble *locabase;
+fardouble *locbbase;
+LUdblptr ptra;  /* For converting ptr to 2D array */
+ulong elapsed;
+ulong j,i;              /* Indexes */
+
+
+/*
+** Move the seed arrays (a & b) into the destination
+** arrays;
+*/
+for(j=0;j<numarrays;j++)
+{       locabase=abase+j*LUARRAYROWS*LUARRAYCOLS;
+	locbbase=bbase+j*LUARRAYROWS;
+	for(i=0;i<LUARRAYROWS*LUARRAYCOLS;i++)
+		*(locabase+i)=*(a+i);
+	for(i=0;i<LUARRAYROWS;i++)
+		*(locbbase+i)=*(b+i);
+}
+
+/*
+** Do test...begin timing.
+*/
+elapsed=StartStopwatch();
+for(i=0;i<numarrays;i++)
+{       locabase=abase+i*LUARRAYROWS*LUARRAYCOLS;
+	locbbase=bbase+i*LUARRAYROWS;
+	ptra.ptrs.p=locabase;
+	lusolve(*ptra.ptrs.ap,LUARRAYROWS,locbbase);
+}
+
+return(StopStopwatch(elapsed));
+}
+
+/******************
+** build_problem **
+*******************
+** Constructs a solvable set of linear equations.  It does this by
+** creating an identity matrix, then loading the solution vector
+** with random numbers.  After that, the identity matrix and
+** solution vector are randomly "scrambled".  Scrambling is
+** done by (a) randomly selecting a row and multiplying that
+** row by a random number and (b) adding one randomly-selected
+** row to another.
+*/
+static void build_problem(double a[][LUARRAYCOLS],
+		int n,
+		double b[LUARRAYROWS])
+{
+long i,j,k,k1;  /* Indexes */
+double rcon;     /* Random constant */
+
+/*
+** Reset random number generator
+*/
+/* randnum(13L); */
+randnum((int32)13);
+
+/*
+** Build an identity matrix.
+** We'll also use this as a chance to load the solution
+** vector.
+*/
+for(i=0;i<n;i++)
+{       /* b[i]=(double)(abs_randwc(100L)+1L); */
+	b[i]=(double)(abs_randwc((int32)100)+(int32)1);
+	for(j=0;j<n;j++)
+		if(i==j)
+		        /* a[i][j]=(double)(abs_randwc(1000L)+1L); */
+			a[i][j]=(double)(abs_randwc((int32)1000)+(int32)1);
+		else
+			a[i][j]=(double)0.0;
+}
+
+#ifdef DEBUG
+printf("Problem:\n");
+for(i=0;i<n;i++)
+{
+/*
+	for(j=0;j<n;j++)
+		printf("%6.2f ",a[i][j]);
+*/
+	printf("%.0f/%.0f=%.2f\t",b[i],a[i][i],b[i]/a[i][i]);
+/*
+        printf("\n");
+*/
+}
+#endif
+
+/*
+** Scramble.  Do this 8n times.  See comment above for
+** a description of the scrambling process.
+*/
+
+for(i=0;i<8*n;i++)
+{
+	/*
+	** Pick a row and a random constant.  Multiply
+	** all elements in the row by the constant.
+	*/
+ /*       k=abs_randwc((long)n);
+	rcon=(double)(abs_randwc(20L)+1L);
+	for(j=0;j<n;j++)
+		a[k][j]=a[k][j]*rcon;
+	b[k]=b[k]*rcon;
+*/
+	/*
+	** Pick two random rows and add second to
+	** first.  Note that we also occasionally multiply
+	** by minus 1 so that we get a subtraction operation.
+	*/
+        /* k=abs_randwc((long)n); */
+        /* k1=abs_randwc((long)n); */
+	k=abs_randwc((int32)n);
+	k1=abs_randwc((int32)n);
+	if(k!=k1)
+	{
+		if(k<k1) rcon=(double)1.0;
+			else rcon=(double)-1.0;
+		for(j=0;j<n;j++)
+			a[k][j]+=a[k1][j]*rcon;;
+		b[k]+=b[k1]*rcon;
+	}
+}
+
+return;
+}
+
+
+/***********
+** ludcmp **
+************
+** From the procedure of the same name in "Numerical Recipes in Pascal",
+** by Press, Flannery, Tukolsky, and Vetterling.
+** Given an nxn matrix a[], this routine replaces it by the LU
+** decomposition of a rowwise permutation of itself.  a[] and n
+** are input.  a[] is output, modified as follows:
+**   --                       --
+**  |  b(1,1) b(1,2) b(1,3)...  |
+**  |  a(2,1) b(2,2) b(2,3)...  |
+**  |  a(3,1) a(3,2) b(3,3)...  |
+**  |  a(4,1) a(4,2) a(4,3)...  |
+**  |  ...                      |
+**   --                        --
+**
+** Where the b(i,j) elements form the upper triangular matrix of the
+** LU decomposition, and the a(i,j) elements form the lower triangular
+** elements.  The LU decomposition is calculated so that we don't
+** need to store the a(i,i) elements (which would have laid along the
+** diagonal and would have all been 1).
+**
+** indx[] is an output vector that records the row permutation
+** effected by the partial pivoting; d is output as +/-1 depending
+** on whether the number of row interchanges was even or odd,
+** respectively.
+** Returns 0 if matrix singular, else returns 1.
+*/
+static int ludcmp(double a[][LUARRAYCOLS],
+		int n,
+		int indx[],
+		int *d)
+{
+
+double big;     /* Holds largest element value */
+double sum;
+double dum;     /* Holds dummy value */
+int i,j,k;      /* Indexes */
+int imax=0;     /* Holds max index value */
+double tiny;    /* A really small number */
+
+tiny=(double)1.0e-20;
+
+*d=1;           /* No interchanges yet */
+
+for(i=0;i<n;i++)
+{       big=(double)0.0;
+	for(j=0;j<n;j++)
+		if((double)fabs(a[i][j]) > big)
+			big=fabs(a[i][j]);
+	/* Bail out on singular matrix */
+	if(big==(double)0.0) return(0);
+	LUtempvv[i]=1.0/big;
+}
+
+/*
+** Crout's algorithm...loop over columns.
+*/
+for(j=0;j<n;j++)
+{       if(j!=0)
+		for(i=0;i<j;i++)
+		{       sum=a[i][j];
+			if(i!=0)
+				for(k=0;k<i;k++)
+					sum-=(a[i][k]*a[k][j]);
+			a[i][j]=sum;
+		}
+	big=(double)0.0;
+	for(i=j;i<n;i++)
+	{       sum=a[i][j];
+		if(j!=0)
+			for(k=0;k<j;k++)
+				sum-=a[i][k]*a[k][j];
+		a[i][j]=sum;
+		dum=LUtempvv[i]*fabs(sum);
+		if(dum>=big)
+		{       big=dum;
+			imax=i;
+		}
+	}
+	if(j!=imax)             /* Interchange rows if necessary */
+	{       for(k=0;k<n;k++)
+		{       dum=a[imax][k];
+			a[imax][k]=a[j][k];
+			a[j][k]=dum;
+		}
+		*d=-*d;         /* Change parity of d */
+		dum=LUtempvv[imax];
+		LUtempvv[imax]=LUtempvv[j]; /* Don't forget scale factor */
+		LUtempvv[j]=dum;
+	}
+	indx[j]=imax;
+	/*
+	** If the pivot element is zero, the matrix is singular
+	** (at least as far as the precision of the machine
+	** is concerned.)  We'll take the original author's
+	** recommendation and replace 0.0 with "tiny".
+	*/
+	if(a[j][j]==(double)0.0)
+		a[j][j]=tiny;
+
+	if(j!=(n-1))
+	{       dum=1.0/a[j][j];
+		for(i=j+1;i<n;i++)
+			a[i][j]=a[i][j]*dum;
+	}
+}
+
+return(1);
+}
+
+/***********
+** lubksb **
+************
+** Also from "Numerical Recipes in Pascal".
+** This routine solves the set of n linear equations A X = B.
+** Here, a[][] is input, not as the matrix A, but as its
+** LU decomposition, created by the routine ludcmp().
+** Indx[] is input as the permutation vector returned by ludcmp().
+**  b[] is input as the right-hand side an returns the
+** solution vector X.
+** a[], n, and indx are not modified by this routine and
+** can be left in place for different values of b[].
+** This routine takes into account the possibility that b will
+** begin with many zero elements, so it is efficient for use in
+** matrix inversion.
+*/
+static void lubksb( double a[][LUARRAYCOLS],
+		int n,
+		int indx[LUARRAYROWS],
+		double b[LUARRAYROWS])
+{
+
+int i,j;        /* Indexes */
+int ip;         /* "pointer" into indx */
+int ii;
+double sum;
+
+/*
+** When ii is set to a positive value, it will become
+** the index of the first nonvanishing element of b[].
+** We now do the forward substitution. The only wrinkle
+** is to unscramble the permutation as we go.
+*/
+ii=-1;
+for(i=0;i<n;i++)
+{       ip=indx[i];
+	sum=b[ip];
+	b[ip]=b[i];
+	if(ii!=-1)
+		for(j=ii;j<i;j++)
+			sum=sum-a[i][j]*b[j];
+	else
+		/*
+		** If a nonzero element is encountered, we have
+		** to do the sums in the loop above.
+		*/
+		if(sum!=(double)0.0)
+			ii=i;
+	b[i]=sum;
+}
+/*
+** Do backsubstitution
+*/
+for(i=(n-1);i>=0;i--)
+{
+	sum=b[i];
+	if(i!=(n-1))
+		for(j=(i+1);j<n;j++)
+			sum=sum-a[i][j]*b[j];
+	b[i]=sum/a[i][i];
+}
+return;
+}
+
+/************
+** lusolve **
+*************
+** Solve a linear set of equations: A x = b
+** Original matrix A will be destroyed by this operation.
+** Returns 0 if matrix is singular, 1 otherwise.
+*/
+static int lusolve(double a[][LUARRAYCOLS],
+		int n,
+		double b[LUARRAYROWS])
+{
+int indx[LUARRAYROWS];
+int d;
+#ifdef DEBUG
+int i,j;
+#endif
+
+if(ludcmp(a,n,indx,&d)==0) return(0);
+
+/* Matrix not singular -- proceed */
+lubksb(a,n,indx,b);
+
+#ifdef DEBUG
+printf("Solution:\n");
+for(i=0;i<n;i++)
+{
+  for(j=0;j<n;j++){
+  /*
+    printf("%6.2f ",a[i][j]);
+  */
+  }
+  printf("%6.2f\t",b[i]);
+  /*
+    printf("\n");
+  */
+}
+printf("\n");
+#endif
+
+return(1);
+}
diff --git a/nbench0.h b/nbench0.h
index cef0928..46ef994 100644
--- a/nbench0.h
+++ b/nbench0.h
@@ -336,8 +336,6 @@ extern void DoHuffman(void);
 extern void DoNNET(void);
 extern void DoLU(void);
 
-extern void ErrorExit(void);    /* From SYSSPEC */
-
 /*
 ** Array of pointers to the benchmark functions.
 */
diff --git a/nbench1.c b/nbench1.c
deleted file mode 100644
index 05c35df..0000000
--- a/nbench1.c
+++ /dev/null
@@ -1,4445 +0,0 @@
-
-/*
-** nbench1.c
-*/
-
-/********************************
-**       BYTEmark (tm)         **
-** BYTE NATIVE MODE BENCHMARKS **
-**       VERSION 2             **
-**                             **
-** Included in this source     **
-** file:                       **
-**  Numeric Heapsort           **
-**  String Heapsort            **
-**  Bitfield test              **
-**  Floating point emulation   **
-**  Fourier coefficients       **
-**  Assignment algorithm       **
-**  IDEA Encyption             **
-**  Huffman compression        **
-**  Back prop. neural net      **
-**  LU Decomposition           **
-**    (linear equations)       **
-** ----------                  **
-** Rick Grehan, BYTE Magazine  **
-*********************************
-**
-** BYTEmark (tm)
-** BYTE's Native Mode Benchmarks
-** Rick Grehan, BYTE Magazine
-**
-** Creation:
-** Revision: 3/95;10/95
-**  10/95 - Removed allocation that was taking place inside
-**   the LU Decomposition benchmark. Though it didn't seem to
-**   make a difference on systems we ran it on, it nonetheless
-**   removes an operating system dependency that probably should
-**   not have been there.
-**
-** DISCLAIMER
-** The source, executable, and documentation files that comprise
-** the BYTEmark benchmarks are made available on an "as is" basis.
-** This means that we at BYTE Magazine have made every reasonable
-** effort to verify that the there are no errors in the source and
-** executable code.  We cannot, however, guarantee that the programs
-** are error-free.  Consequently, McGraw-HIll and BYTE Magazine make
-** no claims in regard to the fitness of the source code, executable
-** code, and documentation of the BYTEmark.
-**  Furthermore, BYTE Magazine, McGraw-Hill, and all employees
-** of McGraw-Hill cannot be held responsible for any damages resulting
-** from the use of this code or the results obtained from using
-** this code.
-*/
-
-/*
-** INCLUDES
-*/
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <strings.h>
-#include <math.h>
-#include "nmglobal.h"
-#include "nbench1.h"
-#include "wordcat.h"
-
-#ifdef DEBUG
-static int numsort_status=0;
-static int stringsort_status=0;
-#endif
-
-/*********************
-** NUMERIC HEAPSORT **
-**********************
-** This test implements a heapsort algorithm, performed on an
-** array of longs.
-*/
-
-/**************
-** DoNumSort **
-***************
-** This routine performs the CPU numeric sort test.
-** NOTE: Last version incorrectly stated that the routine
-**  returned result in # of longword sorted per second.
-**  Not so; the routine returns # of iterations per sec.
-*/
-
-void DoNumSort(void)
-{
-SortStruct *numsortstruct;      /* Local pointer to global struct */
-farlong *arraybase;     /* Base pointers of array */
-long accumtime;         /* Accumulated time */
-double iterations;      /* Iteration counter */
-char *errorcontext;     /* Error context string pointer */
-int systemerror;        /* For holding error codes */
-
-/*
-** Link to global structure
-*/
-numsortstruct=&global_numsortstruct;
-
-/*
-** Set the error context string.
-*/
-errorcontext="CPU:Numeric Sort";
-
-/*
-** See if we need to do self adjustment code.
-*/
-if(numsortstruct->adjust==0)
-{
-	/*
-	** Self-adjustment code.  The system begins by sorting 1
-	** array.  If it does that in no time, then two arrays
-	** are built and sorted.  This process continues until
-	** enough arrays are built to handle the tolerance.
-	*/
-	numsortstruct->numarrays=1;
-	while(1)
-	{
-		/*
-		** Allocate space for arrays
-		*/
-		arraybase=(farlong *)AllocateMemory(sizeof(long) *
-			numsortstruct->numarrays * numsortstruct->arraysize,
-			&systemerror);
-		if(systemerror)
-		{       ReportError(errorcontext,systemerror);
-			FreeMemory((farvoid *)arraybase,
-				  &systemerror);
-			ErrorExit();
-		}
-
-		/*
-		** Do an iteration of the numeric sort.  If the
-		** elapsed time is less than or equal to the permitted
-		** minimum, then allocate for more arrays and
-		** try again.
-		*/
-		if(DoNumSortIteration(arraybase,
-			numsortstruct->arraysize,
-			numsortstruct->numarrays)>global_min_ticks)
-			break;          /* We're ok...exit */
-
-		FreeMemory((farvoid *)arraybase,&systemerror);
-		if(numsortstruct->numarrays++>NUMNUMARRAYS)
-		{       printf("CPU:NSORT -- NUMNUMARRAYS hit.\n");
-			ErrorExit();
-		}
-	}
-}
-else
-{       /*
-	** Allocate space for arrays
-	*/
-	arraybase=(farlong *)AllocateMemory(sizeof(long) *
-		numsortstruct->numarrays * numsortstruct->arraysize,
-		&systemerror);
-	if(systemerror)
-	{       ReportError(errorcontext,systemerror);
-		FreeMemory((farvoid *)arraybase,
-			  &systemerror);
-		ErrorExit();
-	}
-
-}
-/*
-** All's well if we get here.  Repeatedly perform sorts until the
-** accumulated elapsed time is greater than # of seconds requested.
-*/
-accumtime=0L;
-iterations=(double)0.0;
-
-do {
-	accumtime+=DoNumSortIteration(arraybase,
-		numsortstruct->arraysize,
-		numsortstruct->numarrays);
-	iterations+=(double)1.0;
-} while(TicksToSecs(accumtime)<numsortstruct->request_secs);
-
-/*
-** Clean up, calculate results, and go home.  Be sure to
-** show that we don't have to rerun adjustment code.
-*/
-FreeMemory((farvoid *)arraybase,&systemerror);
-
-numsortstruct->sortspersec=iterations *
-	(double)numsortstruct->numarrays / TicksToFracSecs(accumtime);
-
-if(numsortstruct->adjust==0)
-	numsortstruct->adjust=1;
-
-#ifdef DEBUG
-if (numsort_status==0) printf("Numeric sort: OK\n");
-numsort_status=0;
-#endif
-return;
-}
-
-/***********************
-** DoNumSortIteration **
-************************
-** This routine executes one iteration of the numeric
-** sort benchmark.  It returns the number of ticks
-** elapsed for the iteration.
-*/
-static ulong DoNumSortIteration(farlong *arraybase,
-		ulong arraysize,
-		uint numarrays)
-{
-ulong elapsed;          /* Elapsed ticks */
-ulong i;
-/*
-** Load up the array with random numbers
-*/
-LoadNumArrayWithRand(arraybase,arraysize,numarrays);
-
-/*
-** Start the stopwatch
-*/
-elapsed=StartStopwatch();
-
-/*
-** Execute a heap of heapsorts
-*/
-for(i=0;i<numarrays;i++)
-	NumHeapSort(arraybase+i*arraysize,0L,arraysize-1L);
-
-/*
-** Get elapsed time
-*/
-elapsed=StopStopwatch(elapsed);
-#ifdef DEBUG
-{
-	for(i=0;i<arraysize-1;i++)
-	{       /*
-		** Compare to check for proper
-		** sort.
-		*/
-		if(arraybase[i+1]<arraybase[i])
-		{       printf("Sort Error\n");
-			numsort_status=1;
-                        break;
-		}
-	}
-}
-#endif
-
-return(elapsed);
-}
-
-/*************************
-** LoadNumArrayWithRand **
-**************************
-** Load up an array with random longs.
-*/
-static void LoadNumArrayWithRand(farlong *array,     /* Pointer to arrays */
-		ulong arraysize,
-		uint numarrays)         /* # of elements in array */
-{
-long i;                 /* Used for index */
-farlong *darray;        /* Destination array pointer */
-/*
-** Initialize the random number generator
-*/
-/* randnum(13L); */
-randnum((int32)13);
-
-/*
-** Load up first array with randoms
-*/
-for(i=0L;i<arraysize;i++)
-        /* array[i]=randnum(0L); */
-	array[i]=randnum((int32)0);
-
-/*
-** Now, if there's more than one array to load, copy the
-** first into each of the others.
-*/
-darray=array;
-while(--numarrays)
-{       darray+=arraysize;
-	for(i=0L;i<arraysize;i++)
-		darray[i]=array[i];
-}
-
-return;
-}
-
-/****************
-** NumHeapSort **
-*****************
-** Pass this routine a pointer to an array of long
-** integers.  Also pass in minimum and maximum offsets.
-** This routine performs a heap sort on that array.
-*/
-static void NumHeapSort(farlong *array,
-	ulong bottom,           /* Lower bound */
-	ulong top)              /* Upper bound */
-{
-ulong temp;                     /* Used to exchange elements */
-ulong i;                        /* Loop index */
-
-/*
-** First, build a heap in the array
-*/
-for(i=(top/2L); i>0; --i)
-	NumSift(array,i,top);
-
-/*
-** Repeatedly extract maximum from heap and place it at the
-** end of the array.  When we get done, we'll have a sorted
-** array.
-*/
-for(i=top; i>0; --i)
-{       NumSift(array,bottom,i);
-	temp=*array;                    /* Perform exchange */
-	*array=*(array+i);
-	*(array+i)=temp;
-}
-return;
-}
-
-/************
-** NumSift **
-*************
-** Peforms the sift operation on a numeric array,
-** constructing a heap in the array.
-*/
-static void NumSift(farlong *array,     /* Array of numbers */
-	ulong i,                /* Minimum of array */
-	ulong j)                /* Maximum of array */
-{
-unsigned long k;
-long temp;                              /* Used for exchange */
-
-while((i+i)<=j)
-{
-	k=i+i;
-	if(k<j)
-		if(array[k]<array[k+1L])
-			++k;
-	if(array[i]<array[k])
-	{
-		temp=array[k];
-		array[k]=array[i];
-		array[i]=temp;
-		i=k;
-	}
-	else
-		i=j+1;
-}
-return;
-}
-
-/********************
-** STRING HEAPSORT **
-********************/
-
-/*****************
-** DoStringSort **
-******************
-** This routine performs the CPU string sort test.
-** Arguments:
-**      requested_secs = # of seconds to execute test
-**      stringspersec = # of strings per second sorted (RETURNED)
-*/
-void DoStringSort(void)
-{
-
-SortStruct *strsortstruct;      /* Local for sort structure */
-faruchar *arraybase;            /* Base pointer of char array */
-long accumtime;                 /* Accumulated time */
-double iterations;              /* # of iterations */
-char *errorcontext;             /* Error context string pointer */
-int systemerror;                /* For holding error code */
-
-/*
-** Link to global structure
-*/
-strsortstruct=&global_strsortstruct;
-
-/*
-** Set the error context
-*/
-errorcontext="CPU:String Sort";
-
-/*
-** See if we have to perform self-adjustment code
-*/
-if(strsortstruct->adjust==0)
-{
-	/*
-	** Initialize the number of arrays.
-	*/
-	strsortstruct->numarrays=1;
-	while(1)
-	{
-		/*
-		** Allocate space for array.  We'll add an extra 100
-		** bytes to protect memory as strings move around
-		** (this can happen during string adjustment)
-		*/
-		arraybase=(faruchar *)AllocateMemory((strsortstruct->arraysize+100L) *
-			(long)strsortstruct->numarrays,&systemerror);
-		if(systemerror)
-		{       ReportError(errorcontext,systemerror);
-			ErrorExit();
-		}
-
-		/*
-		** Do an iteration of the string sort.  If the
-		** elapsed time is less than or equal to the permitted
-		** minimum, then de-allocate the array, reallocate a
-		** an additional array, and try again.
-		*/
-		if(DoStringSortIteration(arraybase,
-			strsortstruct->numarrays,
-			strsortstruct->arraysize)>global_min_ticks)
-			break;          /* We're ok...exit */
-
-		FreeMemory((farvoid *)arraybase,&systemerror);
-		strsortstruct->numarrays+=1;
-	}
-}
-else
-{
-	/*
-	** We don't have to perform self adjustment code.
-	** Simply allocate the space for the array.
-	*/
-	arraybase=(faruchar *)AllocateMemory((strsortstruct->arraysize+100L) *
-		(long)strsortstruct->numarrays,&systemerror);
-	if(systemerror)
-	{       ReportError(errorcontext,systemerror);
-		ErrorExit();
-	}
-}
-/*
-** All's well if we get here.  Repeatedly perform sorts until the
-** accumulated elapsed time is greater than # of seconds requested.
-*/
-accumtime=0L;
-iterations=(double)0.0;
-
-do {
-	accumtime+=DoStringSortIteration(arraybase,
-				strsortstruct->numarrays,
-				strsortstruct->arraysize);
-	iterations+=(double)strsortstruct->numarrays;
-} while(TicksToSecs(accumtime)<strsortstruct->request_secs);
-
-/*
-** Clean up, calculate results, and go home.
-** Set flag to show we don't need to rerun adjustment code.
-*/
-FreeMemory((farvoid *)arraybase,&systemerror);
-strsortstruct->sortspersec=iterations / (double)TicksToFracSecs(accumtime);
-if(strsortstruct->adjust==0)
-	strsortstruct->adjust=1;
-#ifdef DEBUG
-if (stringsort_status==0) printf("String sort: OK\n");
-stringsort_status=0;
-#endif
-return;
-}
-
-/**************************
-** DoStringSortIteration **
-***************************
-** This routine executes one iteration of the string
-** sort benchmark.  It returns the number of ticks
-** Note that this routine also builds the offset pointer
-** array.
-*/
-static ulong DoStringSortIteration(faruchar *arraybase,
-		uint numarrays,ulong arraysize)
-{
-farulong *optrarray;            /* Offset pointer array */
-unsigned long elapsed;          /* Elapsed ticks */
-unsigned long nstrings;         /* # of strings in array */
-int syserror;                   /* System error code */
-unsigned int i;                 /* Index */
-farulong *tempobase;            /* Temporary offset pointer base */
-faruchar *tempsbase;            /* Temporary string base pointer */
-
-/*
-** Load up the array(s) with random numbers
-*/
-optrarray=LoadStringArray(arraybase,numarrays,&nstrings,arraysize);
-
-/*
-** Set temp base pointers...they will be modified as the
-** benchmark proceeds.
-*/
-tempobase=optrarray;
-tempsbase=arraybase;
-
-/*
-** Start the stopwatch
-*/
-elapsed=StartStopwatch();
-
-/*
-** Execute heapsorts
-*/
-for(i=0;i<numarrays;i++)
-{       StrHeapSort(tempobase,tempsbase,nstrings,0L,nstrings-1);
-	tempobase+=nstrings;    /* Advance base pointers */
-	tempsbase+=arraysize+100;
-}
-
-/*
-** Record elapsed time
-*/
-elapsed=StopStopwatch(elapsed);
-
-#ifdef DEBUG
-{
-	unsigned long i;
-	for(i=0;i<nstrings-1;i++)
-	{       /*
-		** Compare strings to check for proper
-		** sort.
-		*/
-		if(str_is_less(optrarray,arraybase,nstrings,i+1,i))
-		{       printf("Sort Error\n");
-			stringsort_status=1;
-                        break;
-		}
-	}
-}
-#endif
-
-/*
-** Release the offset pointer array built by
-** LoadStringArray()
-*/
-FreeMemory((farvoid *)optrarray,&syserror);
-
-/*
-** Return elapsed ticks.
-*/
-return(elapsed);
-}
-
-/********************
-** LoadStringArray **
-*********************
-** Initialize the string array with random strings of
-** varying sizes.
-** Returns the pointer to the offset pointer array.
-** Note that since we're creating a number of arrays, this
-** routine builds one array, then copies it into the others.
-*/
-static farulong *LoadStringArray(faruchar *strarray, /* String array */
-	uint numarrays,                 /* # of arrays */
-	ulong *nstrings,                /* # of strings */
-	ulong arraysize)                /* Size of array */
-{
-faruchar *tempsbase;            /* Temporary string base pointer */
-farulong *optrarray;            /* Local for pointer */
-farulong *tempobase;            /* Temporary offset pointer base pointer */
-unsigned long curroffset;       /* Current offset */
-int fullflag;                   /* Indicates full array */
-unsigned char stringlength;     /* Length of string */
-unsigned char i;                /* Index */
-unsigned long j;                /* Another index */
-unsigned int k;                 /* Yet another index */
-unsigned int l;                 /* Ans still one more index */
-int systemerror;                /* For holding error code */
-
-/*
-** Initialize random number generator.
-*/
-/* randnum(13L); */
-randnum((int32)13);
-
-/*
-** Start with no strings.  Initialize our current offset pointer
-** to 0.
-*/
-*nstrings=0L;
-curroffset=0L;
-fullflag=0;
-
-do
-{
-	/*
-	** Allocate a string with a random length no
-	** shorter than 4 bytes and no longer than
-	** 80 bytes.  Note we have to also make sure
-	** there's room in the array.
-	*/
-        /* stringlength=(unsigned char)((1+abs_randwc(76L)) & 0xFFL);*/
-	stringlength=(unsigned char)((1+abs_randwc((int32)76)) & 0xFFL);
-	if((unsigned long)stringlength+curroffset+1L>=arraysize)
-	{       stringlength=(unsigned char)((arraysize-curroffset-1L) &
-				0xFF);
-		fullflag=1;     /* Indicates a full */
-	}
-
-	/*
-	** Store length at curroffset and advance current offset.
-	*/
-	*(strarray+curroffset)=stringlength;
-	curroffset++;
-
-	/*
-	** Fill up the rest of the string with random bytes.
-	*/
-	for(i=0;i<stringlength;i++)
-	{       *(strarray+curroffset)=
-		        /* (unsigned char)(abs_randwc((long)0xFE)); */
-			(unsigned char)(abs_randwc((int32)0xFE));
-		curroffset++;
-	}
-
-	/*
-	** Increment the # of strings counter.
-	*/
-	*nstrings+=1L;
-
-} while(fullflag==0);
-
-/*
-** We now have initialized a single full array.  If there
-** is more than one array, copy the original into the
-** others.
-*/
-k=1;
-tempsbase=strarray;
-while(k<numarrays)
-{       tempsbase+=arraysize+100;         /* Set base */
-	for(l=0;l<arraysize;l++)
-		tempsbase[l]=strarray[l];
-	k++;
-}
-
-/*
-** Now the array is full, allocate enough space for an
-** offset pointer array.
-*/
-optrarray=(farulong *)AllocateMemory(*nstrings * sizeof(unsigned long) *
-		numarrays,
-		&systemerror);
-if(systemerror)
-{       ReportError("CPU:Stringsort",systemerror);
-	FreeMemory((void *)strarray,&systemerror);
-	ErrorExit();
-}
-
-/*
-** Go through the newly-built string array, building
-** offsets and putting them into the offset pointer
-** array.
-*/
-curroffset=0;
-for(j=0;j<*nstrings;j++)
-{       *(optrarray+j)=curroffset;
-	curroffset+=(unsigned long)(*(strarray+curroffset))+1L;
-}
-
-/*
-** As above, we've made one copy of the offset pointers,
-** so duplicate this array in the remaining ones.
-*/
-k=1;
-tempobase=optrarray;
-while(k<numarrays)
-{       tempobase+=*nstrings;
-	for(l=0;l<*nstrings;l++)
-		tempobase[l]=optrarray[l];
-	k++;
-}
-
-/*
-** All done...go home.  Pass local pointer back.
-*/
-return(optrarray);
-}
-
-/**************
-** stradjust **
-***************
-** Used by the string heap sort.  Call this routine to adjust the
-** string at offset i to length l.  The members of the string array
-** are moved accordingly and the length of the string at offset i
-** is set to l.
-*/
-static void stradjust(farulong *optrarray,      /* Offset pointer array */
-	faruchar *strarray,                     /* String array */
-	ulong nstrings,                         /* # of strings */
-	ulong i,                                /* Offset to adjust */
-	uchar l)                                /* New length */
-{
-unsigned long nbytes;           /* # of bytes to move */
-unsigned long j;                /* Index */
-int direction;                  /* Direction indicator */
-unsigned char adjamount;        /* Adjustment amount */
-
-/*
-** If new length is less than old length, the direction is
-** down.  If new length is greater than old length, the
-** direction is up.
-*/
-direction=(int)l - (int)*(strarray+*(optrarray+i));
-adjamount=(unsigned char)abs(direction);
-
-/*
-** See if the adjustment is being made to the last
-** string in the string array.  If so, we don't have to
-** do anything more than adjust the length field.
-*/
-if(i==(nstrings-1L))
-{       *(strarray+*(optrarray+i))=l;
-	return;
-}
-
-/*
-** Calculate the total # of bytes in string array from
-** location i+1 to end of array.  Whether we're moving "up" or
-** down, this is how many bytes we'll have to move.
-*/
-nbytes=*(optrarray+nstrings-1L) +
-	(unsigned long)*(strarray+*(optrarray+nstrings-1L)) + 1L -
-	*(optrarray+i+1L);
-
-/*
-** Calculate the source and the destination.  Source is
-** string position i+1.  Destination is string position i+l
-** (i+"ell"...don't confuse 1 and l).
-** Hand this straight to memmove and let it handle the
-** "overlap" problem.
-*/
-MoveMemory((farvoid *)(strarray+*(optrarray+i)+l+1),
-	(farvoid *)(strarray+*(optrarray+i+1)),
-	(unsigned long)nbytes);
-
-/*
-** We have to adjust the offset pointer array.
-** This covers string i+1 to numstrings-1.
-*/
-for(j=i+1;j<nstrings;j++)
-	if(direction<0)
-		*(optrarray+j)=*(optrarray+j)-adjamount;
-	else
-		*(optrarray+j)=*(optrarray+j)+adjamount;
-
-/*
-** Store the new length and go home.
-*/
-*(strarray+*(optrarray+i))=l;
-return;
-}
-
-/****************
-** strheapsort **
-*****************
-** Pass this routine a pointer to an array of unsigned char.
-** The array is presumed to hold strings occupying at most
-** 80 bytes (counts a byte count).
-** This routine also needs a pointer to an array of offsets
-** which represent string locations in the array, and
-** an unsigned long indicating the number of strings
-** in the array.
-*/
-static void StrHeapSort(farulong *optrarray, /* Offset pointers */
-	faruchar *strarray,             /* Strings array */
-	ulong numstrings,               /* # of strings in array */
-	ulong bottom,                   /* Region to sort...bottom */
-	ulong top)                      /* Region to sort...top */
-{
-unsigned char temp[80];                 /* Used to exchange elements */
-unsigned char tlen;                     /* Temp to hold length */
-unsigned long i;                        /* Loop index */
-
-
-/*
-** Build a heap in the array
-*/
-for(i=(top/2L); i>0; --i)
-	strsift(optrarray,strarray,numstrings,i,top);
-
-/*
-** Repeatedly extract maximum from heap and place it at the
-** end of the array.  When we get done, we'll have a sorted
-** array.
-*/
-for(i=top; i>0; --i)
-{
-	strsift(optrarray,strarray,numstrings,0,i);
-
-	/* temp = string[0] */
-	tlen=*strarray;
-	MoveMemory((farvoid *)&temp[0], /* Perform exchange */
-		(farvoid *)strarray,
-		(unsigned long)(tlen+1));
-
-
-	/* string[0]=string[i] */
-	tlen=*(strarray+*(optrarray+i));
-	stradjust(optrarray,strarray,numstrings,0,tlen);
-	MoveMemory((farvoid *)strarray,
-		(farvoid *)(strarray+*(optrarray+i)),
-		(unsigned long)(tlen+1));
-
-	/* string[i]=temp */
-	tlen=temp[0];
-	stradjust(optrarray,strarray,numstrings,i,tlen);
-	MoveMemory((farvoid *)(strarray+*(optrarray+i)),
-		(farvoid *)&temp[0],
-		(unsigned long)(tlen+1));
-
-}
-return;
-}
-
-/****************
-** str_is_less **
-*****************
-** Pass this function:
-**      1) A pointer to an array of offset pointers
-**      2) A pointer to a string array
-**      3) The number of elements in the string array
-**      4) Offsets to two strings (a & b)
-** This function returns TRUE if string a is < string b.
-*/
-static int str_is_less(farulong *optrarray, /* Offset pointers */
-	faruchar *strarray,                     /* String array */
-	ulong numstrings,                       /* # of strings */
-	ulong a, ulong b)                       /* Offsets */
-{
-int slen;               /* String length */
-
-/*
-** Determine which string has the minimum length.  Use that
-** to call strncmp().  If they match up to that point, the
-** string with the longer length wins.
-*/
-slen=(int)*(strarray+*(optrarray+a));
-if(slen > (int)*(strarray+*(optrarray+b)))
-	slen=(int)*(strarray+*(optrarray+b));
-
-slen=strncmp((char *)(strarray+*(optrarray+a)),
-		(char *)(strarray+*(optrarray+b)),slen);
-
-if(slen==0)
-{
-	/*
-	** They match.  Return true if the length of a
-	** is greater than the length of b.
-	*/
-	if(*(strarray+*(optrarray+a)) >
-		*(strarray+*(optrarray+b)))
-		return(TRUE);
-	return(FALSE);
-}
-
-if(slen<0) return(TRUE);        /* a is strictly less than b */
-
-return(FALSE);                  /* Only other possibility */
-}
-
-/************
-** strsift **
-*************
-** Pass this function:
-**      1) A pointer to an array of offset pointers
-**      2) A pointer to a string array
-**      3) The number of elements in the string array
-**      4) Offset within which to sort.
-** Sift the array within the bounds of those offsets (thus
-** building a heap).
-*/
-static void strsift(farulong *optrarray,        /* Offset pointers */
-	faruchar *strarray,                     /* String array */
-	ulong numstrings,                       /* # of strings */
-	ulong i, ulong j)                       /* Offsets */
-{
-unsigned long k;                /* Temporaries */
-unsigned char temp[80];
-unsigned char tlen;             /* For string lengths */
-
-
-while((i+i)<=j)
-{
-	k=i+i;
-	if(k<j)
-		if(str_is_less(optrarray,strarray,numstrings,k,k+1L))
-			++k;
-	if(str_is_less(optrarray,strarray,numstrings,i,k))
-	{
-		/* temp=string[k] */
-		tlen=*(strarray+*(optrarray+k));
-		MoveMemory((farvoid *)&temp[0],
-			(farvoid *)(strarray+*(optrarray+k)),
-			(unsigned long)(tlen+1));
-
-		/* string[k]=string[i] */
-		tlen=*(strarray+*(optrarray+i));
-		stradjust(optrarray,strarray,numstrings,k,tlen);
-		MoveMemory((farvoid *)(strarray+*(optrarray+k)),
-			(farvoid *)(strarray+*(optrarray+i)),
-			(unsigned long)(tlen+1));
-
-		/* string[i]=temp */
-		tlen=temp[0];
-		stradjust(optrarray,strarray,numstrings,i,tlen);
-		MoveMemory((farvoid *)(strarray+*(optrarray+i)),
-			(farvoid *)&temp[0],
-			(unsigned long)(tlen+1));
-		i=k;
-	}
-	else
-		i=j+1;
-}
-return;
-}
-
-/************************
-** BITFIELD OPERATIONS **
-*************************/
-
-/*************
-** DoBitops **
-**************
-** Perform the bit operations test portion of the CPU
-** benchmark.  Returns the iterations per second.
-*/
-void DoBitops(void)
-{
-BitOpStruct *locbitopstruct;    /* Local bitop structure */
-farulong *bitarraybase;         /* Base of bitmap array */
-farulong *bitoparraybase;       /* Base of bitmap operations array */
-ulong nbitops;                  /* # of bitfield operations */
-ulong accumtime;                /* Accumulated time in ticks */
-double iterations;              /* # of iterations */
-char *errorcontext;             /* Error context string */
-int systemerror;                /* For holding error codes */
-int ticks;
-
-/*
-** Link to global structure.
-*/
-locbitopstruct=&global_bitopstruct;
-
-/*
-** Set the error context.
-*/
-errorcontext="CPU:Bitfields";
-
-/*
-** See if we need to run adjustment code.
-*/
-if(locbitopstruct->adjust==0)
-{
-	bitarraybase=(farulong *)AllocateMemory(locbitopstruct->bitfieldarraysize *
-		sizeof(ulong),&systemerror);
-	if(systemerror)
-	{       ReportError(errorcontext,systemerror);
-		ErrorExit();
-	}
-
-	/*
-	** Initialize bitfield operations array to [2,30] elements
-	*/
-	locbitopstruct->bitoparraysize=30L;
-
-	while(1)
-	{
-		/*
-		** Allocate space for operations array
-		*/
-		bitoparraybase=(farulong *)AllocateMemory(locbitopstruct->bitoparraysize*2L*
-			sizeof(ulong),
-			&systemerror);
-		if(systemerror)
-		{       ReportError(errorcontext,systemerror);
-			FreeMemory((farvoid *)bitarraybase,&systemerror);
-			ErrorExit();
-		}
-		/*
-		** Do an iteration of the bitmap test.  If the
-		** elapsed time is less than or equal to the permitted
-		** minimum, then de-allocate the array, reallocate a
-		** larger version, and try again.
-		*/
-		ticks=DoBitfieldIteration(bitarraybase,
-					   bitoparraybase,
-					   locbitopstruct->bitoparraysize,
-					   &nbitops);
-#ifdef DEBUG
-#ifdef LINUX
-	        if (locbitopstruct->bitoparraysize==30L){
-		  /* this is the first loop, write a debug file */
-		  FILE *file;
-		  unsigned long *running_base; /* same as farulong */
-		  long counter;
-		  file=fopen("debugbit.dat","w");
-		  running_base=bitarraybase;
-		  for (counter=0;counter<(long)(locbitopstruct->bitfieldarraysize);counter++){
-#ifdef LONG64
-		    fprintf(file,"%08X",(unsigned int)(*running_base&0xFFFFFFFFL));
-		    fprintf(file,"%08X",(unsigned int)((*running_base>>32)&0xFFFFFFFFL));
-		    if ((counter+1)%4==0) fprintf(file,"\n");
-#else
-		    fprintf(file,"%08lX",*running_base);
-		    if ((counter+1)%8==0) fprintf(file,"\n");
-#endif
-		    running_base=running_base+1;
-		  }
-		  fclose(file);
-		  printf("\nWrote the file debugbit.dat, you may want to compare it to debugbit.good\n");
-		}
-#endif
-#endif
-
-		if (ticks>global_min_ticks) break;      /* We're ok...exit */
-
-		FreeMemory((farvoid *)bitoparraybase,&systemerror);
-		locbitopstruct->bitoparraysize+=100L;
-	}
-}
-else
-{
-	/*
-	** Don't need to do self adjustment, just allocate
-	** the array space.
-	*/
-	bitarraybase=(farulong *)AllocateMemory(locbitopstruct->bitfieldarraysize *
-		sizeof(ulong),&systemerror);
-	if(systemerror)
-	{       ReportError(errorcontext,systemerror);
-		ErrorExit();
-	}
-	bitoparraybase=(farulong *)AllocateMemory(locbitopstruct->bitoparraysize*2L*
-		sizeof(ulong),
-		&systemerror);
-	if(systemerror)
-	{       ReportError(errorcontext,systemerror);
-		FreeMemory((farvoid *)bitarraybase,&systemerror);
-		ErrorExit();
-	}
-}
-
-/*
-** All's well if we get here.  Repeatedly perform bitops until the
-** accumulated elapsed time is greater than # of seconds requested.
-*/
-accumtime=0L;
-iterations=(double)0.0;
-do {
-	accumtime+=DoBitfieldIteration(bitarraybase,
-			bitoparraybase,
-			locbitopstruct->bitoparraysize,&nbitops);
-	iterations+=(double)nbitops;
-} while(TicksToSecs(accumtime)<locbitopstruct->request_secs);
-
-/*
-** Clean up, calculate results, and go home.
-** Also, set adjustment flag to show that we don't have
-** to do self adjusting in the future.
-*/
-FreeMemory((farvoid *)bitarraybase,&systemerror);
-FreeMemory((farvoid *)bitoparraybase,&systemerror);
-locbitopstruct->bitopspersec=iterations /TicksToFracSecs(accumtime);
-if(locbitopstruct->adjust==0)
-	locbitopstruct->adjust=1;
-
-return;
-}
-
-/************************
-** DoBitfieldIteration **
-*************************
-** Perform a single iteration of the bitfield benchmark.
-** Return the # of ticks accumulated by the operation.
-*/
-static ulong DoBitfieldIteration(farulong *bitarraybase,
-		farulong *bitoparraybase,
-		long bitoparraysize,
-		ulong *nbitops)
-{
-long i;                         /* Index */
-ulong bitoffset;                /* Offset into bitmap */
-ulong elapsed;                  /* Time to execute */
-/*
-** Clear # bitops counter
-*/
-*nbitops=0L;
-
-/*
-** Construct a set of bitmap offsets and run lengths.
-** The offset can be any random number from 0 to the
-** size of the bitmap (in bits).  The run length can
-** be any random number from 1 to the number of bits
-** between the offset and the end of the bitmap.
-** Note that the bitmap has 8192 * 32 bits in it.
-** (262,144 bits)
-*/
-/*
-** Reset random number generator so things repeat.
-** Also reset the bit array we work on.
-** added by Uwe F. Mayer
-*/
-randnum((int32)13);
-for (i=0;i<global_bitopstruct.bitfieldarraysize;i++)
-{
-#ifdef LONG64
-	*(bitarraybase+i)=(ulong)0x5555555555555555;
-#else
-	*(bitarraybase+i)=(ulong)0x55555555;
-#endif
-}
-randnum((int32)13);
-/* end of addition of code */
-
-for (i=0;i<bitoparraysize;i++)
-{
-	/* First item is offset */
-        /* *(bitoparraybase+i+i)=bitoffset=abs_randwc(262140L); */
-	*(bitoparraybase+i+i)=bitoffset=abs_randwc((int32)262140);
-
-	/* Next item is run length */
-	/* *nbitops+=*(bitoparraybase+i+i+1L)=abs_randwc(262140L-bitoffset);*/
-	*nbitops+=*(bitoparraybase+i+i+1L)=abs_randwc((int32)262140-bitoffset);
-}
-
-/*
-** Array of offset and lengths built...do an iteration of
-** the test.
-** Start the stopwatch.
-*/
-elapsed=StartStopwatch();
-
-/*
-** Loop through array off offset/run length pairs.
-** Execute operation based on modulus of index.
-*/
-for(i=0;i<bitoparraysize;i++)
-{
-	switch(i % 3)
-	{
-
-		case 0: /* Set run of bits */
-			ToggleBitRun(bitarraybase,
-				*(bitoparraybase+i+i),
-				*(bitoparraybase+i+i+1),
-				1);
-			break;
-
-		case 1: /* Clear run of bits */
-			ToggleBitRun(bitarraybase,
-				*(bitoparraybase+i+i),
-				*(bitoparraybase+i+i+1),
-				0);
-			break;
-
-		case 2: /* Complement run of bits */
-			FlipBitRun(bitarraybase,
-				*(bitoparraybase+i+i),
-				*(bitoparraybase+i+i+1));
-			break;
-	}
-}
-
-/*
-** Return elapsed time
-*/
-return(StopStopwatch(elapsed));
-}
-
-
-/*****************************
-**     ToggleBitRun          *
-******************************
-** Set or clear a run of nbits starting at
-** bit_addr in bitmap.
-*/
-static void ToggleBitRun(farulong *bitmap, /* Bitmap */
-		ulong bit_addr,         /* Address of bits to set */
-		ulong nbits,            /* # of bits to set/clr */
-		uint val)               /* 1 or 0 */
-{
-unsigned long bindex;   /* Index into array */
-unsigned long bitnumb;  /* Bit number */
-
-while(nbits--)
-{
-#ifdef LONG64
-	bindex=bit_addr>>6;     /* Index is number /64 */
-	bitnumb=bit_addr % 64;   /* Bit number in word */
-#else
-	bindex=bit_addr>>5;     /* Index is number /32 */
-	bitnumb=bit_addr % 32;  /* bit number in word */
-#endif
-	if(val)
-		bitmap[bindex]|=(1L<<bitnumb);
-	else
-		bitmap[bindex]&=~(1L<<bitnumb);
-	bit_addr++;
-}
-return;
-}
-
-/***************
-** FlipBitRun **
-****************
-** Complements a run of bits.
-*/
-static void FlipBitRun(farulong *bitmap,        /* Bit map */
-		ulong bit_addr,                 /* Bit address */
-		ulong nbits)                    /* # of bits to flip */
-{
-unsigned long bindex;   /* Index into array */
-unsigned long bitnumb;  /* Bit number */
-
-while(nbits--)
-{
-#ifdef LONG64
-	bindex=bit_addr>>6;     /* Index is number /64 */
-	bitnumb=bit_addr % 64;  /* Bit number in longword */
-#else
-	bindex=bit_addr>>5;     /* Index is number /32 */
-	bitnumb=bit_addr % 32;  /* Bit number in longword */
-#endif
-	bitmap[bindex]^=(1L<<bitnumb);
-	bit_addr++;
-}
-
-return;
-}
-
-/*****************************
-** FLOATING-POINT EMULATION **
-*****************************/
-
-/**************
-** DoEmFloat **
-***************
-** Perform the floating-point emulation routines portion of the
-** CPU benchmark.  Returns the operations per second.
-*/
-void DoEmFloat(void)
-{
-EmFloatStruct *locemfloatstruct;        /* Local structure */
-InternalFPF *abase;             /* Base of A array */
-InternalFPF *bbase;             /* Base of B array */
-InternalFPF *cbase;             /* Base of C array */
-ulong accumtime;                /* Accumulated time in ticks */
-double iterations;              /* # of iterations */
-ulong tickcount;                /* # of ticks */
-char *errorcontext;             /* Error context string pointer */
-int systemerror;                /* For holding error code */
-ulong loops;                    /* # of loops */
-
-/*
-** Link to global structure
-*/
-locemfloatstruct=&global_emfloatstruct;
-
-/*
-** Set the error context
-*/
-errorcontext="CPU:Floating Emulation";
-
-
-/*
-** Test the emulation routines.
-*/
-#ifdef DEBUG
-#endif
-
-abase=(InternalFPF *)AllocateMemory(locemfloatstruct->arraysize*sizeof(InternalFPF),
-		&systemerror);
-if(systemerror)
-{       ReportError(errorcontext,systemerror);
-	ErrorExit();
-}
-
-bbase=(InternalFPF *)AllocateMemory(locemfloatstruct->arraysize*sizeof(InternalFPF),
-		&systemerror);
-if(systemerror)
-{       ReportError(errorcontext,systemerror);
-	FreeMemory((farvoid *)abase,&systemerror);
-	ErrorExit();
-}
-
-cbase=(InternalFPF *)AllocateMemory(locemfloatstruct->arraysize*sizeof(InternalFPF),
-		&systemerror);
-if(systemerror)
-{       ReportError(errorcontext,systemerror);
-	FreeMemory((farvoid *)abase,&systemerror);
-	FreeMemory((farvoid *)bbase,&systemerror);
-	ErrorExit();
-}
-
-/*
-** Set up the arrays
-*/
-SetupCPUEmFloatArrays(abase,bbase,cbase,locemfloatstruct->arraysize);
-
-/*
-** See if we need to do self-adjusting code.
-*/
-if(locemfloatstruct->adjust==0)
-{
-	locemfloatstruct->loops=0;
-
-	/*
-	** Do an iteration of the tests.  If the elapsed time is
-	** less than minimum, increase the loop count and try
-	** again.
-	*/
-	for(loops=1;loops<CPUEMFLOATLOOPMAX;loops+=loops)
-	{       tickcount=DoEmFloatIteration(abase,bbase,cbase,
-			locemfloatstruct->arraysize,
-			loops);
-		if(tickcount>global_min_ticks)
-		{       locemfloatstruct->loops=loops;
-			break;
-		}
-	}
-}
-
-/*
-** Verify that selft adjustment code worked.
-*/
-if(locemfloatstruct->loops==0)
-{       printf("CPU:EMFPU -- CMPUEMFLOATLOOPMAX limit hit\n");
-	FreeMemory((farvoid *)abase,&systemerror);
-	FreeMemory((farvoid *)bbase,&systemerror);
-	FreeMemory((farvoid *)cbase,&systemerror);
-	ErrorExit();
-}
-
-/*
-** All's well if we get here.  Repeatedly perform floating
-** tests until the accumulated time is greater than the
-** # of seconds requested.
-** Each iteration performs arraysize * 3 operations.
-*/
-accumtime=0L;
-iterations=(double)0.0;
-do {
-	accumtime+=DoEmFloatIteration(abase,bbase,cbase,
-			locemfloatstruct->arraysize,
-			locemfloatstruct->loops);
-	iterations+=(double)1.0;
-} while(TicksToSecs(accumtime)<locemfloatstruct->request_secs);
-
-
-/*
-** Clean up, calculate results, and go home.
-** Also, indicate that adjustment is done.
-*/
-FreeMemory((farvoid *)abase,&systemerror);
-FreeMemory((farvoid *)bbase,&systemerror);
-FreeMemory((farvoid *)cbase,&systemerror);
-
-locemfloatstruct->emflops=(iterations*(double)locemfloatstruct->loops)/
-		(double)TicksToFracSecs(accumtime);
-if(locemfloatstruct->adjust==0)
-	locemfloatstruct->adjust=1;
-
-#ifdef DEBUG
-printf("----------------------------------------------------------------------------\n");
-#endif
-return;
-}
-
-/*************************
-** FOURIER COEFFICIENTS **
-*************************/
-
-/**************
-** DoFourier **
-***************
-** Perform the transcendental/trigonometric portion of the
-** benchmark.  This benchmark calculates the first n
-** fourier coefficients of the function (x+1)^x defined
-** on the interval 0,2.
-*/
-void DoFourier(void)
-{
-FourierStruct *locfourierstruct;        /* Local fourier struct */
-fardouble *abase;               /* Base of A[] coefficients array */
-fardouble *bbase;               /* Base of B[] coefficients array */
-unsigned long accumtime;        /* Accumulated time in ticks */
-double iterations;              /* # of iterations */
-char *errorcontext;             /* Error context string pointer */
-int systemerror;                /* For error code */
-
-/*
-** Link to global structure
-*/
-locfourierstruct=&global_fourierstruct;
-
-/*
-** Set error context string
-*/
-errorcontext="FPU:Transcendental";
-
-/*
-** See if we need to do self-adjustment code.
-*/
-if(locfourierstruct->adjust==0)
-{
-	locfourierstruct->arraysize=100L;       /* Start at 100 elements */
-	while(1)
-	{
-
-		abase=(fardouble *)AllocateMemory(locfourierstruct->arraysize*sizeof(double),
-				&systemerror);
-		if(systemerror)
-		{       ReportError(errorcontext,systemerror);
-			ErrorExit();
-		}
-
-		bbase=(fardouble *)AllocateMemory(locfourierstruct->arraysize*sizeof(double),
-				&systemerror);
-		if(systemerror)
-		{       ReportError(errorcontext,systemerror);
-			FreeMemory((void *)abase,&systemerror);
-			ErrorExit();
-		}
-		/*
-		** Do an iteration of the tests.  If the elapsed time is
-		** less than or equal to the permitted minimum, re-allocate
-		** larger arrays and try again.
-		*/
-		if(DoFPUTransIteration(abase,bbase,
-			locfourierstruct->arraysize)>global_min_ticks)
-			break;          /* We're ok...exit */
-
-		/*
-		** Make bigger arrays and try again.
-		*/
-		FreeMemory((farvoid *)abase,&systemerror);
-		FreeMemory((farvoid *)bbase,&systemerror);
-		locfourierstruct->arraysize+=50L;
-	}
-}
-else
-{       /*
-	** Don't need self-adjustment.  Just allocate the
-	** arrays, and go.
-	*/
-	abase=(fardouble *)AllocateMemory(locfourierstruct->arraysize*sizeof(double),
-			&systemerror);
-	if(systemerror)
-	{       ReportError(errorcontext,systemerror);
-		ErrorExit();
-	}
-
-	bbase=(fardouble *)AllocateMemory(locfourierstruct->arraysize*sizeof(double),
-			&systemerror);
-	if(systemerror)
-	{       ReportError(errorcontext,systemerror);
-		FreeMemory((void *)abase,&systemerror);
-		ErrorExit();
-	}
-}
-/*
-** All's well if we get here.  Repeatedly perform integration
-** tests until the accumulated time is greater than the
-** # of seconds requested.
-*/
-accumtime=0L;
-iterations=(double)0.0;
-do {
-	accumtime+=DoFPUTransIteration(abase,bbase,locfourierstruct->arraysize);
-	iterations+=(double)locfourierstruct->arraysize*(double)2.0-(double)1.0;
-} while(TicksToSecs(accumtime)<locfourierstruct->request_secs);
-
-
-/*
-** Clean up, calculate results, and go home.
-** Also set adjustment flag to indicate no adjust code needed.
-*/
-FreeMemory((farvoid *)abase,&systemerror);
-FreeMemory((farvoid *)bbase,&systemerror);
-
-locfourierstruct->fflops=iterations/(double)TicksToFracSecs(accumtime);
-
-if(locfourierstruct->adjust==0)
-	locfourierstruct->adjust=1;
-
-return;
-}
-
-/************************
-** DoFPUTransIteration **
-*************************
-** Perform an iteration of the FPU Transcendental/trigonometric
-** benchmark.  Here, an iteration consists of calculating the
-** first n fourier coefficients of the function (x+1)^x on
-** the interval 0,2.  n is given by arraysize.
-** NOTE: The # of integration steps is fixed at
-** 200.
-*/
-static ulong DoFPUTransIteration(fardouble *abase,      /* A coeffs. */
-			fardouble *bbase,               /* B coeffs. */
-			ulong arraysize)                /* # of coeffs */
-{
-double omega;           /* Fundamental frequency */
-unsigned long i;        /* Index */
-unsigned long elapsed;  /* Elapsed time */
-
-/*
-** Start the stopwatch
-*/
-elapsed=StartStopwatch();
-
-/*
-** Calculate the fourier series.  Begin by
-** calculating A[0].
-*/
-
-*abase=TrapezoidIntegrate((double)0.0,
-			(double)2.0,
-			200,
-			(double)0.0,    /* No omega * n needed */
-			0 )/(double)2.0;
-
-/*
-** Calculate the fundamental frequency.
-** ( 2 * pi ) / period...and since the period
-** is 2, omega is simply pi.
-*/
-omega=(double)3.1415926535897932;
-
-for(i=1;i<arraysize;i++)
-{
-
-	/*
-	** Calculate A[i] terms.  Note, once again, that we
-	** can ignore the 2/period term outside the integral
-	** since the period is 2 and the term cancels itself
-	** out.
-	*/
-	*(abase+i)=TrapezoidIntegrate((double)0.0,
-			(double)2.0,
-			200,
-			omega * (double)i,
-			1);
-
-	/*
-	** Calculate the B[i] terms.
-	*/
-	*(bbase+i)=TrapezoidIntegrate((double)0.0,
-			(double)2.0,
-			200,
-			omega * (double)i,
-			2);
-
-}
-#ifdef DEBUG
-{
-  int i;
-  printf("\nA[i]=\n");
-  for (i=0;i<arraysize;i++) printf("%7.3g ",abase[i]);
-  printf("\nB[i]=\n(undefined) ");
-  for (i=1;i<arraysize;i++) printf("%7.3g ",bbase[i]);
-}
-#endif
-/*
-** All done, stop the stopwatch
-*/
-return(StopStopwatch(elapsed));
-}
-
-/***********************
-** TrapezoidIntegrate **
-************************
-** Perform a simple trapezoid integration on the
-** function (x+1)**x.
-** x0,x1 set the lower and upper bounds of the
-** integration.
-** nsteps indicates # of trapezoidal sections
-** omegan is the fundamental frequency times
-**  the series member #
-** select = 0 for the A[0] term, 1 for cosine terms, and
-**   2 for sine terms.
-** Returns the value.
-*/
-static double TrapezoidIntegrate( double x0,            /* Lower bound */
-			double x1,              /* Upper bound */
-			int nsteps,             /* # of steps */
-			double omegan,          /* omega * n */
-			int select)
-{
-double x;               /* Independent variable */
-double dx;              /* Stepsize */
-double rvalue;          /* Return value */
-
-
-/*
-** Initialize independent variable
-*/
-x=x0;
-
-/*
-** Calculate stepsize
-*/
-dx=(x1 - x0) / (double)nsteps;
-
-/*
-** Initialize the return value.
-*/
-rvalue=thefunction(x0,omegan,select)/(double)2.0;
-
-/*
-** Compute the other terms of the integral.
-*/
-if(nsteps!=1)
-{       --nsteps;               /* Already done 1 step */
-	while(--nsteps )
-	{
-		x+=dx;
-		rvalue+=thefunction(x,omegan,select);
-	}
-}
-/*
-** Finish computation
-*/
-rvalue=(rvalue+thefunction(x1,omegan,select)/(double)2.0)*dx;
-
-return(rvalue);
-}
-
-/****************
-** thefunction **
-*****************
-** This routine selects the function to be used
-** in the Trapezoid integration.
-** x is the independent variable
-** omegan is omega * n
-** select chooses which of the sine/cosine functions
-**  are used.  note the special case for select=0.
-*/
-static double thefunction(double x,             /* Independent variable */
-		double omegan,          /* Omega * term */
-		int select)             /* Choose term */
-{
-
-/*
-** Use select to pick which function we call.
-*/
-switch(select)
-{
-	case 0: return(pow(x+(double)1.0,x));
-
-	case 1: return(pow(x+(double)1.0,x) * cos(omegan * x));
-
-	case 2: return(pow(x+(double)1.0,x) * sin(omegan * x));
-}
-
-/*
-** We should never reach this point, but the following
-** keeps compilers from issuing a warning message.
-*/
-return(0.0);
-}
-
-/*************************
-** ASSIGNMENT ALGORITHM **
-*************************/
-
-/*************
-** DoAssign **
-**************
-** Perform an assignment algorithm.
-** The algorithm was adapted from the step by step guide found
-** in "Quantitative Decision Making for Business" (Gordon,
-**  Pressman, and Cohn; Prentice-Hall)
-**
-**
-** NOTES:
-** 1. Even though the algorithm distinguishes between
-**    ASSIGNROWS and ASSIGNCOLS, as though the two might
-**    be different, it does presume a square matrix.
-**    I.E., ASSIGNROWS and ASSIGNCOLS must be the same.
-**    This makes for some algorithmically-correct but
-**    probably non-optimal constructs.
-**
-*/
-void DoAssign(void)
-{
-AssignStruct *locassignstruct;  /* Local structure ptr */
-farlong *arraybase;
-char *errorcontext;
-int systemerror;
-ulong accumtime;
-double iterations;
-
-/*
-** Link to global structure
-*/
-locassignstruct=&global_assignstruct;
-
-/*
-** Set the error context string.
-*/
-errorcontext="CPU:Assignment";
-
-/*
-** See if we need to do self adjustment code.
-*/
-if(locassignstruct->adjust==0)
-{
-	/*
-	** Self-adjustment code.  The system begins by working on 1
-	** array.  If it does that in no time, then two arrays
-	** are built.  This process continues until
-	** enough arrays are built to handle the tolerance.
-	*/
-	locassignstruct->numarrays=1;
-	while(1)
-	{
-		/*
-		** Allocate space for arrays
-		*/
-		arraybase=(farlong *) AllocateMemory(sizeof(long)*
-			ASSIGNROWS*ASSIGNCOLS*locassignstruct->numarrays,
-			 &systemerror);
-		if(systemerror)
-		{       ReportError(errorcontext,systemerror);
-			FreeMemory((farvoid *)arraybase,
-			  &systemerror);
-			ErrorExit();
-		}
-
-		/*
-		** Do an iteration of the assignment alg.  If the
-		** elapsed time is less than or equal to the permitted
-		** minimum, then allocate for more arrays and
-		** try again.
-		*/
-		if(DoAssignIteration(arraybase,
-			locassignstruct->numarrays)>global_min_ticks)
-			break;          /* We're ok...exit */
-
-		FreeMemory((farvoid *)arraybase, &systemerror);
-		locassignstruct->numarrays++;
-	}
-}
-else
-{       /*
-	** Allocate space for arrays
-	*/
-	arraybase=(farlong *)AllocateMemory(sizeof(long)*
-		ASSIGNROWS*ASSIGNCOLS*locassignstruct->numarrays,
-		 &systemerror);
-	if(systemerror)
-	{       ReportError(errorcontext,systemerror);
-		FreeMemory((farvoid *)arraybase,
-		  &systemerror);
-		ErrorExit();
-	}
-}
-
-/*
-** All's well if we get here.  Do the tests.
-*/
-accumtime=0L;
-iterations=(double)0.0;
-
-do {
-	accumtime+=DoAssignIteration(arraybase,
-		locassignstruct->numarrays);
-	iterations+=(double)1.0;
-} while(TicksToSecs(accumtime)<locassignstruct->request_secs);
-
-/*
-** Clean up, calculate results, and go home.  Be sure to
-** show that we don't have to rerun adjustment code.
-*/
-FreeMemory((farvoid *)arraybase,&systemerror);
-
-locassignstruct->iterspersec=iterations *
-	(double)locassignstruct->numarrays / TicksToFracSecs(accumtime);
-
-if(locassignstruct->adjust==0)
-	locassignstruct->adjust=1;
-
-return;
-
-}
-
-/**********************
-** DoAssignIteration **
-***********************
-** This routine executes one iteration of the assignment test.
-** It returns the number of ticks elapsed in the iteration.
-*/
-static ulong DoAssignIteration(farlong *arraybase,
-	ulong numarrays)
-{
-longptr abase;                  /* local pointer */
-ulong elapsed;          /* Elapsed ticks */
-ulong i;
-
-/*
-** Set up local pointer
-*/
-abase.ptrs.p=arraybase;
-
-/*
-** Load up the arrays with a random table.
-*/
-LoadAssignArrayWithRand(arraybase,numarrays);
-
-/*
-** Start the stopwatch
-*/
-elapsed=StartStopwatch();
-
-/*
-** Execute assignment algorithms
-*/
-for(i=0;i<numarrays;i++)
-{       /* abase.ptrs.p+=i*ASSIGNROWS*ASSIGNCOLS; */
-        /* Fixed  by Eike Dierks */
-	Assignment(*abase.ptrs.ap);
-	abase.ptrs.p+=ASSIGNROWS*ASSIGNCOLS;
-}
-
-/*
-** Get elapsed time
-*/
-return(StopStopwatch(elapsed));
-}
-
-/****************************
-** LoadAssignArrayWithRand **
-*****************************
-** Load the assignment arrays with random numbers.  All positive.
-** These numbers represent costs.
-*/
-static void LoadAssignArrayWithRand(farlong *arraybase,
-	ulong numarrays)
-{
-longptr abase,abase1;   /* Local for array pointer */
-ulong i;
-
-/*
-** Set local array pointer
-*/
-abase.ptrs.p=arraybase;
-abase1.ptrs.p=arraybase;
-
-/*
-** Set up the first array.  Then just copy it into the
-** others.
-*/
-LoadAssign(*(abase.ptrs.ap));
-if(numarrays>1)
-	for(i=1;i<numarrays;i++)
-	  {     /* abase1.ptrs.p+=i*ASSIGNROWS*ASSIGNCOLS; */
-	        /* Fixed  by Eike Dierks */
-	        abase1.ptrs.p+=ASSIGNROWS*ASSIGNCOLS;
-		CopyToAssign(*(abase.ptrs.ap),*(abase1.ptrs.ap));
-	}
-
-return;
-}
-
-/***************
-** LoadAssign **
-****************
-** The array given by arraybase is loaded with positive random
-** numbers.  Elements in the array are capped at 5,000,000.
-*/
-static void LoadAssign(farlong arraybase[][ASSIGNCOLS])
-{
-ushort i,j;
-
-/*
-** Reset random number generator so things repeat.
-*/
-/* randnum(13L); */
-randnum((int32)13);
-
-for(i=0;i<ASSIGNROWS;i++)
-  for(j=0;j<ASSIGNROWS;j++){
-    /* arraybase[i][j]=abs_randwc(5000000L);*/
-    arraybase[i][j]=abs_randwc((int32)5000000);
-  }
-
-return;
-}
-
-/*****************
-** CopyToAssign **
-******************
-** Copy the contents of one array to another.  This is called by
-** the routine that builds the initial array, and is used to copy
-** the contents of the intial array into all following arrays.
-*/
-static void CopyToAssign(farlong arrayfrom[ASSIGNROWS][ASSIGNCOLS],
-		farlong arrayto[ASSIGNROWS][ASSIGNCOLS])
-{
-ushort i,j;
-
-for(i=0;i<ASSIGNROWS;i++)
-	for(j=0;j<ASSIGNCOLS;j++)
-		arrayto[i][j]=arrayfrom[i][j];
-
-return;
-}
-
-/***************
-** Assignment **
-***************/
-static void Assignment(farlong arraybase[][ASSIGNCOLS])
-{
-short assignedtableau[ASSIGNROWS][ASSIGNCOLS];
-
-/*
-** First, calculate minimum costs
-*/
-calc_minimum_costs(arraybase);
-
-/*
-** Repeat following until the number of rows selected
-** equals the number of rows in the tableau.
-*/
-while(first_assignments(arraybase,assignedtableau)!=ASSIGNROWS)
-{         second_assignments(arraybase,assignedtableau);
-}
-
-#ifdef DEBUG
-{
-	int i,j;
-	printf("\nColumn choices for each row\n");
-	for(i=0;i<ASSIGNROWS;i++)
-	{
-	        printf("R%03d: ",i);
-		for(j=0;j<ASSIGNCOLS;j++)
-			if(assignedtableau[i][j]==1)
-				printf("%03d ",j);
-	}
-}
-#endif
-
-return;
-}
-
-/***********************
-** calc_minimum_costs **
-************************
-** Revise the tableau by calculating the minimum costs on a
-** row and column basis.  These minima are subtracted from
-** their rows and columns, creating a new tableau.
-*/
-static void calc_minimum_costs(long tableau[][ASSIGNCOLS])
-{
-ushort i,j;              /* Index variables */
-long currentmin;        /* Current minimum */
-/*
-** Determine minimum costs on row basis.  This is done by
-** subtracting -- on a row-per-row basis -- the minum value
-** for that row.
-*/
-for(i=0;i<ASSIGNROWS;i++)
-{
-	currentmin=MAXPOSLONG;  /* Initialize minimum */
-	for(j=0;j<ASSIGNCOLS;j++)
-		if(tableau[i][j]<currentmin)
-			currentmin=tableau[i][j];
-
-	for(j=0;j<ASSIGNCOLS;j++)
-		tableau[i][j]-=currentmin;
-}
-
-/*
-** Determine minimum cost on a column basis.  This works
-** just as above, only now we step through the array
-** column-wise
-*/
-for(j=0;j<ASSIGNCOLS;j++)
-{
-	currentmin=MAXPOSLONG;  /* Initialize minimum */
-	for(i=0;i<ASSIGNROWS;i++)
-		if(tableau[i][j]<currentmin)
-			currentmin=tableau[i][j];
-
-	/*
-	** Here, we'll take the trouble to see if the current
-	** minimum is zero.  This is likely worth it, since the
-	** preceding loop will have created at least one zero in
-	** each row.  We can save ourselves a few iterations.
-	*/
-	if(currentmin!=0)
-		for(i=0;i<ASSIGNROWS;i++)
-			tableau[i][j]-=currentmin;
-}
-
-return;
-}
-
-/**********************
-** first_assignments **
-***********************
-** Do first assignments.
-** The assignedtableau[] array holds a set of values that
-** indicate the assignment of a value, or its elimination.
-** The values are:
-**      0 = Item is neither assigned nor eliminated.
-**      1 = Item is assigned
-**      2 = Item is eliminated
-** Returns the number of selections made.  If this equals
-** the number of rows, then an optimum has been determined.
-*/
-static int first_assignments(long tableau[][ASSIGNCOLS],
-		short assignedtableau[][ASSIGNCOLS])
-{
-ushort i,j,k;                   /* Index variables */
-ushort numassigns;              /* # of assignments */
-ushort totnumassigns;           /* Total # of assignments */
-ushort numzeros;                /* # of zeros in row */
-int selected=0;                 /* Flag used to indicate selection */
-
-/*
-** Clear the assignedtableau, setting all members to show that
-** no one is yet assigned, eliminated, or anything.
-*/
-for(i=0;i<ASSIGNROWS;i++)
-	for(j=0;j<ASSIGNCOLS;j++)
-		assignedtableau[i][j]=0;
-
-totnumassigns=0;
-do {
-	numassigns=0;
-	/*
-	** Step through rows.  For each one that is not currently
-	** assigned, see if the row has only one zero in it.  If so,
-	** mark that as an assigned row/col.  Eliminate other zeros
-	** in the same column.
-	*/
-	for(i=0;i<ASSIGNROWS;i++)
-	{       numzeros=0;
-		for(j=0;j<ASSIGNCOLS;j++)
-			if(tableau[i][j]==0L)
-				if(assignedtableau[i][j]==0)
-				{       numzeros++;
-					selected=j;
-				}
-		if(numzeros==1)
-		{       numassigns++;
-			totnumassigns++;
-			assignedtableau[i][selected]=1;
-			for(k=0;k<ASSIGNROWS;k++)
-				if((k!=i) &&
-				   (tableau[k][selected]==0))
-					assignedtableau[k][selected]=2;
-		}
-	}
-	/*
-	** Step through columns, doing same as above.  Now, be careful
-	** of items in the other rows of a selected column.
-	*/
-	for(j=0;j<ASSIGNCOLS;j++)
-	{       numzeros=0;
-		for(i=0;i<ASSIGNROWS;i++)
-			if(tableau[i][j]==0L)
-				if(assignedtableau[i][j]==0)
-				{       numzeros++;
-					selected=i;
-				}
-		if(numzeros==1)
-		{       numassigns++;
-			totnumassigns++;
-			assignedtableau[selected][j]=1;
-			for(k=0;k<ASSIGNCOLS;k++)
-				if((k!=j) &&
-				   (tableau[selected][k]==0))
-					assignedtableau[selected][k]=2;
-		}
-	}
-	/*
-	** Repeat until no more assignments to be made.
-	*/
-} while(numassigns!=0);
-
-/*
-** See if we can leave at this point.
-*/
-if(totnumassigns==ASSIGNROWS) return(totnumassigns);
-
-/*
-** Now step through the array by row.  If you find any unassigned
-** zeros, pick the first in the row.  Eliminate all zeros from
-** that same row & column.  This occurs if there are multiple optima...
-** possibly.
-*/
-for(i=0;i<ASSIGNROWS;i++)
-{       selected=-1;
-	for(j=0;j<ASSIGNCOLS;j++)
-		if((tableau[i][j]==0L) &&
-		   (assignedtableau[i][j]==0))
-		{       selected=j;
-			break;
-		}
-	if(selected!=-1)
-	{       assignedtableau[i][selected]=1;
-		totnumassigns++;
-		for(k=0;k<ASSIGNCOLS;k++)
-			if((k!=selected) &&
-			   (tableau[i][k]==0L))
-				assignedtableau[i][k]=2;
-		for(k=0;k<ASSIGNROWS;k++)
-			if((k!=i) &&
-			   (tableau[k][selected]==0L))
-				assignedtableau[k][selected]=2;
-	}
-}
-
-return(totnumassigns);
-}
-
-/***********************
-** second_assignments **
-************************
-** This section of the algorithm creates the revised
-** tableau, and is difficult to explain.  I suggest you
-** refer to the algorithm's source, mentioned in comments
-** toward the beginning of the program.
-*/
-static void second_assignments(long tableau[][ASSIGNCOLS],
-		short assignedtableau[][ASSIGNCOLS])
-{
-int i,j;                                /* Indexes */
-short linesrow[ASSIGNROWS];
-short linescol[ASSIGNCOLS];
-long smallest;                          /* Holds smallest value */
-ushort numassigns;                      /* Number of assignments */
-ushort newrows;                         /* New rows to be considered */
-/*
-** Clear the linesrow and linescol arrays.
-*/
-for(i=0;i<ASSIGNROWS;i++)
-	linesrow[i]=0;
-for(i=0;i<ASSIGNCOLS;i++)
-	linescol[i]=0;
-
-/*
-** Scan rows, flag each row that has no assignment in it.
-*/
-for(i=0;i<ASSIGNROWS;i++)
-{       numassigns=0;
-	for(j=0;j<ASSIGNCOLS;j++)
-		if(assignedtableau[i][j]==1)
-		{       numassigns++;
-			break;
-		}
-	if(numassigns==0) linesrow[i]=1;
-}
-
-do {
-
-	newrows=0;
-	/*
-	** For each row checked above, scan for any zeros.  If found,
-	** check the associated column.
-	*/
-	for(i=0;i<ASSIGNROWS;i++)
-	{       if(linesrow[i]==1)
-			for(j=0;j<ASSIGNCOLS;j++)
-				if(tableau[i][j]==0)
-					linescol[j]=1;
-	}
-
-	/*
-	** Now scan checked columns.  If any contain assigned zeros, check
-	** the associated row.
-	*/
-	for(j=0;j<ASSIGNCOLS;j++)
-		if(linescol[j]==1)
-			for(i=0;i<ASSIGNROWS;i++)
-				if((assignedtableau[i][j]==1) &&
-					(linesrow[i]!=1))
-				{
-					linesrow[i]=1;
-					newrows++;
-				}
-} while(newrows!=0);
-
-/*
-** linesrow[n]==0 indicate rows covered by imaginary line
-** linescol[n]==1 indicate cols covered by imaginary line
-** For all cells not covered by imaginary lines, determine smallest
-** value.
-*/
-smallest=MAXPOSLONG;
-for(i=0;i<ASSIGNROWS;i++)
-	if(linesrow[i]!=0)
-		for(j=0;j<ASSIGNCOLS;j++)
-			if(linescol[j]!=1)
-				if(tableau[i][j]<smallest)
-					smallest=tableau[i][j];
-
-/*
-** Subtract smallest from all cells in the above set.
-*/
-for(i=0;i<ASSIGNROWS;i++)
-	if(linesrow[i]!=0)
-		for(j=0;j<ASSIGNCOLS;j++)
-			if(linescol[j]!=1)
-				tableau[i][j]-=smallest;
-
-/*
-** Add smallest to all cells covered by two lines.
-*/
-for(i=0;i<ASSIGNROWS;i++)
-	if(linesrow[i]==0)
-		for(j=0;j<ASSIGNCOLS;j++)
-			if(linescol[j]==1)
-				tableau[i][j]+=smallest;
-
-return;
-}
-
-/********************
-** IDEA Encryption **
-*********************
-** IDEA - International Data Encryption Algorithm.
-** Based on code presented in Applied Cryptography by Bruce Schneier.
-** Which was based on code developed by Xuejia Lai and James L. Massey.
-** Other modifications made by Colin Plumb.
-**
-*/
-
-/***********
-** DoIDEA **
-************
-** Perform IDEA encryption.  Note that we time encryption & decryption
-** time as being a single loop.
-*/
-void DoIDEA(void)
-{
-IDEAStruct *locideastruct;      /* Loc pointer to global structure */
-int i;
-IDEAkey Z,DK;
-u16 userkey[8];
-ulong accumtime;
-double iterations;
-char *errorcontext;
-int systemerror;
-faruchar *plain1;               /* First plaintext buffer */
-faruchar *crypt1;               /* Encryption buffer */
-faruchar *plain2;               /* Second plaintext buffer */
-
-/*
-** Link to global data
-*/
-locideastruct=&global_ideastruct;
-
-/*
-** Set error context
-*/
-errorcontext="CPU:IDEA";
-
-/*
-** Re-init random-number generator.
-*/
-/* randnum(3L); */
-randnum((int32)3);
-
-/*
-** Build an encryption/decryption key
-*/
-for (i=0;i<8;i++)
-        /* userkey[i]=(u16)(abs_randwc(60000L) & 0xFFFF); */
-	userkey[i]=(u16)(abs_randwc((int32)60000) & 0xFFFF);
-for(i=0;i<KEYLEN;i++)
-	Z[i]=0;
-
-/*
-** Compute encryption/decryption subkeys
-*/
-en_key_idea(userkey,Z);
-de_key_idea(Z,DK);
-
-/*
-** Allocate memory for buffers.  We'll make 3, called plain1,
-** crypt1, and plain2.  It works like this:
-**   plain1 >>encrypt>> crypt1 >>decrypt>> plain2.
-** So, plain1 and plain2 should match.
-** Also, fill up plain1 with sample text.
-*/
-plain1=(faruchar *)AllocateMemory(locideastruct->arraysize,&systemerror);
-if(systemerror)
-{
-	ReportError(errorcontext,systemerror);
-	ErrorExit();
-}
-
-crypt1=(faruchar *)AllocateMemory(locideastruct->arraysize,&systemerror);
-if(systemerror)
-{
-	ReportError(errorcontext,systemerror);
-	FreeMemory((farvoid *)plain1,&systemerror);
-	ErrorExit();
-}
-
-plain2=(faruchar *)AllocateMemory(locideastruct->arraysize,&systemerror);
-if(systemerror)
-{
-	ReportError(errorcontext,systemerror);
-	FreeMemory((farvoid *)plain1,&systemerror);
-	FreeMemory((farvoid *)crypt1,&systemerror);
-	ErrorExit();
-}
-/*
-** Note that we build the "plaintext" by simply loading
-** the array up with random numbers.
-*/
-for(i=0;i<locideastruct->arraysize;i++)
-	plain1[i]=(uchar)(abs_randwc(255) & 0xFF);
-
-/*
-** See if we need to perform self adjustment loop.
-*/
-if(locideastruct->adjust==0)
-{
-	/*
-	** Do self-adjustment.  This involves initializing the
-	** # of loops and increasing the loop count until we
-	** get a number of loops that we can use.
-	*/
-	for(locideastruct->loops=100L;
-	  locideastruct->loops<MAXIDEALOOPS;
-	  locideastruct->loops+=10L)
-		if(DoIDEAIteration(plain1,crypt1,plain2,
-		  locideastruct->arraysize,
-		  locideastruct->loops,
-		  Z,DK)>global_min_ticks) break;
-}
-
-/*
-** All's well if we get here.  Do the test.
-*/
-accumtime=0L;
-iterations=(double)0.0;
-
-do {
-	accumtime+=DoIDEAIteration(plain1,crypt1,plain2,
-		locideastruct->arraysize,
-		locideastruct->loops,Z,DK);
-	iterations+=(double)locideastruct->loops;
-} while(TicksToSecs(accumtime)<locideastruct->request_secs);
-
-/*
-** Clean up, calculate results, and go home.  Be sure to
-** show that we don't have to rerun adjustment code.
-*/
-FreeMemory((farvoid *)plain1,&systemerror);
-FreeMemory((farvoid *)crypt1,&systemerror);
-FreeMemory((farvoid *)plain2,&systemerror);
-locideastruct->iterspersec=iterations / TicksToFracSecs(accumtime);
-
-if(locideastruct->adjust==0)
-	locideastruct->adjust=1;
-
-return;
-
-}
-
-/********************
-** DoIDEAIteration **
-*********************
-** Execute a single iteration of the IDEA encryption algorithm.
-** Actually, a single iteration is one encryption and one
-** decryption.
-*/
-static ulong DoIDEAIteration(faruchar *plain1,
-			faruchar *crypt1,
-			faruchar *plain2,
-			ulong arraysize,
-			ulong nloops,
-			IDEAkey Z,
-			IDEAkey DK)
-{
-register ulong i;
-register ulong j;
-ulong elapsed;
-#ifdef DEBUG
-int status=0;
-#endif
-
-/*
-** Start the stopwatch.
-*/
-elapsed=StartStopwatch();
-
-/*
-** Do everything for nloops.
-*/
-for(i=0;i<nloops;i++)
-{
-	for(j=0;j<arraysize;j+=(sizeof(u16)*4))
-		cipher_idea((u16 *)(plain1+j),(u16 *)(crypt1+j),Z);       /* Encrypt */
-
-	for(j=0;j<arraysize;j+=(sizeof(u16)*4))
-		cipher_idea((u16 *)(crypt1+j),(u16 *)(plain2+j),DK);      /* Decrypt */
-}
-
-#ifdef DEBUG
-for(j=0;j<arraysize;j++)
-	if(*(plain1+j)!=*(plain2+j)){
-		printf("IDEA Error! \n");
-                status=1;
-                }
-if (status==0) printf("IDEA: OK\n");
-#endif
-
-/*
-** Get elapsed time.
-*/
-return(StopStopwatch(elapsed));
-}
-
-/********
-** mul **
-*********
-** Performs multiplication, modulo (2**16)+1.  This code is structured
-** on the assumption that untaken branches are cheaper than taken
-** branches, and that the compiler doesn't schedule branches.
-*/
-static u16 mul(register u16 a, register u16 b)
-{
-register u32 p;
-if(a)
-{       if(b)
-	{       p=(u32)(a*b);
-		b=low16(p);
-		a=(u16)(p>>16);
-		return(b-a+(b<a));
-	}
-	else
-		return(1-a);
-}
-else
-	return(1-b);
-}
-
-/********
-** inv **
-*********
-** Compute multiplicative inverse of x, modulo (2**16)+1
-** using Euclid's GCD algorithm.  It is unrolled twice
-** to avoid swapping the meaning of the registers.  And
-** some subtracts are changed to adds.
-*/
-static u16 inv(u16 x)
-{
-u16 t0, t1;
-u16 q, y;
-
-if(x<=1)
-	return(x);      /* 0 and 1 are self-inverse */
-t1=0x10001 / x;
-y=0x10001 % x;
-if(y==1)
-	return(low16(1-t1));
-t0=1;
-do {
-	q=x/y;
-	x=x%y;
-	t0+=q*t1;
-	if(x==1) return(t0);
-	q=y/x;
-	y=y%x;
-	t1+=q*t0;
-} while(y!=1);
-return(low16(1-t1));
-}
-
-/****************
-** en_key_idea **
-*****************
-** Compute IDEA encryption subkeys Z
-*/
-static void en_key_idea(u16 *userkey, u16 *Z)
-{
-int i,j;
-
-/*
-** shifts
-*/
-for(j=0;j<8;j++)
-	Z[j]=*userkey++;
-for(i=0;j<KEYLEN;j++)
-{       i++;
-	Z[i+7]=(Z[i&7]<<9)| (Z[(i+1) & 7] >> 7);
-	Z+=i&8;
-	i&=7;
-}
-return;
-}
-
-/****************
-** de_key_idea **
-*****************
-** Compute IDEA decryption subkeys DK from encryption
-** subkeys Z.
-*/
-static void de_key_idea(IDEAkey Z, IDEAkey DK)
-{
-IDEAkey TT;
-int j;
-u16 t1, t2, t3;
-u16 *p;
-p=(u16 *)(TT+KEYLEN);
-
-t1=inv(*Z++);
-t2=-*Z++;
-t3=-*Z++;
-*--p=inv(*Z++);
-*--p=t3;
-*--p=t2;
-*--p=t1;
-
-for(j=1;j<ROUNDS;j++)
-{       t1=*Z++;
-	*--p=*Z++;
-	*--p=t1;
-	t1=inv(*Z++);
-	t2=-*Z++;
-	t3=-*Z++;
-	*--p=inv(*Z++);
-	*--p=t2;
-	*--p=t3;
-	*--p=t1;
-}
-t1=*Z++;
-*--p=*Z++;
-*--p=t1;
-t1=inv(*Z++);
-t2=-*Z++;
-t3=-*Z++;
-*--p=inv(*Z++);
-*--p=t3;
-*--p=t2;
-*--p=t1;
-/*
-** Copy and destroy temp copy
-*/
-for(j=0,p=TT;j<KEYLEN;j++)
-{       *DK++=*p;
-	*p++=0;
-}
-
-return;
-}
-
-/*
-** MUL(x,y)
-** This #define creates a macro that computes x=x*y modulo 0x10001.
-** Requires temps t16 and t32.  Also requires y to be strictly 16
-** bits.  Here, I am using the simplest form.  May not be the
-** fastest. -- RG
-*/
-/* #define MUL(x,y) (x=mul(low16(x),y)) */
-
-/****************
-** cipher_idea **
-*****************
-** IDEA encryption/decryption algorithm.
-*/
-static void cipher_idea(u16 in[4],
-		u16 out[4],
-		register IDEAkey Z)
-{
-register u16 x1, x2, x3, x4, t1, t2;
-/* register u16 t16;
-register u16 t32; */
-int r=ROUNDS;
-
-x1=*in++;
-x2=*in++;
-x3=*in++;
-x4=*in;
-
-do {
-	MUL(x1,*Z++);
-	x2+=*Z++;
-	x3+=*Z++;
-	MUL(x4,*Z++);
-
-	t2=x1^x3;
-	MUL(t2,*Z++);
-	t1=t2+(x2^x4);
-	MUL(t1,*Z++);
-	t2=t1+t2;
-
-	x1^=t1;
-	x4^=t2;
-
-	t2^=x2;
-	x2=x3^t1;
-	x3=t2;
-} while(--r);
-MUL(x1,*Z++);
-*out++=x1;
-*out++=x3+*Z++;
-*out++=x2+*Z++;
-MUL(x4,*Z);
-*out=x4;
-return;
-}
-
-/************************
-** HUFFMAN COMPRESSION **
-************************/
-
-/**************
-** DoHuffman **
-***************
-** Execute a huffman compression on a block of plaintext.
-** Note that (as with IDEA encryption) an iteration of the
-** Huffman test includes a compression AND a decompression.
-** Also, the compression cycle includes building the
-** Huffman tree.
-*/
-void DoHuffman(void)
-{
-HuffStruct *lochuffstruct;      /* Loc pointer to global data */
-char *errorcontext;
-int systemerror;
-ulong accumtime;
-double iterations;
-farchar *comparray;
-farchar *decomparray;
-farchar *plaintext;
-
-/*
-** Link to global data
-*/
-lochuffstruct=&global_huffstruct;
-
-/*
-** Set error context.
-*/
-errorcontext="CPU:Huffman";
-
-/*
-** Allocate memory for the plaintext and the compressed text.
-** We'll be really pessimistic here, and allocate equal amounts
-** for both (though we know...well, we PRESUME) the compressed
-** stuff will take less than the plain stuff.
-** Also note that we'll build a 3rd buffer to decompress
-** into, and we preallocate space for the huffman tree.
-** (We presume that the Huffman tree will grow no larger
-** than 512 bytes.  This is actually a super-conservative
-** estimate...but, who cares?)
-*/
-plaintext=(farchar *)AllocateMemory(lochuffstruct->arraysize,&systemerror);
-if(systemerror)
-{       ReportError(errorcontext,systemerror);
-	ErrorExit();
-}
-comparray=(farchar *)AllocateMemory(lochuffstruct->arraysize,&systemerror);
-if(systemerror)
-{       ReportError(errorcontext,systemerror);
-	FreeMemory(plaintext,&systemerror);
-	ErrorExit();
-}
-decomparray=(farchar *)AllocateMemory(lochuffstruct->arraysize,&systemerror);
-if(systemerror)
-{       ReportError(errorcontext,systemerror);
-	FreeMemory(plaintext,&systemerror);
-	FreeMemory(comparray,&systemerror);
-	ErrorExit();
-}
-
-hufftree=(huff_node *)AllocateMemory(sizeof(huff_node) * 512,
-	&systemerror);
-if(systemerror)
-{       ReportError(errorcontext,systemerror);
-	FreeMemory(plaintext,&systemerror);
-	FreeMemory(comparray,&systemerror);
-	FreeMemory(decomparray,&systemerror);
-	ErrorExit();
-}
-
-/*
-** Build the plaintext buffer.  Since we want this to
-** actually be able to compress, we'll use the
-** wordcatalog to build the plaintext stuff.
-*/
-/*
-** Reset random number generator so things repeat.
-** added by Uwe F. Mayer
-*/
-randnum((int32)13);
-create_text_block(plaintext,lochuffstruct->arraysize-1,(ushort)500);
-plaintext[lochuffstruct->arraysize-1L]='\0';
-plaintextlen=lochuffstruct->arraysize;
-
-/*
-** See if we need to perform self adjustment loop.
-*/
-if(lochuffstruct->adjust==0)
-{
-	/*
-	** Do self-adjustment.  This involves initializing the
-	** # of loops and increasing the loop count until we
-	** get a number of loops that we can use.
-	*/
-	for(lochuffstruct->loops=100L;
-	  lochuffstruct->loops<MAXHUFFLOOPS;
-	  lochuffstruct->loops+=10L)
-		if(DoHuffIteration(plaintext,
-			comparray,
-			decomparray,
-		  lochuffstruct->arraysize,
-		  lochuffstruct->loops,
-		  hufftree)>global_min_ticks) break;
-}
-
-/*
-** All's well if we get here.  Do the test.
-*/
-accumtime=0L;
-iterations=(double)0.0;
-
-do {
-	accumtime+=DoHuffIteration(plaintext,
-		comparray,
-		decomparray,
-		lochuffstruct->arraysize,
-		lochuffstruct->loops,
-		hufftree);
-	iterations+=(double)lochuffstruct->loops;
-} while(TicksToSecs(accumtime)<lochuffstruct->request_secs);
-
-/*
-** Clean up, calculate results, and go home.  Be sure to
-** show that we don't have to rerun adjustment code.
-*/
-FreeMemory((farvoid *)plaintext,&systemerror);
-FreeMemory((farvoid *)comparray,&systemerror);
-FreeMemory((farvoid *)decomparray,&systemerror);
-FreeMemory((farvoid *)hufftree,&systemerror);
-lochuffstruct->iterspersec=iterations / TicksToFracSecs(accumtime);
-
-if(lochuffstruct->adjust==0)
-	lochuffstruct->adjust=1;
-
-}
-
-/*********************
-** create_text_line **
-**********************
-** Create a random line of text, stored at *dt.  The line may be
-** no more than nchars long.
-*/
-static void create_text_line(farchar *dt,
-			long nchars)
-{
-long charssofar;        /* # of characters so far */
-long tomove;            /* # of characters to move */
-char myword[40];        /* Local buffer for words */
-farchar *wordptr;       /* Pointer to word from catalog */
-
-charssofar=0;
-
-do {
-/*
-** Grab a random word from the wordcatalog
-*/
-/* wordptr=wordcatarray[abs_randwc((long)WORDCATSIZE)];*/
-wordptr=wordcatarray[abs_randwc((int32)WORDCATSIZE)];
-MoveMemory((farvoid *)myword,
-	(farvoid *)wordptr,
-	(unsigned long)strlen(wordptr)+1);
-
-/*
-** Append a blank.
-*/
-tomove=strlen(myword)+1;
-myword[tomove-1]=' ';
-
-/*
-** See how long it is.  If its length+charssofar > nchars, we have
-** to trim it.
-*/
-if((tomove+charssofar)>nchars)
-	tomove=nchars-charssofar;
-/*
-** Attach the word to the current line.  Increment counter.
-*/
-MoveMemory((farvoid *)dt,(farvoid *)myword,(unsigned long)tomove);
-charssofar+=tomove;
-dt+=tomove;
-
-/*
-** If we're done, bail out.  Otherwise, go get another word.
-*/
-} while(charssofar<nchars);
-
-return;
-}
-
-/**********************
-** create_text_block **
-***********************
-** Build a block of text randomly loaded with words.  The words
-** come from the wordcatalog (which must be loaded before you
-** call this).
-** *tb points to the memory where the text is to be built.
-** tblen is the # of bytes to put into the text block
-** maxlinlen is the maximum length of any line (line end indicated
-**  by a carriage return).
-*/
-static void create_text_block(farchar *tb,
-			ulong tblen,
-			ushort maxlinlen)
-{
-ulong bytessofar;       /* # of bytes so far */
-ulong linelen;          /* Line length */
-
-bytessofar=0L;
-do {
-
-/*
-** Pick a random length for a line and fill the line.
-** Make sure the line can fit (haven't exceeded tablen) and also
-** make sure you leave room to append a carriage return.
-*/
-linelen=abs_randwc(maxlinlen-6)+6;
-if((linelen+bytessofar)>tblen)
-	linelen=tblen-bytessofar;
-
-if(linelen>1)
-{
-	create_text_line(tb,linelen);
-}
-tb+=linelen-1;          /* Add the carriage return */
-*tb++='\n';
-
-bytessofar+=linelen;
-
-} while(bytessofar<tblen);
-
-}
-
-/********************
-** DoHuffIteration **
-*********************
-** Perform the huffman benchmark.  This routine
-**  (a) Builds the huffman tree
-**  (b) Compresses the text
-**  (c) Decompresses the text and verifies correct decompression
-*/
-static ulong DoHuffIteration(farchar *plaintext,
-	farchar *comparray,
-	farchar *decomparray,
-	ulong arraysize,
-	ulong nloops,
-	huff_node *hufftree)
-{
-int i;                          /* Index */
-long j;                         /* Bigger index */
-int root;                       /* Pointer to huffman tree root */
-float lowfreq1, lowfreq2;       /* Low frequency counters */
-int lowidx1, lowidx2;           /* Indexes of low freq. elements */
-long bitoffset;                 /* Bit offset into text */
-long textoffset;                /* Char offset into text */
-long maxbitoffset;              /* Holds limit of bit offset */
-long bitstringlen;              /* Length of bitstring */
-int c;                          /* Character from plaintext */
-char bitstring[30];             /* Holds bitstring */
-ulong elapsed;                  /* For stopwatch */
-#ifdef DEBUG
-int status=0;
-#endif
-
-/*
-** Start the stopwatch
-*/
-elapsed=StartStopwatch();
-
-/*
-** Do everything for nloops
-*/
-while(nloops--)
-{
-
-/*
-** Calculate the frequency of each byte value. Store the
-** results in what will become the "leaves" of the
-** Huffman tree.  Interior nodes will be built in those
-** nodes greater than node #255.
-*/
-for(i=0;i<256;i++)
-{
-	hufftree[i].freq=(float)0.0;
-	hufftree[i].c=(unsigned char)i;
-}
-
-for(j=0;j<arraysize;j++)
-	hufftree[(int)plaintext[j]].freq+=(float)1.0;
-
-for(i=0;i<256;i++)
-	if(hufftree[i].freq != (float)0.0)
-		hufftree[i].freq/=(float)arraysize;
-
-/* Reset the second half of the tree. Otherwise the loop below that
-** compares the frequencies up to index 512 makes no sense. Some
-** systems automatically zero out memory upon allocation, others (like
-** for example DEC Unix) do not. Depending on this the loop below gets
-** different data and different run times. On our alpha the data that
-** was arbitrarily assigned led to an underflow error at runtime. We
-** use that zeroed-out bits are in fact 0 as a float.
-** Uwe F. Mayer */
-bzero((char *)&(hufftree[256]),sizeof(huff_node)*256);
-/*
-** Build the huffman tree.  First clear all the parent
-** pointers and left/right pointers.  Also, discard all
-** nodes that have a frequency of true 0.  */
-for(i=0;i<512;i++)
-{       if(hufftree[i].freq==(float)0.0)
-		hufftree[i].parent=EXCLUDED;
-	else
-		hufftree[i].parent=hufftree[i].left=hufftree[i].right=-1;
-}
-
-/*
-** Go through the tree. Finding nodes of really low
-** frequency.
-*/
-root=255;                       /* Starting root node-1 */
-while(1)
-{
-	lowfreq1=(float)2.0; lowfreq2=(float)2.0;
-	lowidx1=-1; lowidx2=-1;
-	/*
-	** Find first lowest frequency.
-	*/
-	for(i=0;i<=root;i++)
-		if(hufftree[i].parent<0)
-			if(hufftree[i].freq<lowfreq1)
-			{       lowfreq1=hufftree[i].freq;
-				lowidx1=i;
-			}
-
-	/*
-	** Did we find a lowest value?  If not, the
-	** tree is done.
-	*/
-	if(lowidx1==-1) break;
-
-	/*
-	** Find next lowest frequency
-	*/
-	for(i=0;i<=root;i++)
-		if((hufftree[i].parent<0) && (i!=lowidx1))
-			if(hufftree[i].freq<lowfreq2)
-			{       lowfreq2=hufftree[i].freq;
-				lowidx2=i;
-			}
-
-	/*
-	** If we could only find one item, then that
-	** item is surely the root, and (as above) the
-	** tree is done.
-	*/
-	if(lowidx2==-1) break;
-
-	/*
-	** Attach the two new nodes to the current root, and
-	** advance the current root.
-	*/
-	root++;                 /* New root */
-	hufftree[lowidx1].parent=root;
-	hufftree[lowidx2].parent=root;
-	hufftree[root].freq=lowfreq1+lowfreq2;
-	hufftree[root].left=lowidx1;
-	hufftree[root].right=lowidx2;
-	hufftree[root].parent=-2;       /* Show root */
-}
-
-/*
-** Huffman tree built...compress the plaintext
-*/
-bitoffset=0L;                           /* Initialize bit offset */
-for(i=0;i<arraysize;i++)
-{
-	c=(int)plaintext[i];                 /* Fetch character */
-	/*
-	** Build a bit string for byte c
-	*/
-	bitstringlen=0;
-	while(hufftree[c].parent!=-2)
-	{       if(hufftree[hufftree[c].parent].left==c)
-			bitstring[bitstringlen]='0';
-		else
-			bitstring[bitstringlen]='1';
-		c=hufftree[c].parent;
-		bitstringlen++;
-	}
-
-	/*
-	** Step backwards through the bit string, setting
-	** bits in the compressed array as you go.
-	*/
-	while(bitstringlen--)
-	{       SetCompBit((u8 *)comparray,(u32)bitoffset,bitstring[bitstringlen]);
-		bitoffset++;
-	}
-}
-
-/*
-** Compression done.  Perform de-compression.
-*/
-maxbitoffset=bitoffset;
-bitoffset=0;
-textoffset=0;
-do {
-	i=root;
-	while(hufftree[i].left!=-1)
-	{       if(GetCompBit((u8 *)comparray,(u32)bitoffset)==0)
-			i=hufftree[i].left;
-		else
-			i=hufftree[i].right;
-		bitoffset++;
-	}
-	decomparray[textoffset]=hufftree[i].c;
-
-#ifdef DEBUG
-	if(hufftree[i].c != plaintext[textoffset])
-	{
-		/* Show error */
-		printf("Error at textoffset %ld\n",textoffset);
-		status=1;
-	}
-#endif
-	textoffset++;
-} while(bitoffset<maxbitoffset);
-
-}       /* End the big while(nloops--) from above */
-
-/*
-** All done
-*/
-#ifdef DEBUG
-  if (status==0) printf("Huffman: OK\n");
-#endif
-return(StopStopwatch(elapsed));
-}
-
-/***************
-** SetCompBit **
-****************
-** Set a bit in the compression array.  The value of the
-** bit is set according to char bitchar.
-*/
-static void SetCompBit(u8 *comparray,
-		u32 bitoffset,
-		char bitchar)
-{
-u32 byteoffset;
-int bitnumb;
-
-/*
-** First calculate which element in the comparray to
-** alter. and the bitnumber.
-*/
-byteoffset=bitoffset>>3;
-bitnumb=bitoffset % 8;
-
-/*
-** Set or clear
-*/
-if(bitchar=='1')
-	comparray[byteoffset]|=(1<<bitnumb);
-else
-	comparray[byteoffset]&=~(1<<bitnumb);
-
-return;
-}
-
-/***************
-** GetCompBit **
-****************
-** Return the bit value of a bit in the comparession array.
-** Returns 0 if the bit is clear, nonzero otherwise.
-*/
-static int GetCompBit(u8 *comparray,
-		u32 bitoffset)
-{
-u32 byteoffset;
-int bitnumb;
-
-/*
-** Calculate byte offset and bit number.
-*/
-byteoffset=bitoffset>>3;
-bitnumb=bitoffset % 8;
-
-/*
-** Fetch
-*/
-return((1<<bitnumb) & comparray[byteoffset] );
-}
-
-/********************************
-** BACK PROPAGATION NEURAL NET **
-*********************************
-** This code is a modified version of the code
-** that was submitted to BYTE Magazine by
-** Maureen Caudill.  It accomanied an article
-** that I CANNOT NOW RECALL.
-** The author's original heading/comment was
-** as follows:
-**
-**  Backpropagation Network
-**  Written by Maureen Caudill
-**  in Think C 4.0 on a Macintosh
-**
-**  (c) Maureen Caudill 1988-1991
-**  This network will accept 5x7 input patterns
-**  and produce 8 bit output patterns.
-**  The source code may be copied or modified without restriction,
-**  but no fee may be charged for its use.
-**
-** ++++++++++++++
-** I have modified the code so that it will work
-** on systems other than a Macintosh -- RG
-*/
-
-/***********
-** DoNNet **
-************
-** Perform the neural net benchmark.
-** Note that this benchmark is one of the few that
-** requires an input file.  That file is "NNET.DAT" and
-** should be on the local directory (from which the
-** benchmark program in launched).
-*/
-void DoNNET(void)
-{
-NNetStruct *locnnetstruct;      /* Local ptr to global data */
-char *errorcontext;
-ulong accumtime;
-double iterations;
-
-/*
-** Link to global data
-*/
-locnnetstruct=&global_nnetstruct;
-
-/*
-** Set error context
-*/
-errorcontext="CPU:NNET";
-
-/*
-** Init random number generator.
-** NOTE: It is important that the random number generator
-**  be re-initialized for every pass through this test.
-**  The NNET algorithm uses the random number generator
-**  to initialize the net.  Results are sensitive to
-**  the initial neural net state.
-*/
-/* randnum(3L); */
-randnum((int32)3);
-
-/*
-** Read in the input and output patterns.  We'll do this
-** only once here at the beginning.  These values don't
-** change once loaded.
-*/
-if(read_data_file()!=0)
-   ErrorExit();
-
-
-/*
-** See if we need to perform self adjustment loop.
-*/
-if(locnnetstruct->adjust==0)
-{
-	/*
-	** Do self-adjustment.  This involves initializing the
-	** # of loops and increasing the loop count until we
-	** get a number of loops that we can use.
-	*/
-	for(locnnetstruct->loops=1L;
-	  locnnetstruct->loops<MAXNNETLOOPS;
-	  locnnetstruct->loops++)
-	  {     /*randnum(3L); */
-		randnum((int32)3);
-		if(DoNNetIteration(locnnetstruct->loops)
-			>global_min_ticks) break;
-	  }
-}
-
-/*
-** All's well if we get here.  Do the test.
-*/
-accumtime=0L;
-iterations=(double)0.0;
-
-do {
-	/* randnum(3L); */    /* Gotta do this for Neural Net */
-	randnum((int32)3);    /* Gotta do this for Neural Net */
-	accumtime+=DoNNetIteration(locnnetstruct->loops);
-	iterations+=(double)locnnetstruct->loops;
-} while(TicksToSecs(accumtime)<locnnetstruct->request_secs);
-
-/*
-** Clean up, calculate results, and go home.  Be sure to
-** show that we don't have to rerun adjustment code.
-*/
-locnnetstruct->iterspersec=iterations / TicksToFracSecs(accumtime);
-
-if(locnnetstruct->adjust==0)
-	locnnetstruct->adjust=1;
-
-
-return;
-}
-
-/********************
-** DoNNetIteration **
-*********************
-** Do a single iteration of the neural net benchmark.
-** By iteration, we mean a "learning" pass.
-*/
-static ulong DoNNetIteration(ulong nloops)
-{
-ulong elapsed;          /* Elapsed time */
-int patt;
-
-/*
-** Run nloops learning cycles.  Notice that, counted with
-** the learning cycle is the weight randomization and
-** zeroing of changes.  This should reduce clock jitter,
-** since we don't have to stop and start the clock for
-** each iteration.
-*/
-elapsed=StartStopwatch();
-while(nloops--)
-{
-	randomize_wts();
-	zero_changes();
-	iteration_count=1;
-	learned = F;
-	numpasses = 0;
-	while (learned == F)
-	{
-		for (patt=0; patt<numpats; patt++)
-		{
-			worst_error = 0.0;      /* reset this every pass through data */
-			move_wt_changes();      /* move last pass's wt changes to momentum array */
-			do_forward_pass(patt);
-			do_back_pass(patt);
-			iteration_count++;
-		}
-		numpasses ++;
-		learned = check_out_error();
-	}
-#ifdef DEBUG
-printf("Learned in %d passes\n",numpasses);
-#endif
-}
-return(StopStopwatch(elapsed));
-}
-
-/*************************
-** do_mid_forward(patt) **
-**************************
-** Process the middle layer's forward pass
-** The activation of middle layer's neurode is the weighted
-** sum of the inputs from the input pattern, with sigmoid
-** function applied to the inputs.
-**/
-static void  do_mid_forward(int patt)
-{
-double  sum;
-int     neurode, i;
-
-for (neurode=0;neurode<MID_SIZE; neurode++)
-{
-	sum = 0.0;
-	for (i=0; i<IN_SIZE; i++)
-	{       /* compute weighted sum of input signals */
-		sum += mid_wts[neurode][i]*in_pats[patt][i];
-	}
-	/*
-	** apply sigmoid function f(x) = 1/(1+exp(-x)) to weighted sum
-	*/
-	sum = 1.0/(1.0+exp(-sum));
-	mid_out[neurode] = sum;
-}
-return;
-}
-
-/*********************
-** do_out_forward() **
-**********************
-** process the forward pass through the output layer
-** The activation of the output layer is the weighted sum of
-** the inputs (outputs from middle layer), modified by the
-** sigmoid function.
-**/
-static void  do_out_forward()
-{
-double sum;
-int neurode, i;
-
-for (neurode=0; neurode<OUT_SIZE; neurode++)
-{
-	sum = 0.0;
-	for (i=0; i<MID_SIZE; i++)
-	{       /*
-		** compute weighted sum of input signals
-		** from middle layer
-		*/
-		sum += out_wts[neurode][i]*mid_out[i];
-	}
-	/*
-	** Apply f(x) = 1/(1+exp(-x)) to weighted input
-	*/
-	sum = 1.0/(1.0+exp(-sum));
-	out_out[neurode] = sum;
-}
-return;
-}
-
-/*************************
-** display_output(patt) **
-**************************
-** Display the actual output vs. the desired output of the
-** network.
-** Once the training is complete, and the "learned" flag set
-** to TRUE, then display_output sends its output to both
-** the screen and to a text output file.
-**
-** NOTE: This routine has been disabled in the benchmark
-** version. -- RG
-**/
-/*
-void  display_output(int patt)
-{
-int             i;
-
-	fprintf(outfile,"\n Iteration # %d",iteration_count);
-	fprintf(outfile,"\n Desired Output:  ");
-
-	for (i=0; i<OUT_SIZE; i++)
-	{
-		fprintf(outfile,"%6.3f  ",out_pats[patt][i]);
-	}
-	fprintf(outfile,"\n Actual Output:   ");
-
-	for (i=0; i<OUT_SIZE; i++)
-	{
-		fprintf(outfile,"%6.3f  ",out_out[i]);
-	}
-	fprintf(outfile,"\n");
-	return;
-}
-*/
-
-/**********************
-** do_forward_pass() **
-***********************
-** control function for the forward pass through the network
-** NOTE: I have disabled the call to display_output() in
-**  the benchmark version -- RG.
-**/
-static void  do_forward_pass(int patt)
-{
-do_mid_forward(patt);   /* process forward pass, middle layer */
-do_out_forward();       /* process forward pass, output layer */
-/* display_output(patt);        ** display results of forward pass */
-return;
-}
-
-/***********************
-** do_out_error(patt) **
-************************
-** Compute the error for the output layer neurodes.
-** This is simply Desired - Actual.
-**/
-static void do_out_error(int patt)
-{
-int neurode;
-double error,tot_error, sum;
-
-tot_error = 0.0;
-sum = 0.0;
-for (neurode=0; neurode<OUT_SIZE; neurode++)
-{
-	out_error[neurode] = out_pats[patt][neurode] - out_out[neurode];
-	/*
-	** while we're here, also compute magnitude
-	** of total error and worst error in this pass.
-	** We use these to decide if we are done yet.
-	*/
-	error = out_error[neurode];
-	if (error <0.0)
-	{
-		sum += -error;
-		if (-error > tot_error)
-			tot_error = -error; /* worst error this pattern */
-	}
-	else
-	{
-		sum += error;
-		if (error > tot_error)
-			tot_error = error; /* worst error this pattern */
-	}
-}
-avg_out_error[patt] = sum/OUT_SIZE;
-tot_out_error[patt] = tot_error;
-return;
-}
-
-/***********************
-** worst_pass_error() **
-************************
-** Find the worst and average error in the pass and save it
-**/
-static void  worst_pass_error()
-{
-double error,sum;
-
-int i;
-
-error = 0.0;
-sum = 0.0;
-for (i=0; i<numpats; i++)
-{
-	if (tot_out_error[i] > error) error = tot_out_error[i];
-	sum += avg_out_error[i];
-}
-worst_error = error;
-average_error = sum/numpats;
-return;
-}
-
-/*******************
-** do_mid_error() **
-********************
-** Compute the error for the middle layer neurodes
-** This is based on the output errors computed above.
-** Note that the derivative of the sigmoid f(x) is
-**        f'(x) = f(x)(1 - f(x))
-** Recall that f(x) is merely the output of the middle
-** layer neurode on the forward pass.
-**/
-static void do_mid_error()
-{
-double sum;
-int neurode, i;
-
-for (neurode=0; neurode<MID_SIZE; neurode++)
-{
-	sum = 0.0;
-	for (i=0; i<OUT_SIZE; i++)
-		sum += out_wts[i][neurode]*out_error[i];
-
-	/*
-	** apply the derivative of the sigmoid here
-	** Because of the choice of sigmoid f(I), the derivative
-	** of the sigmoid is f'(I) = f(I)(1 - f(I))
-	*/
-	mid_error[neurode] = mid_out[neurode]*(1-mid_out[neurode])*sum;
-}
-return;
-}
-
-/*********************
-** adjust_out_wts() **
-**********************
-** Adjust the weights of the output layer.  The error for
-** the output layer has been previously propagated back to
-** the middle layer.
-** Use the Delta Rule with momentum term to adjust the weights.
-**/
-static void adjust_out_wts()
-{
-int weight, neurode;
-double learn,delta,alph;
-
-learn = BETA;
-alph  = ALPHA;
-for (neurode=0; neurode<OUT_SIZE; neurode++)
-{
-	for (weight=0; weight<MID_SIZE; weight++)
-	{
-		/* standard delta rule */
-		delta = learn * out_error[neurode] * mid_out[weight];
-
-		/* now the momentum term */
-		delta += alph * out_wt_change[neurode][weight];
-		out_wts[neurode][weight] += delta;
-
-		/* keep track of this pass's cum wt changes for next pass's momentum */
-		out_wt_cum_change[neurode][weight] += delta;
-	}
-}
-return;
-}
-
-/*************************
-** adjust_mid_wts(patt) **
-**************************
-** Adjust the middle layer weights using the previously computed
-** errors.
-** We use the Generalized Delta Rule with momentum term
-**/
-static void adjust_mid_wts(int patt)
-{
-int weight, neurode;
-double learn,alph,delta;
-
-learn = BETA;
-alph  = ALPHA;
-for (neurode=0; neurode<MID_SIZE; neurode++)
-{
-	for (weight=0; weight<IN_SIZE; weight++)
-	{
-		/* first the basic delta rule */
-		delta = learn * mid_error[neurode] * in_pats[patt][weight];
-
-		/* with the momentum term */
-		delta += alph * mid_wt_change[neurode][weight];
-		mid_wts[neurode][weight] += delta;
-
-		/* keep track of this pass's cum wt changes for next pass's momentum */
-		mid_wt_cum_change[neurode][weight] += delta;
-	}
-}
-return;
-}
-
-/*******************
-** do_back_pass() **
-********************
-** Process the backward propagation of error through network.
-**/
-void  do_back_pass(int patt)
-{
-
-do_out_error(patt);
-do_mid_error();
-adjust_out_wts();
-adjust_mid_wts(patt);
-
-return;
-}
-
-
-/**********************
-** move_wt_changes() **
-***********************
-** Move the weight changes accumulated last pass into the wt-change
-** array for use by the momentum term in this pass. Also zero out
-** the accumulating arrays after the move.
-**/
-static void move_wt_changes()
-{
-int i,j;
-
-for (i = 0; i<MID_SIZE; i++)
-	for (j = 0; j<IN_SIZE; j++)
-	{
-		mid_wt_change[i][j] = mid_wt_cum_change[i][j];
-		/*
-		** Zero it out for next pass accumulation.
-		*/
-		mid_wt_cum_change[i][j] = 0.0;
-	}
-
-for (i = 0; i<OUT_SIZE; i++)
-	for (j=0; j<MID_SIZE; j++)
-	{
-		out_wt_change[i][j] = out_wt_cum_change[i][j];
-		out_wt_cum_change[i][j] = 0.0;
-	}
-
-return;
-}
-
-/**********************
-** check_out_error() **
-***********************
-** Check to see if the error in the output layer is below
-** MARGIN*OUT_SIZE for all output patterns.  If so, then
-** assume the network has learned acceptably well.  This
-** is simply an arbitrary measure of how well the network
-** has learned -- many other standards are possible.
-**/
-static int check_out_error()
-{
-int result,i,error;
-
-result  = T;
-error   = F;
-worst_pass_error();     /* identify the worst error in this pass */
-
-/*
-#ifdef DEBUG
-printf("\n Iteration # %d",iteration_count);
-#endif
-*/
-for (i=0; i<numpats; i++)
-{
-/*      printf("\n Error pattern %d:   Worst: %8.3f; Average: %8.3f",
-	  i+1,tot_out_error[i], avg_out_error[i]);
-	fprintf(outfile,
-	 "\n Error pattern %d:   Worst: %8.3f; Average: %8.3f",
-	 i+1,tot_out_error[i]);
-*/
-
-	if (worst_error >= STOP) result = F;
-	if (tot_out_error[i] >= 16.0) error = T;
-}
-
-if (error == T) result = ERR;
-
-
-#ifdef DEBUG
-/* printf("\n Error this pass thru data:   Worst: %8.3f; Average: %8.3f",
- worst_error,average_error);
-*/
-/* fprintf(outfile,
- "\n Error this pass thru data:   Worst: %8.3f; Average: %8.3f",
-  worst_error, average_error); */
-#endif
-
-return(result);
-}
-
-
-/*******************
-** zero_changes() **
-********************
-** Zero out all the wt change arrays
-**/
-static void zero_changes()
-{
-int i,j;
-
-for (i = 0; i<MID_SIZE; i++)
-{
-	for (j=0; j<IN_SIZE; j++)
-	{
-		mid_wt_change[i][j] = 0.0;
-		mid_wt_cum_change[i][j] = 0.0;
-	}
-}
-
-for (i = 0; i< OUT_SIZE; i++)
-{
-	for (j=0; j<MID_SIZE; j++)
-	{
-		out_wt_change[i][j] = 0.0;
-		out_wt_cum_change[i][j] = 0.0;
-	}
-}
-return;
-}
-
-
-/********************
-** randomize_wts() **
-*********************
-** Intialize the weights in the middle and output layers to
-** random values between -0.25..+0.25
-** Function rand() returns a value between 0 and 32767.
-**
-** NOTE: Had to make alterations to how the random numbers were
-** created.  -- RG.
-**/
-static void randomize_wts()
-{
-int neurode,i;
-double value;
-
-/*
-** Following not used int benchmark version -- RG
-**
-**        printf("\n Please enter a random number seed (1..32767):  ");
-**        scanf("%d", &i);
-**        srand(i);
-*/
-
-for (neurode = 0; neurode<MID_SIZE; neurode++)
-{
-	for(i=0; i<IN_SIZE; i++)
-	{
-	        /* value=(double)abs_randwc(100000L); */
-		value=(double)abs_randwc((int32)100000);
-		value=value/(double)100000.0 - (double) 0.5;
-		mid_wts[neurode][i] = value/2;
-	}
-}
-for (neurode=0; neurode<OUT_SIZE; neurode++)
-{
-	for(i=0; i<MID_SIZE; i++)
-	{
-	        /* value=(double)abs_randwc(100000L); */
-		value=(double)abs_randwc((int32)100000);
-		value=value/(double)10000.0 - (double) 0.5;
-		out_wts[neurode][i] = value/2;
-	}
-}
-
-return;
-}
-
-
-/*********************
-** read_data_file() **
-**********************
-** Read in the input data file and store the patterns in
-** in_pats and out_pats.
-** The format for the data file is as follows:
-**
-** line#   data expected
-** -----   ------------------------------
-** 1               In-X-size,in-y-size,out-size
-** 2               number of patterns in file
-** 3               1st X row of 1st input pattern
-** 4..             following rows of 1st input pattern pattern
-**                 in-x+2  y-out pattern
-**                                 1st X row of 2nd pattern
-**                 etc.
-**
-** Each row of data is separated by commas or spaces.
-** The data is expected to be ascii text corresponding to
-** either a +1 or a 0.
-**
-** Sample input for a 1-pattern file (The comments to the
-** right may NOT be in the file unless more sophisticated
-** parsing of the input is done.):
-**
-** 5,7,8                      input is 5x7 grid, output is 8 bits
-** 1                          one pattern in file
-** 0,1,1,1,0                  beginning of pattern for "O"
-** 1,0,0,0,1
-** 1,0,0,0,1
-** 1,0,0,0,1
-** 1,0,0,0,1
-** 1,0,0,0,0
-** 0,1,1,1,0
-** 0,1,0,0,1,1,1,1            ASCII code for "O" -- 0100 1111
-**
-** Clearly, this simple scheme can be expanded or enhanced
-** any way you like.
-**
-** Returns -1 if any file error occurred, otherwise 0.
-**/
-static int read_data_file()
-{
-FILE *infile;
-
-int xinsize,yinsize,youtsize;
-int patt, element, i, row;
-int vals_read;
-int val1,val2,val3,val4,val5,val6,val7,val8;
-
-/* printf("\n Opening and retrieving data from file."); */
-
-infile = fopen(inpath, "r");
-if (infile == NULL)
-{
-	printf("\n CPU:NNET--error in opening file!");
-	return -1 ;
-}
-vals_read =fscanf(infile,"%d  %d  %d",&xinsize,&yinsize,&youtsize);
-if (vals_read != 3)
-{
-	printf("\n CPU:NNET -- Should read 3 items in line one; did read %d",vals_read);
-	return -1;
-}
-vals_read=fscanf(infile,"%d",&numpats);
-if (vals_read !=1)
-{
-	printf("\n CPU:NNET -- Should read 1 item in line 2; did read %d",vals_read);
-	return -1;
-}
-if (numpats > MAXPATS)
-	numpats = MAXPATS;
-
-for (patt=0; patt<numpats; patt++)
-{
-	element = 0;
-	for (row = 0; row<yinsize; row++)
-	{
-		vals_read = fscanf(infile,"%d  %d  %d  %d  %d",
-			&val1, &val2, &val3, &val4, &val5);
-		if (vals_read != 5)
-		{
-			printf ("\n CPU:NNET -- failure in reading input!");
-			return -1;
-		}
-		element=row*xinsize;
-
-		in_pats[patt][element] = (double) val1; element++;
-		in_pats[patt][element] = (double) val2; element++;
-		in_pats[patt][element] = (double) val3; element++;
-		in_pats[patt][element] = (double) val4; element++;
-		in_pats[patt][element] = (double) val5; element++;
-	}
-	for (i=0;i<IN_SIZE; i++)
-	{
-		if (in_pats[patt][i] >= 0.9)
-			in_pats[patt][i] = 0.9;
-		if (in_pats[patt][i] <= 0.1)
-			in_pats[patt][i] = 0.1;
-	}
-	element = 0;
-	vals_read = fscanf(infile,"%d  %d  %d  %d  %d  %d  %d  %d",
-		&val1, &val2, &val3, &val4, &val5, &val6, &val7, &val8);
-
-	out_pats[patt][element] = (double) val1; element++;
-	out_pats[patt][element] = (double) val2; element++;
-	out_pats[patt][element] = (double) val3; element++;
-	out_pats[patt][element] = (double) val4; element++;
-	out_pats[patt][element] = (double) val5; element++;
-	out_pats[patt][element] = (double) val6; element++;
-	out_pats[patt][element] = (double) val7; element++;
-	out_pats[patt][element] = (double) val8; element++;
-}
-
-/* printf("\n Closing the input file now. "); */
-
-fclose(infile);
-return(0);
-}
-
-/*********************
-** initialize_net() **
-**********************
-** Do all the initialization stuff before beginning
-*/
-/*
-static int initialize_net()
-{
-int err_code;
-
-randomize_wts();
-zero_changes();
-err_code = read_data_file();
-iteration_count = 1;
-return(err_code);
-}
-*/
-
-/**********************
-** display_mid_wts() **
-***********************
-** Display the weights on the middle layer neurodes
-** NOTE: This routine is not used in the benchmark
-**  test -- RG
-**/
-/* static void display_mid_wts()
-{
-int             neurode, weight, row, col;
-
-fprintf(outfile,"\n Weights of Middle Layer neurodes:");
-
-for (neurode=0; neurode<MID_SIZE; neurode++)
-{
-	fprintf(outfile,"\n  Mid Neurode # %d",neurode);
-	for (row=0; row<IN_Y_SIZE; row++)
-	{
-		fprintf(outfile,"\n ");
-		for (col=0; col<IN_X_SIZE; col++)
-		{
-			weight = IN_X_SIZE * row + col;
-			fprintf(outfile," %8.3f ", mid_wts[neurode][weight]);
-		}
-	}
-}
-return;
-}
-*/
-/**********************
-** display_out_wts() **
-***********************
-** Display the weights on the output layer neurodes
-** NOTE: This code is not used in the benchmark
-**  test -- RG
-*/
-/* void  display_out_wts()
-{
-int             neurode, weight;
-
-	fprintf(outfile,"\n Weights of Output Layer neurodes:");
-
-	for (neurode=0; neurode<OUT_SIZE; neurode++)
-	{
-		fprintf(outfile,"\n  Out Neurode # %d \n",neurode);
-		for (weight=0; weight<MID_SIZE; weight++)
-		{
-			fprintf(outfile," %8.3f ", out_wts[neurode][weight]);
-		}
-	}
-	return;
-}
-*/
-
-/***********************
-**  LU DECOMPOSITION  **
-** (Linear Equations) **
-************************
-** These routines come from "Numerical Recipes in Pascal".
-** Note that, as in the assignment algorithm, though we
-** separately define LUARRAYROWS and LUARRAYCOLS, the two
-** must be the same value (this routine depends on a square
-** matrix).
-*/
-
-/*********
-** DoLU **
-**********
-** Perform the LU decomposition benchmark.
-*/
-void DoLU(void)
-{
-LUStruct *loclustruct;  /* Local pointer to global data */
-char *errorcontext;
-int systemerror;
-fardouble *a;
-fardouble *b;
-fardouble *abase;
-fardouble *bbase;
-LUdblptr ptra;
-int n;
-int i;
-ulong accumtime;
-double iterations;
-
-/*
-** Link to global data
-*/
-loclustruct=&global_lustruct;
-
-/*
-** Set error context.
-*/
-errorcontext="FPU:LU";
-
-/*
-** Our first step is to build a "solvable" problem.  This
-** will become the "seed" set that all others will be
-** derived from. (I.E., we'll simply copy these arrays
-** into the others.
-*/
-a=(fardouble *)AllocateMemory(sizeof(double) * LUARRAYCOLS * LUARRAYROWS,
-		&systemerror);
-b=(fardouble *)AllocateMemory(sizeof(double) * LUARRAYROWS,
-		&systemerror);
-n=LUARRAYROWS;
-
-/*
-** We need to allocate a temp vector that is used by the LU
-** algorithm.  This removes the allocation routine from the
-** timing.
-*/
-LUtempvv=(fardouble *)AllocateMemory(sizeof(double)*LUARRAYROWS,
-	&systemerror);
-
-/*
-** Build a problem to be solved.
-*/
-ptra.ptrs.p=a;                  /* Gotta coerce linear array to 2D array */
-build_problem(*ptra.ptrs.ap,n,b);
-
-/*
-** Now that we have a problem built, see if we need to do
-** auto-adjust.  If so, repeatedly call the DoLUIteration routine,
-** increasing the number of solutions per iteration as you go.
-*/
-if(loclustruct->adjust==0)
-{
-	loclustruct->numarrays=0;
-	for(i=1;i<=MAXLUARRAYS;i++)
-	{
-		abase=(fardouble *)AllocateMemory(sizeof(double) *
-			LUARRAYCOLS*LUARRAYROWS*(i+1),&systemerror);
-		if(systemerror)
-		{       ReportError(errorcontext,systemerror);
-			LUFreeMem(a,b,(fardouble *)NULL,(fardouble *)NULL);
-			ErrorExit();
-		}
-		bbase=(fardouble *)AllocateMemory(sizeof(double) *
-			LUARRAYROWS*(i+1),&systemerror);
-		if(systemerror)
-		{       ReportError(errorcontext,systemerror);
-			LUFreeMem(a,b,abase,(fardouble *)NULL);
-			ErrorExit();
-		}
-		if(DoLUIteration(a,b,abase,bbase,i)>global_min_ticks)
-		{       loclustruct->numarrays=i;
-			break;
-		}
-		/*
-		** Not enough arrays...free them all and try again
-		*/
-		FreeMemory((farvoid *)abase,&systemerror);
-		FreeMemory((farvoid *)bbase,&systemerror);
-	}
-	/*
-	** Were we able to do it?
-	*/
-	if(loclustruct->numarrays==0)
-	{       printf("FPU:LU -- Array limit reached\n");
-		LUFreeMem(a,b,abase,bbase);
-		ErrorExit();
-	}
-}
-else
-{       /*
-	** Don't need to adjust -- just allocate the proper
-	** number of arrays and proceed.
-	*/
-	abase=(fardouble *)AllocateMemory(sizeof(double) *
-		LUARRAYCOLS*LUARRAYROWS*loclustruct->numarrays,
-		&systemerror);
-	if(systemerror)
-	{       ReportError(errorcontext,systemerror);
-		LUFreeMem(a,b,(fardouble *)NULL,(fardouble *)NULL);
-		ErrorExit();
-	}
-	bbase=(fardouble *)AllocateMemory(sizeof(double) *
-		LUARRAYROWS*loclustruct->numarrays,&systemerror);
-	if(systemerror)
-	{
-		ReportError(errorcontext,systemerror);
-		LUFreeMem(a,b,abase,(fardouble *)NULL);
-		ErrorExit();
-	}
-}
-/*
-** All's well if we get here.  Do the test.
-*/
-accumtime=0L;
-iterations=(double)0.0;
-
-do {
-	accumtime+=DoLUIteration(a,b,abase,bbase,
-		loclustruct->numarrays);
-	iterations+=(double)loclustruct->numarrays;
-} while(TicksToSecs(accumtime)<loclustruct->request_secs);
-
-/*
-** Clean up, calculate results, and go home.  Be sure to
-** show that we don't have to rerun adjustment code.
-*/
-loclustruct->iterspersec=iterations / TicksToFracSecs(accumtime);
-
-if(loclustruct->adjust==0)
-	loclustruct->adjust=1;
-
-LUFreeMem(a,b,abase,bbase);
-return;
-}
-
-/**************
-** LUFreeMem **
-***************
-** Release memory associated with LU benchmark.
-*/
-static void LUFreeMem(fardouble *a, fardouble *b,
-			fardouble *abase,fardouble *bbase)
-{
-int systemerror;
-
-FreeMemory((farvoid *)a,&systemerror);
-FreeMemory((farvoid *)b,&systemerror);
-FreeMemory((farvoid *)LUtempvv,&systemerror);
-
-if(abase!=(fardouble *)NULL) FreeMemory((farvoid *)abase,&systemerror);
-if(bbase!=(fardouble *)NULL) FreeMemory((farvoid *)bbase,&systemerror);
-return;
-}
-
-/******************
-** DoLUIteration **
-*******************
-** Perform an iteration of the LU decomposition benchmark.
-** An iteration refers to the repeated solution of several
-** identical matrices.
-*/
-static ulong DoLUIteration(fardouble *a,fardouble *b,
-		fardouble *abase, fardouble *bbase,
-		ulong numarrays)
-{
-fardouble *locabase;
-fardouble *locbbase;
-LUdblptr ptra;  /* For converting ptr to 2D array */
-ulong elapsed;
-ulong j,i;              /* Indexes */
-
-
-/*
-** Move the seed arrays (a & b) into the destination
-** arrays;
-*/
-for(j=0;j<numarrays;j++)
-{       locabase=abase+j*LUARRAYROWS*LUARRAYCOLS;
-	locbbase=bbase+j*LUARRAYROWS;
-	for(i=0;i<LUARRAYROWS*LUARRAYCOLS;i++)
-		*(locabase+i)=*(a+i);
-	for(i=0;i<LUARRAYROWS;i++)
-		*(locbbase+i)=*(b+i);
-}
-
-/*
-** Do test...begin timing.
-*/
-elapsed=StartStopwatch();
-for(i=0;i<numarrays;i++)
-{       locabase=abase+i*LUARRAYROWS*LUARRAYCOLS;
-	locbbase=bbase+i*LUARRAYROWS;
-	ptra.ptrs.p=locabase;
-	lusolve(*ptra.ptrs.ap,LUARRAYROWS,locbbase);
-}
-
-return(StopStopwatch(elapsed));
-}
-
-/******************
-** build_problem **
-*******************
-** Constructs a solvable set of linear equations.  It does this by
-** creating an identity matrix, then loading the solution vector
-** with random numbers.  After that, the identity matrix and
-** solution vector are randomly "scrambled".  Scrambling is
-** done by (a) randomly selecting a row and multiplying that
-** row by a random number and (b) adding one randomly-selected
-** row to another.
-*/
-static void build_problem(double a[][LUARRAYCOLS],
-		int n,
-		double b[LUARRAYROWS])
-{
-long i,j,k,k1;  /* Indexes */
-double rcon;     /* Random constant */
-
-/*
-** Reset random number generator
-*/
-/* randnum(13L); */
-randnum((int32)13);
-
-/*
-** Build an identity matrix.
-** We'll also use this as a chance to load the solution
-** vector.
-*/
-for(i=0;i<n;i++)
-{       /* b[i]=(double)(abs_randwc(100L)+1L); */
-	b[i]=(double)(abs_randwc((int32)100)+(int32)1);
-	for(j=0;j<n;j++)
-		if(i==j)
-		        /* a[i][j]=(double)(abs_randwc(1000L)+1L); */
-			a[i][j]=(double)(abs_randwc((int32)1000)+(int32)1);
-		else
-			a[i][j]=(double)0.0;
-}
-
-#ifdef DEBUG
-printf("Problem:\n");
-for(i=0;i<n;i++)
-{
-/*
-	for(j=0;j<n;j++)
-		printf("%6.2f ",a[i][j]);
-*/
-	printf("%.0f/%.0f=%.2f\t",b[i],a[i][i],b[i]/a[i][i]);
-/*
-        printf("\n");
-*/
-}
-#endif
-
-/*
-** Scramble.  Do this 8n times.  See comment above for
-** a description of the scrambling process.
-*/
-
-for(i=0;i<8*n;i++)
-{
-	/*
-	** Pick a row and a random constant.  Multiply
-	** all elements in the row by the constant.
-	*/
- /*       k=abs_randwc((long)n);
-	rcon=(double)(abs_randwc(20L)+1L);
-	for(j=0;j<n;j++)
-		a[k][j]=a[k][j]*rcon;
-	b[k]=b[k]*rcon;
-*/
-	/*
-	** Pick two random rows and add second to
-	** first.  Note that we also occasionally multiply
-	** by minus 1 so that we get a subtraction operation.
-	*/
-        /* k=abs_randwc((long)n); */
-        /* k1=abs_randwc((long)n); */
-	k=abs_randwc((int32)n);
-	k1=abs_randwc((int32)n);
-	if(k!=k1)
-	{
-		if(k<k1) rcon=(double)1.0;
-			else rcon=(double)-1.0;
-		for(j=0;j<n;j++)
-			a[k][j]+=a[k1][j]*rcon;;
-		b[k]+=b[k1]*rcon;
-	}
-}
-
-return;
-}
-
-
-/***********
-** ludcmp **
-************
-** From the procedure of the same name in "Numerical Recipes in Pascal",
-** by Press, Flannery, Tukolsky, and Vetterling.
-** Given an nxn matrix a[], this routine replaces it by the LU
-** decomposition of a rowwise permutation of itself.  a[] and n
-** are input.  a[] is output, modified as follows:
-**   --                       --
-**  |  b(1,1) b(1,2) b(1,3)...  |
-**  |  a(2,1) b(2,2) b(2,3)...  |
-**  |  a(3,1) a(3,2) b(3,3)...  |
-**  |  a(4,1) a(4,2) a(4,3)...  |
-**  |  ...                      |
-**   --                        --
-**
-** Where the b(i,j) elements form the upper triangular matrix of the
-** LU decomposition, and the a(i,j) elements form the lower triangular
-** elements.  The LU decomposition is calculated so that we don't
-** need to store the a(i,i) elements (which would have laid along the
-** diagonal and would have all been 1).
-**
-** indx[] is an output vector that records the row permutation
-** effected by the partial pivoting; d is output as +/-1 depending
-** on whether the number of row interchanges was even or odd,
-** respectively.
-** Returns 0 if matrix singular, else returns 1.
-*/
-static int ludcmp(double a[][LUARRAYCOLS],
-		int n,
-		int indx[],
-		int *d)
-{
-
-double big;     /* Holds largest element value */
-double sum;
-double dum;     /* Holds dummy value */
-int i,j,k;      /* Indexes */
-int imax=0;     /* Holds max index value */
-double tiny;    /* A really small number */
-
-tiny=(double)1.0e-20;
-
-*d=1;           /* No interchanges yet */
-
-for(i=0;i<n;i++)
-{       big=(double)0.0;
-	for(j=0;j<n;j++)
-		if((double)fabs(a[i][j]) > big)
-			big=fabs(a[i][j]);
-	/* Bail out on singular matrix */
-	if(big==(double)0.0) return(0);
-	LUtempvv[i]=1.0/big;
-}
-
-/*
-** Crout's algorithm...loop over columns.
-*/
-for(j=0;j<n;j++)
-{       if(j!=0)
-		for(i=0;i<j;i++)
-		{       sum=a[i][j];
-			if(i!=0)
-				for(k=0;k<i;k++)
-					sum-=(a[i][k]*a[k][j]);
-			a[i][j]=sum;
-		}
-	big=(double)0.0;
-	for(i=j;i<n;i++)
-	{       sum=a[i][j];
-		if(j!=0)
-			for(k=0;k<j;k++)
-				sum-=a[i][k]*a[k][j];
-		a[i][j]=sum;
-		dum=LUtempvv[i]*fabs(sum);
-		if(dum>=big)
-		{       big=dum;
-			imax=i;
-		}
-	}
-	if(j!=imax)             /* Interchange rows if necessary */
-	{       for(k=0;k<n;k++)
-		{       dum=a[imax][k];
-			a[imax][k]=a[j][k];
-			a[j][k]=dum;
-		}
-		*d=-*d;         /* Change parity of d */
-		dum=LUtempvv[imax];
-		LUtempvv[imax]=LUtempvv[j]; /* Don't forget scale factor */
-		LUtempvv[j]=dum;
-	}
-	indx[j]=imax;
-	/*
-	** If the pivot element is zero, the matrix is singular
-	** (at least as far as the precision of the machine
-	** is concerned.)  We'll take the original author's
-	** recommendation and replace 0.0 with "tiny".
-	*/
-	if(a[j][j]==(double)0.0)
-		a[j][j]=tiny;
-
-	if(j!=(n-1))
-	{       dum=1.0/a[j][j];
-		for(i=j+1;i<n;i++)
-			a[i][j]=a[i][j]*dum;
-	}
-}
-
-return(1);
-}
-
-/***********
-** lubksb **
-************
-** Also from "Numerical Recipes in Pascal".
-** This routine solves the set of n linear equations A X = B.
-** Here, a[][] is input, not as the matrix A, but as its
-** LU decomposition, created by the routine ludcmp().
-** Indx[] is input as the permutation vector returned by ludcmp().
-**  b[] is input as the right-hand side an returns the
-** solution vector X.
-** a[], n, and indx are not modified by this routine and
-** can be left in place for different values of b[].
-** This routine takes into account the possibility that b will
-** begin with many zero elements, so it is efficient for use in
-** matrix inversion.
-*/
-static void lubksb( double a[][LUARRAYCOLS],
-		int n,
-		int indx[LUARRAYROWS],
-		double b[LUARRAYROWS])
-{
-
-int i,j;        /* Indexes */
-int ip;         /* "pointer" into indx */
-int ii;
-double sum;
-
-/*
-** When ii is set to a positive value, it will become
-** the index of the first nonvanishing element of b[].
-** We now do the forward substitution. The only wrinkle
-** is to unscramble the permutation as we go.
-*/
-ii=-1;
-for(i=0;i<n;i++)
-{       ip=indx[i];
-	sum=b[ip];
-	b[ip]=b[i];
-	if(ii!=-1)
-		for(j=ii;j<i;j++)
-			sum=sum-a[i][j]*b[j];
-	else
-		/*
-		** If a nonzero element is encountered, we have
-		** to do the sums in the loop above.
-		*/
-		if(sum!=(double)0.0)
-			ii=i;
-	b[i]=sum;
-}
-/*
-** Do backsubstitution
-*/
-for(i=(n-1);i>=0;i--)
-{
-	sum=b[i];
-	if(i!=(n-1))
-		for(j=(i+1);j<n;j++)
-			sum=sum-a[i][j]*b[j];
-	b[i]=sum/a[i][i];
-}
-return;
-}
-
-/************
-** lusolve **
-*************
-** Solve a linear set of equations: A x = b
-** Original matrix A will be destroyed by this operation.
-** Returns 0 if matrix is singular, 1 otherwise.
-*/
-static int lusolve(double a[][LUARRAYCOLS],
-		int n,
-		double b[LUARRAYROWS])
-{
-int indx[LUARRAYROWS];
-int d;
-#ifdef DEBUG
-int i,j;
-#endif
-
-if(ludcmp(a,n,indx,&d)==0) return(0);
-
-/* Matrix not singular -- proceed */
-lubksb(a,n,indx,b);
-
-#ifdef DEBUG
-printf("Solution:\n");
-for(i=0;i<n;i++)
-{
-  for(j=0;j<n;j++){
-  /*
-    printf("%6.2f ",a[i][j]);
-  */
-  }
-  printf("%6.2f\t",b[i]);
-  /*
-    printf("\n");
-  */
-}
-printf("\n");
-#endif
-
-return(1);
-}
diff --git a/nbench1.h b/nbench1.h
deleted file mode 100644
index 13a5907..0000000
--- a/nbench1.h
+++ /dev/null
@@ -1,428 +0,0 @@
-/*
-** nbench1.h
-** Header for nbench1.c
-** BYTEmark (tm)
-** BYTE's Native Mode Benchmarks
-** Rick Grehan, BYTE Magazine
-**
-** Creation:
-** Revision: 3/95;10/95
-**
-** DISCLAIMER
-** The source, executable, and documentation files that comprise
-** the BYTEmark benchmarks are made available on an "as is" basis.
-** This means that we at BYTE Magazine have made every reasonable
-** effort to verify that the there are no errors in the source and
-** executable code.  We cannot, however, guarantee that the programs
-** are error-free.  Consequently, McGraw-HIll and BYTE Magazine make
-** no claims in regard to the fitness of the source code, executable
-** code, and documentation of the BYTEmark.
-**  Furthermore, BYTE Magazine, McGraw-Hill, and all employees
-** of McGraw-Hill cannot be held responsible for any damages resulting
-** from the use of this code or the results obtained from using
-** this code.
-*/
-
-/*
-** DEFINES
-*/
-/* #define DEBUG */
-
-/*
-** EXTERNALS
-*/
-extern ulong global_min_ticks;
-
-extern SortStruct global_numsortstruct;
-extern SortStruct global_strsortstruct;
-extern BitOpStruct global_bitopstruct;
-extern EmFloatStruct global_emfloatstruct;
-extern FourierStruct global_fourierstruct;
-extern AssignStruct global_assignstruct;
-extern IDEAStruct global_ideastruct;
-extern HuffStruct global_huffstruct;
-extern NNetStruct global_nnetstruct;
-extern LUStruct global_lustruct;
-
-/* External PROTOTYPES */
-/*extern unsigned long abs_randwc(unsigned long num);*/     /* From MISC */
-/*extern long randnum(long lngval);*/
-extern int32 randwc(int32 num);
-extern u32 abs_randwc(u32 num);
-extern int32 randnum(int32 lngval);
-
-extern farvoid *AllocateMemory(unsigned long nbytes,    /* From SYSSPEC */
-	int *errorcode);
-extern void FreeMemory(farvoid *mempointer,
-	int *errorcode);
-extern void MoveMemory(farvoid *destination,
-		farvoid *source, unsigned long nbytes);
-extern void ReportError(char *context, int errorcode);
-extern void ErrorExit();
-extern unsigned long StartStopwatch();
-extern unsigned long StopStopwatch(unsigned long startticks);
-extern unsigned long TicksToSecs(unsigned long tickamount);
-extern double TicksToFracSecs(unsigned long tickamount);
-
-/*****************
-** NUMERIC SORT **
-*****************/
-
-/*
-** PROTOTYPES
-*/
-void DoNumSort(void);
-static ulong DoNumSortIteration(farlong *arraybase,
-		ulong arraysize,
-		uint numarrays);
-static void LoadNumArrayWithRand(farlong *array,
-		ulong arraysize,
-		uint numarrays);
-static void NumHeapSort(farlong *array,
-		ulong bottom,
-		ulong top);
-static void NumSift(farlong *array,
-		ulong i,
-		ulong j);
-
-
-/****************
-** STRING SORT **
-*****************
-*/
-
-
-/*
-** PROTOTYPES
-*/
-void DoStringSort(void);
-static ulong DoStringSortIteration(faruchar *arraybase,
-		uint numarrays,
-		ulong arraysize);
-static farulong *LoadStringArray(faruchar *strarray,
-		uint numarrays,
-		ulong *strings,
-		ulong arraysize);
-static void stradjust(farulong *optrarray,
-		faruchar *strarray,
-		ulong nstrings,
-		ulong i,
-		uchar l);
-static void StrHeapSort(farulong *optrarray,
-		faruchar *strarray,
-		ulong numstrings,
-		ulong bottom,
-		ulong top);
-static int str_is_less(farulong *optrarray,
-		faruchar *strarray,
-		ulong numstrings,
-		ulong a,
-		ulong b);
-static void strsift(farulong *optrarray,
-		faruchar *strarray,
-		ulong numstrings,
-		ulong i,
-		ulong j);
-
-/************************
-** BITFIELD OPERATIONS **
-*************************
-*/
-
-/*
-** PROTOTYPES
-*/
-void DoBitops(void);
-static ulong DoBitfieldIteration(farulong *bitarraybase,
-		farulong *bitoparraybase,
-		long bitoparraysize,
-		ulong *nbitops);
-static void ToggleBitRun(farulong *bitmap,
-		ulong bit_addr,
-		ulong nbits,
-		uint val);
-static void FlipBitRun(farulong *bitmap,
-		ulong bit_addr,
-		ulong nbits);
-
-/****************************
-** EMULATED FLOATING POINT **
-****************************/
-typedef struct
-{
-	u8 type;        /* Indicates, NORMAL, SUBNORMAL, etc. */
-	u8 sign;        /* Mantissa sign */
-	short exp;      /* Signed exponent...no bias */
-	u16 mantissa[INTERNAL_FPF_PRECISION];
-} InternalFPF;
-
-/*
-** PROTOTYPES
-*/
-void DoEmFloat(void);
-
-/*
-** EXTERNALS
-*/
-extern void SetupCPUEmFloatArrays(InternalFPF *abase,
-	InternalFPF *bbase, InternalFPF *cbase,
-	ulong arraysize);
-extern ulong DoEmFloatIteration(InternalFPF *abase,
-	InternalFPF *bbase, InternalFPF *cbase,
-	ulong arraysize, ulong loops);
-
-/*************************
-** FOURIER COEFFICIENTS **
-*************************/
-
-/*
-** PROTOTYPES
-*/
-void DoFourier(void);
-static ulong DoFPUTransIteration(fardouble *abase,
-		fardouble *bbase,
-		ulong arraysize);
-static double TrapezoidIntegrate(double x0,
-		double x1,
-		int nsteps,
-		double omegan,
-		int select);
-static double thefunction(double x,
-		double omegan,
-		int select);
-
-/*************************
-** ASSIGNMENT ALGORITHM **
-*************************/
-
-/*
-** DEFINES
-*/
-
-#define ASSIGNROWS 101L
-#define ASSIGNCOLS 101L
-
-/*
-** TYPEDEFS
-*/
-typedef struct {
-	union {
-		long *p;
-		long (*ap)[ASSIGNROWS][ASSIGNCOLS];
-	} ptrs;
-} longptr;
-
-/*
-** PROTOTYPES
-*/
-void DoAssign(void);
-static ulong DoAssignIteration(farlong *arraybase,
-		ulong numarrays);
-static void LoadAssignArrayWithRand(farlong *arraybase,
-		ulong numarrays);
-static void LoadAssign(farlong arraybase[][ASSIGNCOLS]);
-static void CopyToAssign(farlong arrayfrom[][ASSIGNCOLS],
-		long arrayto[][ASSIGNCOLS]);
-static void Assignment(farlong arraybase[][ASSIGNCOLS]);
-static void calc_minimum_costs(long tableau[][ASSIGNCOLS]);
-static int first_assignments(long tableau[][ASSIGNCOLS],
-		short assignedtableau[][ASSIGNCOLS]);
-static void second_assignments(long tableau[][ASSIGNCOLS],
-		short assignedtableau[][ASSIGNCOLS]);
-
-/********************
-** IDEA ENCRYPTION **
-********************/
-
-/*
-** DEFINES
-*/
-#define IDEAKEYSIZE 16
-#define IDEABLOCKSIZE 8
-#define ROUNDS 8
-#define KEYLEN (6*ROUNDS+4)
-
-/*
-** MACROS
-*/
-#define low16(x) ((x) & 0x0FFFF)
-#define MUL(x,y) (x=mul(low16(x),y))
-
-
-typedef u16 IDEAkey[KEYLEN];
-
-/*
-** PROTOTYPES
-*/
-void DoIDEA(void);
-static ulong DoIDEAIteration(faruchar *plain1,
-	faruchar *crypt1, faruchar *plain2,
-	ulong arraysize, ulong nloops,
-	IDEAkey Z, IDEAkey DK);
-static u16 mul(register u16 a, register u16 b);
-static u16 inv(u16 x);
-static void en_key_idea(u16 userkey[8], IDEAkey Z);
-static void de_key_idea(IDEAkey Z, IDEAkey DK);
-static void cipher_idea(u16 in[4], u16 out[4], IDEAkey Z);
-
-/************************
-** HUFFMAN COMPRESSION **
-************************/
-
-/*
-** DEFINES
-*/
-#define EXCLUDED 32000L          /* Big positive value */
-
-/*
-** TYPEDEFS
-*/
-typedef struct {
-	uchar c;                /* Byte value */
-	float freq;             /* Frequency */
-	int parent;             /* Parent node */
-	int left;               /* Left pointer = 0 */
-	int right;              /* Right pointer = 1 */
-} huff_node;
-
-/*
-** GLOBALS
-*/
-static huff_node *hufftree;             /* The huffman tree */
-static long plaintextlen;               /* Length of plaintext */
-
-/*
-** PROTOTYPES
-*/
-void DoHuffman();
-static void create_text_line(farchar *dt,long nchars);
-static void create_text_block(farchar *tb, ulong tblen,
-		ushort maxlinlen);
-static ulong DoHuffIteration(farchar *plaintext,
-	farchar *comparray, farchar *decomparray,
-	ulong arraysize, ulong nloops, huff_node *hufftree);
-static void SetCompBit(u8 *comparray, u32 bitoffset, char bitchar);
-static int GetCompBit(u8 *comparray, u32 bitoffset);
-
-/********************************
-** BACK PROPAGATION NEURAL NET **
-********************************/
-
-/*
-** DEFINES
-*/
-#define T 1                     /* TRUE */
-#define F 0                     /* FALSE */
-#define ERR -1
-#define MAXPATS 10              /* max number of patterns in data file */
-#define IN_X_SIZE 5             /* number of neurodes/row of input layer */
-#define IN_Y_SIZE 7             /* number of neurodes/col of input layer */
-#define IN_SIZE 35              /* equals IN_X_SIZE*IN_Y_SIZE */
-#define MID_SIZE 8              /* number of neurodes in middle layer */
-#define OUT_SIZE 8              /* number of neurodes in output layer */
-#define MARGIN 0.1              /* how near to 1,0 do we have to come to stop? */
-#define BETA 0.09               /* beta learning constant */
-#define ALPHA 0.09              /* momentum term constant */
-#define STOP 0.1                /* when worst_error less than STOP, training is done */
-
-/*
-** GLOBALS
-*/
-double  mid_wts[MID_SIZE][IN_SIZE];     /* middle layer weights */
-double  out_wts[OUT_SIZE][MID_SIZE];    /* output layer weights */
-double  mid_out[MID_SIZE];              /* middle layer output */
-double  out_out[OUT_SIZE];              /* output layer output */
-double  mid_error[MID_SIZE];            /* middle layer errors */
-double  out_error[OUT_SIZE];            /* output layer errors */
-double  mid_wt_change[MID_SIZE][IN_SIZE]; /* storage for last wt change */
-double  out_wt_change[OUT_SIZE][MID_SIZE]; /* storage for last wt change */
-double  in_pats[MAXPATS][IN_SIZE];      /* input patterns */
-double  out_pats[MAXPATS][OUT_SIZE];    /* desired output patterns */
-double  tot_out_error[MAXPATS];         /* measure of whether net is done */
-double  out_wt_cum_change[OUT_SIZE][MID_SIZE]; /* accumulated wt changes */
-double  mid_wt_cum_change[MID_SIZE][IN_SIZE];  /* accumulated wt changes */
-
-double  worst_error; /* worst error each pass through the data */
-double  average_error; /* average error each pass through the data */
-double  avg_out_error[MAXPATS]; /* average error each pattern */
-
-int iteration_count;    /* number of passes thru network so far */
-int numpats;            /* number of patterns in data file */
-int numpasses;          /* number of training passes through data file */
-int learned;            /* flag--if TRUE, network has learned all patterns */
-
-/*
-** The Neural Net test requires an input data file.
-** The name is specified here.
-*/
-char *inpath="NNET.DAT";
-
-/*
-** PROTOTYPES
-*/
-void DoNNET(void);
-static ulong DoNNetIteration(ulong nloops);
-static void do_mid_forward(int patt);
-static void do_out_forward();
-void display_output(int patt);
-static void do_forward_pass(int patt);
-static void do_out_error(int patt);
-static void worst_pass_error();
-static void do_mid_error();
-static void adjust_out_wts();
-static void adjust_mid_wts();
-static void do_back_pass(int patt);
-static void move_wt_changes();
-static int check_out_error();
-static void zero_changes();
-static void randomize_wts();
-static int read_data_file();
-/* static int initialize_net(); */
-
-/***********************
-**  LU DECOMPOSITION  **
-** (Linear Equations) **
-***********************/
-
-/*
-** DEFINES
-*/
-
-#define LUARRAYROWS 101L
-#define LUARRAYCOLS 101L
-
-/*
-** TYPEDEFS
-*/
-typedef struct
-{       union
-	{       fardouble *p;
-		fardouble (*ap)[][LUARRAYCOLS];
-	} ptrs;
-} LUdblptr;
-
-/*
-** GLOBALS
-*/
-fardouble *LUtempvv;
-
-/*
-** PROTOTYPES
-*/
-void DoLU(void);
-static void LUFreeMem(fardouble *a, fardouble *b,
-	fardouble *abase, fardouble *bbase);
-static ulong DoLUIteration(fardouble *a, fardouble *b,
-	fardouble *abase, fardouble *bbase,
-	ulong numarrays);
-static void build_problem( double a[][LUARRAYCOLS],
-	int n, double b[LUARRAYROWS]);
-static int ludcmp(double a[][LUARRAYCOLS],
-	int n, int indx[], int *d);
-static void lubksb(double a[][LUARRAYCOLS],
-	int n, int indx[LUARRAYROWS],
-	double b[LUARRAYROWS]);
-static int lusolve(double a[][LUARRAYCOLS],
-	int n, double b[LUARRAYROWS]);
-
-
diff --git a/neural.c b/neural.c
new file mode 100644
index 0000000..0a1bced
--- /dev/null
+++ b/neural.c
@@ -0,0 +1,815 @@
+#include <stdio.h> /*
+#include <stdlib.h>
+#include <string.h>
+#include <strings.h>*/
+#include <math.h>
+#include "nmglobal.h"
+#include "nbench1.h"
+
+/*
+** The Neural Net test requires an input data file.
+** The name is specified here.
+*/
+char *inpath="NNET.DAT";
+
+/********************************
+** BACK PROPAGATION NEURAL NET **
+*********************************
+** This code is a modified version of the code
+** that was submitted to BYTE Magazine by
+** Maureen Caudill.  It accomanied an article
+** that I CANNOT NOW RECALL.
+** The author's original heading/comment was
+** as follows:
+**
+**  Backpropagation Network
+**  Written by Maureen Caudill
+**  in Think C 4.0 on a Macintosh
+**
+**  (c) Maureen Caudill 1988-1991
+**  This network will accept 5x7 input patterns
+**  and produce 8 bit output patterns.
+**  The source code may be copied or modified without restriction,
+**  but no fee may be charged for its use.
+**
+** ++++++++++++++
+** I have modified the code so that it will work
+** on systems other than a Macintosh -- RG
+*/
+
+/***********
+** DoNNet **
+************
+** Perform the neural net benchmark.
+** Note that this benchmark is one of the few that
+** requires an input file.  That file is "NNET.DAT" and
+** should be on the local directory (from which the
+** benchmark program in launched).
+*/
+void DoNNET(void)
+{
+NNetStruct *locnnetstruct;      /* Local ptr to global data */
+char *errorcontext;
+ulong accumtime;
+double iterations;
+
+/*
+** Link to global data
+*/
+locnnetstruct=&global_nnetstruct;
+
+/*
+** Set error context
+*/
+errorcontext="CPU:NNET";
+
+/*
+** Init random number generator.
+** NOTE: It is important that the random number generator
+**  be re-initialized for every pass through this test.
+**  The NNET algorithm uses the random number generator
+**  to initialize the net.  Results are sensitive to
+**  the initial neural net state.
+*/
+/* randnum(3L); */
+randnum((int32)3);
+
+/*
+** Read in the input and output patterns.  We'll do this
+** only once here at the beginning.  These values don't
+** change once loaded.
+*/
+if(read_data_file()!=0)
+   ErrorExit();
+
+
+/*
+** See if we need to perform self adjustment loop.
+*/
+if(locnnetstruct->adjust==0)
+{
+	/*
+	** Do self-adjustment.  This involves initializing the
+	** # of loops and increasing the loop count until we
+	** get a number of loops that we can use.
+	*/
+	for(locnnetstruct->loops=1L;
+	  locnnetstruct->loops<MAXNNETLOOPS;
+	  locnnetstruct->loops++)
+	  {     /*randnum(3L); */
+		randnum((int32)3);
+		if(DoNNetIteration(locnnetstruct->loops)
+			>global_min_ticks) break;
+	  }
+}
+
+/*
+** All's well if we get here.  Do the test.
+*/
+accumtime=0L;
+iterations=(double)0.0;
+
+do {
+	/* randnum(3L); */    /* Gotta do this for Neural Net */
+	randnum((int32)3);    /* Gotta do this for Neural Net */
+	accumtime+=DoNNetIteration(locnnetstruct->loops);
+	iterations+=(double)locnnetstruct->loops;
+} while(TicksToSecs(accumtime)<locnnetstruct->request_secs);
+
+/*
+** Clean up, calculate results, and go home.  Be sure to
+** show that we don't have to rerun adjustment code.
+*/
+locnnetstruct->iterspersec=iterations / TicksToFracSecs(accumtime);
+
+if(locnnetstruct->adjust==0)
+	locnnetstruct->adjust=1;
+
+
+return;
+}
+
+/********************
+** DoNNetIteration **
+*********************
+** Do a single iteration of the neural net benchmark.
+** By iteration, we mean a "learning" pass.
+*/
+static ulong DoNNetIteration(ulong nloops)
+{
+ulong elapsed;          /* Elapsed time */
+int patt;
+
+/*
+** Run nloops learning cycles.  Notice that, counted with
+** the learning cycle is the weight randomization and
+** zeroing of changes.  This should reduce clock jitter,
+** since we don't have to stop and start the clock for
+** each iteration.
+*/
+elapsed=StartStopwatch();
+while(nloops--)
+{
+	randomize_wts();
+	zero_changes();
+	iteration_count=1;
+	learned = F;
+	numpasses = 0;
+	while (learned == F)
+	{
+		for (patt=0; patt<numpats; patt++)
+		{
+			worst_error = 0.0;      /* reset this every pass through data */
+			move_wt_changes();      /* move last pass's wt changes to momentum array */
+			do_forward_pass(patt);
+			do_back_pass(patt);
+			iteration_count++;
+		}
+		numpasses ++;
+		learned = check_out_error();
+	}
+#ifdef DEBUG
+printf("Learned in %d passes\n",numpasses);
+#endif
+}
+return(StopStopwatch(elapsed));
+}
+
+/*************************
+** do_mid_forward(patt) **
+**************************
+** Process the middle layer's forward pass
+** The activation of middle layer's neurode is the weighted
+** sum of the inputs from the input pattern, with sigmoid
+** function applied to the inputs.
+**/
+static void  do_mid_forward(int patt)
+{
+double  sum;
+int     neurode, i;
+
+for (neurode=0;neurode<MID_SIZE; neurode++)
+{
+	sum = 0.0;
+	for (i=0; i<IN_SIZE; i++)
+	{       /* compute weighted sum of input signals */
+		sum += mid_wts[neurode][i]*in_pats[patt][i];
+	}
+	/*
+	** apply sigmoid function f(x) = 1/(1+exp(-x)) to weighted sum
+	*/
+	sum = 1.0/(1.0+exp(-sum));
+	mid_out[neurode] = sum;
+}
+return;
+}
+
+/*********************
+** do_out_forward() **
+**********************
+** process the forward pass through the output layer
+** The activation of the output layer is the weighted sum of
+** the inputs (outputs from middle layer), modified by the
+** sigmoid function.
+**/
+static void  do_out_forward()
+{
+double sum;
+int neurode, i;
+
+for (neurode=0; neurode<OUT_SIZE; neurode++)
+{
+	sum = 0.0;
+	for (i=0; i<MID_SIZE; i++)
+	{       /*
+		** compute weighted sum of input signals
+		** from middle layer
+		*/
+		sum += out_wts[neurode][i]*mid_out[i];
+	}
+	/*
+	** Apply f(x) = 1/(1+exp(-x)) to weighted input
+	*/
+	sum = 1.0/(1.0+exp(-sum));
+	out_out[neurode] = sum;
+}
+return;
+}
+
+/*************************
+** display_output(patt) **
+**************************
+** Display the actual output vs. the desired output of the
+** network.
+** Once the training is complete, and the "learned" flag set
+** to TRUE, then display_output sends its output to both
+** the screen and to a text output file.
+**
+** NOTE: This routine has been disabled in the benchmark
+** version. -- RG
+**/
+/*
+void  display_output(int patt)
+{
+int             i;
+
+	fprintf(outfile,"\n Iteration # %d",iteration_count);
+	fprintf(outfile,"\n Desired Output:  ");
+
+	for (i=0; i<OUT_SIZE; i++)
+	{
+		fprintf(outfile,"%6.3f  ",out_pats[patt][i]);
+	}
+	fprintf(outfile,"\n Actual Output:   ");
+
+	for (i=0; i<OUT_SIZE; i++)
+	{
+		fprintf(outfile,"%6.3f  ",out_out[i]);
+	}
+	fprintf(outfile,"\n");
+	return;
+}
+*/
+
+/**********************
+** do_forward_pass() **
+***********************
+** control function for the forward pass through the network
+** NOTE: I have disabled the call to display_output() in
+**  the benchmark version -- RG.
+**/
+static void  do_forward_pass(int patt)
+{
+do_mid_forward(patt);   /* process forward pass, middle layer */
+do_out_forward();       /* process forward pass, output layer */
+/* display_output(patt);        ** display results of forward pass */
+return;
+}
+
+/***********************
+** do_out_error(patt) **
+************************
+** Compute the error for the output layer neurodes.
+** This is simply Desired - Actual.
+**/
+static void do_out_error(int patt)
+{
+int neurode;
+double error,tot_error, sum;
+
+tot_error = 0.0;
+sum = 0.0;
+for (neurode=0; neurode<OUT_SIZE; neurode++)
+{
+	out_error[neurode] = out_pats[patt][neurode] - out_out[neurode];
+	/*
+	** while we're here, also compute magnitude
+	** of total error and worst error in this pass.
+	** We use these to decide if we are done yet.
+	*/
+	error = out_error[neurode];
+	if (error <0.0)
+	{
+		sum += -error;
+		if (-error > tot_error)
+			tot_error = -error; /* worst error this pattern */
+	}
+	else
+	{
+		sum += error;
+		if (error > tot_error)
+			tot_error = error; /* worst error this pattern */
+	}
+}
+avg_out_error[patt] = sum/OUT_SIZE;
+tot_out_error[patt] = tot_error;
+return;
+}
+
+/***********************
+** worst_pass_error() **
+************************
+** Find the worst and average error in the pass and save it
+**/
+static void  worst_pass_error()
+{
+double error,sum;
+
+int i;
+
+error = 0.0;
+sum = 0.0;
+for (i=0; i<numpats; i++)
+{
+	if (tot_out_error[i] > error) error = tot_out_error[i];
+	sum += avg_out_error[i];
+}
+worst_error = error;
+average_error = sum/numpats;
+return;
+}
+
+/*******************
+** do_mid_error() **
+********************
+** Compute the error for the middle layer neurodes
+** This is based on the output errors computed above.
+** Note that the derivative of the sigmoid f(x) is
+**        f'(x) = f(x)(1 - f(x))
+** Recall that f(x) is merely the output of the middle
+** layer neurode on the forward pass.
+**/
+static void do_mid_error()
+{
+double sum;
+int neurode, i;
+
+for (neurode=0; neurode<MID_SIZE; neurode++)
+{
+	sum = 0.0;
+	for (i=0; i<OUT_SIZE; i++)
+		sum += out_wts[i][neurode]*out_error[i];
+
+	/*
+	** apply the derivative of the sigmoid here
+	** Because of the choice of sigmoid f(I), the derivative
+	** of the sigmoid is f'(I) = f(I)(1 - f(I))
+	*/
+	mid_error[neurode] = mid_out[neurode]*(1-mid_out[neurode])*sum;
+}
+return;
+}
+
+/*********************
+** adjust_out_wts() **
+**********************
+** Adjust the weights of the output layer.  The error for
+** the output layer has been previously propagated back to
+** the middle layer.
+** Use the Delta Rule with momentum term to adjust the weights.
+**/
+static void adjust_out_wts()
+{
+int weight, neurode;
+double learn,delta,alph;
+
+learn = BETA;
+alph  = ALPHA;
+for (neurode=0; neurode<OUT_SIZE; neurode++)
+{
+	for (weight=0; weight<MID_SIZE; weight++)
+	{
+		/* standard delta rule */
+		delta = learn * out_error[neurode] * mid_out[weight];
+
+		/* now the momentum term */
+		delta += alph * out_wt_change[neurode][weight];
+		out_wts[neurode][weight] += delta;
+
+		/* keep track of this pass's cum wt changes for next pass's momentum */
+		out_wt_cum_change[neurode][weight] += delta;
+	}
+}
+return;
+}
+
+/*************************
+** adjust_mid_wts(patt) **
+**************************
+** Adjust the middle layer weights using the previously computed
+** errors.
+** We use the Generalized Delta Rule with momentum term
+**/
+static void adjust_mid_wts(int patt)
+{
+int weight, neurode;
+double learn,alph,delta;
+
+learn = BETA;
+alph  = ALPHA;
+for (neurode=0; neurode<MID_SIZE; neurode++)
+{
+	for (weight=0; weight<IN_SIZE; weight++)
+	{
+		/* first the basic delta rule */
+		delta = learn * mid_error[neurode] * in_pats[patt][weight];
+
+		/* with the momentum term */
+		delta += alph * mid_wt_change[neurode][weight];
+		mid_wts[neurode][weight] += delta;
+
+		/* keep track of this pass's cum wt changes for next pass's momentum */
+		mid_wt_cum_change[neurode][weight] += delta;
+	}
+}
+return;
+}
+
+/*******************
+** do_back_pass() **
+********************
+** Process the backward propagation of error through network.
+**/
+void  do_back_pass(int patt)
+{
+
+do_out_error(patt);
+do_mid_error();
+adjust_out_wts();
+adjust_mid_wts(patt);
+
+return;
+}
+
+
+/**********************
+** move_wt_changes() **
+***********************
+** Move the weight changes accumulated last pass into the wt-change
+** array for use by the momentum term in this pass. Also zero out
+** the accumulating arrays after the move.
+**/
+static void move_wt_changes()
+{
+int i,j;
+
+for (i = 0; i<MID_SIZE; i++)
+	for (j = 0; j<IN_SIZE; j++)
+	{
+		mid_wt_change[i][j] = mid_wt_cum_change[i][j];
+		/*
+		** Zero it out for next pass accumulation.
+		*/
+		mid_wt_cum_change[i][j] = 0.0;
+	}
+
+for (i = 0; i<OUT_SIZE; i++)
+	for (j=0; j<MID_SIZE; j++)
+	{
+		out_wt_change[i][j] = out_wt_cum_change[i][j];
+		out_wt_cum_change[i][j] = 0.0;
+	}
+
+return;
+}
+
+/**********************
+** check_out_error() **
+***********************
+** Check to see if the error in the output layer is below
+** MARGIN*OUT_SIZE for all output patterns.  If so, then
+** assume the network has learned acceptably well.  This
+** is simply an arbitrary measure of how well the network
+** has learned -- many other standards are possible.
+**/
+static int check_out_error()
+{
+int result,i,error;
+
+result  = T;
+error   = F;
+worst_pass_error();     /* identify the worst error in this pass */
+
+/*
+#ifdef DEBUG
+printf("\n Iteration # %d",iteration_count);
+#endif
+*/
+for (i=0; i<numpats; i++)
+{
+/*      printf("\n Error pattern %d:   Worst: %8.3f; Average: %8.3f",
+	  i+1,tot_out_error[i], avg_out_error[i]);
+	fprintf(outfile,
+	 "\n Error pattern %d:   Worst: %8.3f; Average: %8.3f",
+	 i+1,tot_out_error[i]);
+*/
+
+	if (worst_error >= STOP) result = F;
+	if (tot_out_error[i] >= 16.0) error = T;
+}
+
+if (error == T) result = ERR;
+
+
+#ifdef DEBUG
+/* printf("\n Error this pass thru data:   Worst: %8.3f; Average: %8.3f",
+ worst_error,average_error);
+*/
+/* fprintf(outfile,
+ "\n Error this pass thru data:   Worst: %8.3f; Average: %8.3f",
+  worst_error, average_error); */
+#endif
+
+return(result);
+}
+
+
+/*******************
+** zero_changes() **
+********************
+** Zero out all the wt change arrays
+**/
+static void zero_changes()
+{
+int i,j;
+
+for (i = 0; i<MID_SIZE; i++)
+{
+	for (j=0; j<IN_SIZE; j++)
+	{
+		mid_wt_change[i][j] = 0.0;
+		mid_wt_cum_change[i][j] = 0.0;
+	}
+}
+
+for (i = 0; i< OUT_SIZE; i++)
+{
+	for (j=0; j<MID_SIZE; j++)
+	{
+		out_wt_change[i][j] = 0.0;
+		out_wt_cum_change[i][j] = 0.0;
+	}
+}
+return;
+}
+
+
+/********************
+** randomize_wts() **
+*********************
+** Intialize the weights in the middle and output layers to
+** random values between -0.25..+0.25
+** Function rand() returns a value between 0 and 32767.
+**
+** NOTE: Had to make alterations to how the random numbers were
+** created.  -- RG.
+**/
+static void randomize_wts()
+{
+int neurode,i;
+double value;
+
+/*
+** Following not used int benchmark version -- RG
+**
+**        printf("\n Please enter a random number seed (1..32767):  ");
+**        scanf("%d", &i);
+**        srand(i);
+*/
+
+for (neurode = 0; neurode<MID_SIZE; neurode++)
+{
+	for(i=0; i<IN_SIZE; i++)
+	{
+	        /* value=(double)abs_randwc(100000L); */
+		value=(double)abs_randwc((int32)100000);
+		value=value/(double)100000.0 - (double) 0.5;
+		mid_wts[neurode][i] = value/2;
+	}
+}
+for (neurode=0; neurode<OUT_SIZE; neurode++)
+{
+	for(i=0; i<MID_SIZE; i++)
+	{
+	        /* value=(double)abs_randwc(100000L); */
+		value=(double)abs_randwc((int32)100000);
+		value=value/(double)10000.0 - (double) 0.5;
+		out_wts[neurode][i] = value/2;
+	}
+}
+
+return;
+}
+
+
+/*********************
+** read_data_file() **
+**********************
+** Read in the input data file and store the patterns in
+** in_pats and out_pats.
+** The format for the data file is as follows:
+**
+** line#   data expected
+** -----   ------------------------------
+** 1               In-X-size,in-y-size,out-size
+** 2               number of patterns in file
+** 3               1st X row of 1st input pattern
+** 4..             following rows of 1st input pattern pattern
+**                 in-x+2  y-out pattern
+**                                 1st X row of 2nd pattern
+**                 etc.
+**
+** Each row of data is separated by commas or spaces.
+** The data is expected to be ascii text corresponding to
+** either a +1 or a 0.
+**
+** Sample input for a 1-pattern file (The comments to the
+** right may NOT be in the file unless more sophisticated
+** parsing of the input is done.):
+**
+** 5,7,8                      input is 5x7 grid, output is 8 bits
+** 1                          one pattern in file
+** 0,1,1,1,0                  beginning of pattern for "O"
+** 1,0,0,0,1
+** 1,0,0,0,1
+** 1,0,0,0,1
+** 1,0,0,0,1
+** 1,0,0,0,0
+** 0,1,1,1,0
+** 0,1,0,0,1,1,1,1            ASCII code for "O" -- 0100 1111
+**
+** Clearly, this simple scheme can be expanded or enhanced
+** any way you like.
+**
+** Returns -1 if any file error occurred, otherwise 0.
+**/
+static int read_data_file()
+{
+FILE *infile;
+
+int xinsize,yinsize,youtsize;
+int patt, element, i, row;
+int vals_read;
+int val1,val2,val3,val4,val5,val6,val7,val8;
+
+/* printf("\n Opening and retrieving data from file."); */
+
+infile = fopen(inpath, "r");
+if (infile == NULL)
+{
+	printf("\n CPU:NNET--error in opening file!");
+	return -1 ;
+}
+vals_read =fscanf(infile,"%d  %d  %d",&xinsize,&yinsize,&youtsize);
+if (vals_read != 3)
+{
+	printf("\n CPU:NNET -- Should read 3 items in line one; did read %d",vals_read);
+	return -1;
+}
+vals_read=fscanf(infile,"%d",&numpats);
+if (vals_read !=1)
+{
+	printf("\n CPU:NNET -- Should read 1 item in line 2; did read %d",vals_read);
+	return -1;
+}
+if (numpats > MAXPATS)
+	numpats = MAXPATS;
+
+for (patt=0; patt<numpats; patt++)
+{
+	element = 0;
+	for (row = 0; row<yinsize; row++)
+	{
+		vals_read = fscanf(infile,"%d  %d  %d  %d  %d",
+			&val1, &val2, &val3, &val4, &val5);
+		if (vals_read != 5)
+		{
+			printf ("\n CPU:NNET -- failure in reading input!");
+			return -1;
+		}
+		element=row*xinsize;
+
+		in_pats[patt][element] = (double) val1; element++;
+		in_pats[patt][element] = (double) val2; element++;
+		in_pats[patt][element] = (double) val3; element++;
+		in_pats[patt][element] = (double) val4; element++;
+		in_pats[patt][element] = (double) val5; element++;
+	}
+	for (i=0;i<IN_SIZE; i++)
+	{
+		if (in_pats[patt][i] >= 0.9)
+			in_pats[patt][i] = 0.9;
+		if (in_pats[patt][i] <= 0.1)
+			in_pats[patt][i] = 0.1;
+	}
+	element = 0;
+	vals_read = fscanf(infile,"%d  %d  %d  %d  %d  %d  %d  %d",
+		&val1, &val2, &val3, &val4, &val5, &val6, &val7, &val8);
+
+	out_pats[patt][element] = (double) val1; element++;
+	out_pats[patt][element] = (double) val2; element++;
+	out_pats[patt][element] = (double) val3; element++;
+	out_pats[patt][element] = (double) val4; element++;
+	out_pats[patt][element] = (double) val5; element++;
+	out_pats[patt][element] = (double) val6; element++;
+	out_pats[patt][element] = (double) val7; element++;
+	out_pats[patt][element] = (double) val8; element++;
+}
+
+/* printf("\n Closing the input file now. "); */
+
+fclose(infile);
+return(0);
+}
+
+/*********************
+** initialize_net() **
+**********************
+** Do all the initialization stuff before beginning
+*/
+/*
+static int initialize_net()
+{
+int err_code;
+
+randomize_wts();
+zero_changes();
+err_code = read_data_file();
+iteration_count = 1;
+return(err_code);
+}
+*/
+
+/**********************
+** display_mid_wts() **
+***********************
+** Display the weights on the middle layer neurodes
+** NOTE: This routine is not used in the benchmark
+**  test -- RG
+**/
+/* static void display_mid_wts()
+{
+int             neurode, weight, row, col;
+
+fprintf(outfile,"\n Weights of Middle Layer neurodes:");
+
+for (neurode=0; neurode<MID_SIZE; neurode++)
+{
+	fprintf(outfile,"\n  Mid Neurode # %d",neurode);
+	for (row=0; row<IN_Y_SIZE; row++)
+	{
+		fprintf(outfile,"\n ");
+		for (col=0; col<IN_X_SIZE; col++)
+		{
+			weight = IN_X_SIZE * row + col;
+			fprintf(outfile," %8.3f ", mid_wts[neurode][weight]);
+		}
+	}
+}
+return;
+}
+*/
+/**********************
+** display_out_wts() **
+***********************
+** Display the weights on the output layer neurodes
+** NOTE: This code is not used in the benchmark
+**  test -- RG
+*/
+/* void  display_out_wts()
+{
+int             neurode, weight;
+
+	fprintf(outfile,"\n Weights of Output Layer neurodes:");
+
+	for (neurode=0; neurode<OUT_SIZE; neurode++)
+	{
+		fprintf(outfile,"\n  Out Neurode # %d \n",neurode);
+		for (weight=0; weight<MID_SIZE; weight++)
+		{
+			fprintf(outfile," %8.3f ", out_wts[neurode][weight]);
+		}
+	}
+	return;
+}
+*/
diff --git a/nmglobal.h b/nmglobal.h
index 2b57db5..a20fe62 100644
--- a/nmglobal.h
+++ b/nmglobal.h
@@ -25,9 +25,6 @@
 ** this code.
 */
 
-/* is this a 64 bit architecture? If so, this will define LONG64 */
-#include "pointer.h"
-
 /*
 ** SYSTEM DEFINES
 */
diff --git a/numsort.c b/numsort.c
new file mode 100644
index 0000000..936b390
--- /dev/null
+++ b/numsort.c
@@ -0,0 +1,292 @@
+#include <stdio.h>
+#include <stdint.h>
+/*
+#include <stdlib.h>
+#include <string.h>
+#include <strings.h>
+#include <math.h>
+*/
+#include "nmglobal.h"
+#include "nbench1.h"
+
+/*********************
+** NUMERIC HEAPSORT **
+**********************
+** This test implements a heapsort algorithm, performed on an
+** array of longs.
+*/
+
+/**************
+** DoNumSort **
+***************
+** This routine performs the CPU numeric sort test.
+** NOTE: Last version incorrectly stated that the routine
+**  returned result in # of longword sorted per second.
+**  Not so; the routine returns # of iterations per sec.
+*/
+
+void DoNumSort(void)
+{
+SortStruct *numsortstruct;      /* Local pointer to global struct */
+long *arraybase;     /* Base pointers of array */
+long accumtime;         /* Accumulated time */
+double iterations;      /* Iteration counter */
+char *errorcontext;     /* Error context string pointer */
+int systemerror;        /* For holding error codes */
+
+/*
+** Link to global structure
+*/
+numsortstruct=&global_numsortstruct;
+
+/*
+** Set the error context string.
+*/
+errorcontext="CPU:Numeric Sort";
+
+/*
+** See if we need to do self adjustment code.
+*/
+if(numsortstruct->adjust==0)
+{
+	/*
+	** Self-adjustment code.  The system begins by sorting 1
+	** array.  If it does that in no time, then two arrays
+	** are built and sorted.  This process continues until
+	** enough arrays are built to handle the tolerance.
+	*/
+	numsortstruct->numarrays=1;
+	while(1)
+	{
+		/*
+		** Allocate space for arrays
+		*/
+		arraybase=(long *)AllocateMemory(sizeof(long) *
+			numsortstruct->numarrays * numsortstruct->arraysize,
+			&systemerror);
+		if(systemerror)
+		{       ReportError(errorcontext,systemerror);
+			FreeMemory((void *)arraybase,
+				  &systemerror);
+		}
+
+		/*
+		** Do an iteration of the numeric sort.  If the
+		** elapsed time is less than or equal to the permitted
+		** minimum, then allocate for more arrays and
+		** try again.
+		*/
+		if(DoNumSortIteration(arraybase,
+			numsortstruct->arraysize,
+			numsortstruct->numarrays)>global_min_ticks)
+			break;          /* We're ok...exit */
+
+		FreeMemory((void *)arraybase,&systemerror);
+		if(numsortstruct->numarrays++>NUMNUMARRAYS)
+		{       printf("CPU:NSORT -- NUMNUMARRAYS hit.\n");
+		}
+	}
+}
+else
+{       /*
+	** Allocate space for arrays
+	*/
+	arraybase=(long *)AllocateMemory(sizeof(long) *
+		numsortstruct->numarrays * numsortstruct->arraysize,
+		&systemerror);
+	if(systemerror)
+	{       ReportError(errorcontext,systemerror);
+		FreeMemory((void *)arraybase,
+			  &systemerror);
+	}
+
+}
+/*
+** All's well if we get here.  Repeatedly perform sorts until the
+** accumulated elapsed time is greater than # of seconds requested.
+*/
+accumtime=0L;
+iterations=(double)0.0;
+
+do {
+	accumtime+=DoNumSortIteration(arraybase,
+		numsortstruct->arraysize,
+		numsortstruct->numarrays);
+	iterations+=(double)1.0;
+} while(TicksToSecs(accumtime)<numsortstruct->request_secs);
+
+/*
+** Clean up, calculate results, and go home.  Be sure to
+** show that we don't have to rerun adjustment code.
+*/
+FreeMemory((void *)arraybase,&systemerror);
+
+numsortstruct->sortspersec=iterations *
+	(double)numsortstruct->numarrays / TicksToFracSecs(accumtime);
+
+if(numsortstruct->adjust==0)
+	numsortstruct->adjust=1;
+
+#ifdef DEBUG
+if (numsort_status==0) printf("Numeric sort: OK\n");
+numsort_status=0;
+#endif
+return;
+}
+
+/***********************
+** DoNumSortIteration **
+************************
+** This routine executes one iteration of the numeric
+** sort benchmark.  It returns the number of ticks
+** elapsed for the iteration.
+*/
+static unsigned long DoNumSortIteration(long *arraybase,
+		unsigned long arraysize,
+		unsigned int numarrays)
+{
+unsigned long elapsed;          /* Elapsed ticks */
+unsigned long i;
+/*
+** Load up the array with random numbers
+*/
+LoadNumArrayWithRand(arraybase,arraysize,numarrays);
+
+/*
+** Start the stopwatch
+*/
+elapsed=StartStopwatch();
+
+/*
+** Execute a heap of heapsorts
+*/
+for(i=0;i<numarrays;i++)
+	NumHeapSort(arraybase+i*arraysize,0L,arraysize-1L);
+
+/*
+** Get elapsed time
+*/
+elapsed=StopStopwatch(elapsed);
+#ifdef DEBUG
+{
+	for(i=0;i<arraysize-1;i++)
+	{       /*
+		** Compare to check for proper
+		** sort.
+		*/
+		if(arraybase[i+1]<arraybase[i])
+		{       printf("Sort Error\n");
+			numsort_status=1;
+                        break;
+		}
+	}
+}
+#endif
+
+return(elapsed);
+}
+
+/*************************
+** LoadNumArrayWithRand **
+**************************
+** Load up an array with random longs.
+*/
+static void LoadNumArrayWithRand(long *array,     /* Pointer to arrays */
+		unsigned long arraysize,
+		unsigned int numarrays)         /* # of elements in array */
+{
+long i;                 /* Used for index */
+long *darray;        /* Destination array pointer */
+/*
+** Initialize the random number generator
+*/
+/* randnum(13L); */
+randnum(13);
+
+/*
+** Load up first array with randoms
+*/
+for(i=0L;i<arraysize;i++)
+        /* array[i]=randnum(0L); */
+	array[i]=randnum((int32_t)0);
+
+/*
+** Now, if there's more than one array to load, copy the
+** first into each of the others.
+*/
+darray=array;
+while(--numarrays)
+{       darray+=arraysize;
+	for(i=0L;i<arraysize;i++)
+		darray[i]=array[i];
+}
+
+return;
+}
+
+/****************
+** NumHeapSort **
+*****************
+** Pass this routine a pointer to an array of long
+** integers.  Also pass in minimum and maximum offsets.
+** This routine performs a heap sort on that array.
+*/
+static void NumHeapSort(long *array,
+	unsigned long bottom,           /* Lower bound */
+	unsigned long top)              /* Upper bound */
+{
+unsigned long temp;                     /* Used to exchange elements */
+unsigned long i;                        /* Loop index */
+
+/*
+** First, build a heap in the array
+*/
+for(i=(top/2L); i>0; --i)
+	NumSift(array,i,top);
+
+/*
+** Repeatedly extract maximum from heap and place it at the
+** end of the array.  When we get done, we'll have a sorted
+** array.
+*/
+for(i=top; i>0; --i)
+{       NumSift(array,bottom,i);
+	temp=*array;                    /* Perform exchange */
+	*array=*(array+i);
+	*(array+i)=temp;
+}
+return;
+}
+
+/************
+** NumSift **
+*************
+** Peforms the sift operation on a numeric array,
+** constructing a heap in the array.
+*/
+static void NumSift(long *array,     /* Array of numbers */
+	unsigned long i,                /* Minimum of array */
+	unsigned long j)                /* Maximum of array */
+{
+unsigned long k;
+long temp;                              /* Used for exchange */
+
+while((i+i)<=j)
+{
+	k=i+i;
+	if(k<j)
+		if(array[k]<array[k+1L])
+			++k;
+	if(array[i]<array[k])
+	{
+		temp=array[k];
+		array[k]=array[i];
+		array[i]=temp;
+		i=k;
+	}
+	else
+		i=j+1;
+}
+return;
+}
+
diff --git a/stringsort.c b/stringsort.c
new file mode 100644
index 0000000..5d7c67d
--- /dev/null
+++ b/stringsort.c
@@ -0,0 +1,575 @@
+#include <stdint.h>
+/*#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <strings.h>
+#include <math.h>*/
+#include "nmglobal.h"
+#include "nbench1.h"
+
+#ifdef DEBUG
+static int numsort_status=0;
+static int stringsort_status=0;
+#endif
+
+/********************
+** STRING HEAPSORT **
+********************/
+
+/*****************
+** DoStringSort **
+******************
+** This routine performs the CPU string sort test.
+** Arguments:
+**      requested_secs = # of seconds to execute test
+**      stringspersec = # of strings per second sorted (RETURNED)
+*/
+void DoStringSort(void)
+{
+
+SortStruct *strsortstruct;      /* Local for sort structure */
+unsigned char *arraybase;            /* Base pointer of char array */
+long accumtime;                 /* Accumulated time */
+double iterations;              /* # of iterations */
+char *errorcontext;             /* Error context string pointer */
+int systemerror;                /* For holding error code */
+
+/*
+** Link to global structure
+*/
+strsortstruct=&global_strsortstruct;
+
+/*
+** Set the error context
+*/
+errorcontext="CPU:String Sort";
+
+/*
+** See if we have to perform self-adjustment code
+*/
+if(strsortstruct->adjust==0)
+{
+	/*
+	** Initialize the number of arrays.
+	*/
+	strsortstruct->numarrays=1;
+	while(1)
+	{
+		/*
+		** Allocate space for array.  We'll add an extra 100
+		** bytes to protect memory as strings move around
+		** (this can happen during string adjustment)
+		*/
+		arraybase=(unsigned char *)AllocateMemory((strsortstruct->arraysize+100L) *
+			(long)strsortstruct->numarrays,&systemerror);
+		if(systemerror)
+		{       ReportError(errorcontext,systemerror);
+		}
+
+		/*
+		** Do an iteration of the string sort.  If the
+		** elapsed time is less than or equal to the permitted
+		** minimum, then de-allocate the array, reallocate a
+		** an additional array, and try again.
+		*/
+		if(DoStringSortIteration(arraybase,
+			strsortstruct->numarrays,
+			strsortstruct->arraysize)>global_min_ticks)
+			break;          /* We're ok...exit */
+
+		FreeMemory((void *)arraybase,&systemerror);
+		strsortstruct->numarrays+=1;
+	}
+}
+else
+{
+	/*
+	** We don't have to perform self adjustment code.
+	** Simply allocate the space for the array.
+	*/
+	arraybase=(unsigned char *)AllocateMemory((strsortstruct->arraysize+100L) *
+		(long)strsortstruct->numarrays,&systemerror);
+	if(systemerror)
+	{       ReportError(errorcontext,systemerror);
+	}
+}
+/*
+** All's well if we get here.  Repeatedly perform sorts until the
+** accumulated elapsed time is greater than # of seconds requested.
+*/
+accumtime=0L;
+iterations=(double)0.0;
+
+do {
+	accumtime+=DoStringSortIteration(arraybase,
+				strsortstruct->numarrays,
+				strsortstruct->arraysize);
+	iterations+=(double)strsortstruct->numarrays;
+} while(TicksToSecs(accumtime)<strsortstruct->request_secs);
+
+/*
+** Clean up, calculate results, and go home.
+** Set flag to show we don't need to rerun adjustment code.
+*/
+FreeMemory((void *)arraybase,&systemerror);
+strsortstruct->sortspersec=iterations / (double)TicksToFracSecs(accumtime);
+if(strsortstruct->adjust==0)
+	strsortstruct->adjust=1;
+#ifdef DEBUG
+if (stringsort_status==0) printf("String sort: OK\n");
+stringsort_status=0;
+#endif
+return;
+}
+
+/**************************
+** DoStringSortIteration **
+***************************
+** This routine executes one iteration of the string
+** sort benchmark.  It returns the number of ticks
+** Note that this routine also builds the offset pointer
+** array.
+*/
+static unsigned long DoStringSortIteration(unsigned char *arraybase,
+		unsigned int numarrays,unsigned long arraysize)
+{
+unsigned long *optrarray;            /* Offset pointer array */
+unsigned long elapsed;          /* Elapsed ticks */
+unsigned long nstrings;         /* # of strings in array */
+int syserror;                   /* System error code */
+unsigned int i;                 /* Index */
+unsigned long *tempobase;            /* Temporary offset pointer base */
+unsigned char *tempsbase;            /* Temporary string base pointer */
+
+/*
+** Load up the array(s) with random numbers
+*/
+optrarray=LoadStringArray(arraybase,numarrays,&nstrings,arraysize);
+
+/*
+** Set temp base pointers...they will be modified as the
+** benchmark proceeds.
+*/
+tempobase=optrarray;
+tempsbase=arraybase;
+
+/*
+** Start the stopwatch
+*/
+elapsed=StartStopwatch();
+
+/*
+** Execute heapsorts
+*/
+for(i=0;i<numarrays;i++)
+{       StrHeapSort(tempobase,tempsbase,nstrings,0L,nstrings-1);
+	tempobase+=nstrings;    /* Advance base pointers */
+	tempsbase+=arraysize+100;
+}
+
+/*
+** Record elapsed time
+*/
+elapsed=StopStopwatch(elapsed);
+
+#ifdef DEBUG
+{
+	unsigned long i;
+	for(i=0;i<nstrings-1;i++)
+	{       /*
+		** Compare strings to check for proper
+		** sort.
+		*/
+		if(str_is_less(optrarray,arraybase,nstrings,i+1,i))
+		{       printf("Sort Error\n");
+			stringsort_status=1;
+                        break;
+		}
+	}
+}
+#endif
+
+/*
+** Release the offset pointer array built by
+** LoadStringArray()
+*/
+FreeMemory((void *)optrarray,&syserror);
+
+/*
+** Return elapsed ticks.
+*/
+return(elapsed);
+}
+
+/********************
+** LoadStringArray **
+*********************
+** Initialize the string array with random strings of
+** varying sizes.
+** Returns the pointer to the offset pointer array.
+** Note that since we're creating a number of arrays, this
+** routine builds one array, then copies it into the others.
+*/
+static unsigned long *LoadStringArray(unsigned char *strarray, /* String array */
+	unsigned int numarrays,                 /* # of arrays */
+	unsigned long *nstrings,                /* # of strings */
+	unsigned long arraysize)                /* Size of array */
+{
+unsigned char *tempsbase;            /* Temporary string base pointer */
+unsigned long *optrarray;            /* Local for pointer */
+unsigned long *tempobase;            /* Temporary offset pointer base pointer */
+unsigned long curroffset;       /* Current offset */
+int fullflag;                   /* Indicates full array */
+unsigned char stringlength;     /* Length of string */
+unsigned char i;                /* Index */
+unsigned long j;                /* Another index */
+unsigned int k;                 /* Yet another index */
+unsigned int l;                 /* Ans still one more index */
+int systemerror;                /* For holding error code */
+
+/*
+** Initialize random number generator.
+*/
+/* randnum(13L); */
+randnum((int32_t)13);
+
+/*
+** Start with no strings.  Initialize our current offset pointer
+** to 0.
+*/
+*nstrings=0L;
+curroffset=0L;
+fullflag=0;
+
+do
+{
+	/*
+	** Allocate a string with a random length no
+	** shorter than 4 bytes and no longer than
+	** 80 bytes.  Note we have to also make sure
+	** there's room in the array.
+	*/
+        /* stringlength=(unsigned char)((1+abs_randwc(76L)) & 0xFFL);*/
+	stringlength=(unsigned char)((1+abs_randwc((int32_t)76)) & 0xFFL);
+	if((unsigned long)stringlength+curroffset+1L>=arraysize)
+	{       stringlength=(unsigned char)((arraysize-curroffset-1L) &
+				0xFF);
+		fullflag=1;     /* Indicates a full */
+	}
+
+	/*
+	** Store length at curroffset and advance current offset.
+	*/
+	*(strarray+curroffset)=stringlength;
+	curroffset++;
+
+	/*
+	** Fill up the rest of the string with random bytes.
+	*/
+	for(i=0;i<stringlength;i++)
+	{       *(strarray+curroffset)=
+		        /* (unsigned char)(abs_randwc((long)0xFE)); */
+			(unsigned char)(abs_randwc((int32_t)0xFE));
+		curroffset++;
+	}
+
+	/*
+	** Increment the # of strings counter.
+	*/
+	*nstrings+=1L;
+
+} while(fullflag==0);
+
+/*
+** We now have initialized a single full array.  If there
+** is more than one array, copy the original into the
+** others.
+*/
+k=1;
+tempsbase=strarray;
+while(k<numarrays)
+{       tempsbase+=arraysize+100;         /* Set base */
+	for(l=0;l<arraysize;l++)
+		tempsbase[l]=strarray[l];
+	k++;
+}
+
+/*
+** Now the array is full, allocate enough space for an
+** offset pointer array.
+*/
+optrarray=(unsigned long *)AllocateMemory(*nstrings * sizeof(unsigned long) *
+		numarrays,
+		&systemerror);
+if(systemerror)
+{       ReportError("CPU:Stringsort",systemerror);
+	FreeMemory((void *)strarray,&systemerror);
+}
+
+/*
+** Go through the newly-built string array, building
+** offsets and putting them into the offset pointer
+** array.
+*/
+curroffset=0;
+for(j=0;j<*nstrings;j++)
+{       *(optrarray+j)=curroffset;
+	curroffset+=(unsigned long)(*(strarray+curroffset))+1L;
+}
+
+/*
+** As above, we've made one copy of the offset pointers,
+** so duplicate this array in the remaining ones.
+*/
+k=1;
+tempobase=optrarray;
+while(k<numarrays)
+{       tempobase+=*nstrings;
+	for(l=0;l<*nstrings;l++)
+		tempobase[l]=optrarray[l];
+	k++;
+}
+
+/*
+** All done...go home.  Pass local pointer back.
+*/
+return(optrarray);
+}
+
+/**************
+** stradjust **
+***************
+** Used by the string heap sort.  Call this routine to adjust the
+** string at offset i to length l.  The members of the string array
+** are moved accordingly and the length of the string at offset i
+** is set to l.
+*/
+static void stradjust(unsigned long *optrarray,      /* Offset pointer array */
+	unsigned char *strarray,                     /* String array */
+	unsigned long nstrings,                         /* # of strings */
+	unsigned long i,                                /* Offset to adjust */
+	unsigned char l)                                /* New length */
+{
+unsigned long nbytes;           /* # of bytes to move */
+unsigned long j;                /* Index */
+int direction;                  /* Direction indicator */
+unsigned char adjamount;        /* Adjustment amount */
+
+/*
+** If new length is less than old length, the direction is
+** down.  If new length is greater than old length, the
+** direction is up.
+*/
+direction=(int)l - (int)*(strarray+*(optrarray+i));
+adjamount=(unsigned char)abs(direction);
+
+/*
+** See if the adjustment is being made to the last
+** string in the string array.  If so, we don't have to
+** do anything more than adjust the length field.
+*/
+if(i==(nstrings-1L))
+{       *(strarray+*(optrarray+i))=l;
+	return;
+}
+
+/*
+** Calculate the total # of bytes in string array from
+** location i+1 to end of array.  Whether we're moving "up" or
+** down, this is how many bytes we'll have to move.
+*/
+nbytes=*(optrarray+nstrings-1L) +
+	(unsigned long)*(strarray+*(optrarray+nstrings-1L)) + 1L -
+	*(optrarray+i+1L);
+
+/*
+** Calculate the source and the destination.  Source is
+** string position i+1.  Destination is string position i+l
+** (i+"ell"...don't confuse 1 and l).
+** Hand this straight to memmove and let it handle the
+** "overlap" problem.
+*/
+MoveMemory((void *)(strarray+*(optrarray+i)+l+1),
+	(void *)(strarray+*(optrarray+i+1)),
+	(unsigned long)nbytes);
+
+/*
+** We have to adjust the offset pointer array.
+** This covers string i+1 to numstrings-1.
+*/
+for(j=i+1;j<nstrings;j++)
+	if(direction<0)
+		*(optrarray+j)=*(optrarray+j)-adjamount;
+	else
+		*(optrarray+j)=*(optrarray+j)+adjamount;
+
+/*
+** Store the new length and go home.
+*/
+*(strarray+*(optrarray+i))=l;
+return;
+}
+
+/****************
+** strheapsort **
+*****************
+** Pass this routine a pointer to an array of unsigned char.
+** The array is presumed to hold strings occupying at most
+** 80 bytes (counts a byte count).
+** This routine also needs a pointer to an array of offsets
+** which represent string locations in the array, and
+** an unsigned long indicating the number of strings
+** in the array.
+*/
+static void StrHeapSort(unsigned long *optrarray, /* Offset pointers */
+	unsigned char *strarray,             /* Strings array */
+	unsigned long numstrings,               /* # of strings in array */
+	unsigned long bottom,                   /* Region to sort...bottom */
+	unsigned long top)                      /* Region to sort...top */
+{
+unsigned char temp[80];                 /* Used to exchange elements */
+unsigned char tlen;                     /* Temp to hold length */
+unsigned long i;                        /* Loop index */
+
+
+/*
+** Build a heap in the array
+*/
+for(i=(top/2L); i>0; --i)
+	strsift(optrarray,strarray,numstrings,i,top);
+
+/*
+** Repeatedly extract maximum from heap and place it at the
+** end of the array.  When we get done, we'll have a sorted
+** array.
+*/
+for(i=top; i>0; --i)
+{
+	strsift(optrarray,strarray,numstrings,0,i);
+
+	/* temp = string[0] */
+	tlen=*strarray;
+	MoveMemory((void *)&temp[0], /* Perform exchange */
+		(void *)strarray,
+		(unsigned long)(tlen+1));
+
+
+	/* string[0]=string[i] */
+	tlen=*(strarray+*(optrarray+i));
+	stradjust(optrarray,strarray,numstrings,0,tlen);
+	MoveMemory((void *)strarray,
+		(void *)(strarray+*(optrarray+i)),
+		(unsigned long)(tlen+1));
+
+	/* string[i]=temp */
+	tlen=temp[0];
+	stradjust(optrarray,strarray,numstrings,i,tlen);
+	MoveMemory((void *)(strarray+*(optrarray+i)),
+		(void *)&temp[0],
+		(unsigned long)(tlen+1));
+
+}
+return;
+}
+
+/****************
+** str_is_less **
+*****************
+** Pass this function:
+**      1) A pointer to an array of offset pointers
+**      2) A pointer to a string array
+**      3) The number of elements in the string array
+**      4) Offsets to two strings (a & b)
+** This function returns TRUE if string a is < string b.
+*/
+static int str_is_less(unsigned long *optrarray, /* Offset pointers */
+	unsigned char *strarray,                     /* String array */
+	unsigned long numstrings,                       /* # of strings */
+	unsigned long a, unsigned long b)                       /* Offsets */
+{
+int slen;               /* String length */
+
+/*
+** Determine which string has the minimum length.  Use that
+** to call strncmp().  If they match up to that point, the
+** string with the longer length wins.
+*/
+slen=(int)*(strarray+*(optrarray+a));
+if(slen > (int)*(strarray+*(optrarray+b)))
+	slen=(int)*(strarray+*(optrarray+b));
+
+slen=strncmp((char *)(strarray+*(optrarray+a)),
+		(char *)(strarray+*(optrarray+b)),slen);
+
+if(slen==0)
+{
+	/*
+	** They match.  Return true if the length of a
+	** is greater than the length of b.
+	*/
+	if(*(strarray+*(optrarray+a)) >
+		*(strarray+*(optrarray+b)))
+		return(TRUE);
+	return(FALSE);
+}
+
+if(slen<0) return(TRUE);        /* a is strictly less than b */
+
+return(FALSE);                  /* Only other possibility */
+}
+
+/************
+** strsift **
+*************
+** Pass this function:
+**      1) A pointer to an array of offset pointers
+**      2) A pointer to a string array
+**      3) The number of elements in the string array
+**      4) Offset within which to sort.
+** Sift the array within the bounds of those offsets (thus
+** building a heap).
+*/
+static void strsift(unsigned long *optrarray,        /* Offset pointers */
+	unsigned char *strarray,                     /* String array */
+	unsigned long numstrings,                       /* # of strings */
+	unsigned long i, unsigned long j)                       /* Offsets */
+{
+unsigned long k;                /* Temporaries */
+unsigned char temp[80];
+unsigned char tlen;             /* For string lengths */
+
+
+while((i+i)<=j)
+{
+	k=i+i;
+	if(k<j)
+		if(str_is_less(optrarray,strarray,numstrings,k,k+1L))
+			++k;
+	if(str_is_less(optrarray,strarray,numstrings,i,k))
+	{
+		/* temp=string[k] */
+		tlen=*(strarray+*(optrarray+k));
+		MoveMemory((void *)&temp[0],
+			(void *)(strarray+*(optrarray+k)),
+			(unsigned long)(tlen+1));
+
+		/* string[k]=string[i] */
+		tlen=*(strarray+*(optrarray+i));
+		stradjust(optrarray,strarray,numstrings,k,tlen);
+		MoveMemory((void *)(strarray+*(optrarray+k)),
+			(void *)(strarray+*(optrarray+i)),
+			(unsigned long)(tlen+1));
+
+		/* string[i]=temp */
+		tlen=temp[0];
+		stradjust(optrarray,strarray,numstrings,i,tlen);
+		MoveMemory((void *)(strarray+*(optrarray+i)),
+			(void *)&temp[0],
+			(unsigned long)(tlen+1));
+		i=k;
+	}
+	else
+		i=j+1;
+}
+return;
+}
diff --git a/wordcat.h b/wordcat.h
deleted file mode 100644
index 9f18b42..0000000
--- a/wordcat.h
+++ /dev/null
@@ -1,81 +0,0 @@
-/*
-** wordcat.h
-** Word catalog
-** BYTEmark (tm)
-** BYTE's Native Mode Benchmarks
-** Rick Grehan, BYTE Magazine
-**
-** Creation:
-** Revision: 3/95
-**
-** DISCLAIMER
-** The source, executable, and documentation files that comprise
-** the BYTEmark benchmarks are made available on an "as is" basis.
-** This means that we at BYTE Magazine have made every reasonable
-** effort to verify that the there are no errors in the source and
-** executable code.  We cannot, however, guarantee that the programs
-** are error-free.  Consequently, McGraw-HIll and BYTE Magazine make
-** no claims in regard to the fitness of the source code, executable
-** code, and documentation of the BYTEmark.
-**  Furthermore, BYTE Magazine, McGraw-Hill, and all employees
-** of McGraw-Hill cannot be held responsible for any damages resulting
-** from the use of this code or the results obtained from using
-** this code.
-*/
-
-/*
-** Word catalog
-*/
-#define WORDCATSIZE 50
-
-char *wordcatarray[WORDCATSIZE] =
-{	"Hello",
-	"He",
-	"Him",
-	"the",
-	"this",
-	"that",
-	"though",
-	"rough",
-	"cough",
-	"obviously",
-	"But",
-	"but",
-	"bye",
-	"begin",
-	"beginning",
-	"beginnings",
-	"of",
-	"our",
-	"ourselves",
-	"yourselves",
-	"to",
-	"together",
-	"togetherness",
-	"from",
-	"either",
-	"I",
-	"A",
-	"return",
-	"However",
-	"that",
-	"example",
-	"yet",
-	"quickly",
-	"all",
-	"if",
-	"were",
-	"includes",
-	"always",
-	"never",
-	"not",
-	"small",
-	"returns",
-	"set",
-	"basic",
-	"Entered",
-	"with",
-	"used",
-	"shown",
-	"you",
-	"know" };