From 91b4edf69e5adf9c40edcaff38b880218b7b0d9d Mon Sep 17 00:00:00 2001
From: Matt Turner <mattst88@gmail.com>
Date: Tue, 11 Nov 2008 21:27:09 +0000
Subject: Initial Import

git-svn-id: svn://mattst88.com/svn/cleanbench/trunk@1 0d43b9a7-5ab2-4d7b-af9d-f64450cef757
---
 COM.DAT             |   11 +
 Changes             |   42 +
 Makefile            |  153 ++
 NNET.DAT            |  210 +++
 README              |   66 +
 README.motorola     |   29 +
 README.nonlinux     |   50 +
 README.submit       |   33 +
 RESULTS             |  138 ++
 bdoc.txt            | 2109 ++++++++++++++++++++++++
 debugbit.good.gz    |  Bin 0 -> 1019 bytes
 emfloat.c           | 1343 ++++++++++++++++
 emfloat.h           |  154 ++
 hardware            |  Bin 0 -> 17013 bytes
 hardware.c          |  202 +++
 hardware.h          |    2 +
 hello.c             |    2 +
 misc.c              |  120 ++
 misc.h              |   41 +
 nbench0.c           | 1174 ++++++++++++++
 nbench0.h           |  356 +++++
 nbench1.c           | 4445 +++++++++++++++++++++++++++++++++++++++++++++++++++
 nbench1.h           |  428 +++++
 nmglobal.h          |  519 ++++++
 pointer.c           |    6 +
 sysinfo.c.example   |   10 +
 sysinfo.c.template  |   10 +
 sysinfo.sh          |   78 +
 sysinfoc.c.example  |    4 +
 sysinfoc.c.template |    4 +
 sysspec.c           |  884 ++++++++++
 sysspec.h           |  168 ++
 wordcat.h           |   81 +
 33 files changed, 12872 insertions(+)
 create mode 100644 COM.DAT
 create mode 100644 Changes
 create mode 100644 Makefile
 create mode 100644 NNET.DAT
 create mode 100644 README
 create mode 100644 README.motorola
 create mode 100644 README.nonlinux
 create mode 100644 README.submit
 create mode 100644 RESULTS
 create mode 100644 bdoc.txt
 create mode 100644 debugbit.good.gz
 create mode 100644 emfloat.c
 create mode 100644 emfloat.h
 create mode 100755 hardware
 create mode 100644 hardware.c
 create mode 100644 hardware.h
 create mode 100644 hello.c
 create mode 100644 misc.c
 create mode 100644 misc.h
 create mode 100644 nbench0.c
 create mode 100644 nbench0.h
 create mode 100644 nbench1.c
 create mode 100644 nbench1.h
 create mode 100644 nmglobal.h
 create mode 100644 pointer.c
 create mode 100644 sysinfo.c.example
 create mode 100644 sysinfo.c.template
 create mode 100755 sysinfo.sh
 create mode 100644 sysinfoc.c.example
 create mode 100644 sysinfoc.c.template
 create mode 100644 sysspec.c
 create mode 100644 sysspec.h
 create mode 100644 wordcat.h

diff --git a/COM.DAT b/COM.DAT
new file mode 100644
index 0000000..8dee49c
--- /dev/null
+++ b/COM.DAT
@@ -0,0 +1,11 @@
+ALLSTATS=T
+DONUMSORT=T
+DOSTRINGSORT=T
+DOBITFIELD=T
+DOEMF=T
+DOFOUR=T
+DOASSIGN=T
+DOIDEA=T
+DOHUFF=T
+DONNET=T
+DOLU=T
diff --git a/Changes b/Changes
new file mode 100644
index 0000000..111d8bd
--- /dev/null
+++ b/Changes
@@ -0,0 +1,42 @@
+This is about BYTE's beta version of the native-algorithm benchmark
+
+December 16, 1996:
+
+The source for DOS is obtainable at http://www.byte.com/bmark/bmark.htm
+Linux adaptation written by Uwe F. Mayer <mayer@tux.org>
+
+February 7, 1997:
+
+added -DSOLARIS flag to support solaris
+
+November 11, 1997:
+
+added index split suggested by Andrew D. Balsa
+re-baselined to a Linux machine
+added checking of CPU-type at run-time (cpuinfo.c)
+increased maximal number of loops in some tests
+removed -DSOLARIS flag, works now automatically (this also removed the
+  compiler warnings about redefined types and leads to a 20% faster
+  code for "Bitfield" if compiled with -funroll-loops!)
+
+November 13-19, 1997:
+
+changed debugging information
+changed random number generator to be always 32 bits even on 64 bit OSs
+added data resets to Bitfield and Huffman
+created this Changes file
+added debug code for Bitfield
+
+December 6, 1997:
+
+got rid of cpuinfo.c
+added a RESULTS file
+
+December 7, 1997:
+
+fixed the statistical analysis used to compute the confidence coefficient
+fixed a bug in the DEBUG routine of "Assignment"
+
+December 11, 1997
+added some entries to RESULTS
+
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..5045c77
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,153 @@
+# Makefile for nbench, December 11, 1997, Uwe F. Mayer <mayer@tux.org>
+# Updated February 18, 2003
+
+default: nbench
+
+##########################################################################
+#   If you are using gcc-2.7.2.3 or earlier:
+#   The optimizer of gcc has a bug and in general you should not specify
+#   -funroll-loops together with -O (or -O2, -O3, etc.)
+#   This bug is supposed to be fixed with release 2.8 of gcc.
+#
+#   This bug does NOT seem to have an effect on the correct compilation
+#   of this benchmark suite on my Linux box. However, it leads to
+#   the dreaded "internal compiler error" message on our alpha
+#   running DEC Unix 4.0b. The Linux-binary that was used to obtain
+#   the baseline results was nevertheless compiled with
+#   CFLAGS = -s -static -Wall -O3 -fomit-frame-pointer -funroll-loops
+#
+# You should leave -static in the CFLAGS so that your sysinfo can be
+# compiled into the executable.
+
+CC = gcc
+
+# generic options for gcc
+CFLAGS = -s -static -Wall -O3
+
+# if your gcc lets you do it, then try this one
+#CFLAGS = -s -static -Wall -O3 -fomit-frame-pointer -funroll-loops
+
+# for gcc on an older Pentium type processor you can try the following
+#CFLAGS = -s -static -O3 -fomit-frame-pointer -Wall -m486 \
+#	-fforce-addr -fforce-mem -falign-loops=2 -falign-functions=2 \
+#	-falign-jumps=2 -funroll-loops
+
+# for a newer gcc on a newer Pentium type processor you can try the following
+#CFLAGS = -s -static -O3 -fomit-frame-pointer -Wall -march=i686 \
+#	-fforce-addr -fforce-mem -falign-loops=2 -falign-functions=2 \
+#	-falign-jumps=2 -funroll-loops
+
+# for a newer gcc on an Athlon XP type processor you can try the following
+#CFLAGS = -s -static -O3 -fomit-frame-pointer -Wall -march=athlon-xp \
+#	-fforce-addr -fforce-mem -falign-loops=2 -falign-functions=2 \
+#	-falign-jumps=2 -funroll-loops
+
+# For debugging using gcc
+#CFLAGS = -g -O3 -Wall -DDEBUG
+
+##########################################################################
+# For Linux machines with more than one binary format.
+# The default binaries, depends on your system whether it's elf or aout.
+MACHINE=
+# a.out code for linux on an elf machine
+#MACHINE= -bi486-linuxaout
+# elf code for linux on an a.out machine
+#MACHINE= -bi486-linuxelf
+# if you want a different compiler version and different binaries, for example
+#MACHINE= -V2.7.2 -bi486-linuxaout
+
+##########################################################################
+# Read the file README.nonlinux if you are not using Linux
+
+# for DEC Unix using cc you can try
+#CC = cc
+#CFLAGS = -O3
+#LINKFLAGS = -s -non_shared
+
+# for SunOS using cc
+#CC = cc
+#CFLAGS = -O3 -s
+
+# for DEC Ultrix using cc
+#CC = cc
+#CFLAGS = -O2
+#LINKFLAGS = -s
+
+# for a Mac with OsX and the Darwin environment
+#CC = cc
+#CFLAGS = -O3 -DOSX
+
+# For debugging using cc
+#CC = cc
+#CFLAGS = -g -DDEBUG
+
+##########################################################################
+# If your system does not understand the system command "uname -s -r"
+# then comment this out
+
+# NO_UNAME= -DNO_UNAME
+
+##########################################################################
+# For any Unix flavor you need -DLINUX
+# You also need -DLINUX to get the new indices
+
+DEFINES= -DLINUX $(NO_UNAME)
+
+##########################################################################
+# For LINUX-like systems with gcc
+sysinfoc.c: Makefile
+	./sysinfo.sh $(CC) $(MACHINE) $(DEFINES) $(CFLAGS)
+
+sysinfo.c: Makefile
+	./sysinfo.sh $(CC) $(MACHINE) $(DEFINES) $(CFLAGS)
+
+##########################################################################
+# For non-LINUX systems
+# Edit the files sysinfo.c and sysinfoc.c to include your system information
+# and take sysinfo.c and sysinfoc.c out of the dependencies for nbench0.o
+
+hardware.o: hardware.c hardware.h Makefile
+	$(CC) $(MACHINE) $(DEFINES) $(CFLAGS)\
+		-c hardware.c
+
+nbench0.o: nbench0.h nbench0.c nmglobal.h pointer.h hardware.h\
+	   Makefile sysinfo.c sysinfoc.c
+	$(CC) $(MACHINE) $(DEFINES) $(CFLAGS)\
+		-c nbench0.c
+
+emfloat.o: emfloat.h emfloat.c nmglobal.h pointer.h Makefile
+	$(CC) $(MACHINE) $(DEFINES) $(CFLAGS)\
+		-c emfloat.c
+
+pointer.h: pointer Makefile
+	$(CC) $(MACHINE) $(DEFINES) $(CFLAGS)\
+		-o pointer pointer.c
+	rm -f pointer.h
+	if [ "4" = `./pointer` ] ; then touch pointer.h ;\
+	else echo "#define LONG64" >pointer.h ; fi
+
+misc.o: misc.h misc.c Makefile
+	$(CC) $(MACHINE) $(DEFINES) $(CFLAGS)\
+		-c misc.c
+
+nbench1.o: nbench1.h nbench1.c wordcat.h nmglobal.h pointer.h Makefile
+	$(CC) $(MACHINE) $(DEFINES) $(CFLAGS)\
+		-c nbench1.c
+
+sysspec.o: sysspec.h sysspec.c nmglobal.h pointer.h Makefile
+	$(CC) $(MACHINE) $(DEFINES) $(CFLAGS)\
+		-c sysspec.c
+
+nbench: emfloat.o misc.o nbench0.o nbench1.o sysspec.o hardware.o
+	$(CC) $(MACHINE) $(DEFINES) $(CFLAGS) $(LINKFLAGS)\
+		emfloat.o misc.o nbench0.o nbench1.o sysspec.o hardware.o\
+		-o nbench -lm
+
+##########################################################################
+
+clean:
+	- /bin/rm -f *.o *~ \#* core a.out hello sysinfo.c sysinfoc.c \
+		 bug pointer pointer.h debugbit.dat
+
+mrproper: clean
+	- /bin/rm -f nbench
diff --git a/NNET.DAT b/NNET.DAT
new file mode 100644
index 0000000..5711730
--- /dev/null
+++ b/NNET.DAT
@@ -0,0 +1,210 @@
+5  7  8 
+26
+0  0  1  0  0
+0  1  0  1  0
+1  0  0  0  1
+1  0  0  0  1
+1  1  1  1  1
+1  0  0  0  1
+1  0  0  0  1
+0  1  0  0  0  0  0  1
+1  1  1  1  0
+1  0  0  0  1
+1  0  0  0  1
+1  1  1  1  0
+1  0  0  0  1
+1  0  0  0  1
+1  1  1  1  0
+0  1  0  0  0  0  1  0
+0  1  1  1  0
+1  0  0  0  1
+1  0  0  0  0
+1  0  0  0  0
+1  0  0  0  0
+1  0  0  0  1
+0  1  1  1  0
+0  1  0  0  0  0  1  1
+1  1  1  1  0
+1  0  0  0  1
+1  0  0  0  1
+1  0  0  0  1
+1  0  0  0  1
+1  0  0  0  1
+1  1  1  1  0
+0  1  0  0  0  1  0  0
+1  1  1  1  1
+1  0  0  0  0
+1  0  0  0  0
+1  1  1  0  0
+1  0  0  0  0
+1  0  0  0  0
+1  1  1  1  1
+0  1  0  0  0  1  0  1
+1  1  1  1  1
+1  0  0  0  0
+1  0  0  0  0
+1  1  1  0  0
+1  0  0  0  0
+1  0  0  0  0
+1  0  0  0  0
+0  1  0  0  0  1  1  0
+0  1  1  1  0
+1  0  0  0  1
+1  0  0  0  0
+1  0  0  0  0
+1  0  0  1  1
+1  0  0  0  1
+0  1  1  1  0
+0  1  0  0  0  1  1  1
+1  0  0  0  1
+1  0  0  0  1
+1  0  0  0  1
+1  1  1  1  1
+1  0  0  0  1
+1  0  0  0  1
+1  0  0  0  1
+0  1  0  0  1  0  0  0
+0  1  1  1  0
+0  0  1  0  0
+0  0  1  0  0
+0  0  1  0  0
+0  0  1  0  0
+0  0  1  0  0
+0  1  1  1  0
+0  1  0  0  1  0  0  1
+0  0  0  0  1
+0  0  0  0  1
+0  0  0  0  1
+0  0  0  0  1
+1  0  0  0  1
+1  0  0  0  1
+0  1  1  1  0
+0  1  0  0  1  0  1  0
+1  0  0  0  1
+1  0  0  1  0
+1  0  1  0  0
+1  1  0  0  0
+1  0  1  0  0
+1  0  0  1  0
+1  0  0  0  1
+0  1  0  0  1  0  1  1
+1  0  0  0  0
+1  0  0  0  0
+1  0  0  0  0
+1  0  0  0  0
+1  0  0  0  0
+1  0  0  0  0
+1  1  1  1  1
+0  1  0  0  1  1  0  0
+1  0  0  0  1
+1  1  0  1  1
+1  0  1  0  1
+1  0  1  0  1
+1  0  0  0  1
+1  0  0  0  1
+1  0  0  0  1
+0  1  0  0  1  1  0  1
+1  0  0  0  1
+1  1  0  0  1
+1  0  1  0  1
+1  0  1  0  1
+1  0  1  0  1
+1  0  0  1  1
+1  0  0  0  1
+0  1  0  0  1  1  1  0
+0  1  1  1  0
+1  0  0  0  1
+1  0  0  0  1
+1  0  0  0  1
+1  0  0  0  1
+1  0  0  0  1
+0  1  1  1  0
+0  1  0  0  1  1  1  1
+1  1  1  1  0
+1  0  0  0  1
+1  0  0  0  1
+1  1  1  1  0
+1  0  0  0  0
+1  0  0  0  0
+1  0  0  0  0
+0  1  0  1  0  0  0  0
+0  1  1  1  0
+1  0  0  0  1
+1  0  0  0  1
+1  0  0  0  1
+1  0  1  0  1
+1  0  0  1  1
+0  1  1  1  1
+0  1  0  1  0  0  0  1
+1  1  1  1  0  
+1  0  0  0  1
+1  0  0  0  1
+1  1  1  1  0
+1  0  1  0  0
+1  0  0  1  0
+1  0  0  0  1
+0  1  0  1  0  0  1  0
+0  1  1  1  1
+1  0  0  0  0
+1  0  0  0  0
+0  1  1  1  0
+0  0  0  0  1
+0  0  0  0  1
+1  1  1  1  0
+0  1  0  1  0  0  1  1
+1  1  1  1  1
+0  0  1  0  0
+0  0  1  0  0
+0  0  1  0  0
+0  0  1  0  0
+0  0  1  0  0
+0  0  1  0  0
+0  1  0  1  0  1  0  0
+1  0  0  0  1
+1  0  0  0  1
+1  0  0  0  1
+1  0  0  0  1
+1  0  0  0  1
+1  0  0  0  1
+0  1  1  1  0
+0  1  0  1  0  1  0  1
+1  0  0  0  1
+1  0  0  0  1
+0  1  0  1  0
+0  1  0  1  0
+0  1  0  1  0
+0  1  0  1  0
+0  0  1  0  0
+0  1  0  1  0  1  1  0
+1  0  0  0  1
+1  0  0  0  1
+1  0  0  0  1
+1  0  1  0  1
+1  0  1  0  1
+1  0  1  0  1
+0  1  0  1  0
+0  1  0  1  0  1  1  1
+1  0  0  0  1
+0  1  0  1  0
+0  1  0  1  0
+0  0  1  0  0
+0  1  0  1  0
+0  1  0  1  0
+1  0  0  0  1
+0  1  0  1  1  0  0  0
+1  0  0  0  1
+0  1  0  1  0
+0  1  0  1  0
+0  0  1  0  0
+0  0  1  0  0
+0  0  1  0  0
+0  0  1  0  0
+0  1  0  1  1  0  0  1
+1  1  1  1  1
+0  0  0  1  0
+0  0  0  1  0
+0  0  1  0  0
+0  1  0  0  0
+0  1  0  0  0
+1  1  1  1  1
+0  1  0  1  1  0  1  0
diff --git a/README b/README
new file mode 100644
index 0000000..6863d46
--- /dev/null
+++ b/README
@@ -0,0 +1,66 @@
+February 18, 2003
+-----------------
+Bug-fix release.
+
+December 9, 1997
+----------------
+This release is based on beta release 2 of BYTE Magazine's BYTEmark
+benchmark program (previously known as BYTE's Native Mode
+Benchmarks). This document covers the Native Mode (a.k.a. Algorithm
+Level) tests; benchmarks designed to expose the capabilities of a
+system's CPU, FPU, and memory system.
+
+Running a "make" will create the binary if all goes well. It is called
+"nbench" and performs a suite of 10 tests and compares the results to
+a Dell Pentium 90 with 16 MB RAM and 256 KB L2 cache running MSDOS and
+compiling with the Watcom 10.0 C/C++ compiler. If you define -DLINUX
+during compilation (the default) then you also get a comparison to an
+AMD K6/233 with 32 MB RAM and 512 KB L2-cache running Linux 2.0.32 and
+using a binary which was compiled with GNU gcc version 2.7.2.3 and GNU
+libc-5.4.38.
+
+For more verbose output specify -v as an argument.
+
+The primary web site is: http://www.tux.org/~mayer/linux/bmark.html
+
+The port to Linux/Unix was done by Uwe F. Mayer <mayer@tux.org>.
+
+The index-split was done by Andrew D. Balsa, and reflects the
+realization that memory management is important in CPU design. The
+original tests have been left alone, however, the tests NUMERIC SORT,
+FP EMULATION, IDEA, and HUFFMAN now constitute the integer-arithmetic
+focused benchmark index, while the tests STRING SORT, BITFIELD, and
+ASSIGNMENT make up the new memory index.
+
+The algorithms were not changed from the source which was obtained
+from the BYTE web site at http://www.byte.com/bmark/bmark.htm on
+December 14, 1996.  However, the source was modified to better work
+with 64-bit machines (in particular the random number generator was
+modified to always work with 32 bit, no matter what kind of hardware
+you run it on). Furthermore, for some of the algorithms additional
+resettings of the data was added to increase the consistency across
+different hardware. Some extra debugging code was added, which has no
+impact on normal runs.
+
+In case there is uneven system load due to other processes while this
+benchmark suite executes, it might take longer to run than on an
+unloaded system. This is because the benchmark does some statistical
+analysis to make sure that the reported results are statistically
+significant, and an increased variation in individual runs requires
+more runs to achieve the required statistical confidence.
+
+This is a single-threaded benchmark and is not designed to measure the
+performance gain on multi-processor machines.
+
+For details and customization read bdoc.txt.
+
+THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/README.motorola b/README.motorola
new file mode 100644
index 0000000..223001b
--- /dev/null
+++ b/README.motorola
@@ -0,0 +1,29 @@
+The information in this file is old and no longer valid. It seems that
+the GNU C library has caught up with Motorola's libmoto, and now
+performance is just as good (or better) without libmoto. I'll include
+the old notice out of historical reasons only. Currently libmoto is
+available at ftp://ftp.mcg.mot.com/pub/SPS/PowerPC/software/mklinux/libmoto/,
+but this is subject to change and not under my control.
+
+February 18, 2003
+Uwe F. Mayer
+
+---------------------------------------------------------------------------
+
+If you have a Motorola CPU or equivalent:
+
+When linked with the 'libmoto' (floating point library from Motorola)
+the results you obtain are much better. (FPU index of 0.896 versus
+1.910 in one example.)
+
+The Motorola math library is currently available at:
+http://www.mot.com/SPS/PowerPC/support/rsw_customer_support/mklinux/libmoto/libmoto_reg_mkdev.html
+
+If you have a Motorola CPU and you submit a result then please let me
+know whether you used libmoto or not. Please read the file README.submit.
+
+I do not have a Motorola CPU, and I can't help you with installing the
+library either.
+
+December 3, 1997
+Uwe F. Mayer
\ No newline at end of file
diff --git a/README.nonlinux b/README.nonlinux
new file mode 100644
index 0000000..641fe09
--- /dev/null
+++ b/README.nonlinux
@@ -0,0 +1,50 @@
+December 3, 1993
+================
+
+DEC Unix 4.0 or DEC OSF1 and gcc
+--------------------------------
+Compiles cleanly if you don't use -funroll-loops with gcc-2.7.2.3 or earlier
+
+DEC UNIX 4.0 or DEC OSF1 and cc
+-------------------------------
+CC = cc
+CFLAGS = -O3
+LINKFLAGS = -s -non_shared
+
+Compiles cleanly.
+
+SunOS and gcc
+-------------
+Compiles cleanly
+
+SunOS and cc
+------------
+CC = cc
+CFLAGS = -O3 -s
+
+Compiles with one warning during compilation of nbench1.c
+
+"/usr/ucbinclude/strings.h", line 48: warning: identifier redeclared: strlen
+        current : function() returning int
+        previous: function() returning uint : "/usr/include/string.h", line 98
+
+HP-UX and gcc
+-------------
+Compiles with one warning during compilation of sysspec.c
+
+In file included from /usr/local/lib/gcc-lib/hppa1.1-hp-hpux9.05/2.7.2.1/include/malloc.h:9,
+                 from sysspec.h:37,
+                 from sysspec.c:37:
+/usr/local/lib/gcc-lib/hppa1.1-hp-hpux9.05/2.7.2.1/include/sys/types.h:117: warning: empty declaration
+/usr/local/lib/gcc-lib/hppa1.1-hp-hpux9.05/2.7.2.1/include/sys/types.h:118: warning: empty declaration
+
+DEC Ultrix and cc
+-----------------
+CC = cc
+CFLAGS = -O2
+LINKFLAGS = -s
+
+Compiles with a warning about the correct usage of cut when running sysinfo.sh
+cut: Usage: cut [-s] [-d<char>] {-c<list> | -f<list>} file ...
+cut: Usage: cut [-s] [-d<char>] {-c<list> | -f<list>} file ...
+
diff --git a/README.submit b/README.submit
new file mode 100644
index 0000000..0dd3138
--- /dev/null
+++ b/README.submit
@@ -0,0 +1,33 @@
+I plan on posting a digest of results in case people mail me any.
+The URL will be linked to
+
+http://www.tux.org/~mayer/linux/bmark.html
+
+If you want to submit, then run the benchmark (use your own
+compilation, I don't care with what flags or compiler, but I want all
+numbers from a single benchmark run) and fill in the template as given
+in the example below:
+
+CPU                             : AMD 5x86P75 (486DX4/133MHz)
+L2 CACHE                        : 256 KB
+OS                              : Linux 2.0.32
+C COMPILER                      : gcc 2.7.2.3
+LIBC                            : libc-5.4.38
+Pentium 90 INTEGER INDEX        : 1.051
+Pentium 90 FLOATING-POINT INDEX : 0.450
+AMD K6/233 MEMORY INDEX         : 0.337
+AMD K6/233 INTEGER INDEX        : 0.238
+AMD K6/233 FLOATING-POINT INDEX : 0.230
+
+Any other format is fine as long as it contains the same info (write
+"unknown" or "?" for data you don't know). For example, you could just
+cut the summary from the output of nbench and mail it together with
+cache, CPU, and OS info in case it is not already present. Please do
+not email me the complete output of nbench, or any other unnecessarily
+long email, as this just eats up my hard-disk space.  However, long
+collections of results are of course welcome.
+
+Send your result to mayer@tux.org
+
+Uwe F. Mayer
+February 18, 2003
diff --git a/RESULTS b/RESULTS
new file mode 100644
index 0000000..ccf2336
--- /dev/null
+++ b/RESULTS
@@ -0,0 +1,138 @@
+December 7, 1997
+
+This file contains a few results so you may compare your machine.
+If you read this much after December 1997 then the results herein
+are probably obsolete.
+
+For a longer and hopefully more up-to-date list of results consult
+http://www.tux.org/~mayer/linux/bmark.html
+This web site, however, currently lists the old Pentium 90 indices!
+
+The indices below are with respect to the new AMD K6/233 baseline.
+
+OS                  : DEC Ultrix 4.4
+C compiler          : cc
+libc                : unknown version
+CPU                 : mips R6000
+L2 cache            : ?
+MEMORY INDEX        : 0.029
+INTEGER INDEX       : 0.046
+FLOATING-POINT INDEX: 0.077
+
+OS                  : LINUX 2.0.31
+C compiler          : gcc version 2.7.2.3
+libc                : libc.so.5.4.38
+CPU                 : Intel 486DX2/66 MHz
+L2 cache            : 256 KB
+MEMORY INDEX        : 0.098
+INTEGER INDEX       : 0.141
+FLOATING-POINT INDEX: 0.116
+
+OS                  : LINUX 2.0.32
+C compiler          : gcc version 2.7.2.3
+libc                : libc.so.5.4.38
+CPU                 : AMD 5x86P75 (486DX4/133MHz)
+L2 cache            : 256 KB
+MEMORY INDEX        : 0.234
+INTEGER INDEX       : 0.286
+FLOATING-POINT INDEX: 0.249
+
+OS                  : OSF1 V3.2 214
+C compiler          : cc
+libc                : unknown version
+CPU                 : 21064 alpha (DEC 3000 MODEL 300, year 1993)
+L2 cache            : 256 KB
+MEMORY INDEX        : 0.358
+INTEGER INDEX       : 0.362
+FLOATING-POINT INDEX: 0.656
+
+OS                  : HP-UX A.09.05
+C compiler          : gcc version 2.7.2.1
+libc                : unknown version
+CPU                 : 9000/715
+L2 cache            : ?
+MEMORY INDEX        : 0.208
+INTEGER INDEX       : 0.369
+FLOATING-POINT INDEX: 0.516
+
+OS                  : LINUX 2.0.31
+C compiler          : gcc version 2.7.2.3
+libc                : libc.so.5.4.38
+CPU                 : Intel Pentium 133 MHz
+L2 cache            : 512 KB
+MEMORY INDEX        : 0.383
+INTEGER INDEX       : 0.444
+FLOATING-POINT INDEX: 0.632
+
+OS                  : SunOS 5.5.1
+C compiler          : cc
+libc                : unknown version
+CPU                 : SUN-Ultra-Enterprise-2 sparc
+L2 cache            : ?
+MEMORY INDEX        : 0.417
+INTEGER INDEX       : 0.546
+FLOATING-POINT INDEX: 1.028
+
+OS                  : LINUX 2.0.29
+C compiler          : gcc version 2.7.2.3
+libc                : libc.so.5.4.38
+CPU                 : Cyrix 6x86L PR200+ (at 2 x 75 = 150 MHz)
+L2 cache            : 256 KB
+MEMORY INDEX        : 0.666
+INTEGER INDEX       : 0.599
+FLOATING-POINT INDEX: 0.508
+
+OS                  : LINUX 2.0.31
+C compiler          : gcc version 2.7.2.3
+libc                : libc.so.5.4.38
+CPU                 : Intel Pentium MMX 200 MHz
+L2 cache            : 512 KB
+MEMORY INDEX        : 0.601
+INTEGER INDEX       : 0.636
+FLOATING-POINT INDEX: 0.970
+
+OS                  : LINUX 2.0.31
+C compiler          : gcc version 2.7.2.3
+libc                : libc.so.5.4.38
+CPU                 : Intel 686 PentiumPro 200 MHz
+L2 cache            : 256 KB (internal)
+MEMORY INDEX        : 0.699
+INTEGER INDEX       : 0.732
+FLOATING-POINT INDEX: 1.140
+
+OS                  : LINUX 2.0.29
+C compiler          : gcc version 2.7.2.3
+libc                : libc.so.5.4.38
+CPU                 : Cyrix 6x86MX PR233 (at 2.5 x 75 = 187.5 MHz)
+L2 cache            : 512 KB
+MEMORY INDEX        : 0.861
+INTEGER INDEX       : 0.773
+FLOATING-POINT INDEX: 0.730
+
+OS                  : LINUX 2.0.32
+C compiler          : gcc version 2.7.2.3
+libc                : libc.so.5.4.38
+CPU                 : AMD K6/233
+L2 cache            : 512 KB
+MEMORY INDEX        : 1.000
+INTEGER INDEX       : 1.000
+FLOATING-POINT INDEX: 1.000
+
+OS                  : LINUX 2.0.31
+C compiler          : gcc version 2.7.2.3
+libc                : libc.so.5.4.38
+CPU                 : Intel 686 Pentium II 300 MHz
+L2 cache            : 512 KB
+MEMORY INDEX        : 1.255
+INTEGER INDEX       : 1.093
+FLOATING-POINT INDEX: 1.842
+
+OS                  : DEC UNIX 4.0b 564
+C compiler          : cc
+libc                : unknown version
+CPU                 : 21164 Alpha 300 MHz (dual CPU)
+L2 cache            : 96 KB
+L3 cache            : 4 MB per CPU
+MEMORY INDEX        : 0.973
+INTEGER INDEX       : 1.124
+FLOATING-POINT INDEX: 3.237
diff --git a/bdoc.txt b/bdoc.txt
new file mode 100644
index 0000000..e557bb0
--- /dev/null
+++ b/bdoc.txt
@@ -0,0 +1,2109 @@
+http://www.byte.com/bmark/bmark.htm
+----------------------------------------------------------------------------
+
+BYTEmark
+
+----------------------------------------------------------------------------
+
+This is release 2 of BYTE Magazine's BYTEmark benchmark program (previously
+known as BYTE's Native Mode Benchmarks). This document covers the Native
+Mode (a.k.a. Algorithm Level) tests; benchmarks designed to expose the
+capabilities of a system's CPU, FPU, and memory system. Another group of
+benchmarks within the BYTEmark suite includes the Application Simulation
+Benchmarks. They are detailed in a separate document. [NOTE: The
+documentation for the Application simulation benchmarks should appear before
+the end of March, 95. -- RG].
+
+The Tests
+
+The Native Mode portion of the BYTEmark consists of a number of well-known
+algorithms; some BYTE has used before in earlier versions of the benchmark,
+others are new. The complete suite consists of 10 tests:
+
+Numeric sort - Sorts an array of 32-bit integers.
+
+String sort - Sorts an array of strings of arbitrary length.
+
+Bitfield - Executes a variety of bit manipulation functions.
+
+Emulated floating-point - A small software floating-point package.
+
+Fourier coefficients - A numerical analysis routine for calculating series
+approximations of waveforms.
+
+Assignment algorithm - A well-known task allocation algorithm.
+
+Huffman compression - A well-known text and graphics compression algorithm.
+
+IDEA encryption - A relatively new block cipher algorithm.
+
+Neural Net - A small but functional back-propagation network simulator.
+
+LU Decomposition - A robust algorithm for solving linear equations.
+
+A more complete description of each test can be found in later sections of
+this document.
+
+BYTE built the BYTEmark with the multiplatform world foremost in mind. There
+were, of course, other considerations that we kept high on the list:
+
+Real-world algorithms. The algorithms should actually do something. Previous
+benchmarks often moved gobs of bytes from one point to another, added or
+subtracted piles and piles of numbers, or (in some cases) actually executed
+NOP instructions. We should not belittle those tests of yesterday, they had
+their place. However, we think it better that tests be based on activities
+that are more complex in nature.
+
+Easy to port. All the benchmarks are written in "vanilla" ANSI C. This
+provides us with the best chance of moving them quickly and accurately to
+new processors and operating systems as they appear. It also simplifies
+maintenance.
+
+This means that as new 64-bit (and, perhaps, 128-bit) processors appear, the
+benchmarks can test them as soon as a compiler is available.
+
+Comprehensive. The algorithms were derived from a variety of sources. Some
+are routines that BYTE had been using for some time. Others are routines
+derived from well-known texts in the computer science world. Furthermore,
+the algorithms differ in structure. Some simply "walk" sequentially through
+one-dimensional arrays. Others build and manipulate two-dimensional arrays.
+Finally, some benchmarks are "integer" tests, while others exercise the
+floating-point coprocessor (if one is available).
+
+Scalable. We wanted these benchmarks to be useful across as wide a variety
+of systems as possible. We also wanted to give them a lifetime beyond the
+next wave of new processors.
+
+To that end, we incorporated "dynamic workload adjustment." A complete
+description of this appears in a later section. In a nutshell, this allows
+the tests to "expand or contract" depending on the capabilities of the
+system under test, all the while providing consistent results so that fair
+and accurate comparisons are possible.
+
+Honesty In Advertising
+
+We'd be lying if we said that the BYTEmark was all the benchmarking that
+anyone would ever need to run on a system. It would be equally inaccurate to
+suggest that the tests are completely free of inadequacies. There are many
+things the tests do not do, there are shortcomings, and there are problems.
+
+BYTE will continue to improve the BYTEmark. The source code is freely
+available, and we encourage vendors and users to examine the routines and
+provide us with their feedback. In this way, we assure fairness,
+comprehensiveness, and accuracy.
+
+Still, as we mentioned, there are some shortcomings. Here are those we
+consider the most significant. Keep them in mind as you examine the results
+of the benchmarks now and in the future.
+
+At the mercy of C compilers. Being written in ANSI C, the benchmark program
+is highly portable. This is a reflection of the "world we live in." If this
+were a one-processor world, we might stand a chance at hand-crafting a
+benchmark in assembly language. (At one time, that's exactly what BYTE did.)
+Not today, no way.
+
+The upshot is that the benchmarks must be compiled. For broadest coverage,
+we selected ANSI C. And when they're compiled, the resulting executable's
+performance can be highly dependent on the capabilities of the C compiler.
+Today's benchmark results can be blown out of the water tomorrow if someone
+new enters the scene with an optimizing strategy that outperforms existing
+competition.
+
+This concern is not easily waved off. It will require you to keep careful
+track of compiler version and optimization switches. As BYTE builds its
+database of benchmark results, version number and switch setting will become
+an integral part of that data. This will be true for published information
+as well, so that you can make comparisons fairly and accurately. BYTE will
+control the distribution of test results so that all relevant compiler
+information is attached to the data.
+
+As a faint justification -- for those who think this situation results in
+"polluted" tests -- we should point out that we are in the same boat as all
+the other developers (at least, all those using C compilers -- and that's
+quite a sizeable group). If the only C compilers for a given system happen
+to be poor ones, everyone suffers. It's a fact that a given platform's
+ultimate potential depends as much on the development software available as
+on the technical achievements of the hardware design.
+
+It's just CPU and FPU. It's very tempting to try to capture the performance
+of a machine in a single number. That has never been possible -- though it's
+been tried a lot -- and the gap between that ideal and reality will forever
+widen.
+
+These benchmarks are meant to expose the theoretical upper limit of the CPU,
+FPU, and memory architecture of a system. They cannot measure video, disk,
+or network throughput (those are the domains of a different set of
+benchmarks). You should, therefore, use the results of these tests as part,
+not all, of any evaluation of a system.
+
+Single threaded. Currently, each benchmark test uses only a single execution
+thread. It's unlikely that you'll find any modern operating system that does
+not have some multitasking component. How a system "scales" as more tasks
+are run simultaneously is an effect that the current benchmarks cannot
+explore.
+
+BYTE is working on a future version of the tests that will solve this
+problem.
+
+The tests are synthetic. This quite reasonable argument is based on the fact
+that people don't run benchmarks for a living, they run applications.
+Consequently, the only true measure of a system is how well it performs
+whatever applications you will be running. This, in fact, is the philosophy
+behind the BAPCo benchmarks.
+
+This is not a point with which we would disagree. BYTE regularly makes use
+of a variety of application benchmarks. None of this suggests, however, that
+the BYTEmark benchmarks serve no purpose.
+
+BYTEmark's results should be used as predictors. They can be moved to a new
+platform long before native applications will be ported. The BYTEmark
+benchmarks will therefore provide an early look at the potential of the
+machine. Additionally, the BYTEmark permits you to "home in" on an aspect of
+the overall architecture. How well does the system perform when executing
+floating-point computations? Does its memory architecture help or hinder the
+management of memory buffers that may fall on arbitrary address boundaries?
+How does the cache work with a program whose memory access favors moving
+randomly through memory as opposed to moving sequentially through memory?
+
+The answers to these questions can give you a good idea of how well a system
+would support a particular class of applications. Only a synthetic benchmark
+can give the narrow view necessary to find the answers.
+
+Dynamic Workloads
+
+Our long history of benchmarking has taught us one thing above all others:
+Tomorrow's system will go faster than today's by an amount exceeding your
+wildest guess -- and then some. Dealing with this can become an unending
+race.
+
+It goes like this: You design a benchmark algorithm, you specify its
+parameters (how big the array is, how many loops, etc.), you run it on
+today's latest super-microcomputer, collect your data, and go home. A new
+machine arrives the next day, you run your benchmark, and discover that the
+test executes so quickly that the resolution of the clock routine you're
+using can't keep up with it (i.e., the test is over and done before the
+system clock even has a chance to tick).
+
+If you modify your routine, the figures you collected yesterday are no good.
+If you create a better clock routine by sneaking down into the system
+hardware, you can kiss portability goodbye.
+
+The BYTEmark benchmarks solve this problem by a process we'll refer to as
+"dynamic workload adjustment." In principle, it simply means that if the
+test runs so fast that the system clock can't time it, the benchmark
+increases the test workload -- and keeps increasing it -- until enough time
+is consumed to gather reliable test results.
+
+Here's an example.
+
+The BYTEmark benchmarks perform timing using a "stopwatch" paradigm. The
+routine StartStopwatch() begins timing; StopStopwatch() ends timing and
+reports the elapsed time in clock ticks. Now, "clock ticks" is a value that
+varies from system to system. We'll presume that our test system provides
+1000 clock ticks per second. (We'll also presume that the system actually
+updates its clock 1000 times per second. Surprisingly, some systems don't do
+that. One we know of will tell you that the clock provides 100 ticks per
+second, but updates the clock in 5- or 6-tick increments. The resolution is
+no better than somewhere around 1/18th of a second.) Here, when we say
+"system" we mean not only the computer system, but the environment provided
+by the C compiler. Interestingly, different C compilers for the same system
+will report different clock ticks per second.
+
+Built into the benchmarks is a global variable called GLOBALMINTICKS. This
+variable is the minimum number of clock ticks that the benchmark will allow
+StopStopwatch() to report.
+
+Suppose you run the Numeric Sort benchmark. The benchmark program will
+construct an array filled with random numbers, call StartStopwatch(), sort
+the array, and call StopStopwatch(). If the time reported in StopStopwatch()
+is less than GLOBALMINTICKS, then the benchmark will build two arrays, and
+try again. If sorting two arrays took less time than GLOBALMINTICKS, the
+process repeats with more arrays.
+
+This goes on until the benchmark makes enough work so that an interval
+between StartStopwatch() and StopStopwatch() exceeds GLOBALMINTICKS. Once
+that happens, the test is actually run, and scores are calculated.
+
+Notice that the benchmark didn't make bigger arrays, it made more arrays.
+That's because the time taken by the sort test does not increase linearly as
+the array grows, it increases by a factor of N*log(N) (where N is the size
+of the array).
+
+This principle is applied to all the benchmark tests. A machine with a less
+accurate clock may be forced to sort more arrays at a time, but the results
+are given in arrays per second. In this way fast machines, slow machines,
+machines with accurate clocks, machines with less accurate clocks, can all
+be tested with the same code.
+
+Confidence Intervals
+
+Another built-in feature of the BYTEmark is a set of statistical-analysis
+routines. Running benchmarks is one thing; the question arises as to how
+many times should a test be run until you know you have a good sampling.
+Also, can you determine whether the test is stable (i.e., do results vary
+widely from one execution of the benchmark to the next)?
+
+The BYTEmark keeps score as follows: Each test (a test being a numeric
+sort, a string sort, etc.) is run five times. These five scores are
+averaged, the standard deviation is determined, and a 95% confidence
+half-interval for the mean is calculated (using the student t
+distribution). This tells us that the true average lies -- with a 95%
+probability -- within plus or minus the confidence half-interval of
+the calculated average. If this half-interval is within 5% of the
+calculated average, the benchmarking stops. Otherwise, a new test is
+run and the calculations are repeated with all of the runs done so
+far, including the new one. The benchmark proceeds this way up to a
+total of 30 runs. If the length of the half-interval is still bigger
+than 5% of the calculated average then a warning issued that the
+results might not be statistically certain before the average is
+displayed.
+
+** Fixed a statistical bug here. Uwe F. Mayer
+
+The upshot is that, for each benchmark test, the true average is -- with a
+95% level of confidence -- within 5% of the average reported. Here, the
+"true average" is the average we would get were we able to run the tests
+over and over again an infinite number of times.
+
+This specification ensures that the calculation of results is controlled;
+that someone running the tests in California will use the same technique for
+determining benchmark results as someone running the tests in New York.
+
+In case there is uneven system load due to other processes while this
+benchmark suite executes, it might take longer to run the benchmark suite
+as compared to a run an unloaded system. This is because the benchmark does
+some statistical analysis to make sure that the reported results are
+statistically significant (as explained above), and a high variation in
+individual runs requires more runs to achieve the required statistical
+confidence.
+
+*** added last the paragraph, Uwe F. Mayer
+
+Interpreting Results
+
+Of course, running the benchmarks can present you with a boatload of data.
+It can get mystifying, and some of the more esoteric statistical information
+is valuable only to a limited audience. The big question is: What does it
+all mean?
+
+First, we should point out that the BYTEmark reports both "raw" and indexed
+scores for each test. The raw score for a particular test amounts to the
+"iterations per second" of that test. For example, the numeric sort test
+reports as its raw score the number of arrays it was able to sort per
+second.
+
+The indexed score is the raw score of the system under test divided by the
+raw score obtained on the baseline machine. As of this release, the
+baseline machine is a DELL 90 Mhz Pentium XPS/90 with 16 MB of RAM and 256K
+of external processor cache. (The compiler used was the Watcom C/C++ 10.0
+compiler; optimizations set to "fastest possible code", 4-byte structure
+alignment, Pentium code generation with Pentium register-based calling. The
+operating system was MSDOS.) The indexed score serves to "normalize" the
+raw scores, reducing their dynamic range and making them easier to
+grasp. Simply put, if your machine has an index score of 2.0 on the numeric
+sort test, it performed that test twice as fast as this 90 Mhz Pentium.
+
+If you run all the tests (as you'll see, it is possible to perform "custom
+runs", which execute only a subset of the tests) the BYTEmark will also
+produce two overall index figures: Integer index and Floating-point index.
+The Integer index is the geometric mean of those tests that involve only
+integer processing -- numeric sort, string sort, bitfield, emulated
+floating-point, assignment, Huffman, and IDEA -- while the Floating-point
+index is the geometric mean of those tests that require the floating-point
+coprocessor -- Fourier, neural net, and LU decomposition. You can use these
+scores to get a general feel for the performance of the machine under test
+as compared to the baseline 90 Mhz Pentium.
+
+The Linux/Unix port has a second baseline machine, it is an AMD K6/233 with
+32 MB RAM and 512 KB L2-cache running Linux 2.0.32 and using GNU gcc
+version 2.7.2.3 and libc-5.4.38.  The integer index was split as suggested
+by Andrew D. Balsa <andrewbalsa@usa.net>, and reflects the realization that
+memory management is important in CPU design. The original tests have been
+left alone, however, the geometric mean of the tests NUMERIC SORT, FP
+EMULATION, IDEA, and HUFFMAN now constitutes the integer-arithmetic focused
+benchmark index, while the geometric mean of the tests STRING SORT,
+BITFIELD, and ASSIGNMENT makes up the new memory index. The floating point
+index has been left alone, it is still the geometric mean of FOURIER,
+NEURAL NET, and LU DECOMPOSITION.
+
+*** added the section on Linux, Uwe F. Mayer
+
+What follows is a list of the benchmarks and associated brief remarks that
+describe what the tests do: What they exercise; what a "good" result or a
+"bad" result means. Keep in mind that, in this expanding universe of faster
+processors, bigger caches, more elaborate memory architectures, "good" and
+"bad" are indeed relative terms. A good score on today's hot new processor
+will be a bad score on tomorrow's hot new processor.
+
+These remarks are based on empirical data and profiling that we have done to
+date. (NOTE: The profiling is limited to Intel and Motorola 68K on this
+release. As more data is gathered, we will be refining this section.
+3/14/95--RG)
+
+Benchmark                            Description
+
+Numeric sort                         Generic integer performance.  Should
+                                     exercise non-sequential performance
+                                     of cache (or memory if cache is less
+                                     than 8K).  Moves 32-bit longs at a
+                                     time, so 16-bit processors will be
+                                     at a disadvantage.
+
+
+
+String sort                          Tests memory-move performance.
+                                     Should exercise non-sequential
+                                     performance of cache, with added
+                                     burden that moves are byte-wide and
+                                     can occur on odd address boundaries.
+                                      May tax the performance of
+                                     cell-based processors that must
+                                     perform additional shift operations
+                                     to deal with bytes.
+
+
+
+Bitfield                             Exercises "bit twiddling"
+                                     performance.  Travels through memory
+                                     in a somewhat sequential fashion;
+                                     different from sorts in that data is
+                                     merely altered in place.  If
+                                     properly compiled, takes into
+                                     account 64-bit processors, which
+                                     should see a boost.
+
+
+
+Emulated F.P.                        Past experience has shown this test
+                                     to be a good measurement of overall
+                                     performance.
+
+
+
+Fourier                              Good measure of transcendental and
+                                     trigonometric performance of FPU.
+                                     Little array activity, so this test
+                                     should not be dependent of cache or
+                                     memory architecture.
+
+
+
+Assignment                           The test moves through large integer
+                                     arrays in both row-wise and
+                                     column-wise fashion.  Cache/memory
+                                     with good sequential performance
+                                     should see a boost (memory is
+                                     altered in place -- no moving as in
+                                     a sort operation).   Processing is
+                                     done in 32-bit chunks -- no
+                                     advantage given to 64-bit
+                                     processors.
+
+
+
+Huffman                              A combination of byte operations,
+                                     bit twiddling, and overall integer
+                                     manipulation.  Should be a good
+                                     general measurement.
+
+
+
+IDEA                                 Moves through data sequentially in
+                                     16-bit chunks.  Should provide a
+                                     good indication of raw speed.
+
+
+
+Neural Net                           Small-array floating-point test
+                                     heavily dependent on the exponential
+                                     function; less dependent on overall
+                                     FPU performance.  Small arrays, so
+                                     cache/memory architecture should not
+                                     come into play.
+
+
+
+LU decomposition.                           A floating-point test that moves
+                                     through arrays in both row-wise and
+                                     column-wise fashion.  Exercises only
+                                     fundamental math operations (+, -,
+                                     *, /).
+
+The Command File
+
+Purpose
+
+The BYTEmark program allows you to override many of its default parameters
+using a command file. The command file also lets you request statistical
+information, as well as specify an output file to hold the test results for
+later use.
+
+You identify the command file using a command-line argument. E.G.,
+
+C:NBENCH -cCOMFILE.DAT
+
+tells the benchmark program to read from COMFILE.DAT in the current
+directory.
+
+The content of the command file is simply a series of parameter names and
+values, each on a single line. The parameters control internal variables
+that are either global in nature (i.e., they effect all tests in the
+program) or are specific to a given benchmark test.
+
+The parameters are listed in a reference guide that follows, arranged in the
+following groups:
+
+Global Parameters
+
+Numeric Sort
+
+String Sort
+
+Bitfield
+
+Emulated floating-point
+
+Fourier coefficients
+
+Assignment algorithm
+
+IDEA encryption
+
+Huffman compression
+
+Neural net
+
+LU decomposition
+
+As mentioned above, those items listed under "Global Parameters" affect all
+tests; the rest deal with specific benchmarks. There is no required ordering
+to parameters as they appear in the command file. You can specify them in
+any sequence you wish.
+
+You should be judicious in your use of a command file. Some parameters will
+override the "dynamic workload" adjustment that each test performs. Doing
+this completely bypasses the benchmark code that is designed to produce an
+accurate reading from your system clock. Other parameters will alter default
+settings, yielding test results that cannot be compared with published
+benchmark results.
+
+A Sample Command File
+
+Suppose you built a command file that contained the following:
+
+ALLSTATS=T
+
+CUSTOMRUN=T
+
+OUTFILE=D:\DATA.DAT
+
+DONUMSORT=T
+
+DOLU=T
+
+Here's what this file tells the benchmark program:
+
+ALLSTATS=T means that you've requested a "dump" of all the statistics the
+test gathers. This includes not only the standard deviations of tests run,
+it also produces test-specific information such as the number of arrays
+built, the array size, etc.
+
+CUSTOMRUN=T tells the system that this is a custom run. Only tests
+explicitly specified will be executed.
+
+OUTFILE=D:\DATA.DAT will write the output of the benchmark to the file
+DATA.DAT on the root of the D: drive. (If DATA.DAT already exists, output
+will be appended to the file.)
+
+DONUMSORT=T tells the system to run the numeric sort benchmark. (This was
+necessary on account of the CUSTOMRUN=T line, above.)
+
+DOLU=T tells the system to run the LU decomposition benchmark.
+
+Command File Parameters Reference
+
+(NOTE: Altering some global parameters can invalidate results for comparison
+purposes. Those parameters are indicated in the following section by a bold
+asterisk (*). If you alter any parameters so indicated, you may NOT publish
+the resulting data as BYTEmark scores.)
+
+Global Parameters
+
+GLOBALMINTICKS=<n>
+
+This overrides the default global_min_ticks value (defined in NBENCH1.H).
+The global_min_ticks value is defined as the minimum number of clock ticks
+per iteration of a particular benchmark. For example, if global_min_ticks is
+set to 100 and the numeric sort benchmark is run; each iteration MUST take
+at least 100 ticks, or the system will expand the work-per-iteration.
+
+MINSECONDS=<n>
+
+Sets the minimum number of seconds any particular test will run. This has
+the effect of controlling the number of repetitions done. Default: 5.
+
+ALLSTATS=<T|F>
+
+Set this flag to T for a "dump" of all statistics. The information displayed
+varies from test to test. Default: F.
+
+OUTFILE=<path>
+
+Specifies that output should go to the specified output file. Any test
+results and statistical data displayed on-screen will also be written to the
+file. If the file does not exist, it will be created; otherwise, new output
+will be appended to an existing file. This allows you to "capture" several
+runs into a single file for later review.
+
+Note: the path should not appear in quotes. For example, something like the
+following would work: OUTFILE=C:\BENCH\DUMP.DAT
+
+CUSTOMRUN=<T|F>
+
+Set this flag to T for a custom run. A "custom run" means that the program
+will run only the benchmark tests that you explicitly specify. So, use this
+flag to run a subset of the tests. Default: F.
+
+Numeric Sort
+
+DONUMSORT=<T|F>
+
+Indicates whether to do the numeric sort. Default is T, unless this is a
+custom run (CUSTOMRUN=T), in which case default is F.
+
+NUMNUMARRAYS=<n>
+
+Indicates the number of numeric arrays the system will build. Setting this
+value will override the program's "dynamic workload" adjustment for this
+test.*
+
+NUMARRAYSIZE=<n>
+
+Indicates the number of elements in each numeric array. Default is 8001
+entries. (NOTE: Altering this value will invalidate the test for comparison
+purposes. The performance of the numeric sort test is not related to the
+array size as a linear function; i.e., an array twice as big will not take
+twice as long. The relationship involves a logarithmic function.)*
+
+NUMMINSECONDS=<n>
+
+Overrides MINSECONDS for the numeric sort test.
+
+String Sort
+
+DOSTRINGSORT=<T|F>
+
+Indicates whether to do the string sort. Default is T, unless this is a
+custom run (CUSTOMRUN=T), in which case the default is F.
+
+STRARRAYSIZE=<n>
+
+Sets the size of the string array. Default is 8111. (NOTE: Altering this
+value will invalidate the test for comparison purposes. The performance of
+the string sort test is not related to the array size as a linear function;
+i.e., an array twice as big will not take twice as long. The relationship
+involves a logarithmic function.)*
+
+NUMSTRARRAYS=<n>
+
+Sets the number of string arrays that will be created to run the test.
+Setting this value will override the program's "dynamic workload" adjustment
+for this test.*
+
+STRMINSECONDS=<n>
+
+Overrides MINSECONDS for the string sort test.
+
+Bitfield
+
+DOBITFIELD=<T|F>
+
+Indicates whether to do the bitfield test. Default is T, unless this is a
+custom run (CUSTOMRUN=T), in which case the default is F.
+
+NUMBITOPS=<n>
+
+Sets the number of bitfield operations that will be performed. Setting this
+value will override the program's "dynamic workload" adjustment for this
+test.*
+
+BITFIELDSIZE=<n>
+
+Sets the number of 32-bit elements in the bitfield arrays. The default value
+is dependent on the size of a long as defined by the current compiler. For a
+typical compiler that defines a long to be 32 bits, the default is 32768.
+(NOTE: Altering this parameter will invalidate test results for comparison
+purposes.)*
+
+BITMINSECONDS=<n>
+
+Overrides MINSECONDS for the bitfield test.
+
+Emulated floating-point
+
+DOEMF=<T|F>
+
+Indicates whether to do the emulated floating-point test. Default is T,
+unless this is a custom run (CUSTOMRUN=T), in which case the default is F.
+
+EMFARRAYSIZE=<n>
+
+Sets the size (number of elements) of the emulated floating-point benchmark.
+Default is 3000. The test builds three arrays, each of equal size. This
+parameter sets the number of elements for EACH array. (NOTE: Altering this
+parameter will invalidate test results for comparison purposes.)*
+
+EMFLOOPS=<n>
+
+Sets the number of loops per iteration of the floating-point test. Setting
+this value will override the program's "dynamic workload" adjustment for
+this test.*
+
+EMFMINSECONDS=<n>
+
+Overrides MINSECONDS for the emulated floating-point test.
+
+Fourier coefficients
+
+DOFOUR=<T|F>
+
+Indicates whether to do the Fourier test. Default is T, unless this is a
+custom run (CUSTOMRUN=T), in which case the default is F.
+
+FOURASIZE=<n>
+
+Sets the size of the array for the Fourier test. This sets the number of
+coefficients the test will derive. NOTE: Specifying this value will override
+the system's "dynamic workload" adjustment for this test, and may make the
+results invalid for comparison purposes.*
+
+FOURMINSECONDS=<n>
+
+Overrides MINSECONDS for the Fourier test.
+
+Assignment Algorithm
+
+DOASSIGN=<T|F>
+
+Indicates whether to do the assignment algorithm test. Default is T, unless
+this is a custom run (CUSTOMRUN=T), in which case the default is F.
+
+ASSIGNARRAYS=<n>
+
+Indicates the number of arrays that will be built for the test. Specifying
+this value will override the system's "dynamic workload" adjustment for this
+test. (NOTE: The size of the arrays in the assignment algorithm is fixed at
+101 x 101. Altering the array size requires adjusting global constants and
+recompiling; to do so, however, would invalidate test results.)*
+
+ASSIGNMINSECONDS=<n>
+
+Overrides MINSECONDS for the assignment algorithm test.
+
+IDEA encryption
+
+DOIDEA=<T|F>
+
+Indicates whether to do the IDEA encryption test. Default is T, unless this
+is a custom run (CUSTOMRUN=T), in which case the default is F.
+
+IDEAARRAYSIZE=<n>
+
+Sets the size of the plain-text character array that will be encrypted by the
+test. Default is 4000. The benchmark actually builds 3 arrays: 1st
+plain-text, encrypted version, and 2nd plain-text. The 2nd plain-text array is
+the destination for the decryption process [part of the test]. All arrays
+are set to the same size. (NOTE: Specifying this value will invalidate test
+results for comparison purposes.)*
+
+IDEALOOPS=<n>
+
+Indicates the number of loops in the IDEA test. Specifying this value will
+override the system's "dynamic workload" adjustment for this test.*
+
+IDEAMINSECONDS=<n>
+
+Overrides MINSECONDS for the IDEA test.
+
+Huffman compression
+
+DOHUFF=<T|F>
+
+Indicates whether to do the Huffman test. Default is T, unless this is a
+custom run (CUSTOMRUN=T), in which case the default is F.
+
+HUFFARRAYSIZE=<n>
+
+Sets the size of the string buffer that will be compressed using the Huffman
+test. The default is 5000. (NOTE: Altering this value will invalidate test
+results for comparison purposes.)*
+
+HUFFLOOPS=<n>
+
+Sets the number of loops in the Huffman test. Specifying this value will
+override the system's "dynamic workload" adjustment for this test.*
+
+HUFFMINSECONDS=<n>
+
+Overrides MINSECONDS for the Huffman test.
+
+Neural net
+
+DONNET=<T|F>
+
+Indicates whether to do the Neural Net test. Default is T, unless this is a
+custom run (CUSTOMRUN=T), in which case the default is F.
+
+NNETLOOPS=<n>
+
+Sets the number of loops in the Neural Net test. NOTE: Altering this value
+overrides the benchmark's "dynamic workload" adjustment algorithm, and may
+invalidate the results for comparison purposes.*
+
+NNETMINSECONDS=<n>
+
+Overrides MINSECONDS for the Neural Net test.
+
+LU decomposition
+
+DOLU=<T|F>
+
+Indicates whether to do the LU decomposition test. Default is T, unless this
+is a custom run (CUSTOMRUN=T), in which case the default is F.
+
+LUNUMARRAYS=<n>
+
+Sets the number of arrays in each iteration of the LU decomposition test.
+Specifying this value will override the system's "dynamic workload"
+adjustment for this test.*
+
+LUMINSECONDS=<n>
+
+Overrides MINSECONDS for the LU decomposition test.
+
+Numeric Sort
+
+Description
+
+This benchmark is designed to explore how well the system sorts a numeric
+array. In this case, a numeric array is a one-dimensional collection of
+signed, 32-bit integers. The actual sorting is performed by a heapsort
+algorithm (see the text box following for a description of the heapsort
+algorithm).
+
+It's probably unnecessary to point out (but we'll do it anyway) that sorting
+is a fundamental operation in computer application software. You'll likely
+find sorting routines nestled deep inside a variety of applications;
+everything from database systems to operating-systems kernels.
+
+The numeric sort benchmark reports the number of arrays it was able to sort
+per second. The array size is set by a global constant (it can be overridden
+by the command file -- see below).
+
+Analysis
+
+Optimized 486 code: Profiling of the numeric sort benchmark using Watcom's
+profiler (Watcom C/C++ 10.0) indicates that the algorithm spends most of its
+time in the numsift() function (specifically, about 90% of the benchmark's
+time takes place in numsift()). Within numsift(), two if statements dominate
+time spent:
+
+if(array[k]<array[k+1L]) and if(array[i]<array[k])
+
+Both statements involve indexes into arrays, so it's likely the processor is
+spending a lot of time resolving the array references. (Though both
+statements involve "less-than" comparisons, we doubt that much time is
+consumed in performing the signed compare operation.) Though the first
+statement involves array elements that are adjacent to one another, the
+second does not. In fact, the second statement will probably involve
+elements that are far apart from one another during early passes through the
+sifting process. We expect that systems whose caching system pre-fetches
+contiguous elements (often in "burst" line fills) will not have any great
+advantage of systems without pre-fetch mechanisms.
+
+Similar results were found when we profiled the numeric sort algorithm under
+the Borland C/C++ compiler.
+
+680x0 Code (Macintosh CodeWarrior): CodeWarrior's profiler is function
+based; consequently, it does not allow for line-by-line analysis as does the
+Watcom compiler's profiler.
+
+However, the CodeWarrior profiler does give us enough information to note
+that NumSift() only accounts for about 28% of the time consumed by the
+benchmark. The outer routine, NumHeapSort() accounts for around 71% of the
+time taken. It will require additional analysis to determine why the two
+compilers -- Watcom and CodeWarrior divide the workload so differently. (It
+may have something to do with compiler architecture, or the act of profiling
+the code may produce results that are significantly different than how the
+program runs under normal conditions, though that would lead one to wonder
+what use profilers would be.)
+
+Porting Considerations
+
+The numeric sort routine should represent a trivial porting exercise. It is
+not an overly large benchmark in terms of source code. Additionally, the
+only external routines it calls on are for allocating and releasing memory,
+and managing the stopwatch.
+
+The numeric sort benchmark depends on the following global definitions (note
+that these may be overridden by the command file):
+
+NUMNUMARRAYS -- Sets the upper limit on the number of arrays that the
+benchmark will attempt to build. The numeric sort benchmark creates work for
+itself by requiring the system to sort more and more arrays...not bigger and
+bigger arrays. (The latter case would skew results, because the sorting time
+for heapsort is N log2 N - e.g., doubling the array size does not double the
+sort time.) This constant sets the upper limit to the number of arrays the
+system will build before it signals an error. The default value is 100, and
+may be changed if your system exceeds this limit.
+
+NUMARRAYSIZE - Determines the size of each array built. It has been set to
+8111L and should not be tampered with. The command file entry
+NUMARRAYSIZE=<n> can be used to change this value, but results produced by
+doing this will make your results incompatible with other runs of the
+benchmark (since results will be skewed -- see preceding paragraph).
+
+To test for a correct execution of the numeric sort benchmark, #define the
+DEBUG symbol. This will enable code that verifies that arrays are properly
+sorted. You should run the benchmark program using a command file that has
+only the numeric sort test enabled. If there is an error, the program will
+display "SORT ERROR" (If this happens, it's possible that tons of "SORT
+ERROR" messages will be emitted, so it's best not to redirect output to a
+file), otherwise it will print "Numeric sort: OK" (also quite a few times).
+
+References
+
+Gonnet, G.H. 1984, Handbook of Algorithms and Data Structures (Reading, MA:
+Addison-Wesley).
+
+Knuth, Donald E. 1968, Fundamental Algorithms, vol 1 of The Art of Computer
+Programming (Reading, MA: Addison-Wesley).
+
+Press, William H., Flannery, Brian P., Teukolsky, Saul A., and Vetterling,
+William T. 1989, Numerical Recipes in Pascal (Cambridge: Cambridge
+University Press).
+
+Heapsort
+
+The heapsort algorithm is well-covered in a number of the popular
+computer-science textbooks. In fact, it gets a pat on the back in Numerical
+Recipes (Press et. al.), where the authors write:
+
+Heapsort is our favorite sorting routine. It can be recommended
+wholeheartedly for a variety of sorting applications. It is a true
+"in-place" sort, requiring no auxiliary storage.
+
+Heapsort works by building the array into a kind of a queue called a heap.
+You can imagine this heap as being a form of in-memory binary tree. The
+topmost (root) element of the tree is the element that -- were the array
+sorted -- would be the largest element in the array. Sorting takes place by
+first constructing the heap, then pulling the root off the tree, promoting
+the next largest element to the root, pulling it off, and so on. (The
+promotion process is known as "sifting up.")
+
+Heapsort executes in N log2 N time even in its worst case. Unlike some other
+sorting algorithms, it does not benefit from a partially sorted array
+(though Gonnet does refer to a variation of heapsort, called "smoothsort,"
+which does -- see references).
+
+String Sort
+
+Description
+
+This benchmark is designed to gauge how well the system moves bytes around.
+By that we mean, how well the system can copy a string of bytes from one
+location to another; source and destination being aligned to arbitrary
+addresses. (This is unlike the numeric sort array, which moves bytes
+longword-at-a-time.) The strings themselves are built so as to be of random
+length, ranging from no fewer than 4 bytes and no greater than 80 bytes. The
+mixture of random lengths means that processors will be forced to deal with
+strings that begin and end on arbitrary address boundaries.
+
+The string sort benchmark uses the heapsort algorithm; this is the same
+algorithm as is used in the numeric sort benchmark (see the sidebar on the
+heapsort for a detailed description of the algorithm).
+
+Manipulation of the strings is actually handled by two arrays. One array
+holds the strings themselves; the other is a pointers array. Each member of
+the pointers array carries an offset that points into the string array, so
+that the ith pointer carries the offset to the ith string. This allows the
+benchmark to rapidly locate the position of the ith string. (The sorting
+algorithm requires exchanges of items that might be "distant" from one
+another in the array. It's critical that the routine be able to rapidly find
+a string based on its indexed position in the array.)
+
+The string sort benchmark reports the number of string arrays it was able to
+sort per second. The size of the array is set by a global constant.
+
+Analysis
+
+Optimized 486 code (Watcom C/C++ 10.0): Profiling of the string sort
+benchmark indicates that it spends most of its time in the C library routine
+memmove(). Within that routine, most of the execution is consumed by a pair
+of instructions: rep movsw and rep movsd. These are repeated string move --
+word width and repeated string move -- doubleword width, respectively.
+
+This is precisely where we want to see the time spent. It's interesting to
+note that the memmove() of the particular compiler/profiler tested (Watcom
+C/C++ 10.0) was "smart" enough to do most of the moving on word or
+doubleword boundaries. The string sort benchmark specifically sets arbitrary
+boundaries, so we'd expect to see lots of byte-wide moves. The "smart"
+memmove() is able to move bytes only when it has to, and does the remainder
+of the work via words and doublewords (which can move more bits at a time).
+
+680x0 Code (Macintosh CodeWarrior): Because CodeWarrior's profiler is
+function based, it is impossible to get an idea of how much time the test
+spends in library routines such as memmove(). Fortunately, as an artifact of
+the early version of the benchmark, the string sort algorithm makes use of
+the MoveMemory() routine in the sysspec.c file (system specific routines).
+This call, on anything other than a 16-bit DOS system, calls memmove()
+directly. Hence, we can get a good approximation of how much time is spent
+moving bytes.
+
+The answer is that nearly 78% of the benchmark's time is consumed by
+MoveMemory(), the rest being taken up by the other routines (the
+str_is_less() routine, which performs string comparisons, takes about 7% of
+the time). As above, we can guess that most of the benchmark's time is
+dependent on the performance of the library's memmove() routine.
+
+Porting Considerations
+
+As with the numeric sort routine, the string sort benchmark should be simple
+to port. Simpler, in fact. The string sort benchmark routine is not
+dependent on any typedef that may change from machine to machine (unless a
+char type is not 8 bits).
+
+The string sort benchmark depends on the following global definitions:
+
+NUMSTRARRAYS - Sets the upper limit on the number of arrays that the
+benchmark will attempt to build. The string sort benchmark creates work for
+itself by requiring the system to sort more and more arrays, not bigger and
+bigger arrays. (See section on Numeric Sort for an explanation.) This
+constant sets the upper limit to the number of arrays the system will build
+before it signals an error. The default value is 100, and may be changed if
+your system exceeds this limit.
+
+STRARRAYSIZE - Sets the default size of the string arrays built. We say
+"arrays" because, as with the numeric sort benchmark, the system adds work
+not by expanding the size of the array, but by adding more arrays. This
+value is set to 8111, and should not be modified, since results would not be
+comparable with other runs of the same benchmark on other machines.
+
+To test for a correct execution of the string sort benchmark, #define
+the DEBUG symbol. This will enable code that verifies the arrays are
+properly sorted. Set up a command file that runs only the string sort,
+and execute the benchmark program. If the routine is operating
+properly, the benchmark will print "String sort: OK", this message is
+printed quite often. Otherwise, the program will display "SORT ERROR"
+for each pair of strings it finds out of order (which can be really
+often).
+
+References
+
+See the references for the Numeric Sort benchmark.
+
+Bitfield Operations
+
+Description
+
+The purpose of this benchmark is to explore how efficiently the system
+executes operations that deal with "twiddling bits." The test is set up to
+simulate a "bit map"; a data structure used to keep track of storage usage.
+(Don't confuse this meaning of "bitmap" with its use in describing a
+graphics data structure.)
+
+Systems often use bit maps to keep an inventory of memory blocks or (more
+frequently) disk blocks. In the case of a bit map that manages disk usage,
+an operating system will set aside a buffer in memory so that each bit in
+that buffer corresponds to a block on the disk drive. A 0 bit means that the
+corresponding block is free; a 1 bit means the block is in use. Whenever a
+file requests a new block of disk storage, the operating system searches the
+bit map for the first 0 bit, sets the bit (to indicate that the block is now
+spoken for), and returns the number of the corresponding disk block to the
+requesting file.
+
+These types of operations are precisely what this test simulates. A block of
+memory is set allocated for the bit map. Another block of memory is
+allocated, and set up to hold a series of "bit map commands". Each bitmap
+command tells the simulation to do 1 of 3 things:
+
+1) Clear a series of consecutive bits,
+
+2) Set a series of consecutive bits, or
+
+3) Complement (1->0 and 0->1) a series of consecutive bits.
+
+The bit map command block is loaded with a set of random bit map commands
+(each command covers an random number of bits), and simulation routine steps
+sequentially through the command block, grabbing a command and executing it.
+
+The bitfield benchmark reports the number of bits it was able to operate on
+per second. The size of the bit map is constant; the bitfield operations
+array is adjusted based on the capabilities of the processor. (See the
+section describing the auto-adjust feature of the benchmarks.)
+
+Analysis
+
+Optimized 486 code: Using the Watcom C/C++ 10.0 profiler, the Bitfield
+benchmark appears to spend all of its time in two routines: ToggleBitRun()
+(74% of the time) and DoBitFieldIteration() (24% of the time). We say
+"appears" because this is misleading, as we will explain.
+
+First, it is important to recall that the test performs one of three
+operations for each run of bits (see above). The routine ToggleBitRun()
+handles two of those three operations: setting a run of bits and clearing a
+run of bits. An if() statement inside ToggleBitRun() decides which of the
+two operations is performed. (Speed freaks will quite rightly point out that
+this slows the entire algorithm. ToggleBitRun() is called by a switch()
+statement which has already decided whether bits should be set or cleared;
+it's a waste of time to have ToggleBitRun() have to make that decision yet
+again.)
+
+DoBitFieldIteration() is the "outer" routine that calls ToggleBitRun().
+DoBitFieldIteration() also calls FlipBitRun(). This latter routine is the
+one that performs the third bitfield operation: complementing a run of bits.
+FlipBitRun() gets no "air time" at all (while DoBitFieldIteration() gets 24
+% of the time) simply because the compiler's optimizer recognizes that
+FlipBitRun() is only called by DoBitFieldIteration(), and is called only
+once. Consequently, the optimizer moves FlipBitRun() "inline", i.e., into
+DoBitFieldIteration(). This removes an unnecessary call/return cycle (and is
+probably part of the reason why the FlipBitRun() code gets 24% of the
+algorithm's time, instead of something closer to 30% of its time.)
+
+Within the routines, those lines of code that actually do the shifting, the
+and operations, and the or operations, consume time evenly. This should make
+for a good test of a processor's "bit twiddling" capabilities.
+
+680x0 Code (Macintosh CodeWarrior): The CodeWarrior profiler is function
+based. Consequently, it is impossible to produce a profile of machine
+instruction execution time. We can, however, get a good picture of how the
+algorithm divides its time among the various functions.
+
+Unlike the 486 compiler, the CodeWarrior compiler did not appear to collapse
+the FlipBitRun() routine into the outer DoBitFieldIteration() routine. (We
+don't know this for certain, of course. It's possible that the compiler
+would have done this had we not been profiling.)
+
+In any case, the time spent in the two "core" routines of the bitfield test
+are shown below:
+
+FlipBitRun() - 18031.2 microsecs (called 509 times)
+
+ToggleBitRun() - 50770.6 microsecs (called 1031 times)
+
+In terms of total time, FlipBitRun() takes about 35% of the time (it gets
+about 33% of the calls). Remember, ToggleBitRun() is a single routine that
+is called both to set and clear bits. Hence, ToggleBitRun() is called twice
+as often as FlipBitRun().
+
+We can conclude that time spent setting bits to 1, setting bits to 0, and
+changing the state of bits, is about equal; the load is balanced close to
+what we'd expect it to be, based on the structure of the algorithm.
+
+Porting Considerations
+
+The bitfield operations benchmark is dependent on the size of the long
+datatype. On most systems, this is 32 bits. However, on some of the newer
+RISC chips, a long can be 64 bits long. If your system does use 64-bit
+longs, you'll need to #define the symbol LONG64.
+
+If you are unsure of the size of a long in your system (some C compiler
+manuals make it difficult to discover), simply place an ALLSTATS=T line in
+the command file and run the benchmarks. This will cause the benchmark
+program to display (among other things) the size of the data types int,
+short, and long in bytes.
+
+BITFARRAYSIZE - Sets the number of longs in the bit map array. This number
+is fixed, and should not be altered. The bitfield test adjusts itself by
+adding more bitfield commands (see above), not by creating a larger bit map.
+
+Currently, there is no code added to test for correct execution. If you are
+concerned that your port was incorrect, you'll need to step through your
+favorite debugger and verify execution against the original source code.
+
+** I added a resetting of the random number generator, and a resetting
+** of the bitfield to each loop. Those operations are outside of the
+** timed loop, and should add to make the benchmark more consistent.
+** There also is now debugging information available. If you define
+** DEBUG then the program will write a file named "debugbit.dat",
+** which is the contents of the bitfield after the calibration loop of
+** 30 operations. You can compare this file with the file
+** "debugbit.good" that comes with the distribution.
+** Uwe F. Mayer <mayer@tux.edu>
+
+References
+
+None.
+
+Emulated Floating-point
+
+Description
+
+The emulated floating-point benchmark includes routines that are similar to
+those that would be executed whenever a system performs floating-point
+operations in the absence of a coprocessor. In general, this amounts to a
+mixture of integer instructions, including shift operations, integer
+addition and subtraction, and bit testing (among others).
+
+The benchmark itself is remarkably simple. The test builds three
+1-dimensional arrays and loads the first two up with random floating-point
+numbers. The arrays are then partitioned into 4 equal-sized groups, and the
+test proceeds by performing addition, subtraction, multiplication, and
+division -- one operation on each group. (For example, for the addition
+group, an element from the first array is added to the second array and the
+result is placed in the third array.)
+
+Of course, most of the work takes place inside the routines that perform the
+addition, subtraction, multiplication, and division. These routines operate
+on a special data type (referred to as an InternalFPF number) that -- though
+not strictly IEEE compliant -- carries all the necessary data fields to
+support an IEEE-compatible floating-point system. Specifically, an
+InternalFPF number is built up of the following fields:
+
+Type (indicates a NORMAL, SUBNORMAL, etc.)
+
+Mantissa sign
+
+Unbiased, signed 16-bit exponent
+
+4-word (16 bits) mantissa.
+
+The emulated floating-point test reports its results in number of loops per
+second (where a "loop" is one pass through the arrays as described above).
+
+Finally, we are aware that this test could be on its way to becoming an
+anachronism. A growing number of systems are appearing that have
+coprocessors built into the main CPU. It's possible that floating-point
+emulation will one day be a thing of the past.
+
+Analysis
+
+Optimized 486 code (Watcom C/C++ 10.0): The algorithm's time is distributed
+across a number of routines. The distribution is:
+
+ShiftMantLeft1() - 60% of the time
+
+ShiftMantRight1() - 17% of the time
+
+DivideInternalFPF() - 14% of the time
+
+MultiplyInternalFPF() - 5% of the time.
+
+The first two routines are similar to one another; both shift bits about in
+a floating-point number's mantissa. It's reasonable that ShiftMantLeft1()
+should take a larger share of the system's time; it is called as part of the
+normalization process that concludes every emulated addition, subtraction,
+mutiplication, and division.
+
+680x0 Code (Macintosh CodeWarrior): CodeWarrior's profiler is
+function-based; consequently, it isn't possible to get timing at the machine
+instruction level. However, the output to CodeWarrior's profiler has
+provided insight into the breakdown of time spent in various functions that
+forces us to rethink our 486 code analysis.
+
+Analyzing what goes on inside the emulated floating-point tests is a tough
+one to call because some of the routines that are part of the test are
+called by the function that builds the arrays. Consequently, a quick look at
+the profiler's output can be misleading; it's not obvious how much time a
+particular routine is spending in the test and how much time that same
+routine is spending setting up the test (an operation that does not get
+timed).
+
+Specifically, the routine that loads up the arrays with test data calls
+LongToInternalFPF() and DivideInternalFPF(). LongToInternalFPF() makes one
+call to normalize() if the number is not a true zero. In turn, normalize()
+makes an indeterminate number of calls to ShiftMantLeft1(), depending on the
+structure of the mantissa being normalized.
+
+What's worse, DivideInternalFPF() makes all sorts of calls to all kinds of
+important low-level routines such as Sub16Bits() and ShiftMantLeft1().
+Untangling the wiring of which routine is being called as part of the test,
+and which is being called as part of the setup could probably be done with
+the computer equivalent of detective work and spelunking, but in the
+interest of time we'll opt for approximation.
+
+Here's a breakdown of some of the important routines and their times:
+
+AddSubInternalFPF() - 1003.9 microsecs (called 9024 times)
+
+MultiplyInternalFPF() - 20143 microsecs (called 5610 times)
+
+DivideInternalFPF() - 18820.9 microsecs (called 3366 times).
+
+The 3366 calls to DivideInternalFPF() are timed calls, not setup calls --
+the profiler at least gives outputs of separate calls made to the same
+routine, so we can determine which call is being made by the benchmark, and
+which is being made by the setup routine. It turns out that the setup
+routine calls DivideInternalFPF() 30,000 times.
+
+Notice that though addition/subtraction are called most often,
+multiplication next, then finally division; the time spent in each is the
+reverse. Division takes the most time, then multiplication, finally
+addition/subtraction. (There's probably some universal truth lurking here
+somewhere, but we haven't found it yet.)
+
+Other routines, and their breakdown:
+
+Add16Bits() - 115.3 microsecs
+
+ShiftMantRight1() - 574.2 microsecs
+
+Sub16Bits() - 1762 microsecs
+
+StickySiftRightMant - 40.4 microsecs
+
+ShiftMantLeft1() - 17486.1 microsecs
+
+The times for the last three routines are suspect, since they are called by
+DivideInternalFPF(), and a large portion of their time could be part of the
+setup process. This is what leads us to question the results obtained in the
+486 analysis, since it, too, is unable to determine precisely who is calling
+whom.
+
+Porting Considerations
+
+Earlier versions of this benchmark were extremely sensitive to porting;
+particularly to the "endianism" of the target system. We have tried to
+eliminate many of these problems. The test is nonetheless more "sensitive"
+to porting than most others.
+
+Pay close attention to the following defines and typedefs. They can be found
+in the files EMFLOAT.H, NMGLOBAL.H, and NBENCH1.H:
+
+u8 - Stands for unsigned, 8-bit. Usually defined to be unsigned char.
+
+u16 - Stands for unsigned, 16-bit. Usually defined to be unsigned short.
+
+u32 - Stands for unsigned, 32-bit. Usually defined to be unsigned long.
+
+INTERNAL_FPF_PRECISION - Indicates the number of elements in the mantissa of
+an InternalFPF number. Should be set to 4.
+
+The exponent field of an InternalFPF number is of type short. It should be
+set to whatever minimal data type can hold a signed, 16-bit number.
+
+Other global definitions you will want to be aware of:
+
+CPUEMFLOATLOOPMAX - Sets the maximum number of loops the benchmark will
+attempt before flagging an error. Each execution of a loop in the emulated
+floating-point test is "non-destructive," since the test takes factors from
+two arrays, operates on the factors, and places the result in a third array.
+Consequently, the test makes more work for itself by increasing the number
+of times it passes through the arrays (# of loops). If the system exceeds
+the limit set by CPUEMFLOATLOOPMAX, it will signal an error.
+
+This value may be altered to suit your system; it will not effect the
+benchmark results (unless you reduce it so much the system can never
+generate enough loops to produce a good test run).
+
+EMFARRAYSIZE - Sets the size of the arrays to be used in the test. This
+value is the number of entries (InternalFPF numbers) per array. Currently,
+the number is fixed at 3000, and should not be altered.
+
+Currently, there is no means of testing correct execution of the benchmark
+other than via debugger. There are routines available to decode the internal
+floating point format and print out the numbers, but no formal correctness
+test has been constructed. (This should be available soon. -- 3/14/95 RG)
+
+** It now prints out the operations of 8 of the entries used in the
+** test. Assuming you leave EMFARRAYSIZE at 3000, your results should
+** look like the ones below. The number in front of the colon is the
+** index of the entry.
+**  
+**  	 2: (-1.1160E   0) + (-4.5159E   0) = -5.6320E   0
+**  	 6: (-4.4507E  -1) - (-8.2050E  -1) = +3.7543E  -1
+**  	10: (+1.2465E   0) * (+7.4667E  -1) = +9.3075E  -1
+**  	14: (-1.2781E   0) / (-1.7367E   0) = +7.3596E  -1
+**    2986: (-7.0390E   0) * (-2.0752E   0) = +1.4607E   1
+**    2990: (+8.3753E  -1) / (+2.3876E   1) = +3.5078E  -2
+**    2994: (-1.1393E   0) + (-1.6080E   1) = -1.7219E   1
+**    2998: (+7.2450E   0) - (-8.2654E  -1) = +8.0716E   0
+**
+** Uwe F. Mayer <mayer@tux.edu>
+
+References
+
+Microprocessor Programming for Computer Hobbyists, Neill Graham, Tab Books,
+Blue Ridge Summit, PA, 1977.
+
+Apple Numerica Manual, Second edition, Apple Computer, Addison-Wesley
+Publishing Co., Reading, MA, 1988.
+
+Fourier Series
+
+Description
+
+This is a floating-point benchmark designed primarily to exercise the
+trigonometric and transcendental functions of the system. It calculates the
+first n Fourier coefficients of the function (x+1)x on the interval 0,2. In
+this case, the function (x+1)x is being treated as a cyclic waveform with a
+period of 2.
+
+The Fourier coefficients, when applied as factors to a properly constructed
+series of sine and cosine functions, allow you to approximate the original
+waveform. (In fact, if you can calculate all the Fourier coefficients --
+there'll be an infinite number -- you can reconstruct the waveform exactly).
+You have to calculate the coefficients via integration, and the algorithm
+does this using a simple trapezoidal rule for its numeric integration
+function.
+
+The upshot of all this is that it provides an exercise for the
+floating-point routines that calculate sine, cosine, and raising a number to
+a power. There are also some floating-point multiplications, divisions,
+additions, and subtractions mixed in.
+
+The benchmark reports its results as the number of coefficients calculated
+per second.
+
+As an additional note, we should point out that the performance of this
+benchmark is heavily dependent on how well-built the compiler's math library
+is. We have seen at least two cases where recompilation with new (and
+improved!) math libraries have resulted in two-fold and five-fold
+performance improvements. (Apparently, when a compiler gets moved to a new
+platform, the trigonometric and transcendental functions in the math
+libraries are among the last routines to be "hand optimized" for the new
+platform.) About all we can say about this is that whenever you run this
+test, verify that you have the latest and greatest math libraries.
+
+Analysis
+
+Optimized 486 code: The benchmark partitions its time almost evenly among
+the modules pow387, exp386, and trig387; giving between 25% and 28% of its
+time to each. This is based on profiling with the Watcom compiler running
+under Windows NT. These modules hold the routines that handle raising a
+number to a power and performing trigonometric (sine and cosine)
+calculations. For example, within trig387, time was nearly equally divided
+between the routine that calculates sine and the routine that calculates
+cosine.
+
+The remaining time (between 17% and 18%) was spent in the balance of the
+test. We noticed that most of that time occurred in the routine
+thefunction(). This is at the heart of the numerical integration routine the
+benchmark uses.
+
+Consequently, this benchmark should be a good test of the exponential and
+trigonometric capabilities of a processor. (Note that we recognize that the
+performance also depends on how well the compiler's math library is built.)
+
+680x0 Code (Macintosh CodeWarrior): The CodeWarrior profiler is function
+based, therefore it is impossible to get performance results for individual
+machine instructions. The CodeWarrior compiler is also unable to tell us how
+much time is spent within a given library routine; we can't see how much
+time gets spent executing the sin(), cos(), or pow() functions (which,
+unfortunately, was the whole idea behind the benchmark).
+
+About all we can glean from the results is that thefunction() takes about
+74% of the time in the test (this is where the heavy math calculations take
+place) while trapezoidintegrate() accounts for about 26% of the time on its
+own.
+
+Porting Considerations
+
+Necessarily, this benchmark is at the mercy of the efficiency of the
+floating-point support provided by whatever compiler you are using. It is
+recommended that, if you are doing the port yourself, you contact the
+designers of the compiler, and discuss with them what optimization switches
+should be set to produce the fastest code. (This sounds simple; usually it's
+not. Some systems let you decide between speed and true IEEE compliance.)
+
+As far as global definitions go, this benchmark is happily free of them. All
+the math is done using double data types. We have noticed that, on some Unix
+systems, you must be careful to include the correct math libraries.
+Typically, you'll discover this at link time.
+
+To test for correct execution of the benchmark: It's unlikely you'll need to
+do this, since the algorithm is so cut-and-dried. Furthermore, there are no
+explicit provisions made to verify the correctness. You can, however, either
+dip into your favorite debugger, or alter the code to print out the contents
+of the abase (which holds the A[i] terms) and bbase (which holds the B[i]
+terms) arrays as they are being filled (see routine DoFPUTransIteration).
+** This is exactly what I have done, it now prints out A[i] and B[i] data.
+** Uwe F. Mayer <mayer@tux.edu>
+Run the benchmark with a command file set to execute only the Fourier test,
+and examine the contents of the arrays. The first 100 are listed below.
+
+A[i]=
+   2.84 1.05 0.274 0.0824 0.0102 -0.024 -0.0426 -0.0536 -0.0605 -0.065
+-0.0679 -0.0698 -0.0709 -0.0715 -0.0717 -0.0715 -0.0711 -0.0704
+-0.0696 -0.0685 -0.0674 -0.0661 -0.0647 -0.0632 -0.0615 -0.0598 -0.058
+-0.0561 -0.0542 -0.0521 -0.0501 -0.0479 -0.0457 -0.0434 -0.0411
+-0.0387 -0.0363 -0.0338 -0.0313 -0.0288 -0.0262 -0.0236 -0.0209
+-0.0183 -0.0156 -0.0129 -0.0102 -0.00744 -0.0047 -0.00196 0.000794
+0.00355 0.0063 0.00905 0.0118 0.0145 0.0172 0.0199 0.0226 0.0253
+0.0279 0.0305 0.0331 0.0357 0.0382 0.0407 0.0431 0.0455 0.0479 0.0502
+0.0525 0.0547 0.0569 0.059 0.061 0.063 0.0649 0.0668 0.0686 0.0703
+0.072 0.0736 0.0751 0.0765 0.0779 0.0792 0.0804 0.0816 0.0826 0.0836
+0.0845 0.0853 0.0861 0.0867 0.0873 0.0877 0.0881 0.0884 0.0887 0.0888
+
+B[i]= 
+(undefined) -1.88 -1.16 -0.806 -0.61 -0.487 -0.402 -0.34 -0.293 -0.255
+-0.224 -0.199 -0.177 -0.158 -0.141 -0.126 -0.113 -0.101 -0.0901
+-0.0802 -0.071 -0.0625 -0.0546 -0.0473 -0.0404 -0.034 -0.0279 -0.0222
+-0.0168 -0.0117 -0.00693 -0.00238 0.00193 0.00601 0.00988 0.0135 0.017
+0.0203 0.0234 0.0263 0.0291 0.0317 0.0341 0.0364 0.0385 0.0405 0.0424
+0.0441 0.0457 0.0471 0.0484 0.0496 0.0507 0.0516 0.0525 0.0532 0.0538
+0.0543 0.0546 0.0549 0.055 0.0551 0.055 0.0549 0.0546 0.0543 0.0538
+0.0533 0.0527 0.052 0.0512 0.0503 0.0493 0.0483 0.0472 0.046 0.0447
+0.0434 0.042 0.0405 0.039 0.0374 0.0358 0.0341 0.0323 0.0305 0.0287
+0.0268 0.0249 0.023 0.021 0.019 0.0169 0.0149 0.0128 0.0107 0.00857
+0.00644 0.0043 0.00215
+
+Note that there is no B[0] coefficient. If the above numbers are in the
+arrays shown, you can feel pretty confident that the benchmark it working
+properly.
+
+References
+
+Engineering and Scientific Computations in Pascal, Lawrence P. Huelsman,
+Harper & Row, New York, 1986.
+
+Assignment Algorithm
+
+Description
+
+This test is built on an algorithm with direct application to the business
+world. The assignment algorithm solves the following problem: Say you have X
+machines and Y jobs. Any of the machines can do any of the jobs; however, the
+machines are sufficiently different so that the cost of doing a particular
+job can vary depending what machine does it. Furthermore, the jobs are
+sufficiently different that the cost varies depending on which job a given
+machine does. You therefore construct a matrix; machines are the rows, jobs
+are the columns, and the [i,j] element of the array is the cost of doing the
+jth job on the ith machine. How can you assign the jobs so that the cost of
+completing them all is minimal? (This also assumes that one machine does one
+job.)
+
+Did you get that?
+
+The assignment algorithm benchmark is largely a test of how well the
+processor handles problems built around array manipulation. It is not a
+floating-point test; the "cost matrix" built by the algorithm is simply a 2D
+array of long integers. This benchmark considers an iteration to be a run of
+the assignment algorithm on a 101 x 101 - element matrix. It reports its
+results in iterations per second.
+
+Analysis
+
+Optimized 486 code (Watcom C/C++ 10.0): There are numerous loops within the
+assignment algorithm. The development system we were using (Watcom C/C++
+10.0) appears to have a fine time unrolling many of them. Consequently, it
+is difficult to pin down the execution impact of single lines (as in, for
+example, the numeric sort benchmark).
+
+On the level of functions, the benchmark spends around 70% of its time in
+the routine first_assignments(). This is where a) lone zeros in rows and
+columns are found and selected, and b) a choice is made between duplicate
+zeros. Around 23% of the time is spent in the second_assignments() routine
+where (if first_assignments() fails) the matrix is partitioned into smaller
+submatrices.
+
+Overall, we did a tally of instruction mix execution. The approximate
+breakdowns are:
+
+move - 38%
+
+conditional jump - 12%
+
+unconditional jump - 11%
+
+comparison - 14%
+
+math/logical/shift - 24%
+
+Many of the move instructions that appeared to consume the most amounts of
+time were referencing items on the local stack frame. This required an
+indirect reference through EBP, plus a constant offset to resolve the
+address.
+
+This should be a good exercise of a cache, since operations in the
+first_assignments() routine require both row-wise and column-wise movement
+through the array. Note that the routine could be made more "severe" by
+chancing the assignedtableau[][] array to an array of unsigned char --
+forcing fetches on byte boundaries.
+
+680x0 Code (CodeWarrior): The CodeWarrior profiler is function-based.
+Consequently, it's not possible to determine what's going on at the machine
+instruction level. We can, however, get a good idea of how much time the
+algorithm spends in each routine. The important routines are broken down as
+follows:
+
+calc_minimum_costs() - approximately 0.3% of the time
+
+(250 microsecs)
+
+first_assignments() - approximately 79% of the time
+
+(96284.6 microsecs)
+
+second_assignments() - approximately 19% of the time
+
+(22758 microsecs)
+
+These times are approximate; some time is spent in the Assignment() routine
+itself.
+
+These figures are reasonably close to those of the 486, at least in terms of
+the mixture of time spent in a particular routine. Hence, this should still
+be a good test of system cache (as described in the preceding section),
+given the behavior of the first_assignments() routine.
+
+Porting Considerations
+
+The assignment algorithm test is purely an integer benchmark, and requires
+no special data types that might be affected by ports to different
+architectures. There are only two global constants that affect the
+algorithm:
+
+ASSIGNROWS and ASSIGNCOLS - These set the size of the assignment array. Both
+are defined to be 101 (so, the array that is benchmarked is a 101 x 101
+-element array of longs). These values should not be altered.
+
+To test for correct execution of the benchmark: #define the symbol DEBUG,
+recompile, set up a command file that executes only the assignment
+algorithm, and run the benchmark. (You may want to pipe the output through a
+paging filter, like the more program.) The act of defining DEBUG will enable
+a section of code that displays the assigned columns on a per-row basis. If
+the benchmark is working properly, the numbers to be displayed
+should be:
+
+R000: 056 R001: 066 R002: 052 R003: 065 R004: 043 R005: 023 R006: 016
+R007: 077 R008: 095 R009: 004 R010: 064 R011: 076 R012: 078 R013: 091
+R014: 013 R015: 029 R016: 044 R017: 014 R018: 041 R019: 042 R020: 020
+R021: 071 R022: 024 R023: 017 R024: 055 R025: 040 R026: 070 R027: 025
+R028: 031 R029: 019 R030: 073 R031: 002 R032: 047 R033: 009 R034: 035
+R035: 045 R036: 005 R037: 063 R038: 081 R039: 039 R040: 087 R041: 008
+R042: 053 R043: 093 R044: 049 R045: 092 R046: 061 R047: 046 R048: 026
+R049: 034 R050: 088 R051: 000 R052: 028 R053: 018 R054: 072 R055: 021
+R056: 037 R057: 082 R058: 006 R059: 058 R060: 096 R061: 068 R062: 069
+R063: 054 R064: 057 R065: 086 R066: 097 R067: 084 R068: 099 R069: 051
+R070: 098 R071: 003 R072: 074 R073: 062 R074: 080 R075: 033 R076: 011
+R077: 094 R078: 012 R079: 050 R080: 010 R081: 038 R082: 089 R083: 059
+R084: 022 R085: 079 R086: 015 R087: 007 R088: 075 R089: 083 R090: 060
+R091: 048 R092: 032 R093: 067 R094: 001 R095: 030 R096: 027 R097: 085
+R098: 090 R099: 036 R100: 100
+
+These are the column choices for each row made by the algorithm. If
+you see these numbers displayed, the algorithm is working correctly.
+
+*** The original debugging information was incorrect, as it not only
+*** display the chosen columns, but also displayed eliminated columns.
+*** Changed to show all 101 entries. Uwe F. Mayer <mayer@tux.edu>
+
+References
+
+Quantitative Decision Making for Business, Gordon, Pressman, and Cohn,
+Prentice-Hall, Englewood Cliffs, NJ, 1990.
+
+Quantitative Decision Making, Guiseppi A. Forgionne, Wadsworth Publishing
+Co., California, 1986.
+
+Huffman Compression
+
+Description
+
+This is a compression algorithm that -- while helpful for some time as a
+text compression technique -- has since fallen out of fashion on account of
+the superior performance by algorithms such as LZW compression. It is,
+however, still used in some graphics file formats in one form or another.
+
+The benchmark consists of three parts:
+
+Building a "Huffman Tree" (explained below),
+
+Compression, and
+
+Decompression.
+
+A "Huffman Tree" is a special data structure that guides the compression and
+decompression processes. If you were to diagram one, it would look like a
+large binary tree (i.e., two branches per each node). Describing its
+function in detail is beyond the scope of this paper (see the references for
+more information). We should, however, point out that the tree is built from
+the "bottom up"; and the procedure for constructing it requires that the
+algorithm scan the uncompressed buffer, building a frequency table for all
+the characters appearing in the buffer. (This version of the Huffman
+algorithm compresses byte-at-a-time, though there's no reason why the same
+principle could not be applied to tokens larger than one byte.)
+
+Once the tree is built, text compression is relatively straightforward. The
+algorithm fetches a character from the uncompressed buffer, navigates the
+tree based on the character's value, and produces a bit stream that is
+concatenated to the compressed buffer. Decompression is the reverse of that
+process. (We recognize that we are simplifying the algorithm. Again, we
+recommend you check the references.)
+
+The Huffman Compression benchmark considers an iteration to be the three
+operations described above, performed on an uncompressed text buffer of 5000
+bytes. It reports its results in iterations per second.
+
+Analysis
+
+Optimized 486 code (Watcom C/C++ 10.0): The Huffman compression algorithm --
+tree building, compression, and decompression -- is written as a single,
+large routine: DoHuffIteration(). All the benchmark's time is spent within
+that routine.
+
+Components of DoHuffIteration() that consume the most time are those that
+perform the compression and decompression .
+
+The code for performing the compression spends most of its time (accounting
+for about 13%) constructing the bit string for a character that is being
+compressed. It does this by seeking up the tree from a leaf, emitting 1's
+and 0's in the process, until it reaches the root. The stream of 1's and 0's
+are loaded into a character array; the algorithm then walks "backward"
+through the array, setting (or clearing) bits in the compression buffer as
+it goes.
+
+Similarly, the decompression portion takes about 12% of the time as the
+algorithm pulls bits out of the compressed buffer -- using them to navigate
+the Huffman tree -- and reconstructs the original text.
+
+680x0 Code (Macintosh CodeWarrior): CodeWarrior's profiler is function
+based. Consequently, it's impossible to get performance scores for
+individual machine instructions. Furthermore, as mentioned above, the
+Huffman compression algorithm is written as a monolithic routine. This makes
+the results from the CodeWarrior profiler all the more sparse.
+
+We can at least point out that the lowmost routines (GetCompBit() and
+SetCompBit()) that read and write individual bits, though called nearly 13
+million times each, account for only 0.7% and 0.3% of the total time,
+respectively.
+
+Porting Considerations
+
+The Huffman algorithm relies on no special data types. It should port
+readily. Global constants of interest include:
+
+EXCLUDED - This is a large, positive value. Currently it is set to 32000,
+and should be left alone. Basically, this is a token that the system uses to
+indicate an excluded character (one that does not appear in the plain-text).
+It is set to a ridiculously high value that will never appear in the
+pointers of the tree during normal construction.
+
+MAXHUFFLOOPS - This is another one of those "governor" constants. The
+Huffman benchmark creates more work for itself by doing multiple
+compression/decompression loops. This constant sets the maximum number of
+loops it will attempt per iteration before it gives up. Currently, it is set
+to 50000. Though it is unlikely you'll ever need to modify this value, you
+can increase it if your machine is too fast for the adjustment algorithm. Do
+not reduce the number.
+
+HUFFARRAYSIZE - This value sets the size of the plain-text array to be
+compressed. You can override this value with the command file to see how
+well your machine performs for larger or smaller arrays. The subsequent
+results, however, are invalid for comparison with other systems.
+
+To test for correct execution of the benchmark: #define the symbol DEBUG,
+recompile, build a command file that executes only the Huffman compression
+algorithm, and run the benchmark. Defining DEBUG will enable a section of
+code that verifies the decompression as it takes place (i.e., the routine
+compares -- character at a time -- the uncompressed data with the original
+plain-text). If there's an error, the program will repeatedly display: "Error
+at textoffset xxx". 
+
+** If everything is correct it will emit quite a few "Huffman: OK" messages.
+**
+** I added a resetting of the random number generator, outside of the
+** timed loop, and a resetting of the Huffman tree, inside of the
+** timed loop. That should help to make the benchmark more consistent.
+** The program did originally only reset half of the tree, which lead
+** to runtime errors on some systems. The effect on the benchmark
+** should be negligible, and in fact comes out as being of the order
+** of less than 1% on my test system.
+** Uwe F. Mayer <mayer@tux.edu>
+
+References
+
+Data Compression: Methods and Theory, James A. Storer, Computer Science
+Press, Rockville, MD, 1988.
+
+An Introduction to Text Processing, Peter D. Smith, MIT Press, Cambridge,
+MA, 1990.
+
+IDEA Encryption
+
+Description
+
+This is another benchmark based on a "higher-level" algorithm; "higher
+-level" in the sense that it is more complex than a sort or a search
+operation.
+
+Security -- and, therefore, cryptography -- are becoming increasingly
+important issues in the computer realm. It's likely that more and more
+machines will be running routines like the IDEA encryption algorithm. (IDEA
+is an acronym for the International Data Encryption Algorithm.)
+
+A good description of the algorithm (and, in fact, the reference we used to
+create the source code for the test) can be found in Bruce Schneier's
+exhaustive exploration of encryption, "Applied Cryptography" (see
+references). To quote Mr. Schneier: "In my opinion, it [IDEA] is the best
+and most secure block algorithm available to the public at this time."
+
+IDEA is a symmetrical, block cipher algorithm. Symmetrical means that the
+same routine used to encrypt the data also decrypts the data. A block cipher
+works on the plain-text (the message to be encrypted) in fixed, discrete
+chunks. In the case of IDEA, the algorithm encrypts and decrypts 64 bits at
+a time.
+
+As pointed out in Schneier's book, there are three operations that the IDEA
+uses to do its work:
+
+XOR (exclusive-or)
+
+Addition modulo 216 (ignoring overflow)
+
+Multiplication modulo 216+1 (ignoring overflow).
+
+IDEA requires a key of 128 bits. However, keys and blocks are further
+subdivided into 16-bit chunks, so that any given operation within the IDEA
+encryption is performed on 16-bit quantities. (This is one of the many
+advantages of the algorithm, it is efficient even on 16-bit processors.)
+
+The IDEA benchmark considers an "iteration" to be an encryption and
+decryption of a buffer of 4000 bytes. The test actually builds 3 buffers:
+The first to hold the original plain-text, the second to hold the encrypted
+text, and the third to hold the decrypted text (the contents of which should
+match that of the first buffer). It reports its results in iterations per
+second.
+
+Analysis
+
+Optimized 486 code: The algorithm actually spends most of its time (nearly
+75%) within the mul() routine, which performs the multiplication modulo
+216+1. This is a super-simple routine, consisting primarily of if
+statements, shifts, and additions.
+
+The remaining time (around 24%) is spent in the balance of the cipher_idea()
+routine. (Note that cipher_idea() calls the mul() routine frequently; so,
+the 24% is comprised of the other lines of cipher_idea()). cipher_idea() is
+littered with simple pointer-fetch-and-increment operations, some addition,
+and some exclusive-or operations.
+
+Note that IDEA's exercise of system capabilities probably doesn't extend
+beyond testing simple integer math operations. Since the buffer size is set
+to 4000 bytes, the test will run entirely in processor cache on most
+systems. Even the cache won't get a heavy "internal" workout, since the
+algorithm proceeds sequentially through each buffer from lower to higher
+addresses.
+
+680x0 code (Macintosh CodeWarrior): CodeWarrior's profiler is function
+based; consequently, it is impossible to determine execution profiles for
+individual machine instructions. We can, however, get an idea of how much
+time is spent in each routine.
+
+As with Huffman compression, the IDEA algorithm is written monolithically --
+a single, large routine does most of the work. However, a special
+multiplication routine, mul(), is frequently called within each
+encryption/decryption iteration (see above).
+
+In this instance, the results for the 68K system diverges widely from those
+of the 486 system. The CodeWarrior profiler shows the mul() routine as
+taking only 4% of the total time in the benchmark, even though it is called
+over 20 million times. The outer routine is called 600,000 times, and
+accounts for about 96% of the whole program's entire time.
+
+Porting Considerations
+
+Since IDEA does its work in 16-bit units, it is particularly important that
+u16 be defined to whatever datatype provides an unsigned 16-bit integer on
+the test platform. Usually, unsigned short works for this. (You can verify
+the size of a short by running the benchmarks with a command file that
+includes ALLSTATS=T as one of the commands. This will cause the benchmark
+program to display a message that tells the size of the int, short, and long
+data-types in bytes.)
+
+Also, the mul() routine in IDEA requires the u32 datatype to define an
+unsigned 32-bit integer. In most cases, unsigned long works.
+
+To test for correct execution of the benchmark: #define the symbol DEBUG,
+recompile, build a command file that executes only the IDEA algorithm, and
+run the benchmark. Defining DEBUG will enable a section of code that
+compares the original plain-text with the output of the test. (Remember, the
+benchmark performs both encryption and decryption.) If the algorithm has
+failed, the output will not match the input, and you'll see "IDEA Error"
+messages all over your display.
+
+References
+
+Applied Cryptography: Protocols, Algorithms, and Source Code in C, Bruce
+Schneier, John Wiley & Sons, Inc., New York, 1994.
+
+Neural Net
+
+Description
+
+The Neural Net simulation benchmark is based on a simple back-propagation
+neural network presented by Maureen Caudill as part of a BYTE article that
+appeared in the October, 1991 issue (see "Expert Networks" in that issue).
+The network involved is a simple 3-layer (input neurodes, middle-layer
+neurodes, and output neurodes) network that accepts a number of 5 x 7 input
+patterns and produce a single 8-bit output pattern.
+
+The test involves sending the network an input pattern that is the 5 x 7
+"image" of a character (1's and 0's -- 1's representing lit pixels, 0's
+representing unlit pixels), and teaching it the 8-bit ASCII code for the
+character.
+
+A thorough description of how the back propagation algorithm works is beyond
+the scope of this paper. We recommend you search through the references
+given at the end of this paper, particularly Ms. Caudill's article, for
+detailed discussion. In brief, the benchmark is primarily an exercise in
+floating-point operations, with some frequent use of the exp() function. It
+also performs a great deal of array references, though the arrays in use are
+well under 300 elements each (and less than 100 in most cases).
+
+The Neural Net benchmark considers an iteration to be a single learning
+cycle. (A "learning cycle" is defined as the time it takes the network to be
+able to associate all input patterns to the correct output patterns within a
+specified tolerance.) It reports its results in iterations per second.
+
+Analysis
+
+Optimized 486 code: The forward pass of the network (i.e., calculating
+outputs from inputs) utilize a sigmoid function. This function has, at its
+heart, a call to the exp() library routine. A small but non-negligible
+amount of time is spent in that function (a little over 5% for the 486
+system we tested).
+
+The learning portion of the network benchmark depends on the derivative of
+the sigmoid function, which turns out to require only multiplications and
+subtractions. Consequently, each learning pass exercises only simple
+floating-point operations.
+
+If we divide the time spent in the test into two parts -- forward pass and
+backward pass (the latter being the learning pass) -- then the test appears
+to spend the greatest part of its time in the learning phase. In fact, most
+time is spent in the adjust_mid_wts() routine. This is the part of the
+routine that alters the weights on the middle layer neurodes. (It accounts
+for over 40% of the benchmark's time.)
+
+680x0 Code (Macintosh CodeWarrior): Though CodeWarrior's profiler is
+function based, the neural net benchmark is highly modular. We can therefore
+get a good breakdown of routine usage:
+
+worst_pass_error() - 304 microsecs (called 4680 times)
+
+adjust_mid_wts() - 83277 microsecs (called 46800 times)
+
+adjust_out_wts() - 17394 microsecs (called 46800 times)
+
+do_mid_error() - 11512 microsecs (called 46800 times)
+
+do_out_error() - 3002 microsecs (called 46800 times)
+
+do_mid_forward() - 49559 microsecs (called 46800 times)
+
+do_out_forward() - 20634 microsecs (called 46800 times)
+
+Again, most time was spent in adjust_mid_wts() (as on the 486), accounting
+for almost twice as much time as do_mid_forward().
+
+Porting Consideration
+
+The Neural Net benchmark is not dependent on any special data types. There
+are a number of global variables and arrays that should not be altered in
+any way. Most importantly, the #defines found in NBENCH1.H under the Neural
+Net section should not be changed. These control not only the number of
+neurodes in each layer; they also include constants that govern the learning
+processes.
+
+Other globals to be aware of:
+
+MAXNNETLOOPS - This constant simply sets the upper limit on the number of
+training loops the test will permit per iteration. The Neural Net benchmark
+adjusts its workload by re-teaching itself over and over (each time it
+begins a new training session, the network is "cleared" -- loaded with
+random values). It is unlikely you will ever need to modify this constant.
+
+inpath - This string pointer is set to the path from which the neural net's
+input data is read. It is currently hardwired to "NNET.DAT". You shouldn't
+have to change this name, unless your file system requires directory
+information as part of the path.
+
+Note that the Neural Net benchmark is the only test that requires an
+external data file. The contents of the file are listed in an attachment to
+this paper. You should use the attachment to reconstruct the file should it
+become lost or corrupted. Any changes to the file will invalidate the test
+results.
+
+To test for correct execution of the benchmark: #define the symbol DEBUG,
+recompile, build a command file that executes only the Neural Net test, and
+run the benchmark. Defining DEBUG will enable a section of code that
+displays how many passes through the learning process were required for the
+net to learn. It should learn in 780 passes.
+
+References
+
+"Expert Networks," Maureen Caudill, BYTE Magazine, October, 1991.
+
+Simulating Neural Networks, Norbert Hoffmann, Verlag Vieweg, Wiesbaden,
+1994.
+
+Signal and Image Processing with Neural Networks, Timothy Masters, John
+Wiley and Sons, New York, 1994.
+
+Introduction to Neural Networks, Jeannette Stanley, California Scientific
+Software, CA, 1989.
+
+LU Decomposition
+
+Description
+
+LU Decomposition is an algorithm that can be used as the heart of a program
+for solving linear equations. Suppose you have a matrix A. LU Decomposition
+determines the matrices L and U such that
+
+L . U = A
+
+where L is a lower triangular matrix and U is an upper triangular matrix. (A
+lower triangular matrix has nonzero elements only on the main diagonal and
+below. An upper triangular matrix has nonzero elements only on the main
+diagonal and above.)
+
+Without going into the mathematical details too deeply, having the L and U
+matrices makes the solution of linear equations (i.e., equations of the form
+A . x = b) quite easy. It turns out that you can also use LU decomposition
+to determine matrix inverses and determinants.
+
+The algorithm used in the benchmarks was derived from Numerical Recipes in
+Pascal (there is a C version of the book, which we did not have on hand), a
+book we heartily recommend to anyone serious about mathematical and
+scientific computing. The authors are approving of LU decomposition as a
+means of solving linear equations, pointing out that their version (which
+makes use of what we would have to call "Crout's method with partial
+implicit pivoting") is a factor of 3 better than one of their Gauss-Jordan
+routines, a factor of 1.5 better than another. They go on to demonstrate the
+use of LU decomposition for iterative improvement of linear equation
+solutions.
+
+The benchmark begins by creating a "solvable" linear system. This is easily
+done by loading up the column vector b with random integers, then
+initializing A with an identity matrix. The equations are then "scrambled"
+by either multiplying a row by a constant, or adding one row to another. The
+scrambled matrices are handed to the LU algorithm.
+
+The LU Decomposition benchmark considers a single iteration to be the
+solution of one set of equations (the size of A is fixed at 101 x 101
+elements). It reports its results in iterations per second.
+
+Analysis
+
+Optimized 486 code (Watcom C/C++ 10.0): The entire algorithm consists of two
+parts: the LU decomposition itself, and the back substitution algorithm that
+builds the solution vector. The majority of the algorithm's time takes place
+within the former; the algorithm that builds the L and U matrices (this
+takes place in routine ludcmp()).
+
+Within ludcmp(), there are two extremely tight for loops forming the heart
+of Crout's algorithm that consume the majority of the time. The loops are
+"tight" in that they each consist of only one line of code; in both cases,
+the line of code is a "multiply and accumulate" operation (actually, it's
+sort of a multiply and de-accumulate, since the result of the multiplication
+is subtracted, not added).
+
+In both cases, the items multiplied are elements from the A array; and one
+factor's row index is varying more rapidly, while another factor's column
+index is varying more rapidly.
+
+Note that this is a good overall test of floating-point operations within
+matrices. Most of the math is floating-point; primarily additions,
+subtractions, and multiplications (only a few divisions).
+
+680x0 Code (Macintosh CodeWarrior): CodeWarrior's profiler is function
+based. It is therefore impossible to determine execution profiles at the
+machine-code level. The profiler does, however, allow us to determine how
+much time the benchmark spends in each routine. This breakdown is as
+follows:
+
+lusolve() - 3.4 microsecs (about 0% of the time)
+
+lubksb() 1198 microsec (about 2% of the time)
+
+ludcmp() - 63171 microsec (about 91% of the time)
+
+The above percentages are for the whole program. Consequently, as a portion
+of actual benchmark time, the amount attributed to each will be slightly
+larger (though the proportions will remain the same).
+
+Since ludcmp() performs the actual LU decomposition, this is exactly where
+we'd want the benchmark to spend its time. The lubksb() routine calls
+ludcmp(), using the resulting matrix to "back-solve" the linear equation.
+
+Porting Considerations
+
+The LU Decomposition routine requires no special data types, and is immune
+to byte ordering. It does make use of a typedef (LUdblptr) that includes an
+embedded union; this allows the benchmark to "coerce" a pointer to double
+into a pointer to a 2D array of double. This arrangement has not caused
+problems with the compilers we have tested to date.
+
+Other constants and globals to be aware of:
+
+LUARRAYROWS and LUARRAYCOLS - These constants set the size of the
+coefficient matrix, A. They cannot be altered by command file. In fact, you
+shouldn't alter them at all, or your results will be invalid. Currently,
+they are both set to 101.
+
+MAXLUARRAYS - This is another "governor" constant. The algorithm performs
+dynamic workload adjustment by building more and more arrays to solve per
+timing round. This sets the maximum upper limit of arrays that it will
+build. Currently, it is set to 1000, which should be more than enough for
+the reasonable future (1000 arrays of 101 x 101 floating-point doubles would
+require somewhere around 80 megabytes of RAM -- and that's not counting the
+column vectors).
+
+To test for correct execution of the benchmark: Currently, there is no
+simple technique for doing this. You can, however, either use your favorite
+debugger (or embed a printf() statement) at the conclusion of the lubksb()
+routine. When this routine concludes, the array b will hold the solution
+vector. These items will be stored as floating-point doubles, and the first
+14 are (with rounding):
+
+46 20 23 22 85 86 97 95 8 89 75 67 6 86
+
+If you find these numbers as the first 14 in the array b[], then you're
+virtually guaranteed that the algorithm is working correctly.
+
+*** The above is not correct, as the initial matrix is not the identity,
+*** but a matrix with random nonzero entries on the diagonal (they have
+*** altered the algorithm since they wrote the documentation).
+*** I changed the output of the debugging routine, it now prints first
+*** what the array b should hold (as righthand side divided by diagonal
+*** entry), and then it prints what the array b does hold after the
+*** decomposition has been done to compute the solution of the system. If
+*** you get the same, then fine.
+*** And, by the way, my original right hand sides are
+***  46  23  85  97   8  75   6  81  88  76   6  84  31  53   2 ...
+*** and the diagonal entries are
+*** 520 922 186 495  89 267 786 571 175 600 738 321 897 541 859 ...
+*** You notice that one has every other number of the original sequence.
+*** This is due to BYTE's change of the algorithm, as they now also use the
+*** random number generator to generate the diagonal elements.
+*** Here is the complete set of data:
+*** 46/520=0.09  23/922=0.02  85/186=0.46   97/495=0.20  8/89=0.09
+*** 75/267=0.28  6/786=0.01   81/571=0.14   88/175=0.50  76/600=0.13
+*** 6/738=0.01   84/321=0.26  31/897=0.03   53/541=0.10  2/859=0.00
+*** 86/92=0.93   51/121=0.42  29/248=0.12   51/789=0.06  84/6=14.00
+*** 21/180=0.12  33/48=0.69   2/899=0.00    12/820=0.01  69/372=0.19
+*** 59/809=0.07  74/18=4.11   40/788=0.05   39/56=0.70   86/91=0.95
+*** 33/878=0.04  82/165=0.50  42/561=0.07   8/274=0.03   84/694=0.12
+*** 32/352=0.09  25/969=0.03  59/816=0.07   33/112=0.29  5/125=0.04
+*** 89/740=0.12  7/223=0.03   54/994=0.05   33/80=0.41   55/676=0.08
+*** 6/524=0.01   36/544=0.07  21/160=0.13   58/596=0.10  15/717=0.02
+*** 84/311=0.27  98/530=0.18  46/713=0.06   41/233=0.18  73/640=0.11
+*** 40/343=0.12  72/586=0.12  100/965=0.10  59/764=0.08  37/866=0.04
+*** 27/682=0.04  3/652=0.00   41/352=0.12   87/786=0.11  45/79=0.57
+*** 83/761=0.11  41/817=0.05  46/209=0.22   78/930=0.08  85/210=0.40
+*** 80/756=0.11  18/931=0.02  30/669=0.04   47/127=0.37  85/891=0.10
+*** 66/364=0.18  83/955=0.09  58/637=0.09   58/778=0.07  82/288=0.28
+*** 42/540=0.08  76/290=0.26  59/36=1.64    29/463=0.06  63/476=0.13
+*** 6/340=0.02   73/341=0.21  59/737=0.08   81/492=0.16  98/443=0.22
+*** 58/32=1.81   53/562=0.09  54/263=0.21   46/367=0.13  58/390=0.15
+*** 96/845=0.11  30/746=0.04  2/687=0.00    28/849=0.03  84/180=0.47
+*** 85/382=0.22
+*** Uwe F. Mayer <mayer@tux.edu>
+
+References
+
+Numerical Recipes in Pascal: The Art of Scientific Computing, Press,
+Flannery, Teukolsky, Vetterling, Cambridge University Press, New York, 1989.
diff --git a/debugbit.good.gz b/debugbit.good.gz
new file mode 100644
index 0000000..fdc893e
Binary files /dev/null and b/debugbit.good.gz differ
diff --git a/emfloat.c b/emfloat.c
new file mode 100644
index 0000000..5e73890
--- /dev/null
+++ b/emfloat.c
@@ -0,0 +1,1343 @@
+/*
+** emfloat.c
+** Source for emulated floating-point routines.
+** BYTEmark (tm)
+** BYTE's Native Mode Benchmarks
+** Rick Grehan, BYTE Magazine.
+**
+** Created:
+** Last update: 3/95
+**
+** DISCLAIMER
+** The source, executable, and documentation files that comprise
+** the BYTEmark benchmarks are made available on an "as is" basis.
+** This means that we at BYTE Magazine have made every reasonable
+** effort to verify that the there are no errors in the source and
+** executable code.  We cannot, however, guarantee that the programs
+** are error-free.  Consequently, McGraw-HIll and BYTE Magazine make
+** no claims in regard to the fitness of the source code, executable
+** code, and documentation of the BYTEmark.
+**  Furthermore, BYTE Magazine, McGraw-Hill, and all employees
+** of McGraw-Hill cannot be held responsible for any damages resulting
+** from the use of this code or the results obtained from using
+** this code.
+*/
+
+
+#include <stdio.h>
+#include <string.h>
+#include "nmglobal.h"
+#include "emfloat.h"
+
+/*
+** Floating-point emulator.
+** These routines are only "sort of" IEEE-compliant.  All work is
+** done using an internal representation.  Also, the routines do
+** not check for many of the exceptions that might occur.
+** Still, the external formats produced are IEEE-compatible,
+** with the restriction that they presume a low-endian machine
+** (though the endianism will not effect the performance).
+**
+** Some code here was based on work done by Steve Snelgrove of
+** Orem, UT.  Other code comes from routines presented in
+** the long-ago book: "Microprocessor Programming for
+** Computer Hobbyists" by Neill Graham.
+*/
+
+/**************************
+** SetupCPUEmFloatArrays **
+***************************
+** Set up the arrays that will be used in the emulated
+** floating-point tests.
+** This is done by loading abase and bbase elements with
+** random numbers.  We use our long-to-floating point
+** routine to set them up.
+** NOTE: We really don't need the pointer to cbase...cbase
+** is overwritten in the benchmark.
+*/
+void SetupCPUEmFloatArrays(InternalFPF *abase,
+                InternalFPF *bbase,
+                InternalFPF *cbase,
+                ulong arraysize)
+{
+ulong i;
+InternalFPF locFPF1,locFPF2;
+/*
+** Reset random number generator so things repeat. Inserted by Uwe F. Mayer.
+*/
+extern int32 randnum(int32 lngval);
+randnum((int32)13);
+
+for(i=0;i<arraysize;i++)
+{/*       LongToInternalFPF(randwc(50000L),&locFPF1); */
+        Int32ToInternalFPF(randwc((int32)50000),&locFPF1);
+ /*       LongToInternalFPF(randwc(50000L)+1L,&locFPF2); */
+        Int32ToInternalFPF(randwc((int32)50000)+(int32)1,&locFPF2);
+        DivideInternalFPF(&locFPF1,&locFPF2,abase+i);
+ /*       LongToInternalFPF(randwc(50000L)+1L,&locFPF2); */
+        Int32ToInternalFPF(randwc((int32)50000)+(int32)1,&locFPF2);
+        DivideInternalFPF(&locFPF1,&locFPF2,bbase+i);
+}
+return;
+}
+
+/***********************
+** DoEmFloatIteration **
+************************
+** Perform an iteration of the emulated floating-point
+** benchmark.  Note that "an iteration" can involve multiple
+** loops through the benchmark.
+*/
+ulong DoEmFloatIteration(InternalFPF *abase,
+                InternalFPF *bbase,
+                InternalFPF *cbase,
+                ulong arraysize, ulong loops)
+{
+ulong elapsed;          /* For the stopwatch */
+static uchar jtable[16] = {0,0,0,0,1,1,1,1,2,2,2,2,2,3,3,3};
+ulong i;
+#ifdef DEBUG
+int number_of_loops;
+#endif
+/*
+** Begin timing
+*/
+elapsed=StartStopwatch();
+#ifdef DEBUG
+number_of_loops=loops-1; /* the index of the first loop we run */
+#endif
+
+/*
+** Each pass through the array performs operations in
+** the followingratios:
+**   4 adds, 4 subtracts, 5 multiplies, 3 divides
+** (adds and subtracts being nearly the same operation)
+*/
+while(loops--)
+{
+        for(i=0;i<arraysize;i++)
+                switch(jtable[i % 16])
+                {
+                        case 0: /* Add */
+                                AddSubInternalFPF(0,abase+i,
+                                  bbase+i,
+                                  cbase+i);
+                                break;
+                        case 1: /* Subtract */
+                                AddSubInternalFPF(1,abase+i,
+                                  bbase+i,
+                                  cbase+i);
+                                break;
+                        case 2: /* Multiply */
+                                MultiplyInternalFPF(abase+i,
+                                  bbase+i,
+                                  cbase+i);
+                                break;
+                        case 3: /* Divide */
+                                DivideInternalFPF(abase+i,
+                                  bbase+i,
+                                  cbase+i);
+                                break;
+                }
+#ifdef DEBUG
+{
+  ulong j[8];   /* we test 8 entries */
+  int k;
+  ulong i;
+  char buffer[1024];
+  if (number_of_loops==loops) /* the first loop */
+    {
+      j[0]=(ulong)2;
+      j[1]=(ulong)6;
+      j[2]=(ulong)10;
+      j[3]=(ulong)14;
+      j[4]=(ulong)(arraysize-14);
+      j[5]=(ulong)(arraysize-10);
+      j[6]=(ulong)(arraysize-6);
+      j[7]=(ulong)(arraysize-2);
+      for(k=0;k<8;k++){
+	i=j[k];
+	InternalFPFToString(buffer,abase+i);
+	printf("%6ld: (%s) ",i,buffer);
+	switch(jtable[i % 16])
+	  {
+	  case 0: strcpy(buffer,"+"); break;
+	  case 1: strcpy(buffer,"-"); break;
+	  case 2: strcpy(buffer,"*"); break;
+	  case 3: strcpy(buffer,"/"); break;
+	  }
+	printf("%s ",buffer);
+	InternalFPFToString(buffer,bbase+i);
+	printf("(%s) = ",buffer);
+	InternalFPFToString(buffer,cbase+i);
+	printf("%s\n",buffer);
+      }
+    }
+}
+#endif
+}
+return(StopStopwatch(elapsed));
+}
+
+/***********************
+** SetInternalFPFZero **
+************************
+** Set an internal floating-point-format number to zero.
+** sign determines the sign of the zero.
+*/
+static void SetInternalFPFZero(InternalFPF *dest,
+                        uchar sign)
+{
+int i;          /* Index */
+
+dest->type=IFPF_IS_ZERO;
+dest->sign=sign;
+dest->exp=MIN_EXP;
+for(i=0;i<INTERNAL_FPF_PRECISION;i++)
+        dest->mantissa[i]=0;
+return;
+}
+
+/***************************
+** SetInternalFPFInfinity **
+****************************
+** Set an internal floating-point-format number to infinity.
+** This can happen if the exponent exceeds MAX_EXP.
+** As above, sign picks the sign of infinity.
+*/
+static void SetInternalFPFInfinity(InternalFPF *dest,
+                        uchar sign)
+{
+int i;          /* Index */
+
+dest->type=IFPF_IS_INFINITY;
+dest->sign=sign;
+dest->exp=MIN_EXP;
+for(i=0;i<INTERNAL_FPF_PRECISION;i++)
+        dest->mantissa[i]=0;
+return;
+}
+
+/**********************
+** SetInternalFPFNaN **
+***********************
+** Set an internal floating-point-format number to Nan
+** (not a number).  Note that we "emulate" an 80x87 as far
+** as the mantissa bits go.
+*/
+static void SetInternalFPFNaN(InternalFPF *dest)
+{
+int i;          /* Index */
+
+dest->type=IFPF_IS_NAN;
+dest->exp=MAX_EXP;
+dest->sign=1;
+dest->mantissa[0]=0x4000;
+for(i=1;i<INTERNAL_FPF_PRECISION;i++)
+        dest->mantissa[i]=0;
+
+return;
+}
+
+/*******************
+** IsMantissaZero **
+********************
+** Pass this routine a pointer to an internal floating point format
+** number's mantissa.  It checks for an all-zero mantissa.
+** Returns 0 if it is NOT all zeros, !=0 otherwise.
+*/
+static int IsMantissaZero(u16 *mant)
+{
+int i;          /* Index */
+int n;          /* Return value */
+
+n=0;
+for(i=0;i<INTERNAL_FPF_PRECISION;i++)
+        n|=mant[i];
+
+return(!n);
+}
+
+/**************
+** Add16Bits **
+***************
+** Add b, c, and carry.  Retult in a.  New carry in carry.
+*/
+static void Add16Bits(u16 *carry,
+                u16 *a,
+                u16 b,
+                u16 c)
+{
+u32 accum;              /* Accumulator */
+
+/*
+** Do the work in the 32-bit accumulator so we can return
+** the carry.
+*/
+accum=(u32)b;
+accum+=(u32)c;
+accum+=(u32)*carry;
+*carry=(u16)((accum & 0x00010000) ? 1 : 0);     /* New carry */
+*a=(u16)(accum & 0xFFFF);       /* Result is lo 16 bits */
+return;
+}
+
+/**************
+** Sub16Bits **
+***************
+** Additive inverse of above.
+*/
+static void Sub16Bits(u16 *borrow,
+                u16 *a,
+                u16 b,
+                u16 c)
+{
+u32 accum;              /* Accumulator */
+
+accum=(u32)b;
+accum-=(u32)c;
+accum-=(u32)*borrow;
+*borrow=(u32)((accum & 0x00010000) ? 1 : 0);    /* New borrow */
+*a=(u16)(accum & 0xFFFF);
+return;
+}
+
+/*******************
+** ShiftMantLeft1 **
+********************
+** Shift a vector of 16-bit numbers left 1 bit.  Also provides
+** a carry bit, which is shifted in at the beginning, and
+** shifted out at the end.
+*/
+static void ShiftMantLeft1(u16 *carry,
+                        u16 *mantissa)
+{
+int i;          /* Index */
+int new_carry;
+u16 accum;      /* Temporary holding placed */
+
+for(i=INTERNAL_FPF_PRECISION-1;i>=0;i--)
+{       accum=mantissa[i];
+        new_carry=accum & 0x8000;       /* Get new carry */
+        accum=accum<<1;                 /* Do the shift */
+        if(*carry)
+                accum|=1;               /* Insert previous carry */
+        *carry=new_carry;
+        mantissa[i]=accum;              /* Return shifted value */
+}
+return;
+}
+
+/********************
+** ShiftMantRight1 **
+*********************
+** Shift a mantissa right by 1 bit.  Provides carry, as
+** above
+*/
+static void ShiftMantRight1(u16 *carry,
+                        u16 *mantissa)
+{
+int i;          /* Index */
+int new_carry;
+u16 accum;
+
+for(i=0;i<INTERNAL_FPF_PRECISION;i++)
+{       accum=mantissa[i];
+        new_carry=accum & 1;            /* Get new carry */
+        accum=accum>>1;
+        if(*carry)
+                accum|=0x8000;
+        *carry=new_carry;
+        mantissa[i]=accum;
+}
+return;
+}
+
+
+/*****************************
+** StickyShiftMantRight **
+******************************
+** This is a shift right of the mantissa with a "sticky bit".
+** I.E., if a carry of 1 is shifted out of the least significant
+** bit, the least significant bit is set to 1.
+*/
+static void StickyShiftRightMant(InternalFPF *ptr,
+                        int amount)
+{
+int i;          /* Index */
+u16 carry;      /* Self-explanatory */
+u16 *mantissa;
+
+mantissa=ptr->mantissa;
+
+if(ptr->type!=IFPF_IS_ZERO)     /* Don't bother shifting a zero */
+{
+        /*
+        ** If the amount of shifting will shift everyting
+        ** out of existence, then just clear the whole mantissa
+        ** and set the lowmost bit to 1.
+        */
+        if(amount>=INTERNAL_FPF_PRECISION * 16)
+        {
+                for(i=0;i<INTERNAL_FPF_PRECISION-1;i++)
+                        mantissa[i]=0;
+                mantissa[INTERNAL_FPF_PRECISION-1]=1;
+        }
+        else
+                for(i=0;i<amount;i++)
+                {
+                        carry=0;
+                        ShiftMantRight1(&carry,mantissa);
+                        if(carry)
+                                mantissa[INTERNAL_FPF_PRECISION-1] |= 1;
+                }
+}
+return;
+}
+
+
+/**************************************************
+**         POST ARITHMETIC PROCESSING            **
+**  (NORMALIZE, ROUND, OVERFLOW, AND UNDERFLOW)  **
+**************************************************/
+
+/**************
+** normalize **
+***************
+** Normalize an internal-representation number.  Normalization
+** discards empty most-significant bits.
+*/
+static void normalize(InternalFPF *ptr)
+{
+u16     carry;
+
+/*
+** As long as there's a highmost 0 bit, shift the significand
+** left 1 bit.  Each time you do this, though, you've
+** gotta decrement the exponent.
+*/
+while ((ptr->mantissa[0] & 0x8000) == 0)
+{
+        carry = 0;
+        ShiftMantLeft1(&carry, ptr->mantissa);
+        ptr->exp--;
+}
+return;
+}
+
+/****************
+** denormalize **
+*****************
+** Denormalize an internal-representation number.  This means
+** shifting it right until its exponent is equivalent to
+** minimum_exponent. (You have to do this often in order
+** to perform additions and subtractions).
+*/
+static void denormalize(InternalFPF *ptr,
+                int minimum_exponent)
+{
+long exponent_difference;
+
+if (IsMantissaZero(ptr->mantissa))
+{
+        printf("Error:  zero significand in denormalize\n");
+}
+
+exponent_difference = ptr->exp-minimum_exponent;
+if (exponent_difference < 0)
+{
+        /*
+        ** The number is subnormal
+        */
+        exponent_difference = -exponent_difference;
+        if (exponent_difference >= (INTERNAL_FPF_PRECISION * 16))
+        {
+                /* Underflow */
+                SetInternalFPFZero(ptr, ptr->sign);
+        }
+        else
+        {
+                ptr->exp+=exponent_difference;
+                StickyShiftRightMant(ptr, exponent_difference);
+        }
+}
+return;
+}
+
+
+/*********************
+** RoundInternalFPF **
+**********************
+** Round an internal-representation number.
+** The kind of rounding we do here is simplest...referred to as
+** "chop".  "Extraneous" rightmost bits are simply hacked off.
+*/
+void RoundInternalFPF(InternalFPF *ptr)
+{
+/* int i; */
+
+if (ptr->type == IFPF_IS_NORMAL ||
+        ptr->type == IFPF_IS_SUBNORMAL)
+{
+        denormalize(ptr, MIN_EXP);
+        if (ptr->type != IFPF_IS_ZERO)
+        {
+
+                /* clear the extraneous bits */
+                ptr->mantissa[3] &= 0xfff8;
+/*              for (i=4; i<INTERNAL_FPF_PRECISION; i++)
+                {
+                        ptr->mantissa[i] = 0;
+                }
+*/
+                /*
+                ** Check for overflow
+                */
+/*              Does not do anything as ptr->exp is a short and MAX_EXP=37268
+		if (ptr->exp > MAX_EXP)
+                {
+                        SetInternalFPFInfinity(ptr, ptr->sign);
+                }
+*/
+        }
+}
+return;
+}
+
+/*******************************************************
+**  ARITHMETIC OPERATIONS ON INTERNAL REPRESENTATION  **
+*******************************************************/
+
+/***************
+** choose_nan **
+****************
+** Called by routines that are forced to perform math on
+** a pair of NaN's.  This routine "selects" which NaN is
+** to be returned.
+*/
+static void choose_nan(InternalFPF *x,
+                InternalFPF *y,
+                InternalFPF *z,
+                int intel_flag)
+{
+int i;
+
+/*
+** Compare the two mantissas,
+** return the larger.  Note that we will be emulating
+** an 80387 in this operation.
+*/
+for (i=0; i<INTERNAL_FPF_PRECISION; i++)
+{
+        if (x->mantissa[i] > y->mantissa[i])
+        {
+                memmove((void *)x,(void *)z,sizeof(InternalFPF));
+                return;
+        }
+        if (x->mantissa[i] < y->mantissa[i])
+        {
+                memmove((void *)y,(void *)z,sizeof(InternalFPF));
+                return;
+        }
+}
+
+/*
+** They are equal
+*/
+if (!intel_flag)
+        /* if the operation is addition */
+        memmove((void *)x,(void *)z,sizeof(InternalFPF));
+else
+        /* if the operation is multiplication */
+        memmove((void *)y,(void *)z,sizeof(InternalFPF));
+return;
+}
+
+
+/**********************
+** AddSubInternalFPF **
+***********************
+** Adding or subtracting internal-representation numbers.
+** Internal-representation numbers pointed to by x and y are
+** added/subtracted and the result returned in z.
+*/
+static void AddSubInternalFPF(uchar operation,
+                InternalFPF *x,
+                InternalFPF *y,
+                InternalFPF *z)
+{
+int exponent_difference;
+u16 borrow;
+u16 carry;
+int i;
+InternalFPF locx,locy;  /* Needed since we alter them */
+
+/*
+** Following big switch statement handles the
+** various combinations of operand types.
+*/
+switch ((x->type * IFPF_TYPE_COUNT) + y->type)
+{
+case ZERO_ZERO:
+        memmove((void *)x,(void *)z,sizeof(InternalFPF));
+        if (x->sign ^ y->sign ^ operation)
+        {
+                z->sign = 0; /* positive */
+        }
+        break;
+
+case NAN_ZERO:
+case NAN_SUBNORMAL:
+case NAN_NORMAL:
+case NAN_INFINITY:
+case SUBNORMAL_ZERO:
+case NORMAL_ZERO:
+case INFINITY_ZERO:
+case INFINITY_SUBNORMAL:
+case INFINITY_NORMAL:
+        memmove((void *)x,(void *)z,sizeof(InternalFPF));
+        break;
+
+
+case ZERO_NAN:
+case SUBNORMAL_NAN:
+case NORMAL_NAN:
+case INFINITY_NAN:
+        memmove((void *)y,(void *)z,sizeof(InternalFPF));
+        break;
+
+case ZERO_SUBNORMAL:
+case ZERO_NORMAL:
+case ZERO_INFINITY:
+case SUBNORMAL_INFINITY:
+case NORMAL_INFINITY:
+        memmove((void *)y,(void *)z,sizeof(InternalFPF));
+        z->sign ^= operation;
+        break;
+
+case SUBNORMAL_SUBNORMAL:
+case SUBNORMAL_NORMAL:
+case NORMAL_SUBNORMAL:
+case NORMAL_NORMAL:
+        /*
+        ** Copy x and y to locals, since we may have
+        ** to alter them.
+        */
+        memmove((void *)&locx,(void *)x,sizeof(InternalFPF));
+        memmove((void *)&locy,(void *)y,sizeof(InternalFPF));
+
+        /* compute sum/difference */
+        exponent_difference = locx.exp-locy.exp;
+        if (exponent_difference == 0)
+        {
+                /*
+                ** locx.exp == locy.exp
+                ** so, no shifting required
+                */
+                if (locx.type == IFPF_IS_SUBNORMAL ||
+                  locy.type == IFPF_IS_SUBNORMAL)
+                        z->type = IFPF_IS_SUBNORMAL;
+                else
+                        z->type = IFPF_IS_NORMAL;
+
+                /*
+                ** Assume that locx.mantissa > locy.mantissa
+                */
+                z->sign = locx.sign;
+                z->exp= locx.exp;
+        }
+        else
+                if (exponent_difference > 0)
+                {
+                        /*
+                        ** locx.exp > locy.exp
+                        */
+                        StickyShiftRightMant(&locy,
+                                 exponent_difference);
+                        z->type = locx.type;
+                        z->sign = locx.sign;
+                        z->exp = locx.exp;
+                }
+                else    /* if (exponent_difference < 0) */
+                {
+                        /*
+                        ** locx.exp < locy.exp
+                        */
+                        StickyShiftRightMant(&locx,
+                                -exponent_difference);
+                        z->type = locy.type;
+                        z->sign = locy.sign ^ operation;
+                        z->exp = locy.exp;
+                }
+
+                if (locx.sign ^ locy.sign ^ operation)
+                {
+                        /*
+                        ** Signs are different, subtract mantissas
+                        */
+                        borrow = 0;
+                        for (i=(INTERNAL_FPF_PRECISION-1); i>=0; i--)
+                                Sub16Bits(&borrow,
+                                        &z->mantissa[i],
+                                        locx.mantissa[i],
+                                        locy.mantissa[i]);
+
+                        if (borrow)
+                        {
+                                /* The y->mantissa was larger than the
+                                ** x->mantissa leaving a negative
+                                ** result.  Change the result back to
+                                ** an unsigned number and flip the
+                                ** sign flag.
+                                */
+                                z->sign = locy.sign ^ operation;
+                                borrow = 0;
+                                for (i=(INTERNAL_FPF_PRECISION-1); i>=0; i--)
+                                {
+                                        Sub16Bits(&borrow,
+                                                &z->mantissa[i],
+                                                0,
+                                                z->mantissa[i]);
+                                }
+                        }
+                        else
+                        {
+                                /* The assumption made above
+                                ** (i.e. x->mantissa >= y->mantissa)
+                                ** was correct.  Therefore, do nothing.
+                                ** z->sign = x->sign;
+                                */
+                        }
+
+                        if (IsMantissaZero(z->mantissa))
+                        {
+                                z->type = IFPF_IS_ZERO;
+                                z->sign = 0; /* positive */
+                        }
+                        else
+                                if (locx.type == IFPF_IS_NORMAL ||
+                                         locy.type == IFPF_IS_NORMAL)
+                                {
+                                        normalize(z);
+                                }
+                }
+                else
+                {
+                        /* signs are the same, add mantissas */
+                        carry = 0;
+                        for (i=(INTERNAL_FPF_PRECISION-1); i>=0; i--)
+                        {
+                                Add16Bits(&carry,
+                                        &z->mantissa[i],
+                                        locx.mantissa[i],
+                                        locy.mantissa[i]);
+                        }
+
+                        if (carry)
+                        {
+                                z->exp++;
+                                carry=0;
+                                ShiftMantRight1(&carry,z->mantissa);
+                                z->mantissa[0] |= 0x8000;
+                                z->type = IFPF_IS_NORMAL;
+                        }
+                        else
+                                if (z->mantissa[0] & 0x8000)
+                                        z->type = IFPF_IS_NORMAL;
+        }
+        break;
+
+case INFINITY_INFINITY:
+        SetInternalFPFNaN(z);
+        break;
+
+case NAN_NAN:
+        choose_nan(x, y, z, 1);
+        break;
+}
+
+/*
+** All the math is done; time to round.
+*/
+RoundInternalFPF(z);
+return;
+}
+
+
+/************************
+** MultiplyInternalFPF **
+*************************
+** Two internal-representation numbers x and y are multiplied; the
+** result is returned in z.
+*/
+static void MultiplyInternalFPF(InternalFPF *x,
+                        InternalFPF *y,
+                        InternalFPF *z)
+{
+int i;
+int j;
+u16 carry;
+u16 extra_bits[INTERNAL_FPF_PRECISION];
+InternalFPF locy;       /* Needed since this will be altered */
+/*
+** As in the preceding function, this large switch
+** statement selects among the many combinations
+** of operands.
+*/
+switch ((x->type * IFPF_TYPE_COUNT) + y->type)
+{
+case INFINITY_SUBNORMAL:
+case INFINITY_NORMAL:
+case INFINITY_INFINITY:
+case ZERO_ZERO:
+case ZERO_SUBNORMAL:
+case ZERO_NORMAL:
+        memmove((void *)x,(void *)z,sizeof(InternalFPF));
+        z->sign ^= y->sign;
+        break;
+
+case SUBNORMAL_INFINITY:
+case NORMAL_INFINITY:
+case SUBNORMAL_ZERO:
+case NORMAL_ZERO:
+        memmove((void *)y,(void *)z,sizeof(InternalFPF));
+        z->sign ^= x->sign;
+        break;
+
+case ZERO_INFINITY:
+case INFINITY_ZERO:
+        SetInternalFPFNaN(z);
+        break;
+
+case NAN_ZERO:
+case NAN_SUBNORMAL:
+case NAN_NORMAL:
+case NAN_INFINITY:
+        memmove((void *)x,(void *)z,sizeof(InternalFPF));
+        break;
+
+case ZERO_NAN:
+case SUBNORMAL_NAN:
+case NORMAL_NAN:
+case INFINITY_NAN:
+        memmove((void *)y,(void *)z,sizeof(InternalFPF));
+        break;
+
+
+case SUBNORMAL_SUBNORMAL:
+case SUBNORMAL_NORMAL:
+case NORMAL_SUBNORMAL:
+case NORMAL_NORMAL:
+        /*
+        ** Make a local copy of the y number, since we will be
+        ** altering it in the process of multiplying.
+        */
+        memmove((void *)&locy,(void *)y,sizeof(InternalFPF));
+
+        /*
+        ** Check for unnormal zero arguments
+        */
+        if (IsMantissaZero(x->mantissa) || IsMantissaZero(y->mantissa))
+                SetInternalFPFInfinity(z, 0);
+
+        /*
+        ** Initialize the result
+        */
+        if (x->type == IFPF_IS_SUBNORMAL ||
+            y->type == IFPF_IS_SUBNORMAL)
+                z->type = IFPF_IS_SUBNORMAL;
+        else
+                z->type = IFPF_IS_NORMAL;
+
+        z->sign = x->sign ^ y->sign;
+        z->exp = x->exp + y->exp ;
+        for (i=0; i<INTERNAL_FPF_PRECISION; i++)
+        {
+                z->mantissa[i] = 0;
+                extra_bits[i] = 0;
+        }
+
+        for (i=0; i<(INTERNAL_FPF_PRECISION*16); i++)
+        {
+                /*
+                ** Get rightmost bit of the multiplier
+                */
+                carry = 0;
+                ShiftMantRight1(&carry, locy.mantissa);
+                if (carry)
+                {
+                        /*
+                        ** Add the multiplicand to the product
+                        */
+                        carry = 0;
+                        for (j=(INTERNAL_FPF_PRECISION-1); j>=0; j--)
+                                Add16Bits(&carry,
+                                        &z->mantissa[j],
+                                        z->mantissa[j],
+                                        x->mantissa[j]);
+                }
+                else
+                {
+                        carry = 0;
+                }
+
+                /*
+                ** Shift the product right.  Overflow bits get
+                ** shifted into extra_bits.  We'll use it later
+                ** to help with the "sticky" bit.
+                */
+                ShiftMantRight1(&carry, z->mantissa);
+                ShiftMantRight1(&carry, extra_bits);
+        }
+
+        /*
+        ** Normalize
+        ** Note that we use a "special" normalization routine
+        ** because we need to use the extra bits. (These are
+        ** bits that may have been shifted off the bottom that
+        ** we want to reclaim...if we can.
+        */
+        while ((z->mantissa[0] & 0x8000) == 0)
+        {
+                carry = 0;
+                ShiftMantLeft1(&carry, extra_bits);
+                ShiftMantLeft1(&carry, z->mantissa);
+                z->exp--;
+        }
+
+        /*
+        ** Set the sticky bit if any bits set in extra bits.
+        */
+        if (IsMantissaZero(extra_bits))
+        {
+                z->mantissa[INTERNAL_FPF_PRECISION-1] |= 1;
+        }
+        break;
+
+case NAN_NAN:
+        choose_nan(x, y, z, 0);
+        break;
+}
+
+/*
+** All math done...do rounding.
+*/
+RoundInternalFPF(z);
+return;
+}
+
+
+/**********************
+** DivideInternalFPF **
+***********************
+** Divide internal FPF number x by y.  Return result in z.
+*/
+static void DivideInternalFPF(InternalFPF *x,
+                        InternalFPF *y,
+                        InternalFPF *z)
+{
+int i;
+int j;
+u16 carry;
+u16 extra_bits[INTERNAL_FPF_PRECISION];
+InternalFPF locx;       /* Local for x number */
+
+/*
+** As with preceding function, the following switch
+** statement selects among the various possible
+** operands.
+*/
+switch ((x->type * IFPF_TYPE_COUNT) + y->type)
+{
+case ZERO_ZERO:
+case INFINITY_INFINITY:
+        SetInternalFPFNaN(z);
+        break;
+
+case ZERO_SUBNORMAL:
+case ZERO_NORMAL:
+        if (IsMantissaZero(y->mantissa))
+        {
+                SetInternalFPFNaN(z);
+                break;
+        }
+
+case ZERO_INFINITY:
+case SUBNORMAL_INFINITY:
+case NORMAL_INFINITY:
+        SetInternalFPFZero(z, x->sign ^ y->sign);
+        break;
+
+case SUBNORMAL_ZERO:
+case NORMAL_ZERO:
+        if (IsMantissaZero(x->mantissa))
+        {
+                SetInternalFPFNaN(z);
+                break;
+        }
+
+case INFINITY_ZERO:
+case INFINITY_SUBNORMAL:
+case INFINITY_NORMAL:
+        SetInternalFPFInfinity(z, 0);
+        z->sign = x->sign ^ y->sign;
+        break;
+
+case NAN_ZERO:
+case NAN_SUBNORMAL:
+case NAN_NORMAL:
+case NAN_INFINITY:
+        memmove((void *)x,(void *)z,sizeof(InternalFPF));
+        break;
+
+case ZERO_NAN:
+case SUBNORMAL_NAN:
+case NORMAL_NAN:
+case INFINITY_NAN:
+        memmove((void *)y,(void *)z,sizeof(InternalFPF));
+        break;
+
+case SUBNORMAL_SUBNORMAL:
+case NORMAL_SUBNORMAL:
+case SUBNORMAL_NORMAL:
+case NORMAL_NORMAL:
+        /*
+        ** Make local copy of x number, since we'll be
+        ** altering it in the process of dividing.
+        */
+        memmove((void *)&locx,(void *)x,sizeof(InternalFPF));
+
+        /*
+        ** Check for unnormal zero arguments
+        */
+        if (IsMantissaZero(locx.mantissa))
+        {
+                if (IsMantissaZero(y->mantissa))
+                        SetInternalFPFNaN(z);
+                else
+                        SetInternalFPFZero(z, 0);
+                break;
+        }
+        if (IsMantissaZero(y->mantissa))
+        {
+                SetInternalFPFInfinity(z, 0);
+                break;
+        }
+
+        /*
+        ** Initialize the result
+        */
+        z->type = x->type;
+        z->sign = x->sign ^ y->sign;
+        z->exp = x->exp - y->exp +
+                        ((INTERNAL_FPF_PRECISION * 16 * 2));
+        for (i=0; i<INTERNAL_FPF_PRECISION; i++)
+        {
+                z->mantissa[i] = 0;
+                extra_bits[i] = 0;
+        }
+
+        while ((z->mantissa[0] & 0x8000) == 0)
+        {
+                carry = 0;
+                ShiftMantLeft1(&carry, locx.mantissa);
+                ShiftMantLeft1(&carry, extra_bits);
+
+                /*
+                ** Time to subtract yet?
+                */
+                if (carry == 0)
+                        for (j=0; j<INTERNAL_FPF_PRECISION; j++)
+                        {
+                                if (y->mantissa[j] > extra_bits[j])
+                                {
+                                        carry = 0;
+                                        goto no_subtract;
+                                }
+                                if (y->mantissa[j] < extra_bits[j])
+                                        break;
+                        }
+                /*
+                ** Divisor (y) <= dividend (x), subtract
+                */
+                carry = 0;
+                for (j=(INTERNAL_FPF_PRECISION-1); j>=0; j--)
+                        Sub16Bits(&carry,
+                                &extra_bits[j],
+                                extra_bits[j],
+                                y->mantissa[j]);
+                carry = 1;      /* 1 shifted into quotient */
+        no_subtract:
+                ShiftMantLeft1(&carry, z->mantissa);
+                z->exp--;
+        }
+        break;
+
+case NAN_NAN:
+        choose_nan(x, y, z, 0);
+        break;
+}
+
+/*
+** Math complete...do rounding
+*/
+RoundInternalFPF(z);
+}
+
+/**********************
+** LongToInternalFPF **
+** Int32ToInternalFPF **
+***********************
+** Convert a signed (long) 32-bit integer into an internal FPF number.
+*/
+/* static void LongToInternalFPF(long mylong, */
+static void Int32ToInternalFPF(int32 mylong,
+                InternalFPF *dest)
+{
+int i;          /* Index */
+u16 myword;     /* Used to hold converted stuff */
+/*
+** Save the sign and get the absolute value.  This will help us
+** with 64-bit machines, since we use only the lower 32
+** bits just in case. (No longer necessary after we use int32.)
+*/
+/* if(mylong<0L) */
+if(mylong<(int32)0)
+{       dest->sign=1;
+        mylong=(int32)0-mylong;
+}
+else
+        dest->sign=0;
+/*
+** Prepare the destination floating point number
+*/
+dest->type=IFPF_IS_NORMAL;
+for(i=0;i<INTERNAL_FPF_PRECISION;i++)
+        dest->mantissa[i]=0;
+
+/*
+** See if we've got a zero.  If so, make the resultant FP
+** number a true zero and go home.
+*/
+if(mylong==0)
+{       dest->type=IFPF_IS_ZERO;
+        dest->exp=0;
+        return;
+}
+
+/*
+** Not a true zero.  Set the exponent to 32 (internal FPFs have
+** no bias) and load the low and high words into their proper
+** locations in the mantissa.  Then normalize.  The action of
+** normalizing slides the mantissa bits into place and sets
+** up the exponent properly.
+*/
+dest->exp=32;
+myword=(u16)((mylong >> 16) & 0xFFFFL);
+dest->mantissa[0]=myword;
+myword=(u16)(mylong & 0xFFFFL);
+dest->mantissa[1]=myword;
+normalize(dest);
+return;
+}
+
+#ifdef DEBUG
+/************************
+** InternalFPFToString **
+*************************
+** FOR DEBUG PURPOSES
+** This routine converts an internal floating point representation
+** number to a string.  Used in debugging the package.
+** Returns length of converted number.
+** NOTE: dest must point to a buffer big enough to hold the
+**  result.  Also, this routine does append a null (an effect
+**  of using the sprintf() function).  It also returns
+**  a length count.
+** NOTE: This routine returns 5 significant digits.  Thats
+**  about all I feel safe with, given the method of
+**  conversion.  It should be more than enough for programmers
+**  to determine whether the package is properly ported.
+*/
+static int InternalFPFToString(char *dest,
+                InternalFPF *src)
+{
+InternalFPF locFPFNum;          /* Local for src (will be altered) */
+InternalFPF IFPF10;             /* Floating-point 10 */
+InternalFPF IFPFComp;           /* For doing comparisons */
+int msign;                      /* Holding for mantissa sign */
+int expcount;                   /* Exponent counter */
+int ccount;                     /* Character counter */
+int i,j,k;                      /* Index */
+u16 carryaccum;                 /* Carry accumulator */
+u16 mycarry;                    /* Local for carry */
+
+/*
+** Check first for the simple things...Nan, Infinity, Zero.
+** If found, copy the proper string in and go home.
+*/
+switch(src->type)
+{
+        case IFPF_IS_NAN:
+                memcpy(dest,"NaN",3);
+                return(3);
+
+        case IFPF_IS_INFINITY:
+                if(src->sign==0)
+                        memcpy(dest,"+Inf",4);
+                else
+                        memcpy(dest,"-Inf",4);
+                return(4);
+
+        case IFPF_IS_ZERO:
+                if(src->sign==0)
+                        memcpy(dest,"+0",2);
+                else
+                        memcpy(dest,"-0",2);
+                return(2);
+}
+
+/*
+** Move the internal number into our local holding area, since
+** we'll be altering it to print it out.
+*/
+memcpy((void *)&locFPFNum,(void *)src,sizeof(InternalFPF));
+
+/*
+** Set up a floating-point 10...which we'll use a lot in a minute.
+*/
+/* LongToInternalFPF(10L,&IFPF10); */
+Int32ToInternalFPF((int32)10,&IFPF10);
+
+/*
+** Save the mantissa sign and make it positive.
+*/
+msign=src->sign;
+
+/* src->sign=0 */ /* bug, fixed Nov. 13, 1997 */
+(&locFPFNum)->sign=0;
+
+expcount=0;             /* Init exponent counter */
+
+/*
+** See if the number is less than 10.  If so, multiply
+** the number repeatedly by 10 until it's not.   For each
+** multiplication, decrement a counter so we can keep track
+** of the exponent.
+*/
+
+while(1)
+{       AddSubInternalFPF(1,&locFPFNum,&IFPF10,&IFPFComp);
+        if(IFPFComp.sign==0) break;
+        MultiplyInternalFPF(&locFPFNum,&IFPF10,&IFPFComp);
+        expcount--;
+        memcpy((void *)&locFPFNum,(void *)&IFPFComp,sizeof(InternalFPF));
+}
+/*
+** Do the reverse of the above.  As long as the number is
+** greater than or equal to 10, divide it by 10.  Increment the
+** exponent counter for each multiplication.
+*/
+
+while(1)
+{
+        AddSubInternalFPF(1,&locFPFNum,&IFPF10,&IFPFComp);
+        if(IFPFComp.sign!=0) break;
+        DivideInternalFPF(&locFPFNum,&IFPF10,&IFPFComp);
+        expcount++;
+        memcpy((void *)&locFPFNum,(void *)&IFPFComp,sizeof(InternalFPF));
+}
+
+/*
+** About time to start storing things.  First, store the
+** mantissa sign.
+*/
+ccount=1;               /* Init character counter */
+if(msign==0)
+        *dest++='+';
+else
+        *dest++='-';
+
+/*
+** At this point we know that the number is in the range
+** 10 > n >=1.  We need to "strip digits" out of the
+** mantissa.  We do this by treating the mantissa as
+** an integer and multiplying by 10. (Not a floating-point
+** 10, but an integer 10.  Since this is debug code and we
+** could care less about speed, we'll do it the stupid
+** way and simply add the number to itself 10 times.
+** Anything that makes it to the left of the implied binary point
+** gets stripped off and emitted.  We'll do this for
+** 5 significant digits (which should be enough to
+** verify things).
+*/
+/*
+** Re-position radix point
+*/
+carryaccum=0;
+while(locFPFNum.exp>0)
+{
+        mycarry=0;
+        ShiftMantLeft1(&mycarry,locFPFNum.mantissa);
+        carryaccum=(carryaccum<<1);
+        if(mycarry) carryaccum++;
+        locFPFNum.exp--;
+}
+
+while(locFPFNum.exp<0)
+{
+        mycarry=0;
+        ShiftMantRight1(&mycarry,locFPFNum.mantissa);
+        locFPFNum.exp++;
+}
+
+for(i=0;i<6;i++)
+        if(i==1)
+        {       /* Emit decimal point */
+                *dest++='.';
+                ccount++;
+        }
+        else
+        {       /* Emit a digit */
+                *dest++=('0'+carryaccum);
+                ccount++;
+
+                carryaccum=0;
+                memcpy((void *)&IFPF10,
+                        (void *)&locFPFNum,
+                        sizeof(InternalFPF));
+
+                /* Do multiply via repeated adds */
+                for(j=0;j<9;j++)
+                {
+                        mycarry=0;
+                        for(k=(INTERNAL_FPF_PRECISION-1);k>=0;k--)
+                                Add16Bits(&mycarry,&(IFPFComp.mantissa[k]),
+                                        locFPFNum.mantissa[k],
+                                        IFPF10.mantissa[k]);
+                        carryaccum+=mycarry ? 1 : 0;
+                        memcpy((void *)&locFPFNum,
+                                (void *)&IFPFComp,
+                                sizeof(InternalFPF));
+                }
+        }
+
+/*
+** Now move the 'E', the exponent sign, and the exponent
+** into the string.
+*/
+*dest++='E';
+
+/* sprint is supposed to return an integer, but it caused problems on SunOS
+ * with the native cc. Hence we force it.
+ * Uwe F. Mayer
+ */
+ccount+=(int)sprintf(dest,"%4d",expcount);
+
+/*
+** All done, go home.
+*/
+return(ccount);
+
+}
+
+#endif
diff --git a/emfloat.h b/emfloat.h
new file mode 100644
index 0000000..41cc6d9
--- /dev/null
+++ b/emfloat.h
@@ -0,0 +1,154 @@
+
+/*
+** emfloat.h
+** Header for emfloat.c
+**
+** BYTEmark (tm)
+** BYTE Magazine's Native Mode benchmarks
+** Rick Grehan, BYTE Magazine
+**
+** Create:
+** Revision: 3/95
+**
+** DISCLAIMER
+** The source, executable, and documentation files that comprise
+** the BYTEmark benchmarks are made available on an "as is" basis.
+** This means that we at BYTE Magazine have made every reasonable
+** effort to verify that the there are no errors in the source and
+** executable code.  We cannot, however, guarantee that the programs
+** are error-free.  Consequently, McGraw-HIll and BYTE Magazine make
+** no claims in regard to the fitness of the source code, executable
+** code, and documentation of the BYTEmark.
+**  Furthermore, BYTE Magazine, McGraw-Hill, and all employees
+** of McGraw-Hill cannot be held responsible for any damages resulting
+** from the use of this code or the results obtained from using
+** this code.
+*/
+
+#include <stdio.h>
+
+/* Is this a 64 bit architecture? If so, this will define LONG64 */
+/* Uwe F. Mayer 15 November 1997                                 */
+#include "pointer.h"
+
+/*
+** DEFINES
+*/
+#define u8 unsigned char
+#define u16 unsigned short
+#ifdef LONG64
+#define u32 unsigned int
+#else
+#define u32 unsigned long
+#endif
+#define uchar unsigned char
+#define ulong unsigned long
+
+#define MAX_EXP 32767L
+#define MIN_EXP (-32767L)
+
+#define IFPF_IS_ZERO 0
+#define IFPF_IS_SUBNORMAL 1
+#define IFPF_IS_NORMAL 2
+#define IFPF_IS_INFINITY 3
+#define IFPF_IS_NAN 4
+#define IFPF_TYPE_COUNT 5
+
+#define ZERO_ZERO                       0
+#define ZERO_SUBNORMAL                  1
+#define ZERO_NORMAL                     2
+#define ZERO_INFINITY                   3
+#define ZERO_NAN                        4
+
+#define SUBNORMAL_ZERO                  5
+#define SUBNORMAL_SUBNORMAL             6
+#define SUBNORMAL_NORMAL                7
+#define SUBNORMAL_INFINITY              8
+#define SUBNORMAL_NAN                   9
+
+#define NORMAL_ZERO                     10
+#define NORMAL_SUBNORMAL                11
+#define NORMAL_NORMAL                   12
+#define NORMAL_INFINITY                 13
+#define NORMAL_NAN                      14
+
+#define INFINITY_ZERO                   15
+#define INFINITY_SUBNORMAL              16
+#define INFINITY_NORMAL                 17
+#define INFINITY_INFINITY               18
+#define INFINITY_NAN                    19
+
+#define NAN_ZERO                        20
+#define NAN_SUBNORMAL                   21
+#define NAN_NORMAL                      22
+#define NAN_INFINITY                    23
+#define NAN_NAN                         24
+#define OPERAND_ZERO                    0
+#define OPERAND_SUBNORMAL               1
+#define OPERAND_NORMAL                  2
+#define OPERAND_INFINITY                3
+#define OPERAND_NAN                     4
+
+/*
+** Following already defined in NMGLOBAL.H
+**
+#define INTERNAL_FPF_PRECISION 4
+*/
+
+/*
+** TYPEDEFS
+*/
+
+typedef struct
+{
+        u8 type;        /* Indicates, NORMAL, SUBNORMAL, etc. */
+        u8 sign;        /* Mantissa sign */
+        short exp;      /* Signed exponent...no bias */
+        u16 mantissa[INTERNAL_FPF_PRECISION];
+} InternalFPF;
+
+/*
+** PROTOTYPES
+*/
+void SetupCPUEmFloatArrays(InternalFPF *abase,
+        InternalFPF *bbase, InternalFPF *cbase, ulong arraysize);
+ulong DoEmFloatIteration(InternalFPF *abase,
+        InternalFPF *bbase, InternalFPF *cbase,
+        ulong arraysize, ulong loops);
+static void SetInternalFPFZero(InternalFPF *dest,
+                        uchar sign);
+static void SetInternalFPFInfinity(InternalFPF *dest,
+                        uchar sign);
+static void SetInternalFPFNaN(InternalFPF *dest);
+static int IsMantissaZero(u16 *mant);
+static void Add16Bits(u16 *carry,u16 *a,u16 b,u16 c);
+static void Sub16Bits(u16 *borrow,u16 *a,u16 b,u16 c);
+static void ShiftMantLeft1(u16 *carry,u16 *mantissa);
+static void ShiftMantRight1(u16 *carry,u16 *mantissa);
+static void StickyShiftRightMant(InternalFPF *ptr,int amount);
+static void normalize(InternalFPF *ptr);
+static void denormalize(InternalFPF *ptr,int minimum_exponent);
+void RoundInternalFPF(InternalFPF *ptr);
+static void choose_nan(InternalFPF *x,InternalFPF *y,InternalFPF *z,
+                int intel_flag);
+static void AddSubInternalFPF(uchar operation,InternalFPF *x,
+                InternalFPF *y,InternalFPF *z);
+static void MultiplyInternalFPF(InternalFPF *x,InternalFPF *y,
+                        InternalFPF *z);
+static void DivideInternalFPF(InternalFPF *x,InternalFPF *y, 
+                        InternalFPF *z);
+/* static void LongToInternalFPF(long mylong, */
+static void Int32ToInternalFPF(int32 mylong,
+                InternalFPF *dest);
+#ifdef DEBUG
+static int InternalFPFToString(char *dest,
+                InternalFPF *src);
+#endif
+
+/*
+** EXTERNALS
+*/
+extern ulong StartStopwatch();
+extern ulong StopStopwatch(ulong elapsed);
+/* extern long randwc(long num); */
+extern int32 randwc(int32 num);
diff --git a/hardware b/hardware
new file mode 100755
index 0000000..6fb3293
Binary files /dev/null and b/hardware differ
diff --git a/hardware.c b/hardware.c
new file mode 100644
index 0000000..4838b2f
--- /dev/null
+++ b/hardware.c
@@ -0,0 +1,202 @@
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+
+#define BUF_SIZ 1024
+
+/******************
+** output_string **
+*******************
+** Displays a string on the screen.  Also, if the flag
+** write_to_file is set, outputs the string to the output file.
+** Note, this routine presumes that you've included a carriage
+** return at the end of the buffer.
+*/
+static void output_string(const char *buffer, const int write_to_file,
+                          FILE *global_ofile){
+  printf("%s",buffer);
+  if(write_to_file!=0)
+    fprintf(global_ofile,"%s",buffer);
+  return;
+}
+
+
+/******************
+** removeNewLine **
+*******************
+** Removes a trailing newline character if present
+*/
+static void removeNewLine(char * s) {
+  if(strlen(s)>0 && s[strlen(s)-1] == '\n') {
+    s[strlen(s)-1] = '\0';
+  }
+}
+
+
+/***************
+** runCommand **
+****************
+** Run the system command through a pipe
+** The pointer result must point to a pre-allocated array of at least BUF_SIZ
+*/
+static void runCommand (const char *command, char *result) {
+  FILE * pipe;
+
+  pipe = popen(command, "r");
+  if(pipe == NULL) {
+    /* command failed */
+    result[0] = '\0';
+  } else {
+    if(NULL == fgets(result, BUF_SIZ, pipe)){
+      /* command failed */
+      result[0] = '\0';
+    }
+    pclose(pipe);
+  }
+  removeNewLine(result);
+}
+
+
+/********************
+** readProcCpuInfo **
+*********************
+** Reads and parses /proc/cpuinfo on a Linux system
+** The pointers must point to pre-allocated arrays of at least BUF_SIZ
+*/
+static void readProcCpuInfo (char *model, char *cache) {
+  FILE * info;
+  char * cp;
+  int cpus = 0;
+  char * buffer_end;
+  char buffer[BUF_SIZ];
+  char vendor_id[BUF_SIZ];
+  char model_name[BUF_SIZ];
+  char cpu_MHz[BUF_SIZ];
+  int i;
+  float f;
+
+  vendor_id[0] = model_name[0] = cpu_MHz[0] = model[0] = cache[0] = '\0';
+  info = fopen("/proc/cpuinfo", "r");
+  if(info != NULL) {
+    /* command did not fail */
+    while(NULL != fgets(buffer, BUF_SIZ, info)){
+      buffer_end = buffer + strlen(buffer);
+      cp = buffer;
+      if(! strncmp(buffer, "processor", 9)) {
+        cpus++;
+      } else if(! strncmp(buffer, "vendor_id", 9)) {
+        cp+=strlen("vendor_id");
+        while(cp < buffer_end && ( *cp == ' ' || *cp == ':'|| *cp == '\t'))
+          cp++;
+        if(cp<buffer_end) {
+          strcpy(vendor_id, cp);
+        }
+        removeNewLine(vendor_id);
+      } else if(! strncmp(buffer, "model name", 10)) {
+        cp+=strlen("model name");
+        while(cp < buffer_end && ( *cp == ' ' || *cp == ':'|| *cp == '\t'))
+          cp++;
+        if(cp<buffer_end) {
+          strcpy(model_name, cp);
+        }
+        removeNewLine(model_name);
+      } else if(! strncmp(buffer, "cpu MHz", 7)) {
+        cp+=strlen("cpu MHz");
+        while(cp < buffer_end && ( *cp == ' ' || *cp == ':'|| *cp == '\t'))
+          cp++;
+        if(cp<buffer_end) {
+          strcpy(cpu_MHz, cp);
+        }
+        removeNewLine(cpu_MHz);
+      } else if(! strncmp(buffer, "cache size", 10)) {
+        cp+=strlen("cache size");
+        while(cp < buffer_end && ( *cp == ' ' || *cp == ':'|| *cp == '\t'))
+          cp++;
+        if(cp<buffer_end) {
+          strcpy(cache, cp);
+        }
+        removeNewLine(cache);
+      }
+    }
+    if(cpus>1) {
+      if (cpus==2) {
+        strcpy(model, "Dual");
+      } else {
+        sprintf(model, "%d CPU", cpus);
+      }
+    }
+    cp = model + strlen(model);
+    if(vendor_id[0] != '\0'){
+      if(cp != model){
+        *cp++ = ' ';
+      }
+      strcpy(cp, vendor_id);
+      cp += strlen(vendor_id);
+    }
+    if(model_name[0] != '\0'){
+      if(cp != model){
+        *cp++ = ' ';
+      }
+      strcpy(cp, model_name);
+      cp += strlen(model_name);
+    }
+    if(cpu_MHz[0] != '\0'){
+      if(cp != model){
+        *cp++ = ' ';
+      }
+      f = atof(cpu_MHz);
+      i = (int)(f+0.5f);
+      sprintf(cpu_MHz, "%dMHz", i);
+      strcpy(cp, cpu_MHz);
+      cp += strlen(cpu_MHz);
+    }
+    fclose(info);
+  }
+}
+
+
+/*************
+** hardware **
+**************
+** Runs the system command "uname -s -r"
+** Reads /proc/cpuinfo if on a linux system
+** Writes output
+*/
+void hardware(const int write_to_file, FILE *global_ofile) {
+  char buffer[BUF_SIZ];
+  char os[BUF_SIZ];
+  char model[BUF_SIZ];
+  char cache[BUF_SIZ];
+  char os_command[] = "uname -s -r";
+#ifdef NO_UNAME
+  os[0] = '\0';
+#else
+  runCommand(os_command, os);
+#endif
+  if(NULL != strstr(os, "Linux")) {
+    readProcCpuInfo (model, cache);
+  } else {
+    model[0] = '\0';
+    cache[0] = '\0';
+  }
+  sprintf(buffer, "CPU                 : %s\n", model);
+  output_string(buffer, write_to_file, global_ofile);
+  sprintf(buffer, "L2 Cache            : %s\n", cache);
+  output_string(buffer, write_to_file, global_ofile);
+  sprintf(buffer, "OS                  : %s\n", os);
+  output_string(buffer, write_to_file, global_ofile);
+}
+
+
+/************************
+** main for hardware.c **
+*************************
+** For testing of code only
+** Should be commented out
+*/
+/*
+int main(int argc, char * argv[]) {
+  hardware(0, NULL);
+  return 0;
+}
+*/
diff --git a/hardware.h b/hardware.h
new file mode 100644
index 0000000..2a07934
--- /dev/null
+++ b/hardware.h
@@ -0,0 +1,2 @@
+extern
+void hardware(const int write_to_file, FILE *global_ofile);
diff --git a/hello.c b/hello.c
new file mode 100644
index 0000000..c664483
--- /dev/null
+++ b/hello.c
@@ -0,0 +1,2 @@
+#include <stdio.h>
+int main () {printf("hello.\n");return(0);}
diff --git a/misc.c b/misc.c
new file mode 100644
index 0000000..a5144e4
--- /dev/null
+++ b/misc.c
@@ -0,0 +1,120 @@
+
+/*
+** misc.c
+** BYTEmark (tm)
+** BYTE's Native Mode Benchmarks
+** Rick Grehan, BYTE Magazine
+** DISCLAIMER
+** The source, executable, and documentation files that comprise
+** the BYTEmark benchmarks are made available on an "as is" basis.
+** This means that we at BYTE Magazine have made every reasonable
+** effort to verify that the there are no errors in the source and
+** executable code.  We cannot, however, guarantee that the programs
+** are error-free.  Consequently, McGraw-HIll and BYTE Magazine make
+** no claims in regard to the fitness of the source code, executable
+** code, and documentation of the BYTEmark.
+**  Furthermore, BYTE Magazine, McGraw-Hill, and all employees
+** of McGraw-Hill cannot be held responsible for any damages resulting
+** from the use of this code or the results obtained from using
+** this code.
+*/
+
+#include <stdio.h>
+#include "misc.h"
+
+/***********************************************************
+**     MISCELLANEOUS BUT OTHERWISE NECESSARY ROUTINES     **
+***********************************************************/
+
+/****************************
+** RANDOM NUMBER GENERATOR **
+*****************************
+** This is a second-order linear congruential random number
+** generator.  Its advantage is (of course) that it can be
+** seeded and will thus produce repeatable sequences of
+** random numbers.
+*/
+
+/****************************
+*         randwc()          *
+*****************************
+** Returns signed long random modulo num.
+*/
+/*
+long randwc(long num)
+{
+	return(randnum(0L)%num);
+}
+*/
+/*
+** Returns signed 32-bit random modulo num.
+*/
+int32 randwc(int32 num)
+{
+	return(randnum((int32)0)%num);
+}
+
+/***************************
+**      abs_randwc()      **
+****************************
+** Same as randwc(), only this routine returns only
+** positive numbers.
+*/
+/*
+unsigned long abs_randwc(unsigned long num)
+{
+long temp;
+
+temp=randwc(num);
+if(temp<0) temp=0L-temp;
+
+return((unsigned long)temp);
+}
+*/
+u32 abs_randwc(u32 num)
+{
+int32 temp;		/* Temporary storage */ 
+
+temp=randwc(num);
+if(temp<0) temp=(int32)0-temp;
+
+return((u32)temp);
+}
+
+/****************************
+*        randnum()          *
+*****************************
+** Second order linear congruential generator.
+** Constants suggested by J. G. Skellam.
+** If val==0, returns next member of sequence.
+**    val!=0, restart generator.
+*/
+/*
+long randnum(long lngval)
+{
+	register long interm;
+	static long randw[2] = { 13L , 117L };
+
+	if (lngval!=0L)
+	{	randw[0]=13L; randw[1]=117L; }
+
+	interm=(randw[0]*254754L+randw[1]*529562L)%999563L;
+	randw[1]=randw[0];
+	randw[0]=interm;
+	return(interm);
+}
+*/
+int32 randnum(int32 lngval)
+{
+	register int32 interm;
+	static int32 randw[2] = { (int32)13 , (int32)117 };
+
+	if (lngval!=(int32)0)
+	{	randw[0]=(int32)13; randw[1]=(int32)117; }
+
+	interm=(randw[0]*(int32)254754+randw[1]*(int32)529562)%(int32)999563;
+	randw[1]=randw[0];
+	randw[0]=interm;
+	return(interm);
+}
+
diff --git a/misc.h b/misc.h
new file mode 100644
index 0000000..0f9bc13
--- /dev/null
+++ b/misc.h
@@ -0,0 +1,41 @@
+/*
+** misc.h
+** Header for misc.c
+** BYTEmark (tm)
+** BYTE's Native Mode Benchmarks
+** Rick Grehan, BYTE Magazine
+**
+** Creation:
+** Revision: 3/95
+**
+** DISCLAIMER
+** The source, executable, and documentation files that comprise
+** the BYTEmark benchmarks are made available on an "as is" basis.
+** This means that we at BYTE Magazine have made every reasonable
+** effort to verify that the there are no errors in the source and
+** executable code.  We cannot, however, guarantee that the programs
+** are error-free.  Consequently, McGraw-HIll and BYTE Magazine make
+** no claims in regard to the fitness of the source code, executable
+** code, and documentation of the BYTEmark.
+**  Furthermore, BYTE Magazine, McGraw-Hill, and all employees
+** of McGraw-Hill cannot be held responsible for any damages resulting
+** from the use of this code or the results obtained from using
+** this code.
+*/
+
+/************************
+** FUNCTION PROTOTYPES **
+************************/
+
+/*
+long randwc(long num);
+unsigned long abs_randwc(unsigned long num);
+long randnum(long lngval);
+*/
+
+#include "nmglobal.h"
+int32 randwc(int32 num);
+u32 abs_randwc(u32 num);
+int32 randnum(int32 lngval);
+
+
diff --git a/nbench0.c b/nbench0.c
new file mode 100644
index 0000000..784b501
--- /dev/null
+++ b/nbench0.c
@@ -0,0 +1,1174 @@
+
+/*
+** nbench0.c
+*/
+
+/*******************************************
+**             BYTEmark (tm)              **
+** BYTE MAGAZINE'S NATIVE MODE BENCHMARKS **
+**           FOR CPU/FPU                  **
+**             ver 2.0                    **
+**       Rick Grehan, BYTE Magazine       **
+********************************************
+** NOTE: These benchmarks do NOT check for the presence
+** of an FPU.  You have to find that out manually.
+**
+** REVISION HISTORY FOR BENCHMARKS
+**  9/94 -- First beta. --RG
+**  12/94 -- Bug discovered in some of the integer routines
+**    (IDEA, Huffman,...).  Routines were not accurately counting
+**    the number of loops.  Fixed. --RG (Thanks to Steve A.)
+**  12/94 -- Added routines to calculate and display index
+**    values. Indexes based on DELL XPS 90 (90 MHz Pentium).
+**  1/95 -- Added Mac time manager routines for more accurate
+**    timing on Macintosh (said to be good to 20 usecs) -- RG
+**  1/95 -- Re-did all the #defines so they made more
+**    sense.  See NMGLOBAL.H -- RG
+**  3/95 -- Fixed memory leak in LU decomposition.  Did not
+**    invalidate previous results, just made it easier to run.--RG
+**  3/95 -- Added TOOLHELP.DLL timing routine to Windows timer. --RG
+**  10/95 -- Added memory array & alignment; moved memory
+**      allocation out of LU Decomposition -- RG
+**
+** DISCLAIMER
+** The source, executable, and documentation files that comprise
+** the BYTEmark benchmarks are made available on an "as is" basis.
+** This means that we at BYTE Magazine have made every reasonable
+** effort to verify that the there are no errors in the source and
+** executable code.  We cannot, however, guarantee that the programs
+** are error-free.  Consequently, McGraw-HIll and BYTE Magazine make
+** no claims in regard to the fitness of the source code, executable
+** code, and documentation of the BYTEmark.
+**  Furthermore, BYTE Magazine, McGraw-Hill, and all employees
+** of McGraw-Hill cannot be held responsible for any damages resulting
+** from the use of this code or the results obtained from using
+** this code.
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <ctype.h>
+#include <string.h>
+#include <time.h>
+#include <math.h>
+#include "nmglobal.h"
+#include "nbench0.h"
+#include "hardware.h"
+
+/*************
+**** main ****
+*************/
+#ifdef MAC
+void main(void)
+#else
+int main(int argc, char *argv[])
+#endif
+{
+int i;                  /* Index */
+time_t time_and_date;   /* Self-explanatory */
+struct tm *loctime;
+double bmean;           /* Benchmark mean */
+double bstdev;          /* Benchmark stdev */
+double lx_memindex;     /* Linux memory index (mainly integer operations)*/
+double lx_intindex;     /* Linux integer index */
+double lx_fpindex;      /* Linux floating-point index */
+double intindex;        /* Integer index */
+double fpindex;         /* Floating-point index */
+ulong bnumrun;          /* # of runs */
+
+#ifdef MAC
+        MaxApplZone();
+#endif
+
+#ifdef MACTIMEMGR
+/* Set up high res timer */
+MacHSTdelay=600*1000*1000;      /* Delay is 10 minutes */
+
+memset((char *)&myTMTask,0,sizeof(TMTask));
+
+/* Prime and remove the task, calculating overhead */
+PrimeTime((QElemPtr)&myTMTask,-MacHSTdelay);
+RmvTime((QElemPtr)&myTMTask);
+MacHSTohead=MacHSTdelay+myTMTask.tmCount;
+#endif
+
+#ifdef WIN31TIMER
+/* Set up the size of the timer info structure */
+win31tinfo.dwSize=(DWORD)sizeof(TIMERINFO);
+/* Load library */
+if((hThlp=LoadLibrary("TOOLHELP.DLL"))<32)
+{       printf("Error loading TOOLHELP\n");
+        exit(0);
+}
+if(!(lpfn=GetProcAddress(hThlp,"TimerCount")))
+{       printf("TOOLHELP error\n");
+        exit(0);
+}
+#endif
+
+/*
+** Set global parameters to default.
+*/
+global_min_ticks=MINIMUM_TICKS;
+global_min_seconds=MINIMUM_SECONDS;
+global_allstats=0;
+global_custrun=0;
+global_align=8;
+write_to_file=0;
+lx_memindex=(double)1.0;        /* set for geometric mean computations */
+lx_intindex=(double)1.0;
+lx_fpindex=(double)1.0;
+intindex=(double)1.0;
+fpindex=(double)1.0;
+mem_array_ents=0;               /* Nothing in mem array */
+
+/*
+** We presume all tests will be run unless told
+** otherwise
+*/
+for(i=0;i<NUMTESTS;i++)
+        tests_to_do[i]=1;
+
+/*
+** Initialize test data structures to default
+** values.
+*/
+set_request_secs();     /* Set all request_secs fields */
+global_numsortstruct.adjust=0;
+global_numsortstruct.arraysize=NUMARRAYSIZE;
+
+global_strsortstruct.adjust=0;
+global_strsortstruct.arraysize=STRINGARRAYSIZE;
+
+global_bitopstruct.adjust=0;
+global_bitopstruct.bitfieldarraysize=BITFARRAYSIZE;
+
+global_emfloatstruct.adjust=0;
+global_emfloatstruct.arraysize=EMFARRAYSIZE;
+
+global_fourierstruct.adjust=0;
+
+global_assignstruct.adjust=0;
+
+global_ideastruct.adjust=0;
+global_ideastruct.arraysize=IDEAARRAYSIZE;
+
+global_huffstruct.adjust=0;
+global_huffstruct.arraysize=HUFFARRAYSIZE;
+
+global_nnetstruct.adjust=0;
+
+global_lustruct.adjust=0;
+
+/*
+** For Macintosh -- read the command line.
+*/
+#ifdef MAC
+UCommandLine();
+#endif
+
+/*
+** Handle any command-line arguments.
+*/
+if(argc>1)
+        for(i=1;i<argc;i++)
+                if(parse_arg(argv[i])==-1)
+                {       display_help(argv[0]);
+                        exit(0);
+                }
+/*
+** Output header
+*/
+#ifdef LINUX
+output_string("\nBYTEmark* Native Mode Benchmark ver. 2 (10/95)\n");
+output_string("Index-split by Andrew D. Balsa (11/97)\n");
+output_string("Linux/Unix* port by Uwe F. Mayer (12/96,11/97)\n");
+#else
+output_string("BBBBBB   YYY   Y  TTTTTTT  EEEEEEE\n");
+output_string("BBB   B  YYY   Y    TTT    EEE\n");
+output_string("BBB   B  YYY   Y    TTT    EEE\n");
+output_string("BBBBBB    YYY Y     TTT    EEEEEEE\n");
+output_string("BBB   B    YYY      TTT    EEE\n");
+output_string("BBB   B    YYY      TTT    EEE\n");
+output_string("BBBBBB     YYY      TTT    EEEEEEE\n\n");
+output_string("\nBYTEmark (tm) Native Mode Benchmark ver. 2 (10/95)\n");
+#endif
+/*
+** See if the user wants all stats.  Output heading info
+** if so.
+*/
+if(global_allstats)
+{
+                output_string("\n");
+                output_string("============================== ALL STATISTICS ===============================\n");
+        time(&time_and_date);
+        loctime=localtime(&time_and_date);
+        sprintf(buffer,"**Date and time of benchmark run: %s",asctime(loctime));
+        output_string(buffer);
+        sprintf(buffer,"**Sizeof: char:%u short:%u int:%u long:%u u8:%u u16:%u u32:%u int32:%u\n",
+                (unsigned int)sizeof(char),
+                (unsigned int)sizeof(short),
+                (unsigned int)sizeof(int),
+                (unsigned int)sizeof(long),
+                (unsigned int)sizeof(u8),
+                (unsigned int)sizeof(u16),
+                (unsigned int)sizeof(u32),
+                (unsigned int)sizeof(int32));
+        output_string(buffer);
+#ifdef LINUX
+#include "sysinfo.c"
+#else
+        sprintf(buffer,"**%s\n",sysname);
+        output_string(buffer);
+        sprintf(buffer,"**%s\n",compilername);
+        output_string(buffer);
+        sprintf(buffer,"**%s\n",compilerversion);
+        output_string(buffer);
+#endif
+                output_string("=============================================================================\n");
+}
+
+/*
+** Execute the tests.
+*/
+#ifdef LINUX
+output_string("\nTEST                : Iterations/sec.  : Old Index   : New Index\n");
+output_string("                    :                  : Pentium 90* : AMD K6/233*\n");
+output_string("--------------------:------------------:-------------:------------\n");
+#endif
+
+for(i=0;i<NUMTESTS;i++)
+{
+        if(tests_to_do[i])
+        {       sprintf(buffer,"%s    :",ftestnames[i]);
+                                output_string(buffer);
+                if (0!=bench_with_confidence(i,
+                        &bmean,
+                        &bstdev,
+                        &bnumrun)){
+		  output_string("\n** WARNING: The current test result is NOT 95 % statistically certain.\n");
+		  output_string("** WARNING: The variation among the individual results is too large.\n");
+		  output_string("                    :");
+		}
+#ifdef LINUX
+                sprintf(buffer," %15.5g  :  %9.2f  :  %9.2f\n",
+                        bmean,bmean/bindex[i],bmean/lx_bindex[i]);
+#else
+		sprintf(buffer,"  Iterations/sec.: %13.2f  Index: %6.2f\n",
+                        bmean,bmean/bindex[i]);
+#endif
+                output_string(buffer);
+		/*
+		** Gather integer or FP indexes
+		*/
+		if((i==4)||(i==8)||(i==9)){
+		  /* FP index */
+		  fpindex=fpindex*(bmean/bindex[i]);
+		  /* Linux FP index */
+		  lx_fpindex=lx_fpindex*(bmean/lx_bindex[i]);
+		}
+		else{
+		  /* Integer index */
+		  intindex=intindex*(bmean/bindex[i]);
+		  if((i==0)||(i==3)||(i==6)||(i==7))
+		    /* Linux integer index */
+		    lx_intindex=lx_intindex*(bmean/lx_bindex[i]);
+		  else
+		    /* Linux memory index */
+		    lx_memindex=lx_memindex*(bmean/lx_bindex[i]);
+		}
+
+                if(global_allstats)
+                {
+                        sprintf(buffer,"  Absolute standard deviation: %g\n",bstdev);
+                        output_string(buffer);
+			if (bmean>(double)1e-100){
+			  /* avoid division by zero */
+			  sprintf(buffer,"  Relative standard deviation: %g %%\n",
+				  (double)100*bstdev/bmean);
+			  output_string(buffer);
+			}
+                        sprintf(buffer,"  Number of runs: %lu\n",bnumrun);
+                        output_string(buffer);
+                        show_stats(i);
+                        sprintf(buffer,"Done with %s\n\n",ftestnames[i]);
+                        output_string(buffer);
+                }
+        }
+}
+/* printf("...done...\n"); */
+
+/*
+** Output the total indexes
+*/
+if(global_custrun==0)
+{
+        output_string("==========================ORIGINAL BYTEMARK RESULTS==========================\n");
+        sprintf(buffer,"INTEGER INDEX       : %.3f\n",
+                       pow(intindex,(double).142857));
+        output_string(buffer);
+        sprintf(buffer,"FLOATING-POINT INDEX: %.3f\n",
+                        pow(fpindex,(double).33333));
+        output_string(buffer);
+        output_string("Baseline (MSDOS*)   : Pentium* 90, 256 KB L2-cache, Watcom* compiler 10.0\n");
+#ifdef LINUX
+        output_string("==============================LINUX DATA BELOW===============================\n");
+	hardware(write_to_file, global_ofile);
+#include "sysinfoc.c"
+        sprintf(buffer,"MEMORY INDEX        : %.3f\n",
+                       pow(lx_memindex,(double).3333333333));
+        output_string(buffer);
+        sprintf(buffer,"INTEGER INDEX       : %.3f\n",
+                       pow(lx_intindex,(double).25));
+        output_string(buffer);
+        sprintf(buffer,"FLOATING-POINT INDEX: %.3f\n",
+                        pow(lx_fpindex,(double).3333333333));
+        output_string(buffer);
+        output_string("Baseline (LINUX)    : AMD K6/233*, 512 KB L2-cache, gcc 2.7.2.3, libc-5.4.38\n");
+#endif
+output_string("* Trademarks are property of their respective holder.\n");
+}
+
+exit(0);
+}
+
+/**************
+** parse_arg **
+***************
+** Given a pointer to a string, we assume that's an argument.
+** Parse that argument and act accordingly.
+** Return 0 if ok, else return -1.
+*/
+static int parse_arg(char *argptr)
+{
+int i;          /* Index */
+FILE *cfile;    /* Command file identifier */
+
+/*
+** First character has got to be a hyphen.
+*/
+if(*argptr++!='-') return(-1);
+
+/*
+** Convert the rest of the argument to upper case
+** so there's little chance of confusion.
+*/
+for(i=0;i<strlen(argptr);i++)
+        argptr[i]=(char)toupper((int)argptr[i]);
+
+/*
+** Next character picks the action.
+*/
+switch(*argptr++)
+{
+        case '?':       return(-1);     /* Will display help */
+
+        case 'V': global_allstats=1; return(0); /* verbose mode */
+
+        case 'C':                       /* Command file name */
+                /*
+                ** First try to open the file for reading.
+                */
+                cfile=fopen(argptr,"r");
+                if(cfile==(FILE *)NULL)
+                {       printf("**Error opening file: %s\n",argptr);
+                        return(-1);
+                }
+                read_comfile(cfile);    /* Read commands */
+                fclose(cfile);
+                break;
+        default:
+                return(-1);
+}
+return(0);
+}
+
+/*******************
+** display_help() **
+********************
+** Display a help message showing argument requirements and such.
+** Exit when you're done...I mean, REALLY exit.
+*/
+void display_help(char *progname)
+{
+        printf("Usage: %s [-v] [-c<FILE>]\n",progname);
+        printf(" -v = verbose\n");
+        printf(" -c = input parameters thru command file <FILE>\n");
+        exit(0);
+}
+
+
+/*****************
+** read_comfile **
+******************
+** Read the command file.  Set global parameters as
+** specified.  This routine assumes that the command file
+** is already open.
+*/
+static void read_comfile(FILE *cfile)
+{
+char inbuf[40];
+char *eptr;             /* Offset to "=" sign */
+int i;                  /* Index */
+
+/*
+** Sit in a big loop, reading a line from the file at each
+** pass.  Terminate on EOF.
+*/
+while(fgets(inbuf,39,cfile)!=(char *)NULL)
+{
+        /* Overwrite the CR character */
+        if(strlen(inbuf)>0)
+                inbuf[strlen(inbuf)-1]='\0';
+
+        /*
+        ** Parse up to the "=" sign.  If we don't find an
+        ** "=", then flag an error.
+        */
+        if((eptr=strchr(inbuf,(int)'='))==(char *)NULL)
+        {       printf("**COMMAND FILE ERROR at LINE:\n %s\n",
+                        inbuf);
+                goto skipswitch;        /* A GOTO!!!! */
+        }
+
+        /*
+        ** Insert a null where the "=" was, then convert
+        ** the substring to uppercase.  That will enable
+        ** us to perform the match.
+        */
+        *eptr++='\0';
+        strtoupper((char *)&inbuf[0]);
+        i=MAXPARAM;
+        do {
+                if(strcmp(inbuf,paramnames[i])==0)
+                        break;
+        } while(--i>=0);
+
+        if(i<0)
+        {       printf("**COMMAND FILE ERROR -- UNKNOWN PARAM: %s",
+                        inbuf);
+                goto skipswitch;
+        }
+
+        /*
+        ** Advance eptr to the next field...which should be
+        ** the value assigned to the parameter.
+        */
+        switch(i)
+        {
+                case PF_GMTICKS:        /* GLOBALMINTICKS */
+                        global_min_ticks=(ulong)atol(eptr);
+                        break;
+
+                case PF_MINSECONDS:     /* MINSECONDS */
+                        global_min_seconds=(ulong)atol(eptr);
+                        set_request_secs();
+                        break;
+
+                case PF_ALLSTATS:       /* ALLSTATS */
+                        global_allstats=getflag(eptr);
+                        break;
+
+                case PF_OUTFILE:        /* OUTFILE */
+                        strcpy(global_ofile_name,eptr);
+                        global_ofile=fopen(global_ofile_name,"a");
+                        /*
+                        ** Open the output file.
+                        */
+                        if(global_ofile==(FILE *)NULL)
+                        {       printf("**Error opening output file: %s\n",
+                                        global_ofile_name);
+                                ErrorExit();
+                        }
+                        write_to_file=-1;
+                        break;
+
+                case PF_CUSTOMRUN:      /* CUSTOMRUN */
+                        global_custrun=getflag(eptr);
+                        for(i=0;i<NUMTESTS;i++)
+                                tests_to_do[i]=1-global_custrun;
+                        break;
+
+                case PF_DONUM:          /* DONUMSORT */
+                        tests_to_do[TF_NUMSORT]=getflag(eptr);
+                        break;
+
+                case PF_NUMNUMA:        /* NUMNUMARRAYS */
+                        global_numsortstruct.numarrays=
+                                (ushort)atoi(eptr);
+                        global_numsortstruct.adjust=1;
+                        break;
+
+                case PF_NUMASIZE:       /* NUMARRAYSIZE */
+                        global_numsortstruct.arraysize=
+                                (ulong)atol(eptr);
+                        break;
+
+                case PF_NUMMINS:        /* NUMMINSECONDS */
+                        global_numsortstruct.request_secs=
+                                (ulong)atol(eptr);
+                        break;
+
+                case PF_DOSTR:          /* DOSTRINGSORT */
+                        tests_to_do[TF_SSORT]=getflag(eptr);
+                        break;
+
+                case PF_STRASIZE:       /* STRARRAYSIZE */
+                        global_strsortstruct.arraysize=
+                                (ulong)atol(eptr);
+                        break;
+
+                case PF_NUMSTRA:        /* NUMSTRARRAYS */
+                        global_strsortstruct.numarrays=
+                                (ushort)atoi(eptr);
+                        global_strsortstruct.adjust=1;
+                        break;
+
+                case PF_STRMINS:        /* STRMINSECONDS */
+                        global_strsortstruct.request_secs=
+                                (ulong)atol(eptr);
+                        break;
+
+                case PF_DOBITF: /* DOBITFIELD */
+                        tests_to_do[TF_BITOP]=getflag(eptr);
+                        break;
+
+                case PF_NUMBITOPS:      /* NUMBITOPS */
+                        global_bitopstruct.bitoparraysize=
+                                (ulong)atol(eptr);
+                        global_bitopstruct.adjust=1;
+                        break;
+
+                case PF_BITFSIZE:       /* BITFIELDSIZE */
+                        global_bitopstruct.bitfieldarraysize=
+                                (ulong)atol(eptr);
+                        break;
+
+                case PF_BITMINS:        /* BITMINSECONDS */
+                        global_bitopstruct.request_secs=
+                                (ulong)atol(eptr);
+                        break;
+
+                case PF_DOEMF:          /* DOEMF */
+                        tests_to_do[TF_FPEMU]=getflag(eptr);
+                        break;
+
+                case PF_EMFASIZE:       /* EMFARRAYSIZE */
+                        global_emfloatstruct.arraysize=
+                                (ulong)atol(eptr);
+                        break;
+
+                case PF_EMFLOOPS:       /* EMFLOOPS */
+                        global_emfloatstruct.loops=
+                                (ulong)atol(eptr);
+                        break;
+
+                case PF_EMFMINS:        /* EMFMINSECOND */
+                        global_emfloatstruct.request_secs=
+                                (ulong)atol(eptr);
+                        break;
+
+                case PF_DOFOUR: /* DOFOUR */
+                        tests_to_do[TF_FFPU]=getflag(eptr);
+                        break;
+
+                case PF_FOURASIZE:      /* FOURASIZE */
+                        global_fourierstruct.arraysize=
+                                (ulong)atol(eptr);
+                        global_fourierstruct.adjust=1;
+                        break;
+
+                case PF_FOURMINS:       /* FOURMINSECONDS */
+                        global_fourierstruct.request_secs=
+                                (ulong)atol(eptr);
+                        break;
+
+                case PF_DOASSIGN:       /* DOASSIGN */
+                        tests_to_do[TF_ASSIGN]=getflag(eptr);
+                        break;
+
+                case PF_AARRAYS:        /* ASSIGNARRAYS */
+                        global_assignstruct.numarrays=
+                                (ulong)atol(eptr);
+                        break;
+
+                case PF_ASSIGNMINS:     /* ASSIGNMINSECONDS */
+                        global_assignstruct.request_secs=
+                                (ulong)atol(eptr);
+                        break;
+
+                case PF_DOIDEA: /* DOIDEA */
+                        tests_to_do[TF_IDEA]=getflag(eptr);
+                        break;
+
+                case PF_IDEAASIZE:      /* IDEAARRAYSIZE */
+                        global_ideastruct.arraysize=
+                                (ulong)atol(eptr);
+                        break;
+
+                case PF_IDEALOOPS:      /* IDEALOOPS */
+                        global_ideastruct.loops=
+                                (ulong)atol(eptr);
+                        break;
+
+                case PF_IDEAMINS:       /* IDEAMINSECONDS */
+                        global_ideastruct.request_secs=
+                                (ulong)atol(eptr);
+                        break;
+
+                case PF_DOHUFF: /* DOHUFF */
+                        tests_to_do[TF_HUFF]=getflag(eptr);
+                        break;
+
+                case PF_HUFFASIZE:      /* HUFFARRAYSIZE */
+                        global_huffstruct.arraysize=
+                                (ulong)atol(eptr);
+                        break;
+
+                case PF_HUFFLOOPS:      /* HUFFLOOPS */
+                        global_huffstruct.loops=
+                                (ulong)atol(eptr);
+                        global_huffstruct.adjust=1;
+                        break;
+
+                case PF_HUFFMINS:       /* HUFFMINSECONDS */
+                        global_huffstruct.request_secs=
+                                (ulong)atol(eptr);
+                        break;
+
+                case PF_DONNET: /* DONNET */
+                        tests_to_do[TF_NNET]=getflag(eptr);
+                        break;
+
+                case PF_NNETLOOPS:      /* NNETLOOPS */
+                        global_nnetstruct.loops=
+                                (ulong)atol(eptr);
+                        global_nnetstruct.adjust=1;
+                        break;
+
+                case PF_NNETMINS:       /* NNETMINSECONDS */
+                        global_nnetstruct.request_secs=
+                                (ulong)atol(eptr);
+                        break;
+
+                case PF_DOLU:           /* DOLU */
+                        tests_to_do[TF_LU]=getflag(eptr);
+                        break;
+
+                case PF_LUNARRAYS:      /* LUNUMARRAYS */
+                        global_lustruct.numarrays=
+                                (ulong)atol(eptr);
+                        global_lustruct.adjust=1;
+                        break;
+
+                case PF_LUMINS: /* LUMINSECONDS */
+                        global_lustruct.request_secs=
+                                (ulong)atol(eptr);
+                        break;
+
+                                case PF_ALIGN:          /* ALIGN */
+                                                global_align=atoi(eptr);
+                                                break;
+        }
+skipswitch:
+        continue;
+}       /* End while */
+
+return;
+}
+
+/************
+** getflag **
+*************
+** Return 1 if cptr points to "T"; 0 otherwise.
+*/
+static int getflag(char *cptr)
+{
+        if(toupper((int)*cptr)=='T') return(1);
+return(0);
+}
+
+/***************
+** strtoupper **
+****************
+** Convert's a string to upper case.  The string is presumed
+** to consist only of alphabetic characters, and to be terminated
+** with a null.
+*/
+static void strtoupper(char *s)
+{
+
+do {
+/*
+** Oddly enough, the following line did not work under THINK C.
+** So, I modified it....hmmmm. --RG
+        *s++=(char)toupper((int)*s);
+*/
+        *s=(char)toupper((int)*s);
+        s++;
+} while(*s!=(char)'\0');
+return;
+}
+
+/*********************
+** set_request_secs **
+**********************
+** Set everyone's "request_secs" entry to whatever
+** value is in global_min_secs.  This is done
+** at the beginning, and possibly later if the
+** user redefines global_min_secs in the command file.
+*/
+static void set_request_secs(void)
+{
+
+global_numsortstruct.request_secs=global_min_seconds;
+global_strsortstruct.request_secs=global_min_seconds;
+global_bitopstruct.request_secs=global_min_seconds;
+global_emfloatstruct.request_secs=global_min_seconds;
+global_fourierstruct.request_secs=global_min_seconds;
+global_assignstruct.request_secs=global_min_seconds;
+global_ideastruct.request_secs=global_min_seconds;
+global_huffstruct.request_secs=global_min_seconds;
+global_nnetstruct.request_secs=global_min_seconds;
+global_lustruct.request_secs=global_min_seconds;
+
+return;
+}
+
+
+/**************************
+** bench_with_confidence **
+***************************
+** Given a benchmark id that indicates a function, this routine
+** repeatedly calls that benchmark, seeking to collect and replace
+** scores to get 5 that meet the confidence criteria.
+**
+** The above is mathematically questionable, as the statistical theory
+** depends on independent observations, and if we exchange data points
+** depending on what we already have then this certainly violates
+** independence of the observations. Hence I changed this so that at
+** most 30 observations are done, but none are deleted as we go
+** along. We simply do more runs and hope to get a big enough sample
+** size so that things stabilize. Uwe F. Mayer
+**
+** Return 0 if ok, -1 if failure.  Returns mean
+** and std. deviation of results if successful.
+*/
+static int bench_with_confidence(int fid,       /* Function id */
+        double *mean,                   /* Mean of scores */
+        double *stdev,                  /* Standard deviation */
+        ulong *numtries)                /* # of attempts */
+{
+double myscores[30];            /* Need at least 5 scores, use at most 30 */
+double c_half_interval;         /* Confidence half interval */
+int i;                          /* Index */
+/* double newscore; */          /* For improving confidence interval */
+
+/*
+** Get first 5 scores.  Then begin confidence testing.
+*/
+for (i=0;i<5;i++)
+{       (*funcpointer[fid])();
+        myscores[i]=getscore(fid);
+#ifdef DEBUG
+	printf("score # %d = %g\n", i, myscores[i]);
+#endif
+}
+*numtries=5;            /* Show 5 attempts */
+
+/*
+** The system allows a maximum of 30 tries before it gives
+** up.  Since we've done 5 already, we'll allow 25 more.
+*/
+
+/*
+** Enter loop to test for confidence criteria.
+*/
+while(1)
+{
+        /*
+        ** Calculate confidence. Should always return 0.
+        */
+        if (0!=calc_confidence(myscores,
+		*numtries,
+                &c_half_interval,
+                mean,
+                stdev)) return(-1);
+
+        /*
+        ** Is the length of the half interval 5% or less of mean?
+        ** If so, we can go home.  Otherwise, we have to continue.
+        */
+        if(c_half_interval/ (*mean) <= (double)0.05)
+                break;
+
+#ifdef OLDCODE
+#undef OLDCODE
+#endif
+#ifdef OLDCODE
+/* this code is no longer valid, we now do not replace but add new scores */
+/* Uwe F. Mayer */
+	      /*
+	      ** Go get a new score and see if it
+	      ** improves existing scores.
+	      */
+	      do {
+		      if(*numtries==10)
+			      return(-1);
+		      (*funcpointer[fid])();
+		      *numtries+=1;
+		      newscore=getscore(fid);
+	      } while(seek_confidence(myscores,&newscore,
+		      &c_half_interval,mean,stdev)==0);
+#endif
+	/* We now simply add a new test run and hope that the runs
+           finally stabilize, Uwe F. Mayer */
+	if(*numtries==30) return(-1);
+	(*funcpointer[fid])();
+	myscores[*numtries]=getscore(fid);
+#ifdef DEBUG
+	printf("score # %ld = %g\n", *numtries, myscores[*numtries]);
+#endif
+	*numtries+=1;
+}
+
+return(0);
+}
+
+#ifdef OLDCODE
+/* this procecdure is no longer needed, Uwe F. Mayer */
+  /********************
+  ** seek_confidence **
+  *********************
+  ** Pass this routine an array of 5 scores PLUS a new score.
+  ** This routine tries the new score in place of each of
+  ** the other five scores to determine if the new score,
+  ** when replacing one of the others, improves the confidence
+  ** half-interval.
+  ** Return 0 if failure.  Original 5 scores unchanged.
+  ** Return -1 if success.  Also returns new half-interval,
+  ** mean, and standard deviation of the sample.
+  */
+  static int seek_confidence( double scores[5],
+  		double *newscore,
+  		double *c_half_interval,
+  		double *smean,
+  		double *sdev)
+  {
+  double sdev_to_beat;    /* Original sdev to be beaten */
+  double temp;            /* For doing a swap */
+  int is_beaten;          /* Indicates original was beaten */
+  int i;                  /* Index */
+
+  /*
+  ** First calculate original standard deviation
+  */
+  calc_confidence(scores,c_half_interval,smean,sdev);
+  sdev_to_beat=*sdev;
+  is_beaten=-1;
+
+  /*
+  ** Try to beat original score.  We'll come out of this
+  ** loop with a flag.
+  */
+  for(i=0;i<5;i++)
+  {
+  	temp=scores[i];
+  	scores[i]=*newscore;
+  	calc_confidence(scores,c_half_interval,smean,sdev);
+  	scores[i]=temp;
+  	if(sdev_to_beat>*sdev)
+  	{       is_beaten=i;
+  		sdev_to_beat=*sdev;
+  	}
+  }
+
+  if(is_beaten!=-1)
+  {       scores[is_beaten]=*newscore;
+  	return(-1);
+  }
+  return(0);
+  }
+#endif
+
+/********************
+** calc_confidence **
+*********************
+** Given a set of numtries scores, calculate the confidence
+** half-interval.  We'll also return the sample mean and sample
+** standard deviation.
+** NOTE: This routines presumes a confidence of 95% and
+** a confidence coefficient of .95
+** returns 0 if there is an error, otherwise -1
+*/
+static int calc_confidence(double scores[], /* Array of scores */
+		int num_scores,             /* number of scores in array */
+                double *c_half_interval,    /* Confidence half-int */
+                double *smean,              /* Standard mean */
+                double *sdev)               /* Sample stand dev */
+{
+/* Here is a list of the student-t distribution up to 29 degrees of
+   freedom. The value at 0 is bogus, as there is no value for zero
+   degrees of freedom. */
+double student_t[30]={0.0 , 12.706 , 4.303 , 3.182 , 2.776 , 2.571 ,
+                             2.447 , 2.365 , 2.306 , 2.262 , 2.228 ,
+                             2.201 , 2.179 , 2.160 , 2.145 , 2.131 ,
+                             2.120 , 2.110 , 2.101 , 2.093 , 2.086 ,
+                             2.080 , 2.074 , 2.069 , 2.064 , 2.060 ,
+		             2.056 , 2.052 , 2.048 , 2.045 };
+int i;          /* Index */
+if ((num_scores<2) || (num_scores>30)) {
+  output_string("Internal error: calc_confidence called with an illegal number of scores\n");
+  return(-1);
+}
+/*
+** First calculate mean.
+*/
+*smean=(double)0.0;
+for(i=0;i<num_scores;i++){
+  *smean+=scores[i];
+}
+*smean/=(double)num_scores;
+
+/* Get standard deviation */
+*sdev=(double)0.0;
+for(i=0;i<num_scores;i++) {
+  *sdev+=(scores[i]-(*smean))*(scores[i]-(*smean));
+}
+*sdev/=(double)(num_scores-1);
+*sdev=sqrt(*sdev);
+
+/* Now calculate the length of the confidence half-interval.  For a
+** confidence level of 95% our confidence coefficient gives us a
+** multiplying factor of the upper .025 quartile of a t distribution
+** with num_scores-1 degrees of freedom, and dividing by sqrt(number of
+** observations). See any introduction to statistics.
+*/
+*c_half_interval=student_t[num_scores-1] * (*sdev) / sqrt((double)num_scores);
+return(0);
+}
+
+/*************
+** getscore **
+**************
+** Return the score for a particular benchmark.
+*/
+static double getscore(int fid)
+{
+
+/*
+** Fid tells us the function.  This is really a matter of
+** doing the proper coercion.
+*/
+switch(fid)
+{
+        case TF_NUMSORT:
+                return(global_numsortstruct.sortspersec);
+        case TF_SSORT:
+                return(global_strsortstruct.sortspersec);
+        case TF_BITOP:
+                return(global_bitopstruct.bitopspersec);
+        case TF_FPEMU:
+                return(global_emfloatstruct.emflops);
+        case TF_FFPU:
+                return(global_fourierstruct.fflops);
+        case TF_ASSIGN:
+                return(global_assignstruct.iterspersec);
+        case TF_IDEA:
+                return(global_ideastruct.iterspersec);
+        case TF_HUFF:
+                return(global_huffstruct.iterspersec);
+        case TF_NNET:
+                return(global_nnetstruct.iterspersec);
+        case TF_LU:
+                return(global_lustruct.iterspersec);
+}
+return((double)0.0);
+}
+
+/******************
+** output_string **
+*******************
+** Displays a string on the screen.  Also, if the flag
+** write_to_file is set, outputs the string to the output file.
+** Note, this routine presumes that you've included a carriage
+** return at the end of the buffer.
+*/
+static void output_string(char *buffer)
+{
+
+printf("%s",buffer);
+if(write_to_file!=0)
+        fprintf(global_ofile,"%s",buffer);
+return;
+}
+
+/***************
+** show_stats **
+****************
+** This routine displays statistics for a particular benchmark.
+** The benchmark is identified by its id.
+*/
+static void show_stats (int bid)
+{
+char buffer[80];        /* Display buffer */
+
+switch(bid)
+{
+        case TF_NUMSORT:                /* Numeric sort */
+                sprintf(buffer,"  Number of arrays: %d\n",
+                        global_numsortstruct.numarrays);
+                output_string(buffer);
+                sprintf(buffer,"  Array size: %ld\n",
+                        global_numsortstruct.arraysize);
+                output_string(buffer);
+                break;
+
+        case TF_SSORT:          /* String sort */
+                sprintf(buffer,"  Number of arrays: %d\n",
+                        global_strsortstruct.numarrays);
+                output_string(buffer);
+                sprintf(buffer,"  Array size: %ld\n",
+                        global_strsortstruct.arraysize);
+                output_string(buffer);
+                break;
+
+        case TF_BITOP:          /* Bitmap operation */
+                sprintf(buffer,"  Operations array size: %ld\n",
+                        global_bitopstruct.bitoparraysize);
+                output_string(buffer);
+                sprintf(buffer,"  Bitfield array size: %ld\n",
+                        global_bitopstruct.bitfieldarraysize);
+                output_string(buffer);
+                break;
+
+        case TF_FPEMU:          /* Floating-point emulation */
+                sprintf(buffer,"  Number of loops: %lu\n",
+                        global_emfloatstruct.loops);
+                output_string(buffer);
+                sprintf(buffer,"  Array size: %lu\n",
+                        global_emfloatstruct.arraysize);
+                output_string(buffer);
+                break;
+
+        case TF_FFPU:           /* Fourier test */
+                sprintf(buffer,"  Number of coefficients: %lu\n",
+                        global_fourierstruct.arraysize);
+                output_string(buffer);
+                break;
+
+        case TF_ASSIGN:
+                sprintf(buffer,"  Number of arrays: %lu\n",
+                        global_assignstruct.numarrays);
+                output_string(buffer);
+                break;
+
+        case TF_IDEA:
+                sprintf(buffer,"  Array size: %lu\n",
+                        global_ideastruct.arraysize);
+                output_string(buffer);
+                sprintf(buffer," Number of loops: %lu\n",
+                        global_ideastruct.loops);
+                output_string(buffer);
+                break;
+
+        case TF_HUFF:
+                sprintf(buffer,"  Array size: %lu\n",
+                        global_huffstruct.arraysize);
+                output_string(buffer);
+                sprintf(buffer,"  Number of loops: %lu\n",
+                        global_huffstruct.loops);
+                output_string(buffer);
+                break;
+
+        case TF_NNET:
+                sprintf(buffer,"  Number of loops: %lu\n",
+                        global_nnetstruct.loops);
+                output_string(buffer);
+                break;
+
+        case TF_LU:
+                sprintf(buffer,"  Number of arrays: %lu\n",
+                        global_lustruct.numarrays);
+                output_string(buffer);
+                break;
+}
+return;
+}
+
+/*
+** Following code added for Mac stuff, so that we can emulate command
+** lines.
+*/
+
+#ifdef MAC
+
+/*****************
+** UCommandLine **
+******************
+** Reads in a command line, and sets up argc and argv appropriately.
+** Note that this routine uses gets() to read in the line.  This means
+** you'd better not enter more than 128 characters on a command line, or
+** things will overflow, and oh boy...
+*/
+void UCommandLine(void)
+{
+printf("Enter command line\n:");
+gets((char *)Uargbuff);
+UParse();
+return;
+}
+
+/***********
+** UParse **
+************
+** Parse the pseudo command-line.  This code appeared as part of the
+** Small-C library in Dr. Dobb's ToolBook of C.
+** It expects the following globals:
+** argc = arg count
+** argv = Pointer to array of char pointers
+** Uargbuff = Character array that holds the arguments.  Should be 129 bytes long.
+** Udummy1 = This is a 2-byte buffer that holds a "*", and acts as the first
+**  argument in the argument list.  This maintains compatibility with other
+**  C's, though it does not provide access to the executable filename.
+** This routine allows for up to 20 individual command-line arguments.
+** Also note that this routine does NOT allow for redirection.
+*/
+void UParse(void)
+{
+unsigned char *ptr;
+
+argc=0;         /* Start arg count */
+Udummy[0]='*';  /* Set dummy first argument */
+Udummy[1]='\0';
+argv[argc++]=(char *)Udummy;
+
+ptr=Uargbuff;           /* Start pointer */
+while(*ptr)
+{
+        if(isspace(*ptr))
+        {       ++ptr;
+                continue;
+        }
+        if(argc<20) argv[argc++]=(char *)ptr;
+        ptr=UField(ptr);
+}
+return;
+}
+/***********
+** UField **
+************
+** Isolate the next command-line field.
+*/
+unsigned char *UField(unsigned char *ptr)
+{
+while(*ptr)
+{       if(isspace(*ptr))
+        {       *ptr=(unsigned char)NULL;
+                return(++ptr);
+        }
+        ++ptr;
+}
+return(ptr);
+}
+#endif
diff --git a/nbench0.h b/nbench0.h
new file mode 100644
index 0000000..cef0928
--- /dev/null
+++ b/nbench0.h
@@ -0,0 +1,356 @@
+/*
+** nbench0.h
+** Header for nbench0.c
+** BYTEmark (tm)
+** BYTE's Native Mode Benchmarks
+** Rick Grehan, BYTE Magazine
+**
+** Creation:
+** Revision: 3/95;10/95
+**  10/95 - Added memory array & alignment -- RG
+**
+** DISCLAIMER
+** The source, executable, and documentation files that comprise
+** the BYTEmark benchmarks are made available on an "as is" basis.
+** This means that we at BYTE Magazine have made every reasonable
+** effort to verify that the there are no errors in the source and
+** executable code.  We cannot, however, guarantee that the programs
+** are error-free.  Consequently, McGraw-HIll and BYTE Magazine make
+** no claims in regard to the fitness of the source code, executable
+** code, and documentation of the BYTEmark.
+**  Furthermore, BYTE Magazine, McGraw-Hill, and all employees
+** of McGraw-Hill cannot be held responsible for any damages resulting
+** from the use of this code or the results obtained from using
+** this code.
+*/
+
+/*
+** Following should be modified accordingly per each
+** compilation.
+*/
+char *sysname="You can enter your system description in nbench0.h";
+char *compilername="It then will be printed here after you recompile";
+char *compilerversion="Have a nice day";
+
+/*  Parameter flags.  Must coincide with parameter names array
+** which appears below. */
+#define PF_GMTICKS 0            /* GLOBALMINTICKS */
+#define PF_MINSECONDS 1         /* MINSECONDS */
+#define PF_ALLSTATS 2           /* ALLSTATS */
+#define PF_OUTFILE 3            /* OUTFILE */
+#define PF_CUSTOMRUN 4          /* CUSTOMRUN */
+#define PF_DONUM 5              /* DONUMSORT */
+#define PF_NUMNUMA 6            /* NUMNUMARRAYS */
+#define PF_NUMASIZE 7           /* NUMARRAYSIZE */
+#define PF_NUMMINS 8            /* NUMMINSECONDS */
+#define PF_DOSTR 9              /* DOSTRINGSORT */
+#define PF_STRASIZE 10          /* STRARRAYSIZE */
+#define PF_NUMSTRA 11           /* NUMSTRARRAYS */
+#define PF_STRMINS 12           /* STRMINSECONDS */
+#define PF_DOBITF 13            /* DOBITFIELD */
+#define PF_NUMBITOPS 14         /* NUMBITOPS */
+#define PF_BITFSIZE 15          /* BITFIELDSIZE */
+#define PF_BITMINS 16           /* BITMINSECONDS */
+#define PF_DOEMF 17             /* DOEMF */
+#define PF_EMFASIZE 18          /* EMFARRAYSIZE */
+#define PF_EMFLOOPS 19          /* EMFLOOPS */
+#define PF_EMFMINS 20           /* EMFMINSECOND */
+#define PF_DOFOUR 21            /* DOFOUR */
+#define PF_FOURASIZE 22         /* FOURASIZE */
+#define PF_FOURMINS 23          /* FOURMINSECONDS */
+#define PF_DOASSIGN 24          /* DOASSIGN */
+#define PF_AARRAYS 25           /* ASSIGNARRAYS */
+#define PF_ASSIGNMINS 26        /* ASSIGNMINSECONDS */
+#define PF_DOIDEA 27            /* DOIDEA */
+#define PF_IDEAASIZE 28         /* IDEAARRAYSIZE */
+#define PF_IDEALOOPS 29         /* IDEALOOPS */
+#define PF_IDEAMINS 30          /* IDEAMINSECONDS */
+#define PF_DOHUFF 31            /* DOHUFF */
+#define PF_HUFFASIZE 32         /* HUFFARRAYSIZE */
+#define PF_HUFFLOOPS 33         /* HUFFLOOPS */
+#define PF_HUFFMINS 34          /* HUFFMINSECONDS */
+#define PF_DONNET 35            /* DONNET */
+#define PF_NNETLOOPS 36         /* NNETLOOPS */
+#define PF_NNETMINS 37          /* NNETMINSECONDS */
+#define PF_DOLU 38              /* DOLU */
+#define PF_LUNARRAYS 39         /* LUNUMARRAYS */
+#define PF_LUMINS 40            /* LUMINSECONDS */
+#define PF_ALIGN 41		        /* ALIGN */
+
+#define MAXPARAM 41
+
+/* Tests-to-do flags...must coincide with test. */
+#define TF_NUMSORT 0
+#define TF_SSORT 1
+#define TF_BITOP 2
+#define TF_FPEMU 3
+#define TF_FFPU 4
+#define TF_ASSIGN 5
+#define TF_IDEA 6
+#define TF_HUFF 7
+#define TF_NNET 8
+#define TF_LU 9
+
+#define NUMTESTS 10
+
+/*
+** GLOBALS
+*/
+
+#define BUF_SIZ 1024
+
+/*
+** Test names
+*/
+char *ftestnames[] = {
+        "NUMERIC SORT    ",
+        "STRING SORT     ",
+        "BITFIELD        ",
+        "FP EMULATION    ",
+        "FOURIER         ",
+        "ASSIGNMENT      ",
+        "IDEA            ",
+        "HUFFMAN         ",
+        "NEURAL NET      ",
+        "LU DECOMPOSITION" };
+
+/*
+** Indexes -- Baseline is DELL Pentium XP90
+** 11/28/94
+*/
+double bindex[] = {
+    38.993,                     /* Numeric sort */
+    2.238,                      /* String sort */
+    5829704,                    /* Bitfield */
+    2.084,                      /* FP Emulation */
+    879.278,                    /* Fourier */
+    .2628,                      /* Assignment */
+    65.382,                     /* IDEA */
+    36.062,                     /* Huffman */
+    .6225,                      /* Neural Net */
+    19.3031 };                  /* LU Decomposition */
+
+/*
+** Indices -- Baseline is a AMD K6-233, 32MB RAM (60ns SDRAM),512k L2 cache,
+** Linux kernel 2.0.32, libc-5.4.38, gcc-2.7.2.3)
+** Nov/30/97
+*/
+double lx_bindex[] = {
+      118.73, 	    /* Numeric sort */
+      14.459,	    /* String sort */
+    27910000,	    /* Bitfield */
+      9.0314,	    /* FP Emulation */
+      1565.5,	    /* Fourier */
+      1.0132,	    /* Assignment */
+      220.21,	    /* IDEA */
+      112.93,	    /* Huffman */
+      1.4799,	    /* Neural Net */
+      26.732};      /* LU Decomposition */
+
+/* Parameter names */
+char *paramnames[]= {
+        "GLOBALMINTICKS",
+        "MINSECONDS",
+        "ALLSTATS",
+        "OUTFILE",
+        "CUSTOMRUN",
+        "DONUMSORT",
+        "NUMNUMARRAYS",
+        "NUMARRAYSIZE",
+        "NUMMINSECONDS",
+        "DOSTRINGSORT",
+        "STRARRAYSIZE",
+        "NUMSTRARRAYS",
+        "STRMINSECONDS",
+        "DOBITFIELD",
+        "NUMBITOPS",
+        "BITFIELDSIZE",
+        "BITMINSECONDS",
+        "DOEMF",
+        "EMFARRAYSIZE",
+        "EMFLOOPS",
+        "EMFMINSECONDS",
+        "DOFOUR",
+        "FOURSIZE",
+        "FOURMINSECONDS",
+        "DOASSIGN",
+        "ASSIGNARRAYS",
+        "ASSIGNMINSECONDS",
+        "DOIDEA",
+        "IDEARRAYSIZE",
+        "IDEALOOPS",
+        "IDEAMINSECONDS",
+        "DOHUFF",
+        "HUFARRAYSIZE",
+        "HUFFLOOPS",
+        "HUFFMINSECONDS",
+        "DONNET",
+        "NNETLOOPS",
+        "NNETMINSECONDS",
+        "DOLU",
+        "LUNUMARRAYS",
+        "LUMINSECONDS",
+	"ALIGN" };
+
+/*
+** Following array is a collection of flags indicating which
+** tests to perform.
+*/
+int tests_to_do[NUMTESTS];
+
+/*
+** Buffer for holding output text.
+*/
+char buffer[BUF_SIZ];
+
+/*
+** Global parameters.
+*/
+ulong global_min_ticks;         /* Minimum ticks */
+ulong global_min_seconds;       /* Minimum seconds tests run */
+int global_allstats;            /* Statistics dump flag */
+char global_ofile_name[BUF_SIZ];/* Output file name */
+FILE *global_ofile;             /* Output file */
+int global_custrun;             /* Custom run flag */
+int write_to_file;              /* Write output to file */
+int global_align;		/* Memory alignment */
+
+/*
+** Following global is the memory array.  This is used to store
+** original and aligned (modified) memory addresses.
+*/
+ulong mem_array[2][MEM_ARRAY_SIZE];
+int mem_array_ents;		/* # of active entries */
+
+/*
+** Following are global structures, one built for
+** each of the tests.
+*/
+SortStruct global_numsortstruct;        /* For numeric sort */
+SortStruct global_strsortstruct;        /* For string sort */
+BitOpStruct global_bitopstruct;         /* For bitfield operations */
+EmFloatStruct global_emfloatstruct;     /* For emul. float. point */
+FourierStruct global_fourierstruct;     /* For fourier test */
+AssignStruct global_assignstruct;       /* For assignment algorithm */
+IDEAStruct global_ideastruct;           /* For IDEA encryption */
+HuffStruct global_huffstruct;           /* For Huffman compression */
+NNetStruct global_nnetstruct;           /* For Neural Net */
+LUStruct global_lustruct;               /* For LU decomposition */
+
+/*
+** The following array of function struct pointers lets
+** us very rapidly map a function to its controlling
+** data structure. NOTE: These must match the "TF_xxx"
+** constants above.
+*/
+void *global_fstruct[] =
+{       (void *)&global_numsortstruct,
+        (void *)&global_strsortstruct,
+        (void *)&global_bitopstruct,
+        (void *)&global_emfloatstruct,
+        (void *)&global_fourierstruct,
+        (void *)&global_assignstruct,
+        (void *)&global_ideastruct,
+        (void *)&global_huffstruct,
+        (void *)&global_nnetstruct,
+        (void *)&global_lustruct };
+
+/*
+** Following globals added to support command line emulation on
+** the Macintosh....which doesn't have command lines.
+*/
+#ifdef MAC
+int argc;                       /* Argument count */
+char *argv[20];                 /* Argument vectors */
+
+unsigned char Uargbuff[129];    /* Buffer holding arguments string */
+unsigned char Udummy[2];        /* Dummy buffer for first arg */
+
+#endif
+
+#ifdef MACTIMEMGR
+#include <Types.h>
+#include <Timer.h>
+/*
+** Timer globals for Mac
+*/
+struct TMTask myTMTask;
+long MacHSTdelay,MacHSTohead;
+
+#endif
+
+/*
+** Following globals used by Win 31 timing routines.
+** NOTE: This requires the includes of the w31timer.asm
+** file in your project!!
+*/
+#ifdef WIN31TIMER
+#include <windows.h>
+#include <toolhelp.h>
+extern TIMERINFO win31tinfo;
+extern HANDLE hThlp;
+extern FARPROC lpfn;
+#endif
+
+/*
+** PROTOTYPES
+*/
+static int parse_arg(char *argptr);
+static void display_help(char *progname);
+static void read_comfile(FILE *cfile);
+static int getflag(char *cptr);
+static void strtoupper(char *s);
+static void set_request_secs(void);
+static int bench_with_confidence(int fid,
+        double *mean, double *stdev, ulong *numtries);
+/*
+static int seek_confidence(double scores[5],
+        double *newscore, double *c_half_interval,
+        double *smean,double *sdev);
+*/
+static int calc_confidence(double scores[],
+        int num_scores,
+        double *c_half_interval,double *smean,
+        double *sdev);
+static double getscore(int fid);
+static void output_string(char *buffer);
+static void show_stats(int bid);
+
+#ifdef MAC
+void UCommandLine(void);
+void UParse(void);
+unsigned char *UField(unsigned char *ptr);
+#endif
+
+/*
+** EXTERNAL PROTOTYPES
+*/
+extern void DoNumSort(void);    /* From NBENCH1 */
+extern void DoStringSort(void);
+extern void DoBitops(void);
+extern void DoEmFloat(void);
+extern void DoFourier(void);
+extern void DoAssign(void);
+extern void DoIDEA(void);
+extern void DoHuffman(void);
+extern void DoNNET(void);
+extern void DoLU(void);
+
+extern void ErrorExit(void);    /* From SYSSPEC */
+
+/*
+** Array of pointers to the benchmark functions.
+*/
+void (*funcpointer[])(void) =
+{       DoNumSort,
+        DoStringSort,
+        DoBitops,
+        DoEmFloat,
+        DoFourier,
+        DoAssign,
+        DoIDEA,
+        DoHuffman,
+        DoNNET,
+        DoLU };
+
+
diff --git a/nbench1.c b/nbench1.c
new file mode 100644
index 0000000..05c35df
--- /dev/null
+++ b/nbench1.c
@@ -0,0 +1,4445 @@
+
+/*
+** nbench1.c
+*/
+
+/********************************
+**       BYTEmark (tm)         **
+** BYTE NATIVE MODE BENCHMARKS **
+**       VERSION 2             **
+**                             **
+** Included in this source     **
+** file:                       **
+**  Numeric Heapsort           **
+**  String Heapsort            **
+**  Bitfield test              **
+**  Floating point emulation   **
+**  Fourier coefficients       **
+**  Assignment algorithm       **
+**  IDEA Encyption             **
+**  Huffman compression        **
+**  Back prop. neural net      **
+**  LU Decomposition           **
+**    (linear equations)       **
+** ----------                  **
+** Rick Grehan, BYTE Magazine  **
+*********************************
+**
+** BYTEmark (tm)
+** BYTE's Native Mode Benchmarks
+** Rick Grehan, BYTE Magazine
+**
+** Creation:
+** Revision: 3/95;10/95
+**  10/95 - Removed allocation that was taking place inside
+**   the LU Decomposition benchmark. Though it didn't seem to
+**   make a difference on systems we ran it on, it nonetheless
+**   removes an operating system dependency that probably should
+**   not have been there.
+**
+** DISCLAIMER
+** The source, executable, and documentation files that comprise
+** the BYTEmark benchmarks are made available on an "as is" basis.
+** This means that we at BYTE Magazine have made every reasonable
+** effort to verify that the there are no errors in the source and
+** executable code.  We cannot, however, guarantee that the programs
+** are error-free.  Consequently, McGraw-HIll and BYTE Magazine make
+** no claims in regard to the fitness of the source code, executable
+** code, and documentation of the BYTEmark.
+**  Furthermore, BYTE Magazine, McGraw-Hill, and all employees
+** of McGraw-Hill cannot be held responsible for any damages resulting
+** from the use of this code or the results obtained from using
+** this code.
+*/
+
+/*
+** INCLUDES
+*/
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <strings.h>
+#include <math.h>
+#include "nmglobal.h"
+#include "nbench1.h"
+#include "wordcat.h"
+
+#ifdef DEBUG
+static int numsort_status=0;
+static int stringsort_status=0;
+#endif
+
+/*********************
+** NUMERIC HEAPSORT **
+**********************
+** This test implements a heapsort algorithm, performed on an
+** array of longs.
+*/
+
+/**************
+** DoNumSort **
+***************
+** This routine performs the CPU numeric sort test.
+** NOTE: Last version incorrectly stated that the routine
+**  returned result in # of longword sorted per second.
+**  Not so; the routine returns # of iterations per sec.
+*/
+
+void DoNumSort(void)
+{
+SortStruct *numsortstruct;      /* Local pointer to global struct */
+farlong *arraybase;     /* Base pointers of array */
+long accumtime;         /* Accumulated time */
+double iterations;      /* Iteration counter */
+char *errorcontext;     /* Error context string pointer */
+int systemerror;        /* For holding error codes */
+
+/*
+** Link to global structure
+*/
+numsortstruct=&global_numsortstruct;
+
+/*
+** Set the error context string.
+*/
+errorcontext="CPU:Numeric Sort";
+
+/*
+** See if we need to do self adjustment code.
+*/
+if(numsortstruct->adjust==0)
+{
+	/*
+	** Self-adjustment code.  The system begins by sorting 1
+	** array.  If it does that in no time, then two arrays
+	** are built and sorted.  This process continues until
+	** enough arrays are built to handle the tolerance.
+	*/
+	numsortstruct->numarrays=1;
+	while(1)
+	{
+		/*
+		** Allocate space for arrays
+		*/
+		arraybase=(farlong *)AllocateMemory(sizeof(long) *
+			numsortstruct->numarrays * numsortstruct->arraysize,
+			&systemerror);
+		if(systemerror)
+		{       ReportError(errorcontext,systemerror);
+			FreeMemory((farvoid *)arraybase,
+				  &systemerror);
+			ErrorExit();
+		}
+
+		/*
+		** Do an iteration of the numeric sort.  If the
+		** elapsed time is less than or equal to the permitted
+		** minimum, then allocate for more arrays and
+		** try again.
+		*/
+		if(DoNumSortIteration(arraybase,
+			numsortstruct->arraysize,
+			numsortstruct->numarrays)>global_min_ticks)
+			break;          /* We're ok...exit */
+
+		FreeMemory((farvoid *)arraybase,&systemerror);
+		if(numsortstruct->numarrays++>NUMNUMARRAYS)
+		{       printf("CPU:NSORT -- NUMNUMARRAYS hit.\n");
+			ErrorExit();
+		}
+	}
+}
+else
+{       /*
+	** Allocate space for arrays
+	*/
+	arraybase=(farlong *)AllocateMemory(sizeof(long) *
+		numsortstruct->numarrays * numsortstruct->arraysize,
+		&systemerror);
+	if(systemerror)
+	{       ReportError(errorcontext,systemerror);
+		FreeMemory((farvoid *)arraybase,
+			  &systemerror);
+		ErrorExit();
+	}
+
+}
+/*
+** All's well if we get here.  Repeatedly perform sorts until the
+** accumulated elapsed time is greater than # of seconds requested.
+*/
+accumtime=0L;
+iterations=(double)0.0;
+
+do {
+	accumtime+=DoNumSortIteration(arraybase,
+		numsortstruct->arraysize,
+		numsortstruct->numarrays);
+	iterations+=(double)1.0;
+} while(TicksToSecs(accumtime)<numsortstruct->request_secs);
+
+/*
+** Clean up, calculate results, and go home.  Be sure to
+** show that we don't have to rerun adjustment code.
+*/
+FreeMemory((farvoid *)arraybase,&systemerror);
+
+numsortstruct->sortspersec=iterations *
+	(double)numsortstruct->numarrays / TicksToFracSecs(accumtime);
+
+if(numsortstruct->adjust==0)
+	numsortstruct->adjust=1;
+
+#ifdef DEBUG
+if (numsort_status==0) printf("Numeric sort: OK\n");
+numsort_status=0;
+#endif
+return;
+}
+
+/***********************
+** DoNumSortIteration **
+************************
+** This routine executes one iteration of the numeric
+** sort benchmark.  It returns the number of ticks
+** elapsed for the iteration.
+*/
+static ulong DoNumSortIteration(farlong *arraybase,
+		ulong arraysize,
+		uint numarrays)
+{
+ulong elapsed;          /* Elapsed ticks */
+ulong i;
+/*
+** Load up the array with random numbers
+*/
+LoadNumArrayWithRand(arraybase,arraysize,numarrays);
+
+/*
+** Start the stopwatch
+*/
+elapsed=StartStopwatch();
+
+/*
+** Execute a heap of heapsorts
+*/
+for(i=0;i<numarrays;i++)
+	NumHeapSort(arraybase+i*arraysize,0L,arraysize-1L);
+
+/*
+** Get elapsed time
+*/
+elapsed=StopStopwatch(elapsed);
+#ifdef DEBUG
+{
+	for(i=0;i<arraysize-1;i++)
+	{       /*
+		** Compare to check for proper
+		** sort.
+		*/
+		if(arraybase[i+1]<arraybase[i])
+		{       printf("Sort Error\n");
+			numsort_status=1;
+                        break;
+		}
+	}
+}
+#endif
+
+return(elapsed);
+}
+
+/*************************
+** LoadNumArrayWithRand **
+**************************
+** Load up an array with random longs.
+*/
+static void LoadNumArrayWithRand(farlong *array,     /* Pointer to arrays */
+		ulong arraysize,
+		uint numarrays)         /* # of elements in array */
+{
+long i;                 /* Used for index */
+farlong *darray;        /* Destination array pointer */
+/*
+** Initialize the random number generator
+*/
+/* randnum(13L); */
+randnum((int32)13);
+
+/*
+** Load up first array with randoms
+*/
+for(i=0L;i<arraysize;i++)
+        /* array[i]=randnum(0L); */
+	array[i]=randnum((int32)0);
+
+/*
+** Now, if there's more than one array to load, copy the
+** first into each of the others.
+*/
+darray=array;
+while(--numarrays)
+{       darray+=arraysize;
+	for(i=0L;i<arraysize;i++)
+		darray[i]=array[i];
+}
+
+return;
+}
+
+/****************
+** NumHeapSort **
+*****************
+** Pass this routine a pointer to an array of long
+** integers.  Also pass in minimum and maximum offsets.
+** This routine performs a heap sort on that array.
+*/
+static void NumHeapSort(farlong *array,
+	ulong bottom,           /* Lower bound */
+	ulong top)              /* Upper bound */
+{
+ulong temp;                     /* Used to exchange elements */
+ulong i;                        /* Loop index */
+
+/*
+** First, build a heap in the array
+*/
+for(i=(top/2L); i>0; --i)
+	NumSift(array,i,top);
+
+/*
+** Repeatedly extract maximum from heap and place it at the
+** end of the array.  When we get done, we'll have a sorted
+** array.
+*/
+for(i=top; i>0; --i)
+{       NumSift(array,bottom,i);
+	temp=*array;                    /* Perform exchange */
+	*array=*(array+i);
+	*(array+i)=temp;
+}
+return;
+}
+
+/************
+** NumSift **
+*************
+** Peforms the sift operation on a numeric array,
+** constructing a heap in the array.
+*/
+static void NumSift(farlong *array,     /* Array of numbers */
+	ulong i,                /* Minimum of array */
+	ulong j)                /* Maximum of array */
+{
+unsigned long k;
+long temp;                              /* Used for exchange */
+
+while((i+i)<=j)
+{
+	k=i+i;
+	if(k<j)
+		if(array[k]<array[k+1L])
+			++k;
+	if(array[i]<array[k])
+	{
+		temp=array[k];
+		array[k]=array[i];
+		array[i]=temp;
+		i=k;
+	}
+	else
+		i=j+1;
+}
+return;
+}
+
+/********************
+** STRING HEAPSORT **
+********************/
+
+/*****************
+** DoStringSort **
+******************
+** This routine performs the CPU string sort test.
+** Arguments:
+**      requested_secs = # of seconds to execute test
+**      stringspersec = # of strings per second sorted (RETURNED)
+*/
+void DoStringSort(void)
+{
+
+SortStruct *strsortstruct;      /* Local for sort structure */
+faruchar *arraybase;            /* Base pointer of char array */
+long accumtime;                 /* Accumulated time */
+double iterations;              /* # of iterations */
+char *errorcontext;             /* Error context string pointer */
+int systemerror;                /* For holding error code */
+
+/*
+** Link to global structure
+*/
+strsortstruct=&global_strsortstruct;
+
+/*
+** Set the error context
+*/
+errorcontext="CPU:String Sort";
+
+/*
+** See if we have to perform self-adjustment code
+*/
+if(strsortstruct->adjust==0)
+{
+	/*
+	** Initialize the number of arrays.
+	*/
+	strsortstruct->numarrays=1;
+	while(1)
+	{
+		/*
+		** Allocate space for array.  We'll add an extra 100
+		** bytes to protect memory as strings move around
+		** (this can happen during string adjustment)
+		*/
+		arraybase=(faruchar *)AllocateMemory((strsortstruct->arraysize+100L) *
+			(long)strsortstruct->numarrays,&systemerror);
+		if(systemerror)
+		{       ReportError(errorcontext,systemerror);
+			ErrorExit();
+		}
+
+		/*
+		** Do an iteration of the string sort.  If the
+		** elapsed time is less than or equal to the permitted
+		** minimum, then de-allocate the array, reallocate a
+		** an additional array, and try again.
+		*/
+		if(DoStringSortIteration(arraybase,
+			strsortstruct->numarrays,
+			strsortstruct->arraysize)>global_min_ticks)
+			break;          /* We're ok...exit */
+
+		FreeMemory((farvoid *)arraybase,&systemerror);
+		strsortstruct->numarrays+=1;
+	}
+}
+else
+{
+	/*
+	** We don't have to perform self adjustment code.
+	** Simply allocate the space for the array.
+	*/
+	arraybase=(faruchar *)AllocateMemory((strsortstruct->arraysize+100L) *
+		(long)strsortstruct->numarrays,&systemerror);
+	if(systemerror)
+	{       ReportError(errorcontext,systemerror);
+		ErrorExit();
+	}
+}
+/*
+** All's well if we get here.  Repeatedly perform sorts until the
+** accumulated elapsed time is greater than # of seconds requested.
+*/
+accumtime=0L;
+iterations=(double)0.0;
+
+do {
+	accumtime+=DoStringSortIteration(arraybase,
+				strsortstruct->numarrays,
+				strsortstruct->arraysize);
+	iterations+=(double)strsortstruct->numarrays;
+} while(TicksToSecs(accumtime)<strsortstruct->request_secs);
+
+/*
+** Clean up, calculate results, and go home.
+** Set flag to show we don't need to rerun adjustment code.
+*/
+FreeMemory((farvoid *)arraybase,&systemerror);
+strsortstruct->sortspersec=iterations / (double)TicksToFracSecs(accumtime);
+if(strsortstruct->adjust==0)
+	strsortstruct->adjust=1;
+#ifdef DEBUG
+if (stringsort_status==0) printf("String sort: OK\n");
+stringsort_status=0;
+#endif
+return;
+}
+
+/**************************
+** DoStringSortIteration **
+***************************
+** This routine executes one iteration of the string
+** sort benchmark.  It returns the number of ticks
+** Note that this routine also builds the offset pointer
+** array.
+*/
+static ulong DoStringSortIteration(faruchar *arraybase,
+		uint numarrays,ulong arraysize)
+{
+farulong *optrarray;            /* Offset pointer array */
+unsigned long elapsed;          /* Elapsed ticks */
+unsigned long nstrings;         /* # of strings in array */
+int syserror;                   /* System error code */
+unsigned int i;                 /* Index */
+farulong *tempobase;            /* Temporary offset pointer base */
+faruchar *tempsbase;            /* Temporary string base pointer */
+
+/*
+** Load up the array(s) with random numbers
+*/
+optrarray=LoadStringArray(arraybase,numarrays,&nstrings,arraysize);
+
+/*
+** Set temp base pointers...they will be modified as the
+** benchmark proceeds.
+*/
+tempobase=optrarray;
+tempsbase=arraybase;
+
+/*
+** Start the stopwatch
+*/
+elapsed=StartStopwatch();
+
+/*
+** Execute heapsorts
+*/
+for(i=0;i<numarrays;i++)
+{       StrHeapSort(tempobase,tempsbase,nstrings,0L,nstrings-1);
+	tempobase+=nstrings;    /* Advance base pointers */
+	tempsbase+=arraysize+100;
+}
+
+/*
+** Record elapsed time
+*/
+elapsed=StopStopwatch(elapsed);
+
+#ifdef DEBUG
+{
+	unsigned long i;
+	for(i=0;i<nstrings-1;i++)
+	{       /*
+		** Compare strings to check for proper
+		** sort.
+		*/
+		if(str_is_less(optrarray,arraybase,nstrings,i+1,i))
+		{       printf("Sort Error\n");
+			stringsort_status=1;
+                        break;
+		}
+	}
+}
+#endif
+
+/*
+** Release the offset pointer array built by
+** LoadStringArray()
+*/
+FreeMemory((farvoid *)optrarray,&syserror);
+
+/*
+** Return elapsed ticks.
+*/
+return(elapsed);
+}
+
+/********************
+** LoadStringArray **
+*********************
+** Initialize the string array with random strings of
+** varying sizes.
+** Returns the pointer to the offset pointer array.
+** Note that since we're creating a number of arrays, this
+** routine builds one array, then copies it into the others.
+*/
+static farulong *LoadStringArray(faruchar *strarray, /* String array */
+	uint numarrays,                 /* # of arrays */
+	ulong *nstrings,                /* # of strings */
+	ulong arraysize)                /* Size of array */
+{
+faruchar *tempsbase;            /* Temporary string base pointer */
+farulong *optrarray;            /* Local for pointer */
+farulong *tempobase;            /* Temporary offset pointer base pointer */
+unsigned long curroffset;       /* Current offset */
+int fullflag;                   /* Indicates full array */
+unsigned char stringlength;     /* Length of string */
+unsigned char i;                /* Index */
+unsigned long j;                /* Another index */
+unsigned int k;                 /* Yet another index */
+unsigned int l;                 /* Ans still one more index */
+int systemerror;                /* For holding error code */
+
+/*
+** Initialize random number generator.
+*/
+/* randnum(13L); */
+randnum((int32)13);
+
+/*
+** Start with no strings.  Initialize our current offset pointer
+** to 0.
+*/
+*nstrings=0L;
+curroffset=0L;
+fullflag=0;
+
+do
+{
+	/*
+	** Allocate a string with a random length no
+	** shorter than 4 bytes and no longer than
+	** 80 bytes.  Note we have to also make sure
+	** there's room in the array.
+	*/
+        /* stringlength=(unsigned char)((1+abs_randwc(76L)) & 0xFFL);*/
+	stringlength=(unsigned char)((1+abs_randwc((int32)76)) & 0xFFL);
+	if((unsigned long)stringlength+curroffset+1L>=arraysize)
+	{       stringlength=(unsigned char)((arraysize-curroffset-1L) &
+				0xFF);
+		fullflag=1;     /* Indicates a full */
+	}
+
+	/*
+	** Store length at curroffset and advance current offset.
+	*/
+	*(strarray+curroffset)=stringlength;
+	curroffset++;
+
+	/*
+	** Fill up the rest of the string with random bytes.
+	*/
+	for(i=0;i<stringlength;i++)
+	{       *(strarray+curroffset)=
+		        /* (unsigned char)(abs_randwc((long)0xFE)); */
+			(unsigned char)(abs_randwc((int32)0xFE));
+		curroffset++;
+	}
+
+	/*
+	** Increment the # of strings counter.
+	*/
+	*nstrings+=1L;
+
+} while(fullflag==0);
+
+/*
+** We now have initialized a single full array.  If there
+** is more than one array, copy the original into the
+** others.
+*/
+k=1;
+tempsbase=strarray;
+while(k<numarrays)
+{       tempsbase+=arraysize+100;         /* Set base */
+	for(l=0;l<arraysize;l++)
+		tempsbase[l]=strarray[l];
+	k++;
+}
+
+/*
+** Now the array is full, allocate enough space for an
+** offset pointer array.
+*/
+optrarray=(farulong *)AllocateMemory(*nstrings * sizeof(unsigned long) *
+		numarrays,
+		&systemerror);
+if(systemerror)
+{       ReportError("CPU:Stringsort",systemerror);
+	FreeMemory((void *)strarray,&systemerror);
+	ErrorExit();
+}
+
+/*
+** Go through the newly-built string array, building
+** offsets and putting them into the offset pointer
+** array.
+*/
+curroffset=0;
+for(j=0;j<*nstrings;j++)
+{       *(optrarray+j)=curroffset;
+	curroffset+=(unsigned long)(*(strarray+curroffset))+1L;
+}
+
+/*
+** As above, we've made one copy of the offset pointers,
+** so duplicate this array in the remaining ones.
+*/
+k=1;
+tempobase=optrarray;
+while(k<numarrays)
+{       tempobase+=*nstrings;
+	for(l=0;l<*nstrings;l++)
+		tempobase[l]=optrarray[l];
+	k++;
+}
+
+/*
+** All done...go home.  Pass local pointer back.
+*/
+return(optrarray);
+}
+
+/**************
+** stradjust **
+***************
+** Used by the string heap sort.  Call this routine to adjust the
+** string at offset i to length l.  The members of the string array
+** are moved accordingly and the length of the string at offset i
+** is set to l.
+*/
+static void stradjust(farulong *optrarray,      /* Offset pointer array */
+	faruchar *strarray,                     /* String array */
+	ulong nstrings,                         /* # of strings */
+	ulong i,                                /* Offset to adjust */
+	uchar l)                                /* New length */
+{
+unsigned long nbytes;           /* # of bytes to move */
+unsigned long j;                /* Index */
+int direction;                  /* Direction indicator */
+unsigned char adjamount;        /* Adjustment amount */
+
+/*
+** If new length is less than old length, the direction is
+** down.  If new length is greater than old length, the
+** direction is up.
+*/
+direction=(int)l - (int)*(strarray+*(optrarray+i));
+adjamount=(unsigned char)abs(direction);
+
+/*
+** See if the adjustment is being made to the last
+** string in the string array.  If so, we don't have to
+** do anything more than adjust the length field.
+*/
+if(i==(nstrings-1L))
+{       *(strarray+*(optrarray+i))=l;
+	return;
+}
+
+/*
+** Calculate the total # of bytes in string array from
+** location i+1 to end of array.  Whether we're moving "up" or
+** down, this is how many bytes we'll have to move.
+*/
+nbytes=*(optrarray+nstrings-1L) +
+	(unsigned long)*(strarray+*(optrarray+nstrings-1L)) + 1L -
+	*(optrarray+i+1L);
+
+/*
+** Calculate the source and the destination.  Source is
+** string position i+1.  Destination is string position i+l
+** (i+"ell"...don't confuse 1 and l).
+** Hand this straight to memmove and let it handle the
+** "overlap" problem.
+*/
+MoveMemory((farvoid *)(strarray+*(optrarray+i)+l+1),
+	(farvoid *)(strarray+*(optrarray+i+1)),
+	(unsigned long)nbytes);
+
+/*
+** We have to adjust the offset pointer array.
+** This covers string i+1 to numstrings-1.
+*/
+for(j=i+1;j<nstrings;j++)
+	if(direction<0)
+		*(optrarray+j)=*(optrarray+j)-adjamount;
+	else
+		*(optrarray+j)=*(optrarray+j)+adjamount;
+
+/*
+** Store the new length and go home.
+*/
+*(strarray+*(optrarray+i))=l;
+return;
+}
+
+/****************
+** strheapsort **
+*****************
+** Pass this routine a pointer to an array of unsigned char.
+** The array is presumed to hold strings occupying at most
+** 80 bytes (counts a byte count).
+** This routine also needs a pointer to an array of offsets
+** which represent string locations in the array, and
+** an unsigned long indicating the number of strings
+** in the array.
+*/
+static void StrHeapSort(farulong *optrarray, /* Offset pointers */
+	faruchar *strarray,             /* Strings array */
+	ulong numstrings,               /* # of strings in array */
+	ulong bottom,                   /* Region to sort...bottom */
+	ulong top)                      /* Region to sort...top */
+{
+unsigned char temp[80];                 /* Used to exchange elements */
+unsigned char tlen;                     /* Temp to hold length */
+unsigned long i;                        /* Loop index */
+
+
+/*
+** Build a heap in the array
+*/
+for(i=(top/2L); i>0; --i)
+	strsift(optrarray,strarray,numstrings,i,top);
+
+/*
+** Repeatedly extract maximum from heap and place it at the
+** end of the array.  When we get done, we'll have a sorted
+** array.
+*/
+for(i=top; i>0; --i)
+{
+	strsift(optrarray,strarray,numstrings,0,i);
+
+	/* temp = string[0] */
+	tlen=*strarray;
+	MoveMemory((farvoid *)&temp[0], /* Perform exchange */
+		(farvoid *)strarray,
+		(unsigned long)(tlen+1));
+
+
+	/* string[0]=string[i] */
+	tlen=*(strarray+*(optrarray+i));
+	stradjust(optrarray,strarray,numstrings,0,tlen);
+	MoveMemory((farvoid *)strarray,
+		(farvoid *)(strarray+*(optrarray+i)),
+		(unsigned long)(tlen+1));
+
+	/* string[i]=temp */
+	tlen=temp[0];
+	stradjust(optrarray,strarray,numstrings,i,tlen);
+	MoveMemory((farvoid *)(strarray+*(optrarray+i)),
+		(farvoid *)&temp[0],
+		(unsigned long)(tlen+1));
+
+}
+return;
+}
+
+/****************
+** str_is_less **
+*****************
+** Pass this function:
+**      1) A pointer to an array of offset pointers
+**      2) A pointer to a string array
+**      3) The number of elements in the string array
+**      4) Offsets to two strings (a & b)
+** This function returns TRUE if string a is < string b.
+*/
+static int str_is_less(farulong *optrarray, /* Offset pointers */
+	faruchar *strarray,                     /* String array */
+	ulong numstrings,                       /* # of strings */
+	ulong a, ulong b)                       /* Offsets */
+{
+int slen;               /* String length */
+
+/*
+** Determine which string has the minimum length.  Use that
+** to call strncmp().  If they match up to that point, the
+** string with the longer length wins.
+*/
+slen=(int)*(strarray+*(optrarray+a));
+if(slen > (int)*(strarray+*(optrarray+b)))
+	slen=(int)*(strarray+*(optrarray+b));
+
+slen=strncmp((char *)(strarray+*(optrarray+a)),
+		(char *)(strarray+*(optrarray+b)),slen);
+
+if(slen==0)
+{
+	/*
+	** They match.  Return true if the length of a
+	** is greater than the length of b.
+	*/
+	if(*(strarray+*(optrarray+a)) >
+		*(strarray+*(optrarray+b)))
+		return(TRUE);
+	return(FALSE);
+}
+
+if(slen<0) return(TRUE);        /* a is strictly less than b */
+
+return(FALSE);                  /* Only other possibility */
+}
+
+/************
+** strsift **
+*************
+** Pass this function:
+**      1) A pointer to an array of offset pointers
+**      2) A pointer to a string array
+**      3) The number of elements in the string array
+**      4) Offset within which to sort.
+** Sift the array within the bounds of those offsets (thus
+** building a heap).
+*/
+static void strsift(farulong *optrarray,        /* Offset pointers */
+	faruchar *strarray,                     /* String array */
+	ulong numstrings,                       /* # of strings */
+	ulong i, ulong j)                       /* Offsets */
+{
+unsigned long k;                /* Temporaries */
+unsigned char temp[80];
+unsigned char tlen;             /* For string lengths */
+
+
+while((i+i)<=j)
+{
+	k=i+i;
+	if(k<j)
+		if(str_is_less(optrarray,strarray,numstrings,k,k+1L))
+			++k;
+	if(str_is_less(optrarray,strarray,numstrings,i,k))
+	{
+		/* temp=string[k] */
+		tlen=*(strarray+*(optrarray+k));
+		MoveMemory((farvoid *)&temp[0],
+			(farvoid *)(strarray+*(optrarray+k)),
+			(unsigned long)(tlen+1));
+
+		/* string[k]=string[i] */
+		tlen=*(strarray+*(optrarray+i));
+		stradjust(optrarray,strarray,numstrings,k,tlen);
+		MoveMemory((farvoid *)(strarray+*(optrarray+k)),
+			(farvoid *)(strarray+*(optrarray+i)),
+			(unsigned long)(tlen+1));
+
+		/* string[i]=temp */
+		tlen=temp[0];
+		stradjust(optrarray,strarray,numstrings,i,tlen);
+		MoveMemory((farvoid *)(strarray+*(optrarray+i)),
+			(farvoid *)&temp[0],
+			(unsigned long)(tlen+1));
+		i=k;
+	}
+	else
+		i=j+1;
+}
+return;
+}
+
+/************************
+** BITFIELD OPERATIONS **
+*************************/
+
+/*************
+** DoBitops **
+**************
+** Perform the bit operations test portion of the CPU
+** benchmark.  Returns the iterations per second.
+*/
+void DoBitops(void)
+{
+BitOpStruct *locbitopstruct;    /* Local bitop structure */
+farulong *bitarraybase;         /* Base of bitmap array */
+farulong *bitoparraybase;       /* Base of bitmap operations array */
+ulong nbitops;                  /* # of bitfield operations */
+ulong accumtime;                /* Accumulated time in ticks */
+double iterations;              /* # of iterations */
+char *errorcontext;             /* Error context string */
+int systemerror;                /* For holding error codes */
+int ticks;
+
+/*
+** Link to global structure.
+*/
+locbitopstruct=&global_bitopstruct;
+
+/*
+** Set the error context.
+*/
+errorcontext="CPU:Bitfields";
+
+/*
+** See if we need to run adjustment code.
+*/
+if(locbitopstruct->adjust==0)
+{
+	bitarraybase=(farulong *)AllocateMemory(locbitopstruct->bitfieldarraysize *
+		sizeof(ulong),&systemerror);
+	if(systemerror)
+	{       ReportError(errorcontext,systemerror);
+		ErrorExit();
+	}
+
+	/*
+	** Initialize bitfield operations array to [2,30] elements
+	*/
+	locbitopstruct->bitoparraysize=30L;
+
+	while(1)
+	{
+		/*
+		** Allocate space for operations array
+		*/
+		bitoparraybase=(farulong *)AllocateMemory(locbitopstruct->bitoparraysize*2L*
+			sizeof(ulong),
+			&systemerror);
+		if(systemerror)
+		{       ReportError(errorcontext,systemerror);
+			FreeMemory((farvoid *)bitarraybase,&systemerror);
+			ErrorExit();
+		}
+		/*
+		** Do an iteration of the bitmap test.  If the
+		** elapsed time is less than or equal to the permitted
+		** minimum, then de-allocate the array, reallocate a
+		** larger version, and try again.
+		*/
+		ticks=DoBitfieldIteration(bitarraybase,
+					   bitoparraybase,
+					   locbitopstruct->bitoparraysize,
+					   &nbitops);
+#ifdef DEBUG
+#ifdef LINUX
+	        if (locbitopstruct->bitoparraysize==30L){
+		  /* this is the first loop, write a debug file */
+		  FILE *file;
+		  unsigned long *running_base; /* same as farulong */
+		  long counter;
+		  file=fopen("debugbit.dat","w");
+		  running_base=bitarraybase;
+		  for (counter=0;counter<(long)(locbitopstruct->bitfieldarraysize);counter++){
+#ifdef LONG64
+		    fprintf(file,"%08X",(unsigned int)(*running_base&0xFFFFFFFFL));
+		    fprintf(file,"%08X",(unsigned int)((*running_base>>32)&0xFFFFFFFFL));
+		    if ((counter+1)%4==0) fprintf(file,"\n");
+#else
+		    fprintf(file,"%08lX",*running_base);
+		    if ((counter+1)%8==0) fprintf(file,"\n");
+#endif
+		    running_base=running_base+1;
+		  }
+		  fclose(file);
+		  printf("\nWrote the file debugbit.dat, you may want to compare it to debugbit.good\n");
+		}
+#endif
+#endif
+
+		if (ticks>global_min_ticks) break;      /* We're ok...exit */
+
+		FreeMemory((farvoid *)bitoparraybase,&systemerror);
+		locbitopstruct->bitoparraysize+=100L;
+	}
+}
+else
+{
+	/*
+	** Don't need to do self adjustment, just allocate
+	** the array space.
+	*/
+	bitarraybase=(farulong *)AllocateMemory(locbitopstruct->bitfieldarraysize *
+		sizeof(ulong),&systemerror);
+	if(systemerror)
+	{       ReportError(errorcontext,systemerror);
+		ErrorExit();
+	}
+	bitoparraybase=(farulong *)AllocateMemory(locbitopstruct->bitoparraysize*2L*
+		sizeof(ulong),
+		&systemerror);
+	if(systemerror)
+	{       ReportError(errorcontext,systemerror);
+		FreeMemory((farvoid *)bitarraybase,&systemerror);
+		ErrorExit();
+	}
+}
+
+/*
+** All's well if we get here.  Repeatedly perform bitops until the
+** accumulated elapsed time is greater than # of seconds requested.
+*/
+accumtime=0L;
+iterations=(double)0.0;
+do {
+	accumtime+=DoBitfieldIteration(bitarraybase,
+			bitoparraybase,
+			locbitopstruct->bitoparraysize,&nbitops);
+	iterations+=(double)nbitops;
+} while(TicksToSecs(accumtime)<locbitopstruct->request_secs);
+
+/*
+** Clean up, calculate results, and go home.
+** Also, set adjustment flag to show that we don't have
+** to do self adjusting in the future.
+*/
+FreeMemory((farvoid *)bitarraybase,&systemerror);
+FreeMemory((farvoid *)bitoparraybase,&systemerror);
+locbitopstruct->bitopspersec=iterations /TicksToFracSecs(accumtime);
+if(locbitopstruct->adjust==0)
+	locbitopstruct->adjust=1;
+
+return;
+}
+
+/************************
+** DoBitfieldIteration **
+*************************
+** Perform a single iteration of the bitfield benchmark.
+** Return the # of ticks accumulated by the operation.
+*/
+static ulong DoBitfieldIteration(farulong *bitarraybase,
+		farulong *bitoparraybase,
+		long bitoparraysize,
+		ulong *nbitops)
+{
+long i;                         /* Index */
+ulong bitoffset;                /* Offset into bitmap */
+ulong elapsed;                  /* Time to execute */
+/*
+** Clear # bitops counter
+*/
+*nbitops=0L;
+
+/*
+** Construct a set of bitmap offsets and run lengths.
+** The offset can be any random number from 0 to the
+** size of the bitmap (in bits).  The run length can
+** be any random number from 1 to the number of bits
+** between the offset and the end of the bitmap.
+** Note that the bitmap has 8192 * 32 bits in it.
+** (262,144 bits)
+*/
+/*
+** Reset random number generator so things repeat.
+** Also reset the bit array we work on.
+** added by Uwe F. Mayer
+*/
+randnum((int32)13);
+for (i=0;i<global_bitopstruct.bitfieldarraysize;i++)
+{
+#ifdef LONG64
+	*(bitarraybase+i)=(ulong)0x5555555555555555;
+#else
+	*(bitarraybase+i)=(ulong)0x55555555;
+#endif
+}
+randnum((int32)13);
+/* end of addition of code */
+
+for (i=0;i<bitoparraysize;i++)
+{
+	/* First item is offset */
+        /* *(bitoparraybase+i+i)=bitoffset=abs_randwc(262140L); */
+	*(bitoparraybase+i+i)=bitoffset=abs_randwc((int32)262140);
+
+	/* Next item is run length */
+	/* *nbitops+=*(bitoparraybase+i+i+1L)=abs_randwc(262140L-bitoffset);*/
+	*nbitops+=*(bitoparraybase+i+i+1L)=abs_randwc((int32)262140-bitoffset);
+}
+
+/*
+** Array of offset and lengths built...do an iteration of
+** the test.
+** Start the stopwatch.
+*/
+elapsed=StartStopwatch();
+
+/*
+** Loop through array off offset/run length pairs.
+** Execute operation based on modulus of index.
+*/
+for(i=0;i<bitoparraysize;i++)
+{
+	switch(i % 3)
+	{
+
+		case 0: /* Set run of bits */
+			ToggleBitRun(bitarraybase,
+				*(bitoparraybase+i+i),
+				*(bitoparraybase+i+i+1),
+				1);
+			break;
+
+		case 1: /* Clear run of bits */
+			ToggleBitRun(bitarraybase,
+				*(bitoparraybase+i+i),
+				*(bitoparraybase+i+i+1),
+				0);
+			break;
+
+		case 2: /* Complement run of bits */
+			FlipBitRun(bitarraybase,
+				*(bitoparraybase+i+i),
+				*(bitoparraybase+i+i+1));
+			break;
+	}
+}
+
+/*
+** Return elapsed time
+*/
+return(StopStopwatch(elapsed));
+}
+
+
+/*****************************
+**     ToggleBitRun          *
+******************************
+** Set or clear a run of nbits starting at
+** bit_addr in bitmap.
+*/
+static void ToggleBitRun(farulong *bitmap, /* Bitmap */
+		ulong bit_addr,         /* Address of bits to set */
+		ulong nbits,            /* # of bits to set/clr */
+		uint val)               /* 1 or 0 */
+{
+unsigned long bindex;   /* Index into array */
+unsigned long bitnumb;  /* Bit number */
+
+while(nbits--)
+{
+#ifdef LONG64
+	bindex=bit_addr>>6;     /* Index is number /64 */
+	bitnumb=bit_addr % 64;   /* Bit number in word */
+#else
+	bindex=bit_addr>>5;     /* Index is number /32 */
+	bitnumb=bit_addr % 32;  /* bit number in word */
+#endif
+	if(val)
+		bitmap[bindex]|=(1L<<bitnumb);
+	else
+		bitmap[bindex]&=~(1L<<bitnumb);
+	bit_addr++;
+}
+return;
+}
+
+/***************
+** FlipBitRun **
+****************
+** Complements a run of bits.
+*/
+static void FlipBitRun(farulong *bitmap,        /* Bit map */
+		ulong bit_addr,                 /* Bit address */
+		ulong nbits)                    /* # of bits to flip */
+{
+unsigned long bindex;   /* Index into array */
+unsigned long bitnumb;  /* Bit number */
+
+while(nbits--)
+{
+#ifdef LONG64
+	bindex=bit_addr>>6;     /* Index is number /64 */
+	bitnumb=bit_addr % 64;  /* Bit number in longword */
+#else
+	bindex=bit_addr>>5;     /* Index is number /32 */
+	bitnumb=bit_addr % 32;  /* Bit number in longword */
+#endif
+	bitmap[bindex]^=(1L<<bitnumb);
+	bit_addr++;
+}
+
+return;
+}
+
+/*****************************
+** FLOATING-POINT EMULATION **
+*****************************/
+
+/**************
+** DoEmFloat **
+***************
+** Perform the floating-point emulation routines portion of the
+** CPU benchmark.  Returns the operations per second.
+*/
+void DoEmFloat(void)
+{
+EmFloatStruct *locemfloatstruct;        /* Local structure */
+InternalFPF *abase;             /* Base of A array */
+InternalFPF *bbase;             /* Base of B array */
+InternalFPF *cbase;             /* Base of C array */
+ulong accumtime;                /* Accumulated time in ticks */
+double iterations;              /* # of iterations */
+ulong tickcount;                /* # of ticks */
+char *errorcontext;             /* Error context string pointer */
+int systemerror;                /* For holding error code */
+ulong loops;                    /* # of loops */
+
+/*
+** Link to global structure
+*/
+locemfloatstruct=&global_emfloatstruct;
+
+/*
+** Set the error context
+*/
+errorcontext="CPU:Floating Emulation";
+
+
+/*
+** Test the emulation routines.
+*/
+#ifdef DEBUG
+#endif
+
+abase=(InternalFPF *)AllocateMemory(locemfloatstruct->arraysize*sizeof(InternalFPF),
+		&systemerror);
+if(systemerror)
+{       ReportError(errorcontext,systemerror);
+	ErrorExit();
+}
+
+bbase=(InternalFPF *)AllocateMemory(locemfloatstruct->arraysize*sizeof(InternalFPF),
+		&systemerror);
+if(systemerror)
+{       ReportError(errorcontext,systemerror);
+	FreeMemory((farvoid *)abase,&systemerror);
+	ErrorExit();
+}
+
+cbase=(InternalFPF *)AllocateMemory(locemfloatstruct->arraysize*sizeof(InternalFPF),
+		&systemerror);
+if(systemerror)
+{       ReportError(errorcontext,systemerror);
+	FreeMemory((farvoid *)abase,&systemerror);
+	FreeMemory((farvoid *)bbase,&systemerror);
+	ErrorExit();
+}
+
+/*
+** Set up the arrays
+*/
+SetupCPUEmFloatArrays(abase,bbase,cbase,locemfloatstruct->arraysize);
+
+/*
+** See if we need to do self-adjusting code.
+*/
+if(locemfloatstruct->adjust==0)
+{
+	locemfloatstruct->loops=0;
+
+	/*
+	** Do an iteration of the tests.  If the elapsed time is
+	** less than minimum, increase the loop count and try
+	** again.
+	*/
+	for(loops=1;loops<CPUEMFLOATLOOPMAX;loops+=loops)
+	{       tickcount=DoEmFloatIteration(abase,bbase,cbase,
+			locemfloatstruct->arraysize,
+			loops);
+		if(tickcount>global_min_ticks)
+		{       locemfloatstruct->loops=loops;
+			break;
+		}
+	}
+}
+
+/*
+** Verify that selft adjustment code worked.
+*/
+if(locemfloatstruct->loops==0)
+{       printf("CPU:EMFPU -- CMPUEMFLOATLOOPMAX limit hit\n");
+	FreeMemory((farvoid *)abase,&systemerror);
+	FreeMemory((farvoid *)bbase,&systemerror);
+	FreeMemory((farvoid *)cbase,&systemerror);
+	ErrorExit();
+}
+
+/*
+** All's well if we get here.  Repeatedly perform floating
+** tests until the accumulated time is greater than the
+** # of seconds requested.
+** Each iteration performs arraysize * 3 operations.
+*/
+accumtime=0L;
+iterations=(double)0.0;
+do {
+	accumtime+=DoEmFloatIteration(abase,bbase,cbase,
+			locemfloatstruct->arraysize,
+			locemfloatstruct->loops);
+	iterations+=(double)1.0;
+} while(TicksToSecs(accumtime)<locemfloatstruct->request_secs);
+
+
+/*
+** Clean up, calculate results, and go home.
+** Also, indicate that adjustment is done.
+*/
+FreeMemory((farvoid *)abase,&systemerror);
+FreeMemory((farvoid *)bbase,&systemerror);
+FreeMemory((farvoid *)cbase,&systemerror);
+
+locemfloatstruct->emflops=(iterations*(double)locemfloatstruct->loops)/
+		(double)TicksToFracSecs(accumtime);
+if(locemfloatstruct->adjust==0)
+	locemfloatstruct->adjust=1;
+
+#ifdef DEBUG
+printf("----------------------------------------------------------------------------\n");
+#endif
+return;
+}
+
+/*************************
+** FOURIER COEFFICIENTS **
+*************************/
+
+/**************
+** DoFourier **
+***************
+** Perform the transcendental/trigonometric portion of the
+** benchmark.  This benchmark calculates the first n
+** fourier coefficients of the function (x+1)^x defined
+** on the interval 0,2.
+*/
+void DoFourier(void)
+{
+FourierStruct *locfourierstruct;        /* Local fourier struct */
+fardouble *abase;               /* Base of A[] coefficients array */
+fardouble *bbase;               /* Base of B[] coefficients array */
+unsigned long accumtime;        /* Accumulated time in ticks */
+double iterations;              /* # of iterations */
+char *errorcontext;             /* Error context string pointer */
+int systemerror;                /* For error code */
+
+/*
+** Link to global structure
+*/
+locfourierstruct=&global_fourierstruct;
+
+/*
+** Set error context string
+*/
+errorcontext="FPU:Transcendental";
+
+/*
+** See if we need to do self-adjustment code.
+*/
+if(locfourierstruct->adjust==0)
+{
+	locfourierstruct->arraysize=100L;       /* Start at 100 elements */
+	while(1)
+	{
+
+		abase=(fardouble *)AllocateMemory(locfourierstruct->arraysize*sizeof(double),
+				&systemerror);
+		if(systemerror)
+		{       ReportError(errorcontext,systemerror);
+			ErrorExit();
+		}
+
+		bbase=(fardouble *)AllocateMemory(locfourierstruct->arraysize*sizeof(double),
+				&systemerror);
+		if(systemerror)
+		{       ReportError(errorcontext,systemerror);
+			FreeMemory((void *)abase,&systemerror);
+			ErrorExit();
+		}
+		/*
+		** Do an iteration of the tests.  If the elapsed time is
+		** less than or equal to the permitted minimum, re-allocate
+		** larger arrays and try again.
+		*/
+		if(DoFPUTransIteration(abase,bbase,
+			locfourierstruct->arraysize)>global_min_ticks)
+			break;          /* We're ok...exit */
+
+		/*
+		** Make bigger arrays and try again.
+		*/
+		FreeMemory((farvoid *)abase,&systemerror);
+		FreeMemory((farvoid *)bbase,&systemerror);
+		locfourierstruct->arraysize+=50L;
+	}
+}
+else
+{       /*
+	** Don't need self-adjustment.  Just allocate the
+	** arrays, and go.
+	*/
+	abase=(fardouble *)AllocateMemory(locfourierstruct->arraysize*sizeof(double),
+			&systemerror);
+	if(systemerror)
+	{       ReportError(errorcontext,systemerror);
+		ErrorExit();
+	}
+
+	bbase=(fardouble *)AllocateMemory(locfourierstruct->arraysize*sizeof(double),
+			&systemerror);
+	if(systemerror)
+	{       ReportError(errorcontext,systemerror);
+		FreeMemory((void *)abase,&systemerror);
+		ErrorExit();
+	}
+}
+/*
+** All's well if we get here.  Repeatedly perform integration
+** tests until the accumulated time is greater than the
+** # of seconds requested.
+*/
+accumtime=0L;
+iterations=(double)0.0;
+do {
+	accumtime+=DoFPUTransIteration(abase,bbase,locfourierstruct->arraysize);
+	iterations+=(double)locfourierstruct->arraysize*(double)2.0-(double)1.0;
+} while(TicksToSecs(accumtime)<locfourierstruct->request_secs);
+
+
+/*
+** Clean up, calculate results, and go home.
+** Also set adjustment flag to indicate no adjust code needed.
+*/
+FreeMemory((farvoid *)abase,&systemerror);
+FreeMemory((farvoid *)bbase,&systemerror);
+
+locfourierstruct->fflops=iterations/(double)TicksToFracSecs(accumtime);
+
+if(locfourierstruct->adjust==0)
+	locfourierstruct->adjust=1;
+
+return;
+}
+
+/************************
+** DoFPUTransIteration **
+*************************
+** Perform an iteration of the FPU Transcendental/trigonometric
+** benchmark.  Here, an iteration consists of calculating the
+** first n fourier coefficients of the function (x+1)^x on
+** the interval 0,2.  n is given by arraysize.
+** NOTE: The # of integration steps is fixed at
+** 200.
+*/
+static ulong DoFPUTransIteration(fardouble *abase,      /* A coeffs. */
+			fardouble *bbase,               /* B coeffs. */
+			ulong arraysize)                /* # of coeffs */
+{
+double omega;           /* Fundamental frequency */
+unsigned long i;        /* Index */
+unsigned long elapsed;  /* Elapsed time */
+
+/*
+** Start the stopwatch
+*/
+elapsed=StartStopwatch();
+
+/*
+** Calculate the fourier series.  Begin by
+** calculating A[0].
+*/
+
+*abase=TrapezoidIntegrate((double)0.0,
+			(double)2.0,
+			200,
+			(double)0.0,    /* No omega * n needed */
+			0 )/(double)2.0;
+
+/*
+** Calculate the fundamental frequency.
+** ( 2 * pi ) / period...and since the period
+** is 2, omega is simply pi.
+*/
+omega=(double)3.1415926535897932;
+
+for(i=1;i<arraysize;i++)
+{
+
+	/*
+	** Calculate A[i] terms.  Note, once again, that we
+	** can ignore the 2/period term outside the integral
+	** since the period is 2 and the term cancels itself
+	** out.
+	*/
+	*(abase+i)=TrapezoidIntegrate((double)0.0,
+			(double)2.0,
+			200,
+			omega * (double)i,
+			1);
+
+	/*
+	** Calculate the B[i] terms.
+	*/
+	*(bbase+i)=TrapezoidIntegrate((double)0.0,
+			(double)2.0,
+			200,
+			omega * (double)i,
+			2);
+
+}
+#ifdef DEBUG
+{
+  int i;
+  printf("\nA[i]=\n");
+  for (i=0;i<arraysize;i++) printf("%7.3g ",abase[i]);
+  printf("\nB[i]=\n(undefined) ");
+  for (i=1;i<arraysize;i++) printf("%7.3g ",bbase[i]);
+}
+#endif
+/*
+** All done, stop the stopwatch
+*/
+return(StopStopwatch(elapsed));
+}
+
+/***********************
+** TrapezoidIntegrate **
+************************
+** Perform a simple trapezoid integration on the
+** function (x+1)**x.
+** x0,x1 set the lower and upper bounds of the
+** integration.
+** nsteps indicates # of trapezoidal sections
+** omegan is the fundamental frequency times
+**  the series member #
+** select = 0 for the A[0] term, 1 for cosine terms, and
+**   2 for sine terms.
+** Returns the value.
+*/
+static double TrapezoidIntegrate( double x0,            /* Lower bound */
+			double x1,              /* Upper bound */
+			int nsteps,             /* # of steps */
+			double omegan,          /* omega * n */
+			int select)
+{
+double x;               /* Independent variable */
+double dx;              /* Stepsize */
+double rvalue;          /* Return value */
+
+
+/*
+** Initialize independent variable
+*/
+x=x0;
+
+/*
+** Calculate stepsize
+*/
+dx=(x1 - x0) / (double)nsteps;
+
+/*
+** Initialize the return value.
+*/
+rvalue=thefunction(x0,omegan,select)/(double)2.0;
+
+/*
+** Compute the other terms of the integral.
+*/
+if(nsteps!=1)
+{       --nsteps;               /* Already done 1 step */
+	while(--nsteps )
+	{
+		x+=dx;
+		rvalue+=thefunction(x,omegan,select);
+	}
+}
+/*
+** Finish computation
+*/
+rvalue=(rvalue+thefunction(x1,omegan,select)/(double)2.0)*dx;
+
+return(rvalue);
+}
+
+/****************
+** thefunction **
+*****************
+** This routine selects the function to be used
+** in the Trapezoid integration.
+** x is the independent variable
+** omegan is omega * n
+** select chooses which of the sine/cosine functions
+**  are used.  note the special case for select=0.
+*/
+static double thefunction(double x,             /* Independent variable */
+		double omegan,          /* Omega * term */
+		int select)             /* Choose term */
+{
+
+/*
+** Use select to pick which function we call.
+*/
+switch(select)
+{
+	case 0: return(pow(x+(double)1.0,x));
+
+	case 1: return(pow(x+(double)1.0,x) * cos(omegan * x));
+
+	case 2: return(pow(x+(double)1.0,x) * sin(omegan * x));
+}
+
+/*
+** We should never reach this point, but the following
+** keeps compilers from issuing a warning message.
+*/
+return(0.0);
+}
+
+/*************************
+** ASSIGNMENT ALGORITHM **
+*************************/
+
+/*************
+** DoAssign **
+**************
+** Perform an assignment algorithm.
+** The algorithm was adapted from the step by step guide found
+** in "Quantitative Decision Making for Business" (Gordon,
+**  Pressman, and Cohn; Prentice-Hall)
+**
+**
+** NOTES:
+** 1. Even though the algorithm distinguishes between
+**    ASSIGNROWS and ASSIGNCOLS, as though the two might
+**    be different, it does presume a square matrix.
+**    I.E., ASSIGNROWS and ASSIGNCOLS must be the same.
+**    This makes for some algorithmically-correct but
+**    probably non-optimal constructs.
+**
+*/
+void DoAssign(void)
+{
+AssignStruct *locassignstruct;  /* Local structure ptr */
+farlong *arraybase;
+char *errorcontext;
+int systemerror;
+ulong accumtime;
+double iterations;
+
+/*
+** Link to global structure
+*/
+locassignstruct=&global_assignstruct;
+
+/*
+** Set the error context string.
+*/
+errorcontext="CPU:Assignment";
+
+/*
+** See if we need to do self adjustment code.
+*/
+if(locassignstruct->adjust==0)
+{
+	/*
+	** Self-adjustment code.  The system begins by working on 1
+	** array.  If it does that in no time, then two arrays
+	** are built.  This process continues until
+	** enough arrays are built to handle the tolerance.
+	*/
+	locassignstruct->numarrays=1;
+	while(1)
+	{
+		/*
+		** Allocate space for arrays
+		*/
+		arraybase=(farlong *) AllocateMemory(sizeof(long)*
+			ASSIGNROWS*ASSIGNCOLS*locassignstruct->numarrays,
+			 &systemerror);
+		if(systemerror)
+		{       ReportError(errorcontext,systemerror);
+			FreeMemory((farvoid *)arraybase,
+			  &systemerror);
+			ErrorExit();
+		}
+
+		/*
+		** Do an iteration of the assignment alg.  If the
+		** elapsed time is less than or equal to the permitted
+		** minimum, then allocate for more arrays and
+		** try again.
+		*/
+		if(DoAssignIteration(arraybase,
+			locassignstruct->numarrays)>global_min_ticks)
+			break;          /* We're ok...exit */
+
+		FreeMemory((farvoid *)arraybase, &systemerror);
+		locassignstruct->numarrays++;
+	}
+}
+else
+{       /*
+	** Allocate space for arrays
+	*/
+	arraybase=(farlong *)AllocateMemory(sizeof(long)*
+		ASSIGNROWS*ASSIGNCOLS*locassignstruct->numarrays,
+		 &systemerror);
+	if(systemerror)
+	{       ReportError(errorcontext,systemerror);
+		FreeMemory((farvoid *)arraybase,
+		  &systemerror);
+		ErrorExit();
+	}
+}
+
+/*
+** All's well if we get here.  Do the tests.
+*/
+accumtime=0L;
+iterations=(double)0.0;
+
+do {
+	accumtime+=DoAssignIteration(arraybase,
+		locassignstruct->numarrays);
+	iterations+=(double)1.0;
+} while(TicksToSecs(accumtime)<locassignstruct->request_secs);
+
+/*
+** Clean up, calculate results, and go home.  Be sure to
+** show that we don't have to rerun adjustment code.
+*/
+FreeMemory((farvoid *)arraybase,&systemerror);
+
+locassignstruct->iterspersec=iterations *
+	(double)locassignstruct->numarrays / TicksToFracSecs(accumtime);
+
+if(locassignstruct->adjust==0)
+	locassignstruct->adjust=1;
+
+return;
+
+}
+
+/**********************
+** DoAssignIteration **
+***********************
+** This routine executes one iteration of the assignment test.
+** It returns the number of ticks elapsed in the iteration.
+*/
+static ulong DoAssignIteration(farlong *arraybase,
+	ulong numarrays)
+{
+longptr abase;                  /* local pointer */
+ulong elapsed;          /* Elapsed ticks */
+ulong i;
+
+/*
+** Set up local pointer
+*/
+abase.ptrs.p=arraybase;
+
+/*
+** Load up the arrays with a random table.
+*/
+LoadAssignArrayWithRand(arraybase,numarrays);
+
+/*
+** Start the stopwatch
+*/
+elapsed=StartStopwatch();
+
+/*
+** Execute assignment algorithms
+*/
+for(i=0;i<numarrays;i++)
+{       /* abase.ptrs.p+=i*ASSIGNROWS*ASSIGNCOLS; */
+        /* Fixed  by Eike Dierks */
+	Assignment(*abase.ptrs.ap);
+	abase.ptrs.p+=ASSIGNROWS*ASSIGNCOLS;
+}
+
+/*
+** Get elapsed time
+*/
+return(StopStopwatch(elapsed));
+}
+
+/****************************
+** LoadAssignArrayWithRand **
+*****************************
+** Load the assignment arrays with random numbers.  All positive.
+** These numbers represent costs.
+*/
+static void LoadAssignArrayWithRand(farlong *arraybase,
+	ulong numarrays)
+{
+longptr abase,abase1;   /* Local for array pointer */
+ulong i;
+
+/*
+** Set local array pointer
+*/
+abase.ptrs.p=arraybase;
+abase1.ptrs.p=arraybase;
+
+/*
+** Set up the first array.  Then just copy it into the
+** others.
+*/
+LoadAssign(*(abase.ptrs.ap));
+if(numarrays>1)
+	for(i=1;i<numarrays;i++)
+	  {     /* abase1.ptrs.p+=i*ASSIGNROWS*ASSIGNCOLS; */
+	        /* Fixed  by Eike Dierks */
+	        abase1.ptrs.p+=ASSIGNROWS*ASSIGNCOLS;
+		CopyToAssign(*(abase.ptrs.ap),*(abase1.ptrs.ap));
+	}
+
+return;
+}
+
+/***************
+** LoadAssign **
+****************
+** The array given by arraybase is loaded with positive random
+** numbers.  Elements in the array are capped at 5,000,000.
+*/
+static void LoadAssign(farlong arraybase[][ASSIGNCOLS])
+{
+ushort i,j;
+
+/*
+** Reset random number generator so things repeat.
+*/
+/* randnum(13L); */
+randnum((int32)13);
+
+for(i=0;i<ASSIGNROWS;i++)
+  for(j=0;j<ASSIGNROWS;j++){
+    /* arraybase[i][j]=abs_randwc(5000000L);*/
+    arraybase[i][j]=abs_randwc((int32)5000000);
+  }
+
+return;
+}
+
+/*****************
+** CopyToAssign **
+******************
+** Copy the contents of one array to another.  This is called by
+** the routine that builds the initial array, and is used to copy
+** the contents of the intial array into all following arrays.
+*/
+static void CopyToAssign(farlong arrayfrom[ASSIGNROWS][ASSIGNCOLS],
+		farlong arrayto[ASSIGNROWS][ASSIGNCOLS])
+{
+ushort i,j;
+
+for(i=0;i<ASSIGNROWS;i++)
+	for(j=0;j<ASSIGNCOLS;j++)
+		arrayto[i][j]=arrayfrom[i][j];
+
+return;
+}
+
+/***************
+** Assignment **
+***************/
+static void Assignment(farlong arraybase[][ASSIGNCOLS])
+{
+short assignedtableau[ASSIGNROWS][ASSIGNCOLS];
+
+/*
+** First, calculate minimum costs
+*/
+calc_minimum_costs(arraybase);
+
+/*
+** Repeat following until the number of rows selected
+** equals the number of rows in the tableau.
+*/
+while(first_assignments(arraybase,assignedtableau)!=ASSIGNROWS)
+{         second_assignments(arraybase,assignedtableau);
+}
+
+#ifdef DEBUG
+{
+	int i,j;
+	printf("\nColumn choices for each row\n");
+	for(i=0;i<ASSIGNROWS;i++)
+	{
+	        printf("R%03d: ",i);
+		for(j=0;j<ASSIGNCOLS;j++)
+			if(assignedtableau[i][j]==1)
+				printf("%03d ",j);
+	}
+}
+#endif
+
+return;
+}
+
+/***********************
+** calc_minimum_costs **
+************************
+** Revise the tableau by calculating the minimum costs on a
+** row and column basis.  These minima are subtracted from
+** their rows and columns, creating a new tableau.
+*/
+static void calc_minimum_costs(long tableau[][ASSIGNCOLS])
+{
+ushort i,j;              /* Index variables */
+long currentmin;        /* Current minimum */
+/*
+** Determine minimum costs on row basis.  This is done by
+** subtracting -- on a row-per-row basis -- the minum value
+** for that row.
+*/
+for(i=0;i<ASSIGNROWS;i++)
+{
+	currentmin=MAXPOSLONG;  /* Initialize minimum */
+	for(j=0;j<ASSIGNCOLS;j++)
+		if(tableau[i][j]<currentmin)
+			currentmin=tableau[i][j];
+
+	for(j=0;j<ASSIGNCOLS;j++)
+		tableau[i][j]-=currentmin;
+}
+
+/*
+** Determine minimum cost on a column basis.  This works
+** just as above, only now we step through the array
+** column-wise
+*/
+for(j=0;j<ASSIGNCOLS;j++)
+{
+	currentmin=MAXPOSLONG;  /* Initialize minimum */
+	for(i=0;i<ASSIGNROWS;i++)
+		if(tableau[i][j]<currentmin)
+			currentmin=tableau[i][j];
+
+	/*
+	** Here, we'll take the trouble to see if the current
+	** minimum is zero.  This is likely worth it, since the
+	** preceding loop will have created at least one zero in
+	** each row.  We can save ourselves a few iterations.
+	*/
+	if(currentmin!=0)
+		for(i=0;i<ASSIGNROWS;i++)
+			tableau[i][j]-=currentmin;
+}
+
+return;
+}
+
+/**********************
+** first_assignments **
+***********************
+** Do first assignments.
+** The assignedtableau[] array holds a set of values that
+** indicate the assignment of a value, or its elimination.
+** The values are:
+**      0 = Item is neither assigned nor eliminated.
+**      1 = Item is assigned
+**      2 = Item is eliminated
+** Returns the number of selections made.  If this equals
+** the number of rows, then an optimum has been determined.
+*/
+static int first_assignments(long tableau[][ASSIGNCOLS],
+		short assignedtableau[][ASSIGNCOLS])
+{
+ushort i,j,k;                   /* Index variables */
+ushort numassigns;              /* # of assignments */
+ushort totnumassigns;           /* Total # of assignments */
+ushort numzeros;                /* # of zeros in row */
+int selected=0;                 /* Flag used to indicate selection */
+
+/*
+** Clear the assignedtableau, setting all members to show that
+** no one is yet assigned, eliminated, or anything.
+*/
+for(i=0;i<ASSIGNROWS;i++)
+	for(j=0;j<ASSIGNCOLS;j++)
+		assignedtableau[i][j]=0;
+
+totnumassigns=0;
+do {
+	numassigns=0;
+	/*
+	** Step through rows.  For each one that is not currently
+	** assigned, see if the row has only one zero in it.  If so,
+	** mark that as an assigned row/col.  Eliminate other zeros
+	** in the same column.
+	*/
+	for(i=0;i<ASSIGNROWS;i++)
+	{       numzeros=0;
+		for(j=0;j<ASSIGNCOLS;j++)
+			if(tableau[i][j]==0L)
+				if(assignedtableau[i][j]==0)
+				{       numzeros++;
+					selected=j;
+				}
+		if(numzeros==1)
+		{       numassigns++;
+			totnumassigns++;
+			assignedtableau[i][selected]=1;
+			for(k=0;k<ASSIGNROWS;k++)
+				if((k!=i) &&
+				   (tableau[k][selected]==0))
+					assignedtableau[k][selected]=2;
+		}
+	}
+	/*
+	** Step through columns, doing same as above.  Now, be careful
+	** of items in the other rows of a selected column.
+	*/
+	for(j=0;j<ASSIGNCOLS;j++)
+	{       numzeros=0;
+		for(i=0;i<ASSIGNROWS;i++)
+			if(tableau[i][j]==0L)
+				if(assignedtableau[i][j]==0)
+				{       numzeros++;
+					selected=i;
+				}
+		if(numzeros==1)
+		{       numassigns++;
+			totnumassigns++;
+			assignedtableau[selected][j]=1;
+			for(k=0;k<ASSIGNCOLS;k++)
+				if((k!=j) &&
+				   (tableau[selected][k]==0))
+					assignedtableau[selected][k]=2;
+		}
+	}
+	/*
+	** Repeat until no more assignments to be made.
+	*/
+} while(numassigns!=0);
+
+/*
+** See if we can leave at this point.
+*/
+if(totnumassigns==ASSIGNROWS) return(totnumassigns);
+
+/*
+** Now step through the array by row.  If you find any unassigned
+** zeros, pick the first in the row.  Eliminate all zeros from
+** that same row & column.  This occurs if there are multiple optima...
+** possibly.
+*/
+for(i=0;i<ASSIGNROWS;i++)
+{       selected=-1;
+	for(j=0;j<ASSIGNCOLS;j++)
+		if((tableau[i][j]==0L) &&
+		   (assignedtableau[i][j]==0))
+		{       selected=j;
+			break;
+		}
+	if(selected!=-1)
+	{       assignedtableau[i][selected]=1;
+		totnumassigns++;
+		for(k=0;k<ASSIGNCOLS;k++)
+			if((k!=selected) &&
+			   (tableau[i][k]==0L))
+				assignedtableau[i][k]=2;
+		for(k=0;k<ASSIGNROWS;k++)
+			if((k!=i) &&
+			   (tableau[k][selected]==0L))
+				assignedtableau[k][selected]=2;
+	}
+}
+
+return(totnumassigns);
+}
+
+/***********************
+** second_assignments **
+************************
+** This section of the algorithm creates the revised
+** tableau, and is difficult to explain.  I suggest you
+** refer to the algorithm's source, mentioned in comments
+** toward the beginning of the program.
+*/
+static void second_assignments(long tableau[][ASSIGNCOLS],
+		short assignedtableau[][ASSIGNCOLS])
+{
+int i,j;                                /* Indexes */
+short linesrow[ASSIGNROWS];
+short linescol[ASSIGNCOLS];
+long smallest;                          /* Holds smallest value */
+ushort numassigns;                      /* Number of assignments */
+ushort newrows;                         /* New rows to be considered */
+/*
+** Clear the linesrow and linescol arrays.
+*/
+for(i=0;i<ASSIGNROWS;i++)
+	linesrow[i]=0;
+for(i=0;i<ASSIGNCOLS;i++)
+	linescol[i]=0;
+
+/*
+** Scan rows, flag each row that has no assignment in it.
+*/
+for(i=0;i<ASSIGNROWS;i++)
+{       numassigns=0;
+	for(j=0;j<ASSIGNCOLS;j++)
+		if(assignedtableau[i][j]==1)
+		{       numassigns++;
+			break;
+		}
+	if(numassigns==0) linesrow[i]=1;
+}
+
+do {
+
+	newrows=0;
+	/*
+	** For each row checked above, scan for any zeros.  If found,
+	** check the associated column.
+	*/
+	for(i=0;i<ASSIGNROWS;i++)
+	{       if(linesrow[i]==1)
+			for(j=0;j<ASSIGNCOLS;j++)
+				if(tableau[i][j]==0)
+					linescol[j]=1;
+	}
+
+	/*
+	** Now scan checked columns.  If any contain assigned zeros, check
+	** the associated row.
+	*/
+	for(j=0;j<ASSIGNCOLS;j++)
+		if(linescol[j]==1)
+			for(i=0;i<ASSIGNROWS;i++)
+				if((assignedtableau[i][j]==1) &&
+					(linesrow[i]!=1))
+				{
+					linesrow[i]=1;
+					newrows++;
+				}
+} while(newrows!=0);
+
+/*
+** linesrow[n]==0 indicate rows covered by imaginary line
+** linescol[n]==1 indicate cols covered by imaginary line
+** For all cells not covered by imaginary lines, determine smallest
+** value.
+*/
+smallest=MAXPOSLONG;
+for(i=0;i<ASSIGNROWS;i++)
+	if(linesrow[i]!=0)
+		for(j=0;j<ASSIGNCOLS;j++)
+			if(linescol[j]!=1)
+				if(tableau[i][j]<smallest)
+					smallest=tableau[i][j];
+
+/*
+** Subtract smallest from all cells in the above set.
+*/
+for(i=0;i<ASSIGNROWS;i++)
+	if(linesrow[i]!=0)
+		for(j=0;j<ASSIGNCOLS;j++)
+			if(linescol[j]!=1)
+				tableau[i][j]-=smallest;
+
+/*
+** Add smallest to all cells covered by two lines.
+*/
+for(i=0;i<ASSIGNROWS;i++)
+	if(linesrow[i]==0)
+		for(j=0;j<ASSIGNCOLS;j++)
+			if(linescol[j]==1)
+				tableau[i][j]+=smallest;
+
+return;
+}
+
+/********************
+** IDEA Encryption **
+*********************
+** IDEA - International Data Encryption Algorithm.
+** Based on code presented in Applied Cryptography by Bruce Schneier.
+** Which was based on code developed by Xuejia Lai and James L. Massey.
+** Other modifications made by Colin Plumb.
+**
+*/
+
+/***********
+** DoIDEA **
+************
+** Perform IDEA encryption.  Note that we time encryption & decryption
+** time as being a single loop.
+*/
+void DoIDEA(void)
+{
+IDEAStruct *locideastruct;      /* Loc pointer to global structure */
+int i;
+IDEAkey Z,DK;
+u16 userkey[8];
+ulong accumtime;
+double iterations;
+char *errorcontext;
+int systemerror;
+faruchar *plain1;               /* First plaintext buffer */
+faruchar *crypt1;               /* Encryption buffer */
+faruchar *plain2;               /* Second plaintext buffer */
+
+/*
+** Link to global data
+*/
+locideastruct=&global_ideastruct;
+
+/*
+** Set error context
+*/
+errorcontext="CPU:IDEA";
+
+/*
+** Re-init random-number generator.
+*/
+/* randnum(3L); */
+randnum((int32)3);
+
+/*
+** Build an encryption/decryption key
+*/
+for (i=0;i<8;i++)
+        /* userkey[i]=(u16)(abs_randwc(60000L) & 0xFFFF); */
+	userkey[i]=(u16)(abs_randwc((int32)60000) & 0xFFFF);
+for(i=0;i<KEYLEN;i++)
+	Z[i]=0;
+
+/*
+** Compute encryption/decryption subkeys
+*/
+en_key_idea(userkey,Z);
+de_key_idea(Z,DK);
+
+/*
+** Allocate memory for buffers.  We'll make 3, called plain1,
+** crypt1, and plain2.  It works like this:
+**   plain1 >>encrypt>> crypt1 >>decrypt>> plain2.
+** So, plain1 and plain2 should match.
+** Also, fill up plain1 with sample text.
+*/
+plain1=(faruchar *)AllocateMemory(locideastruct->arraysize,&systemerror);
+if(systemerror)
+{
+	ReportError(errorcontext,systemerror);
+	ErrorExit();
+}
+
+crypt1=(faruchar *)AllocateMemory(locideastruct->arraysize,&systemerror);
+if(systemerror)
+{
+	ReportError(errorcontext,systemerror);
+	FreeMemory((farvoid *)plain1,&systemerror);
+	ErrorExit();
+}
+
+plain2=(faruchar *)AllocateMemory(locideastruct->arraysize,&systemerror);
+if(systemerror)
+{
+	ReportError(errorcontext,systemerror);
+	FreeMemory((farvoid *)plain1,&systemerror);
+	FreeMemory((farvoid *)crypt1,&systemerror);
+	ErrorExit();
+}
+/*
+** Note that we build the "plaintext" by simply loading
+** the array up with random numbers.
+*/
+for(i=0;i<locideastruct->arraysize;i++)
+	plain1[i]=(uchar)(abs_randwc(255) & 0xFF);
+
+/*
+** See if we need to perform self adjustment loop.
+*/
+if(locideastruct->adjust==0)
+{
+	/*
+	** Do self-adjustment.  This involves initializing the
+	** # of loops and increasing the loop count until we
+	** get a number of loops that we can use.
+	*/
+	for(locideastruct->loops=100L;
+	  locideastruct->loops<MAXIDEALOOPS;
+	  locideastruct->loops+=10L)
+		if(DoIDEAIteration(plain1,crypt1,plain2,
+		  locideastruct->arraysize,
+		  locideastruct->loops,
+		  Z,DK)>global_min_ticks) break;
+}
+
+/*
+** All's well if we get here.  Do the test.
+*/
+accumtime=0L;
+iterations=(double)0.0;
+
+do {
+	accumtime+=DoIDEAIteration(plain1,crypt1,plain2,
+		locideastruct->arraysize,
+		locideastruct->loops,Z,DK);
+	iterations+=(double)locideastruct->loops;
+} while(TicksToSecs(accumtime)<locideastruct->request_secs);
+
+/*
+** Clean up, calculate results, and go home.  Be sure to
+** show that we don't have to rerun adjustment code.
+*/
+FreeMemory((farvoid *)plain1,&systemerror);
+FreeMemory((farvoid *)crypt1,&systemerror);
+FreeMemory((farvoid *)plain2,&systemerror);
+locideastruct->iterspersec=iterations / TicksToFracSecs(accumtime);
+
+if(locideastruct->adjust==0)
+	locideastruct->adjust=1;
+
+return;
+
+}
+
+/********************
+** DoIDEAIteration **
+*********************
+** Execute a single iteration of the IDEA encryption algorithm.
+** Actually, a single iteration is one encryption and one
+** decryption.
+*/
+static ulong DoIDEAIteration(faruchar *plain1,
+			faruchar *crypt1,
+			faruchar *plain2,
+			ulong arraysize,
+			ulong nloops,
+			IDEAkey Z,
+			IDEAkey DK)
+{
+register ulong i;
+register ulong j;
+ulong elapsed;
+#ifdef DEBUG
+int status=0;
+#endif
+
+/*
+** Start the stopwatch.
+*/
+elapsed=StartStopwatch();
+
+/*
+** Do everything for nloops.
+*/
+for(i=0;i<nloops;i++)
+{
+	for(j=0;j<arraysize;j+=(sizeof(u16)*4))
+		cipher_idea((u16 *)(plain1+j),(u16 *)(crypt1+j),Z);       /* Encrypt */
+
+	for(j=0;j<arraysize;j+=(sizeof(u16)*4))
+		cipher_idea((u16 *)(crypt1+j),(u16 *)(plain2+j),DK);      /* Decrypt */
+}
+
+#ifdef DEBUG
+for(j=0;j<arraysize;j++)
+	if(*(plain1+j)!=*(plain2+j)){
+		printf("IDEA Error! \n");
+                status=1;
+                }
+if (status==0) printf("IDEA: OK\n");
+#endif
+
+/*
+** Get elapsed time.
+*/
+return(StopStopwatch(elapsed));
+}
+
+/********
+** mul **
+*********
+** Performs multiplication, modulo (2**16)+1.  This code is structured
+** on the assumption that untaken branches are cheaper than taken
+** branches, and that the compiler doesn't schedule branches.
+*/
+static u16 mul(register u16 a, register u16 b)
+{
+register u32 p;
+if(a)
+{       if(b)
+	{       p=(u32)(a*b);
+		b=low16(p);
+		a=(u16)(p>>16);
+		return(b-a+(b<a));
+	}
+	else
+		return(1-a);
+}
+else
+	return(1-b);
+}
+
+/********
+** inv **
+*********
+** Compute multiplicative inverse of x, modulo (2**16)+1
+** using Euclid's GCD algorithm.  It is unrolled twice
+** to avoid swapping the meaning of the registers.  And
+** some subtracts are changed to adds.
+*/
+static u16 inv(u16 x)
+{
+u16 t0, t1;
+u16 q, y;
+
+if(x<=1)
+	return(x);      /* 0 and 1 are self-inverse */
+t1=0x10001 / x;
+y=0x10001 % x;
+if(y==1)
+	return(low16(1-t1));
+t0=1;
+do {
+	q=x/y;
+	x=x%y;
+	t0+=q*t1;
+	if(x==1) return(t0);
+	q=y/x;
+	y=y%x;
+	t1+=q*t0;
+} while(y!=1);
+return(low16(1-t1));
+}
+
+/****************
+** en_key_idea **
+*****************
+** Compute IDEA encryption subkeys Z
+*/
+static void en_key_idea(u16 *userkey, u16 *Z)
+{
+int i,j;
+
+/*
+** shifts
+*/
+for(j=0;j<8;j++)
+	Z[j]=*userkey++;
+for(i=0;j<KEYLEN;j++)
+{       i++;
+	Z[i+7]=(Z[i&7]<<9)| (Z[(i+1) & 7] >> 7);
+	Z+=i&8;
+	i&=7;
+}
+return;
+}
+
+/****************
+** de_key_idea **
+*****************
+** Compute IDEA decryption subkeys DK from encryption
+** subkeys Z.
+*/
+static void de_key_idea(IDEAkey Z, IDEAkey DK)
+{
+IDEAkey TT;
+int j;
+u16 t1, t2, t3;
+u16 *p;
+p=(u16 *)(TT+KEYLEN);
+
+t1=inv(*Z++);
+t2=-*Z++;
+t3=-*Z++;
+*--p=inv(*Z++);
+*--p=t3;
+*--p=t2;
+*--p=t1;
+
+for(j=1;j<ROUNDS;j++)
+{       t1=*Z++;
+	*--p=*Z++;
+	*--p=t1;
+	t1=inv(*Z++);
+	t2=-*Z++;
+	t3=-*Z++;
+	*--p=inv(*Z++);
+	*--p=t2;
+	*--p=t3;
+	*--p=t1;
+}
+t1=*Z++;
+*--p=*Z++;
+*--p=t1;
+t1=inv(*Z++);
+t2=-*Z++;
+t3=-*Z++;
+*--p=inv(*Z++);
+*--p=t3;
+*--p=t2;
+*--p=t1;
+/*
+** Copy and destroy temp copy
+*/
+for(j=0,p=TT;j<KEYLEN;j++)
+{       *DK++=*p;
+	*p++=0;
+}
+
+return;
+}
+
+/*
+** MUL(x,y)
+** This #define creates a macro that computes x=x*y modulo 0x10001.
+** Requires temps t16 and t32.  Also requires y to be strictly 16
+** bits.  Here, I am using the simplest form.  May not be the
+** fastest. -- RG
+*/
+/* #define MUL(x,y) (x=mul(low16(x),y)) */
+
+/****************
+** cipher_idea **
+*****************
+** IDEA encryption/decryption algorithm.
+*/
+static void cipher_idea(u16 in[4],
+		u16 out[4],
+		register IDEAkey Z)
+{
+register u16 x1, x2, x3, x4, t1, t2;
+/* register u16 t16;
+register u16 t32; */
+int r=ROUNDS;
+
+x1=*in++;
+x2=*in++;
+x3=*in++;
+x4=*in;
+
+do {
+	MUL(x1,*Z++);
+	x2+=*Z++;
+	x3+=*Z++;
+	MUL(x4,*Z++);
+
+	t2=x1^x3;
+	MUL(t2,*Z++);
+	t1=t2+(x2^x4);
+	MUL(t1,*Z++);
+	t2=t1+t2;
+
+	x1^=t1;
+	x4^=t2;
+
+	t2^=x2;
+	x2=x3^t1;
+	x3=t2;
+} while(--r);
+MUL(x1,*Z++);
+*out++=x1;
+*out++=x3+*Z++;
+*out++=x2+*Z++;
+MUL(x4,*Z);
+*out=x4;
+return;
+}
+
+/************************
+** HUFFMAN COMPRESSION **
+************************/
+
+/**************
+** DoHuffman **
+***************
+** Execute a huffman compression on a block of plaintext.
+** Note that (as with IDEA encryption) an iteration of the
+** Huffman test includes a compression AND a decompression.
+** Also, the compression cycle includes building the
+** Huffman tree.
+*/
+void DoHuffman(void)
+{
+HuffStruct *lochuffstruct;      /* Loc pointer to global data */
+char *errorcontext;
+int systemerror;
+ulong accumtime;
+double iterations;
+farchar *comparray;
+farchar *decomparray;
+farchar *plaintext;
+
+/*
+** Link to global data
+*/
+lochuffstruct=&global_huffstruct;
+
+/*
+** Set error context.
+*/
+errorcontext="CPU:Huffman";
+
+/*
+** Allocate memory for the plaintext and the compressed text.
+** We'll be really pessimistic here, and allocate equal amounts
+** for both (though we know...well, we PRESUME) the compressed
+** stuff will take less than the plain stuff.
+** Also note that we'll build a 3rd buffer to decompress
+** into, and we preallocate space for the huffman tree.
+** (We presume that the Huffman tree will grow no larger
+** than 512 bytes.  This is actually a super-conservative
+** estimate...but, who cares?)
+*/
+plaintext=(farchar *)AllocateMemory(lochuffstruct->arraysize,&systemerror);
+if(systemerror)
+{       ReportError(errorcontext,systemerror);
+	ErrorExit();
+}
+comparray=(farchar *)AllocateMemory(lochuffstruct->arraysize,&systemerror);
+if(systemerror)
+{       ReportError(errorcontext,systemerror);
+	FreeMemory(plaintext,&systemerror);
+	ErrorExit();
+}
+decomparray=(farchar *)AllocateMemory(lochuffstruct->arraysize,&systemerror);
+if(systemerror)
+{       ReportError(errorcontext,systemerror);
+	FreeMemory(plaintext,&systemerror);
+	FreeMemory(comparray,&systemerror);
+	ErrorExit();
+}
+
+hufftree=(huff_node *)AllocateMemory(sizeof(huff_node) * 512,
+	&systemerror);
+if(systemerror)
+{       ReportError(errorcontext,systemerror);
+	FreeMemory(plaintext,&systemerror);
+	FreeMemory(comparray,&systemerror);
+	FreeMemory(decomparray,&systemerror);
+	ErrorExit();
+}
+
+/*
+** Build the plaintext buffer.  Since we want this to
+** actually be able to compress, we'll use the
+** wordcatalog to build the plaintext stuff.
+*/
+/*
+** Reset random number generator so things repeat.
+** added by Uwe F. Mayer
+*/
+randnum((int32)13);
+create_text_block(plaintext,lochuffstruct->arraysize-1,(ushort)500);
+plaintext[lochuffstruct->arraysize-1L]='\0';
+plaintextlen=lochuffstruct->arraysize;
+
+/*
+** See if we need to perform self adjustment loop.
+*/
+if(lochuffstruct->adjust==0)
+{
+	/*
+	** Do self-adjustment.  This involves initializing the
+	** # of loops and increasing the loop count until we
+	** get a number of loops that we can use.
+	*/
+	for(lochuffstruct->loops=100L;
+	  lochuffstruct->loops<MAXHUFFLOOPS;
+	  lochuffstruct->loops+=10L)
+		if(DoHuffIteration(plaintext,
+			comparray,
+			decomparray,
+		  lochuffstruct->arraysize,
+		  lochuffstruct->loops,
+		  hufftree)>global_min_ticks) break;
+}
+
+/*
+** All's well if we get here.  Do the test.
+*/
+accumtime=0L;
+iterations=(double)0.0;
+
+do {
+	accumtime+=DoHuffIteration(plaintext,
+		comparray,
+		decomparray,
+		lochuffstruct->arraysize,
+		lochuffstruct->loops,
+		hufftree);
+	iterations+=(double)lochuffstruct->loops;
+} while(TicksToSecs(accumtime)<lochuffstruct->request_secs);
+
+/*
+** Clean up, calculate results, and go home.  Be sure to
+** show that we don't have to rerun adjustment code.
+*/
+FreeMemory((farvoid *)plaintext,&systemerror);
+FreeMemory((farvoid *)comparray,&systemerror);
+FreeMemory((farvoid *)decomparray,&systemerror);
+FreeMemory((farvoid *)hufftree,&systemerror);
+lochuffstruct->iterspersec=iterations / TicksToFracSecs(accumtime);
+
+if(lochuffstruct->adjust==0)
+	lochuffstruct->adjust=1;
+
+}
+
+/*********************
+** create_text_line **
+**********************
+** Create a random line of text, stored at *dt.  The line may be
+** no more than nchars long.
+*/
+static void create_text_line(farchar *dt,
+			long nchars)
+{
+long charssofar;        /* # of characters so far */
+long tomove;            /* # of characters to move */
+char myword[40];        /* Local buffer for words */
+farchar *wordptr;       /* Pointer to word from catalog */
+
+charssofar=0;
+
+do {
+/*
+** Grab a random word from the wordcatalog
+*/
+/* wordptr=wordcatarray[abs_randwc((long)WORDCATSIZE)];*/
+wordptr=wordcatarray[abs_randwc((int32)WORDCATSIZE)];
+MoveMemory((farvoid *)myword,
+	(farvoid *)wordptr,
+	(unsigned long)strlen(wordptr)+1);
+
+/*
+** Append a blank.
+*/
+tomove=strlen(myword)+1;
+myword[tomove-1]=' ';
+
+/*
+** See how long it is.  If its length+charssofar > nchars, we have
+** to trim it.
+*/
+if((tomove+charssofar)>nchars)
+	tomove=nchars-charssofar;
+/*
+** Attach the word to the current line.  Increment counter.
+*/
+MoveMemory((farvoid *)dt,(farvoid *)myword,(unsigned long)tomove);
+charssofar+=tomove;
+dt+=tomove;
+
+/*
+** If we're done, bail out.  Otherwise, go get another word.
+*/
+} while(charssofar<nchars);
+
+return;
+}
+
+/**********************
+** create_text_block **
+***********************
+** Build a block of text randomly loaded with words.  The words
+** come from the wordcatalog (which must be loaded before you
+** call this).
+** *tb points to the memory where the text is to be built.
+** tblen is the # of bytes to put into the text block
+** maxlinlen is the maximum length of any line (line end indicated
+**  by a carriage return).
+*/
+static void create_text_block(farchar *tb,
+			ulong tblen,
+			ushort maxlinlen)
+{
+ulong bytessofar;       /* # of bytes so far */
+ulong linelen;          /* Line length */
+
+bytessofar=0L;
+do {
+
+/*
+** Pick a random length for a line and fill the line.
+** Make sure the line can fit (haven't exceeded tablen) and also
+** make sure you leave room to append a carriage return.
+*/
+linelen=abs_randwc(maxlinlen-6)+6;
+if((linelen+bytessofar)>tblen)
+	linelen=tblen-bytessofar;
+
+if(linelen>1)
+{
+	create_text_line(tb,linelen);
+}
+tb+=linelen-1;          /* Add the carriage return */
+*tb++='\n';
+
+bytessofar+=linelen;
+
+} while(bytessofar<tblen);
+
+}
+
+/********************
+** DoHuffIteration **
+*********************
+** Perform the huffman benchmark.  This routine
+**  (a) Builds the huffman tree
+**  (b) Compresses the text
+**  (c) Decompresses the text and verifies correct decompression
+*/
+static ulong DoHuffIteration(farchar *plaintext,
+	farchar *comparray,
+	farchar *decomparray,
+	ulong arraysize,
+	ulong nloops,
+	huff_node *hufftree)
+{
+int i;                          /* Index */
+long j;                         /* Bigger index */
+int root;                       /* Pointer to huffman tree root */
+float lowfreq1, lowfreq2;       /* Low frequency counters */
+int lowidx1, lowidx2;           /* Indexes of low freq. elements */
+long bitoffset;                 /* Bit offset into text */
+long textoffset;                /* Char offset into text */
+long maxbitoffset;              /* Holds limit of bit offset */
+long bitstringlen;              /* Length of bitstring */
+int c;                          /* Character from plaintext */
+char bitstring[30];             /* Holds bitstring */
+ulong elapsed;                  /* For stopwatch */
+#ifdef DEBUG
+int status=0;
+#endif
+
+/*
+** Start the stopwatch
+*/
+elapsed=StartStopwatch();
+
+/*
+** Do everything for nloops
+*/
+while(nloops--)
+{
+
+/*
+** Calculate the frequency of each byte value. Store the
+** results in what will become the "leaves" of the
+** Huffman tree.  Interior nodes will be built in those
+** nodes greater than node #255.
+*/
+for(i=0;i<256;i++)
+{
+	hufftree[i].freq=(float)0.0;
+	hufftree[i].c=(unsigned char)i;
+}
+
+for(j=0;j<arraysize;j++)
+	hufftree[(int)plaintext[j]].freq+=(float)1.0;
+
+for(i=0;i<256;i++)
+	if(hufftree[i].freq != (float)0.0)
+		hufftree[i].freq/=(float)arraysize;
+
+/* Reset the second half of the tree. Otherwise the loop below that
+** compares the frequencies up to index 512 makes no sense. Some
+** systems automatically zero out memory upon allocation, others (like
+** for example DEC Unix) do not. Depending on this the loop below gets
+** different data and different run times. On our alpha the data that
+** was arbitrarily assigned led to an underflow error at runtime. We
+** use that zeroed-out bits are in fact 0 as a float.
+** Uwe F. Mayer */
+bzero((char *)&(hufftree[256]),sizeof(huff_node)*256);
+/*
+** Build the huffman tree.  First clear all the parent
+** pointers and left/right pointers.  Also, discard all
+** nodes that have a frequency of true 0.  */
+for(i=0;i<512;i++)
+{       if(hufftree[i].freq==(float)0.0)
+		hufftree[i].parent=EXCLUDED;
+	else
+		hufftree[i].parent=hufftree[i].left=hufftree[i].right=-1;
+}
+
+/*
+** Go through the tree. Finding nodes of really low
+** frequency.
+*/
+root=255;                       /* Starting root node-1 */
+while(1)
+{
+	lowfreq1=(float)2.0; lowfreq2=(float)2.0;
+	lowidx1=-1; lowidx2=-1;
+	/*
+	** Find first lowest frequency.
+	*/
+	for(i=0;i<=root;i++)
+		if(hufftree[i].parent<0)
+			if(hufftree[i].freq<lowfreq1)
+			{       lowfreq1=hufftree[i].freq;
+				lowidx1=i;
+			}
+
+	/*
+	** Did we find a lowest value?  If not, the
+	** tree is done.
+	*/
+	if(lowidx1==-1) break;
+
+	/*
+	** Find next lowest frequency
+	*/
+	for(i=0;i<=root;i++)
+		if((hufftree[i].parent<0) && (i!=lowidx1))
+			if(hufftree[i].freq<lowfreq2)
+			{       lowfreq2=hufftree[i].freq;
+				lowidx2=i;
+			}
+
+	/*
+	** If we could only find one item, then that
+	** item is surely the root, and (as above) the
+	** tree is done.
+	*/
+	if(lowidx2==-1) break;
+
+	/*
+	** Attach the two new nodes to the current root, and
+	** advance the current root.
+	*/
+	root++;                 /* New root */
+	hufftree[lowidx1].parent=root;
+	hufftree[lowidx2].parent=root;
+	hufftree[root].freq=lowfreq1+lowfreq2;
+	hufftree[root].left=lowidx1;
+	hufftree[root].right=lowidx2;
+	hufftree[root].parent=-2;       /* Show root */
+}
+
+/*
+** Huffman tree built...compress the plaintext
+*/
+bitoffset=0L;                           /* Initialize bit offset */
+for(i=0;i<arraysize;i++)
+{
+	c=(int)plaintext[i];                 /* Fetch character */
+	/*
+	** Build a bit string for byte c
+	*/
+	bitstringlen=0;
+	while(hufftree[c].parent!=-2)
+	{       if(hufftree[hufftree[c].parent].left==c)
+			bitstring[bitstringlen]='0';
+		else
+			bitstring[bitstringlen]='1';
+		c=hufftree[c].parent;
+		bitstringlen++;
+	}
+
+	/*
+	** Step backwards through the bit string, setting
+	** bits in the compressed array as you go.
+	*/
+	while(bitstringlen--)
+	{       SetCompBit((u8 *)comparray,(u32)bitoffset,bitstring[bitstringlen]);
+		bitoffset++;
+	}
+}
+
+/*
+** Compression done.  Perform de-compression.
+*/
+maxbitoffset=bitoffset;
+bitoffset=0;
+textoffset=0;
+do {
+	i=root;
+	while(hufftree[i].left!=-1)
+	{       if(GetCompBit((u8 *)comparray,(u32)bitoffset)==0)
+			i=hufftree[i].left;
+		else
+			i=hufftree[i].right;
+		bitoffset++;
+	}
+	decomparray[textoffset]=hufftree[i].c;
+
+#ifdef DEBUG
+	if(hufftree[i].c != plaintext[textoffset])
+	{
+		/* Show error */
+		printf("Error at textoffset %ld\n",textoffset);
+		status=1;
+	}
+#endif
+	textoffset++;
+} while(bitoffset<maxbitoffset);
+
+}       /* End the big while(nloops--) from above */
+
+/*
+** All done
+*/
+#ifdef DEBUG
+  if (status==0) printf("Huffman: OK\n");
+#endif
+return(StopStopwatch(elapsed));
+}
+
+/***************
+** SetCompBit **
+****************
+** Set a bit in the compression array.  The value of the
+** bit is set according to char bitchar.
+*/
+static void SetCompBit(u8 *comparray,
+		u32 bitoffset,
+		char bitchar)
+{
+u32 byteoffset;
+int bitnumb;
+
+/*
+** First calculate which element in the comparray to
+** alter. and the bitnumber.
+*/
+byteoffset=bitoffset>>3;
+bitnumb=bitoffset % 8;
+
+/*
+** Set or clear
+*/
+if(bitchar=='1')
+	comparray[byteoffset]|=(1<<bitnumb);
+else
+	comparray[byteoffset]&=~(1<<bitnumb);
+
+return;
+}
+
+/***************
+** GetCompBit **
+****************
+** Return the bit value of a bit in the comparession array.
+** Returns 0 if the bit is clear, nonzero otherwise.
+*/
+static int GetCompBit(u8 *comparray,
+		u32 bitoffset)
+{
+u32 byteoffset;
+int bitnumb;
+
+/*
+** Calculate byte offset and bit number.
+*/
+byteoffset=bitoffset>>3;
+bitnumb=bitoffset % 8;
+
+/*
+** Fetch
+*/
+return((1<<bitnumb) & comparray[byteoffset] );
+}
+
+/********************************
+** BACK PROPAGATION NEURAL NET **
+*********************************
+** This code is a modified version of the code
+** that was submitted to BYTE Magazine by
+** Maureen Caudill.  It accomanied an article
+** that I CANNOT NOW RECALL.
+** The author's original heading/comment was
+** as follows:
+**
+**  Backpropagation Network
+**  Written by Maureen Caudill
+**  in Think C 4.0 on a Macintosh
+**
+**  (c) Maureen Caudill 1988-1991
+**  This network will accept 5x7 input patterns
+**  and produce 8 bit output patterns.
+**  The source code may be copied or modified without restriction,
+**  but no fee may be charged for its use.
+**
+** ++++++++++++++
+** I have modified the code so that it will work
+** on systems other than a Macintosh -- RG
+*/
+
+/***********
+** DoNNet **
+************
+** Perform the neural net benchmark.
+** Note that this benchmark is one of the few that
+** requires an input file.  That file is "NNET.DAT" and
+** should be on the local directory (from which the
+** benchmark program in launched).
+*/
+void DoNNET(void)
+{
+NNetStruct *locnnetstruct;      /* Local ptr to global data */
+char *errorcontext;
+ulong accumtime;
+double iterations;
+
+/*
+** Link to global data
+*/
+locnnetstruct=&global_nnetstruct;
+
+/*
+** Set error context
+*/
+errorcontext="CPU:NNET";
+
+/*
+** Init random number generator.
+** NOTE: It is important that the random number generator
+**  be re-initialized for every pass through this test.
+**  The NNET algorithm uses the random number generator
+**  to initialize the net.  Results are sensitive to
+**  the initial neural net state.
+*/
+/* randnum(3L); */
+randnum((int32)3);
+
+/*
+** Read in the input and output patterns.  We'll do this
+** only once here at the beginning.  These values don't
+** change once loaded.
+*/
+if(read_data_file()!=0)
+   ErrorExit();
+
+
+/*
+** See if we need to perform self adjustment loop.
+*/
+if(locnnetstruct->adjust==0)
+{
+	/*
+	** Do self-adjustment.  This involves initializing the
+	** # of loops and increasing the loop count until we
+	** get a number of loops that we can use.
+	*/
+	for(locnnetstruct->loops=1L;
+	  locnnetstruct->loops<MAXNNETLOOPS;
+	  locnnetstruct->loops++)
+	  {     /*randnum(3L); */
+		randnum((int32)3);
+		if(DoNNetIteration(locnnetstruct->loops)
+			>global_min_ticks) break;
+	  }
+}
+
+/*
+** All's well if we get here.  Do the test.
+*/
+accumtime=0L;
+iterations=(double)0.0;
+
+do {
+	/* randnum(3L); */    /* Gotta do this for Neural Net */
+	randnum((int32)3);    /* Gotta do this for Neural Net */
+	accumtime+=DoNNetIteration(locnnetstruct->loops);
+	iterations+=(double)locnnetstruct->loops;
+} while(TicksToSecs(accumtime)<locnnetstruct->request_secs);
+
+/*
+** Clean up, calculate results, and go home.  Be sure to
+** show that we don't have to rerun adjustment code.
+*/
+locnnetstruct->iterspersec=iterations / TicksToFracSecs(accumtime);
+
+if(locnnetstruct->adjust==0)
+	locnnetstruct->adjust=1;
+
+
+return;
+}
+
+/********************
+** DoNNetIteration **
+*********************
+** Do a single iteration of the neural net benchmark.
+** By iteration, we mean a "learning" pass.
+*/
+static ulong DoNNetIteration(ulong nloops)
+{
+ulong elapsed;          /* Elapsed time */
+int patt;
+
+/*
+** Run nloops learning cycles.  Notice that, counted with
+** the learning cycle is the weight randomization and
+** zeroing of changes.  This should reduce clock jitter,
+** since we don't have to stop and start the clock for
+** each iteration.
+*/
+elapsed=StartStopwatch();
+while(nloops--)
+{
+	randomize_wts();
+	zero_changes();
+	iteration_count=1;
+	learned = F;
+	numpasses = 0;
+	while (learned == F)
+	{
+		for (patt=0; patt<numpats; patt++)
+		{
+			worst_error = 0.0;      /* reset this every pass through data */
+			move_wt_changes();      /* move last pass's wt changes to momentum array */
+			do_forward_pass(patt);
+			do_back_pass(patt);
+			iteration_count++;
+		}
+		numpasses ++;
+		learned = check_out_error();
+	}
+#ifdef DEBUG
+printf("Learned in %d passes\n",numpasses);
+#endif
+}
+return(StopStopwatch(elapsed));
+}
+
+/*************************
+** do_mid_forward(patt) **
+**************************
+** Process the middle layer's forward pass
+** The activation of middle layer's neurode is the weighted
+** sum of the inputs from the input pattern, with sigmoid
+** function applied to the inputs.
+**/
+static void  do_mid_forward(int patt)
+{
+double  sum;
+int     neurode, i;
+
+for (neurode=0;neurode<MID_SIZE; neurode++)
+{
+	sum = 0.0;
+	for (i=0; i<IN_SIZE; i++)
+	{       /* compute weighted sum of input signals */
+		sum += mid_wts[neurode][i]*in_pats[patt][i];
+	}
+	/*
+	** apply sigmoid function f(x) = 1/(1+exp(-x)) to weighted sum
+	*/
+	sum = 1.0/(1.0+exp(-sum));
+	mid_out[neurode] = sum;
+}
+return;
+}
+
+/*********************
+** do_out_forward() **
+**********************
+** process the forward pass through the output layer
+** The activation of the output layer is the weighted sum of
+** the inputs (outputs from middle layer), modified by the
+** sigmoid function.
+**/
+static void  do_out_forward()
+{
+double sum;
+int neurode, i;
+
+for (neurode=0; neurode<OUT_SIZE; neurode++)
+{
+	sum = 0.0;
+	for (i=0; i<MID_SIZE; i++)
+	{       /*
+		** compute weighted sum of input signals
+		** from middle layer
+		*/
+		sum += out_wts[neurode][i]*mid_out[i];
+	}
+	/*
+	** Apply f(x) = 1/(1+exp(-x)) to weighted input
+	*/
+	sum = 1.0/(1.0+exp(-sum));
+	out_out[neurode] = sum;
+}
+return;
+}
+
+/*************************
+** display_output(patt) **
+**************************
+** Display the actual output vs. the desired output of the
+** network.
+** Once the training is complete, and the "learned" flag set
+** to TRUE, then display_output sends its output to both
+** the screen and to a text output file.
+**
+** NOTE: This routine has been disabled in the benchmark
+** version. -- RG
+**/
+/*
+void  display_output(int patt)
+{
+int             i;
+
+	fprintf(outfile,"\n Iteration # %d",iteration_count);
+	fprintf(outfile,"\n Desired Output:  ");
+
+	for (i=0; i<OUT_SIZE; i++)
+	{
+		fprintf(outfile,"%6.3f  ",out_pats[patt][i]);
+	}
+	fprintf(outfile,"\n Actual Output:   ");
+
+	for (i=0; i<OUT_SIZE; i++)
+	{
+		fprintf(outfile,"%6.3f  ",out_out[i]);
+	}
+	fprintf(outfile,"\n");
+	return;
+}
+*/
+
+/**********************
+** do_forward_pass() **
+***********************
+** control function for the forward pass through the network
+** NOTE: I have disabled the call to display_output() in
+**  the benchmark version -- RG.
+**/
+static void  do_forward_pass(int patt)
+{
+do_mid_forward(patt);   /* process forward pass, middle layer */
+do_out_forward();       /* process forward pass, output layer */
+/* display_output(patt);        ** display results of forward pass */
+return;
+}
+
+/***********************
+** do_out_error(patt) **
+************************
+** Compute the error for the output layer neurodes.
+** This is simply Desired - Actual.
+**/
+static void do_out_error(int patt)
+{
+int neurode;
+double error,tot_error, sum;
+
+tot_error = 0.0;
+sum = 0.0;
+for (neurode=0; neurode<OUT_SIZE; neurode++)
+{
+	out_error[neurode] = out_pats[patt][neurode] - out_out[neurode];
+	/*
+	** while we're here, also compute magnitude
+	** of total error and worst error in this pass.
+	** We use these to decide if we are done yet.
+	*/
+	error = out_error[neurode];
+	if (error <0.0)
+	{
+		sum += -error;
+		if (-error > tot_error)
+			tot_error = -error; /* worst error this pattern */
+	}
+	else
+	{
+		sum += error;
+		if (error > tot_error)
+			tot_error = error; /* worst error this pattern */
+	}
+}
+avg_out_error[patt] = sum/OUT_SIZE;
+tot_out_error[patt] = tot_error;
+return;
+}
+
+/***********************
+** worst_pass_error() **
+************************
+** Find the worst and average error in the pass and save it
+**/
+static void  worst_pass_error()
+{
+double error,sum;
+
+int i;
+
+error = 0.0;
+sum = 0.0;
+for (i=0; i<numpats; i++)
+{
+	if (tot_out_error[i] > error) error = tot_out_error[i];
+	sum += avg_out_error[i];
+}
+worst_error = error;
+average_error = sum/numpats;
+return;
+}
+
+/*******************
+** do_mid_error() **
+********************
+** Compute the error for the middle layer neurodes
+** This is based on the output errors computed above.
+** Note that the derivative of the sigmoid f(x) is
+**        f'(x) = f(x)(1 - f(x))
+** Recall that f(x) is merely the output of the middle
+** layer neurode on the forward pass.
+**/
+static void do_mid_error()
+{
+double sum;
+int neurode, i;
+
+for (neurode=0; neurode<MID_SIZE; neurode++)
+{
+	sum = 0.0;
+	for (i=0; i<OUT_SIZE; i++)
+		sum += out_wts[i][neurode]*out_error[i];
+
+	/*
+	** apply the derivative of the sigmoid here
+	** Because of the choice of sigmoid f(I), the derivative
+	** of the sigmoid is f'(I) = f(I)(1 - f(I))
+	*/
+	mid_error[neurode] = mid_out[neurode]*(1-mid_out[neurode])*sum;
+}
+return;
+}
+
+/*********************
+** adjust_out_wts() **
+**********************
+** Adjust the weights of the output layer.  The error for
+** the output layer has been previously propagated back to
+** the middle layer.
+** Use the Delta Rule with momentum term to adjust the weights.
+**/
+static void adjust_out_wts()
+{
+int weight, neurode;
+double learn,delta,alph;
+
+learn = BETA;
+alph  = ALPHA;
+for (neurode=0; neurode<OUT_SIZE; neurode++)
+{
+	for (weight=0; weight<MID_SIZE; weight++)
+	{
+		/* standard delta rule */
+		delta = learn * out_error[neurode] * mid_out[weight];
+
+		/* now the momentum term */
+		delta += alph * out_wt_change[neurode][weight];
+		out_wts[neurode][weight] += delta;
+
+		/* keep track of this pass's cum wt changes for next pass's momentum */
+		out_wt_cum_change[neurode][weight] += delta;
+	}
+}
+return;
+}
+
+/*************************
+** adjust_mid_wts(patt) **
+**************************
+** Adjust the middle layer weights using the previously computed
+** errors.
+** We use the Generalized Delta Rule with momentum term
+**/
+static void adjust_mid_wts(int patt)
+{
+int weight, neurode;
+double learn,alph,delta;
+
+learn = BETA;
+alph  = ALPHA;
+for (neurode=0; neurode<MID_SIZE; neurode++)
+{
+	for (weight=0; weight<IN_SIZE; weight++)
+	{
+		/* first the basic delta rule */
+		delta = learn * mid_error[neurode] * in_pats[patt][weight];
+
+		/* with the momentum term */
+		delta += alph * mid_wt_change[neurode][weight];
+		mid_wts[neurode][weight] += delta;
+
+		/* keep track of this pass's cum wt changes for next pass's momentum */
+		mid_wt_cum_change[neurode][weight] += delta;
+	}
+}
+return;
+}
+
+/*******************
+** do_back_pass() **
+********************
+** Process the backward propagation of error through network.
+**/
+void  do_back_pass(int patt)
+{
+
+do_out_error(patt);
+do_mid_error();
+adjust_out_wts();
+adjust_mid_wts(patt);
+
+return;
+}
+
+
+/**********************
+** move_wt_changes() **
+***********************
+** Move the weight changes accumulated last pass into the wt-change
+** array for use by the momentum term in this pass. Also zero out
+** the accumulating arrays after the move.
+**/
+static void move_wt_changes()
+{
+int i,j;
+
+for (i = 0; i<MID_SIZE; i++)
+	for (j = 0; j<IN_SIZE; j++)
+	{
+		mid_wt_change[i][j] = mid_wt_cum_change[i][j];
+		/*
+		** Zero it out for next pass accumulation.
+		*/
+		mid_wt_cum_change[i][j] = 0.0;
+	}
+
+for (i = 0; i<OUT_SIZE; i++)
+	for (j=0; j<MID_SIZE; j++)
+	{
+		out_wt_change[i][j] = out_wt_cum_change[i][j];
+		out_wt_cum_change[i][j] = 0.0;
+	}
+
+return;
+}
+
+/**********************
+** check_out_error() **
+***********************
+** Check to see if the error in the output layer is below
+** MARGIN*OUT_SIZE for all output patterns.  If so, then
+** assume the network has learned acceptably well.  This
+** is simply an arbitrary measure of how well the network
+** has learned -- many other standards are possible.
+**/
+static int check_out_error()
+{
+int result,i,error;
+
+result  = T;
+error   = F;
+worst_pass_error();     /* identify the worst error in this pass */
+
+/*
+#ifdef DEBUG
+printf("\n Iteration # %d",iteration_count);
+#endif
+*/
+for (i=0; i<numpats; i++)
+{
+/*      printf("\n Error pattern %d:   Worst: %8.3f; Average: %8.3f",
+	  i+1,tot_out_error[i], avg_out_error[i]);
+	fprintf(outfile,
+	 "\n Error pattern %d:   Worst: %8.3f; Average: %8.3f",
+	 i+1,tot_out_error[i]);
+*/
+
+	if (worst_error >= STOP) result = F;
+	if (tot_out_error[i] >= 16.0) error = T;
+}
+
+if (error == T) result = ERR;
+
+
+#ifdef DEBUG
+/* printf("\n Error this pass thru data:   Worst: %8.3f; Average: %8.3f",
+ worst_error,average_error);
+*/
+/* fprintf(outfile,
+ "\n Error this pass thru data:   Worst: %8.3f; Average: %8.3f",
+  worst_error, average_error); */
+#endif
+
+return(result);
+}
+
+
+/*******************
+** zero_changes() **
+********************
+** Zero out all the wt change arrays
+**/
+static void zero_changes()
+{
+int i,j;
+
+for (i = 0; i<MID_SIZE; i++)
+{
+	for (j=0; j<IN_SIZE; j++)
+	{
+		mid_wt_change[i][j] = 0.0;
+		mid_wt_cum_change[i][j] = 0.0;
+	}
+}
+
+for (i = 0; i< OUT_SIZE; i++)
+{
+	for (j=0; j<MID_SIZE; j++)
+	{
+		out_wt_change[i][j] = 0.0;
+		out_wt_cum_change[i][j] = 0.0;
+	}
+}
+return;
+}
+
+
+/********************
+** randomize_wts() **
+*********************
+** Intialize the weights in the middle and output layers to
+** random values between -0.25..+0.25
+** Function rand() returns a value between 0 and 32767.
+**
+** NOTE: Had to make alterations to how the random numbers were
+** created.  -- RG.
+**/
+static void randomize_wts()
+{
+int neurode,i;
+double value;
+
+/*
+** Following not used int benchmark version -- RG
+**
+**        printf("\n Please enter a random number seed (1..32767):  ");
+**        scanf("%d", &i);
+**        srand(i);
+*/
+
+for (neurode = 0; neurode<MID_SIZE; neurode++)
+{
+	for(i=0; i<IN_SIZE; i++)
+	{
+	        /* value=(double)abs_randwc(100000L); */
+		value=(double)abs_randwc((int32)100000);
+		value=value/(double)100000.0 - (double) 0.5;
+		mid_wts[neurode][i] = value/2;
+	}
+}
+for (neurode=0; neurode<OUT_SIZE; neurode++)
+{
+	for(i=0; i<MID_SIZE; i++)
+	{
+	        /* value=(double)abs_randwc(100000L); */
+		value=(double)abs_randwc((int32)100000);
+		value=value/(double)10000.0 - (double) 0.5;
+		out_wts[neurode][i] = value/2;
+	}
+}
+
+return;
+}
+
+
+/*********************
+** read_data_file() **
+**********************
+** Read in the input data file and store the patterns in
+** in_pats and out_pats.
+** The format for the data file is as follows:
+**
+** line#   data expected
+** -----   ------------------------------
+** 1               In-X-size,in-y-size,out-size
+** 2               number of patterns in file
+** 3               1st X row of 1st input pattern
+** 4..             following rows of 1st input pattern pattern
+**                 in-x+2  y-out pattern
+**                                 1st X row of 2nd pattern
+**                 etc.
+**
+** Each row of data is separated by commas or spaces.
+** The data is expected to be ascii text corresponding to
+** either a +1 or a 0.
+**
+** Sample input for a 1-pattern file (The comments to the
+** right may NOT be in the file unless more sophisticated
+** parsing of the input is done.):
+**
+** 5,7,8                      input is 5x7 grid, output is 8 bits
+** 1                          one pattern in file
+** 0,1,1,1,0                  beginning of pattern for "O"
+** 1,0,0,0,1
+** 1,0,0,0,1
+** 1,0,0,0,1
+** 1,0,0,0,1
+** 1,0,0,0,0
+** 0,1,1,1,0
+** 0,1,0,0,1,1,1,1            ASCII code for "O" -- 0100 1111
+**
+** Clearly, this simple scheme can be expanded or enhanced
+** any way you like.
+**
+** Returns -1 if any file error occurred, otherwise 0.
+**/
+static int read_data_file()
+{
+FILE *infile;
+
+int xinsize,yinsize,youtsize;
+int patt, element, i, row;
+int vals_read;
+int val1,val2,val3,val4,val5,val6,val7,val8;
+
+/* printf("\n Opening and retrieving data from file."); */
+
+infile = fopen(inpath, "r");
+if (infile == NULL)
+{
+	printf("\n CPU:NNET--error in opening file!");
+	return -1 ;
+}
+vals_read =fscanf(infile,"%d  %d  %d",&xinsize,&yinsize,&youtsize);
+if (vals_read != 3)
+{
+	printf("\n CPU:NNET -- Should read 3 items in line one; did read %d",vals_read);
+	return -1;
+}
+vals_read=fscanf(infile,"%d",&numpats);
+if (vals_read !=1)
+{
+	printf("\n CPU:NNET -- Should read 1 item in line 2; did read %d",vals_read);
+	return -1;
+}
+if (numpats > MAXPATS)
+	numpats = MAXPATS;
+
+for (patt=0; patt<numpats; patt++)
+{
+	element = 0;
+	for (row = 0; row<yinsize; row++)
+	{
+		vals_read = fscanf(infile,"%d  %d  %d  %d  %d",
+			&val1, &val2, &val3, &val4, &val5);
+		if (vals_read != 5)
+		{
+			printf ("\n CPU:NNET -- failure in reading input!");
+			return -1;
+		}
+		element=row*xinsize;
+
+		in_pats[patt][element] = (double) val1; element++;
+		in_pats[patt][element] = (double) val2; element++;
+		in_pats[patt][element] = (double) val3; element++;
+		in_pats[patt][element] = (double) val4; element++;
+		in_pats[patt][element] = (double) val5; element++;
+	}
+	for (i=0;i<IN_SIZE; i++)
+	{
+		if (in_pats[patt][i] >= 0.9)
+			in_pats[patt][i] = 0.9;
+		if (in_pats[patt][i] <= 0.1)
+			in_pats[patt][i] = 0.1;
+	}
+	element = 0;
+	vals_read = fscanf(infile,"%d  %d  %d  %d  %d  %d  %d  %d",
+		&val1, &val2, &val3, &val4, &val5, &val6, &val7, &val8);
+
+	out_pats[patt][element] = (double) val1; element++;
+	out_pats[patt][element] = (double) val2; element++;
+	out_pats[patt][element] = (double) val3; element++;
+	out_pats[patt][element] = (double) val4; element++;
+	out_pats[patt][element] = (double) val5; element++;
+	out_pats[patt][element] = (double) val6; element++;
+	out_pats[patt][element] = (double) val7; element++;
+	out_pats[patt][element] = (double) val8; element++;
+}
+
+/* printf("\n Closing the input file now. "); */
+
+fclose(infile);
+return(0);
+}
+
+/*********************
+** initialize_net() **
+**********************
+** Do all the initialization stuff before beginning
+*/
+/*
+static int initialize_net()
+{
+int err_code;
+
+randomize_wts();
+zero_changes();
+err_code = read_data_file();
+iteration_count = 1;
+return(err_code);
+}
+*/
+
+/**********************
+** display_mid_wts() **
+***********************
+** Display the weights on the middle layer neurodes
+** NOTE: This routine is not used in the benchmark
+**  test -- RG
+**/
+/* static void display_mid_wts()
+{
+int             neurode, weight, row, col;
+
+fprintf(outfile,"\n Weights of Middle Layer neurodes:");
+
+for (neurode=0; neurode<MID_SIZE; neurode++)
+{
+	fprintf(outfile,"\n  Mid Neurode # %d",neurode);
+	for (row=0; row<IN_Y_SIZE; row++)
+	{
+		fprintf(outfile,"\n ");
+		for (col=0; col<IN_X_SIZE; col++)
+		{
+			weight = IN_X_SIZE * row + col;
+			fprintf(outfile," %8.3f ", mid_wts[neurode][weight]);
+		}
+	}
+}
+return;
+}
+*/
+/**********************
+** display_out_wts() **
+***********************
+** Display the weights on the output layer neurodes
+** NOTE: This code is not used in the benchmark
+**  test -- RG
+*/
+/* void  display_out_wts()
+{
+int             neurode, weight;
+
+	fprintf(outfile,"\n Weights of Output Layer neurodes:");
+
+	for (neurode=0; neurode<OUT_SIZE; neurode++)
+	{
+		fprintf(outfile,"\n  Out Neurode # %d \n",neurode);
+		for (weight=0; weight<MID_SIZE; weight++)
+		{
+			fprintf(outfile," %8.3f ", out_wts[neurode][weight]);
+		}
+	}
+	return;
+}
+*/
+
+/***********************
+**  LU DECOMPOSITION  **
+** (Linear Equations) **
+************************
+** These routines come from "Numerical Recipes in Pascal".
+** Note that, as in the assignment algorithm, though we
+** separately define LUARRAYROWS and LUARRAYCOLS, the two
+** must be the same value (this routine depends on a square
+** matrix).
+*/
+
+/*********
+** DoLU **
+**********
+** Perform the LU decomposition benchmark.
+*/
+void DoLU(void)
+{
+LUStruct *loclustruct;  /* Local pointer to global data */
+char *errorcontext;
+int systemerror;
+fardouble *a;
+fardouble *b;
+fardouble *abase;
+fardouble *bbase;
+LUdblptr ptra;
+int n;
+int i;
+ulong accumtime;
+double iterations;
+
+/*
+** Link to global data
+*/
+loclustruct=&global_lustruct;
+
+/*
+** Set error context.
+*/
+errorcontext="FPU:LU";
+
+/*
+** Our first step is to build a "solvable" problem.  This
+** will become the "seed" set that all others will be
+** derived from. (I.E., we'll simply copy these arrays
+** into the others.
+*/
+a=(fardouble *)AllocateMemory(sizeof(double) * LUARRAYCOLS * LUARRAYROWS,
+		&systemerror);
+b=(fardouble *)AllocateMemory(sizeof(double) * LUARRAYROWS,
+		&systemerror);
+n=LUARRAYROWS;
+
+/*
+** We need to allocate a temp vector that is used by the LU
+** algorithm.  This removes the allocation routine from the
+** timing.
+*/
+LUtempvv=(fardouble *)AllocateMemory(sizeof(double)*LUARRAYROWS,
+	&systemerror);
+
+/*
+** Build a problem to be solved.
+*/
+ptra.ptrs.p=a;                  /* Gotta coerce linear array to 2D array */
+build_problem(*ptra.ptrs.ap,n,b);
+
+/*
+** Now that we have a problem built, see if we need to do
+** auto-adjust.  If so, repeatedly call the DoLUIteration routine,
+** increasing the number of solutions per iteration as you go.
+*/
+if(loclustruct->adjust==0)
+{
+	loclustruct->numarrays=0;
+	for(i=1;i<=MAXLUARRAYS;i++)
+	{
+		abase=(fardouble *)AllocateMemory(sizeof(double) *
+			LUARRAYCOLS*LUARRAYROWS*(i+1),&systemerror);
+		if(systemerror)
+		{       ReportError(errorcontext,systemerror);
+			LUFreeMem(a,b,(fardouble *)NULL,(fardouble *)NULL);
+			ErrorExit();
+		}
+		bbase=(fardouble *)AllocateMemory(sizeof(double) *
+			LUARRAYROWS*(i+1),&systemerror);
+		if(systemerror)
+		{       ReportError(errorcontext,systemerror);
+			LUFreeMem(a,b,abase,(fardouble *)NULL);
+			ErrorExit();
+		}
+		if(DoLUIteration(a,b,abase,bbase,i)>global_min_ticks)
+		{       loclustruct->numarrays=i;
+			break;
+		}
+		/*
+		** Not enough arrays...free them all and try again
+		*/
+		FreeMemory((farvoid *)abase,&systemerror);
+		FreeMemory((farvoid *)bbase,&systemerror);
+	}
+	/*
+	** Were we able to do it?
+	*/
+	if(loclustruct->numarrays==0)
+	{       printf("FPU:LU -- Array limit reached\n");
+		LUFreeMem(a,b,abase,bbase);
+		ErrorExit();
+	}
+}
+else
+{       /*
+	** Don't need to adjust -- just allocate the proper
+	** number of arrays and proceed.
+	*/
+	abase=(fardouble *)AllocateMemory(sizeof(double) *
+		LUARRAYCOLS*LUARRAYROWS*loclustruct->numarrays,
+		&systemerror);
+	if(systemerror)
+	{       ReportError(errorcontext,systemerror);
+		LUFreeMem(a,b,(fardouble *)NULL,(fardouble *)NULL);
+		ErrorExit();
+	}
+	bbase=(fardouble *)AllocateMemory(sizeof(double) *
+		LUARRAYROWS*loclustruct->numarrays,&systemerror);
+	if(systemerror)
+	{
+		ReportError(errorcontext,systemerror);
+		LUFreeMem(a,b,abase,(fardouble *)NULL);
+		ErrorExit();
+	}
+}
+/*
+** All's well if we get here.  Do the test.
+*/
+accumtime=0L;
+iterations=(double)0.0;
+
+do {
+	accumtime+=DoLUIteration(a,b,abase,bbase,
+		loclustruct->numarrays);
+	iterations+=(double)loclustruct->numarrays;
+} while(TicksToSecs(accumtime)<loclustruct->request_secs);
+
+/*
+** Clean up, calculate results, and go home.  Be sure to
+** show that we don't have to rerun adjustment code.
+*/
+loclustruct->iterspersec=iterations / TicksToFracSecs(accumtime);
+
+if(loclustruct->adjust==0)
+	loclustruct->adjust=1;
+
+LUFreeMem(a,b,abase,bbase);
+return;
+}
+
+/**************
+** LUFreeMem **
+***************
+** Release memory associated with LU benchmark.
+*/
+static void LUFreeMem(fardouble *a, fardouble *b,
+			fardouble *abase,fardouble *bbase)
+{
+int systemerror;
+
+FreeMemory((farvoid *)a,&systemerror);
+FreeMemory((farvoid *)b,&systemerror);
+FreeMemory((farvoid *)LUtempvv,&systemerror);
+
+if(abase!=(fardouble *)NULL) FreeMemory((farvoid *)abase,&systemerror);
+if(bbase!=(fardouble *)NULL) FreeMemory((farvoid *)bbase,&systemerror);
+return;
+}
+
+/******************
+** DoLUIteration **
+*******************
+** Perform an iteration of the LU decomposition benchmark.
+** An iteration refers to the repeated solution of several
+** identical matrices.
+*/
+static ulong DoLUIteration(fardouble *a,fardouble *b,
+		fardouble *abase, fardouble *bbase,
+		ulong numarrays)
+{
+fardouble *locabase;
+fardouble *locbbase;
+LUdblptr ptra;  /* For converting ptr to 2D array */
+ulong elapsed;
+ulong j,i;              /* Indexes */
+
+
+/*
+** Move the seed arrays (a & b) into the destination
+** arrays;
+*/
+for(j=0;j<numarrays;j++)
+{       locabase=abase+j*LUARRAYROWS*LUARRAYCOLS;
+	locbbase=bbase+j*LUARRAYROWS;
+	for(i=0;i<LUARRAYROWS*LUARRAYCOLS;i++)
+		*(locabase+i)=*(a+i);
+	for(i=0;i<LUARRAYROWS;i++)
+		*(locbbase+i)=*(b+i);
+}
+
+/*
+** Do test...begin timing.
+*/
+elapsed=StartStopwatch();
+for(i=0;i<numarrays;i++)
+{       locabase=abase+i*LUARRAYROWS*LUARRAYCOLS;
+	locbbase=bbase+i*LUARRAYROWS;
+	ptra.ptrs.p=locabase;
+	lusolve(*ptra.ptrs.ap,LUARRAYROWS,locbbase);
+}
+
+return(StopStopwatch(elapsed));
+}
+
+/******************
+** build_problem **
+*******************
+** Constructs a solvable set of linear equations.  It does this by
+** creating an identity matrix, then loading the solution vector
+** with random numbers.  After that, the identity matrix and
+** solution vector are randomly "scrambled".  Scrambling is
+** done by (a) randomly selecting a row and multiplying that
+** row by a random number and (b) adding one randomly-selected
+** row to another.
+*/
+static void build_problem(double a[][LUARRAYCOLS],
+		int n,
+		double b[LUARRAYROWS])
+{
+long i,j,k,k1;  /* Indexes */
+double rcon;     /* Random constant */
+
+/*
+** Reset random number generator
+*/
+/* randnum(13L); */
+randnum((int32)13);
+
+/*
+** Build an identity matrix.
+** We'll also use this as a chance to load the solution
+** vector.
+*/
+for(i=0;i<n;i++)
+{       /* b[i]=(double)(abs_randwc(100L)+1L); */
+	b[i]=(double)(abs_randwc((int32)100)+(int32)1);
+	for(j=0;j<n;j++)
+		if(i==j)
+		        /* a[i][j]=(double)(abs_randwc(1000L)+1L); */
+			a[i][j]=(double)(abs_randwc((int32)1000)+(int32)1);
+		else
+			a[i][j]=(double)0.0;
+}
+
+#ifdef DEBUG
+printf("Problem:\n");
+for(i=0;i<n;i++)
+{
+/*
+	for(j=0;j<n;j++)
+		printf("%6.2f ",a[i][j]);
+*/
+	printf("%.0f/%.0f=%.2f\t",b[i],a[i][i],b[i]/a[i][i]);
+/*
+        printf("\n");
+*/
+}
+#endif
+
+/*
+** Scramble.  Do this 8n times.  See comment above for
+** a description of the scrambling process.
+*/
+
+for(i=0;i<8*n;i++)
+{
+	/*
+	** Pick a row and a random constant.  Multiply
+	** all elements in the row by the constant.
+	*/
+ /*       k=abs_randwc((long)n);
+	rcon=(double)(abs_randwc(20L)+1L);
+	for(j=0;j<n;j++)
+		a[k][j]=a[k][j]*rcon;
+	b[k]=b[k]*rcon;
+*/
+	/*
+	** Pick two random rows and add second to
+	** first.  Note that we also occasionally multiply
+	** by minus 1 so that we get a subtraction operation.
+	*/
+        /* k=abs_randwc((long)n); */
+        /* k1=abs_randwc((long)n); */
+	k=abs_randwc((int32)n);
+	k1=abs_randwc((int32)n);
+	if(k!=k1)
+	{
+		if(k<k1) rcon=(double)1.0;
+			else rcon=(double)-1.0;
+		for(j=0;j<n;j++)
+			a[k][j]+=a[k1][j]*rcon;;
+		b[k]+=b[k1]*rcon;
+	}
+}
+
+return;
+}
+
+
+/***********
+** ludcmp **
+************
+** From the procedure of the same name in "Numerical Recipes in Pascal",
+** by Press, Flannery, Tukolsky, and Vetterling.
+** Given an nxn matrix a[], this routine replaces it by the LU
+** decomposition of a rowwise permutation of itself.  a[] and n
+** are input.  a[] is output, modified as follows:
+**   --                       --
+**  |  b(1,1) b(1,2) b(1,3)...  |
+**  |  a(2,1) b(2,2) b(2,3)...  |
+**  |  a(3,1) a(3,2) b(3,3)...  |
+**  |  a(4,1) a(4,2) a(4,3)...  |
+**  |  ...                      |
+**   --                        --
+**
+** Where the b(i,j) elements form the upper triangular matrix of the
+** LU decomposition, and the a(i,j) elements form the lower triangular
+** elements.  The LU decomposition is calculated so that we don't
+** need to store the a(i,i) elements (which would have laid along the
+** diagonal and would have all been 1).
+**
+** indx[] is an output vector that records the row permutation
+** effected by the partial pivoting; d is output as +/-1 depending
+** on whether the number of row interchanges was even or odd,
+** respectively.
+** Returns 0 if matrix singular, else returns 1.
+*/
+static int ludcmp(double a[][LUARRAYCOLS],
+		int n,
+		int indx[],
+		int *d)
+{
+
+double big;     /* Holds largest element value */
+double sum;
+double dum;     /* Holds dummy value */
+int i,j,k;      /* Indexes */
+int imax=0;     /* Holds max index value */
+double tiny;    /* A really small number */
+
+tiny=(double)1.0e-20;
+
+*d=1;           /* No interchanges yet */
+
+for(i=0;i<n;i++)
+{       big=(double)0.0;
+	for(j=0;j<n;j++)
+		if((double)fabs(a[i][j]) > big)
+			big=fabs(a[i][j]);
+	/* Bail out on singular matrix */
+	if(big==(double)0.0) return(0);
+	LUtempvv[i]=1.0/big;
+}
+
+/*
+** Crout's algorithm...loop over columns.
+*/
+for(j=0;j<n;j++)
+{       if(j!=0)
+		for(i=0;i<j;i++)
+		{       sum=a[i][j];
+			if(i!=0)
+				for(k=0;k<i;k++)
+					sum-=(a[i][k]*a[k][j]);
+			a[i][j]=sum;
+		}
+	big=(double)0.0;
+	for(i=j;i<n;i++)
+	{       sum=a[i][j];
+		if(j!=0)
+			for(k=0;k<j;k++)
+				sum-=a[i][k]*a[k][j];
+		a[i][j]=sum;
+		dum=LUtempvv[i]*fabs(sum);
+		if(dum>=big)
+		{       big=dum;
+			imax=i;
+		}
+	}
+	if(j!=imax)             /* Interchange rows if necessary */
+	{       for(k=0;k<n;k++)
+		{       dum=a[imax][k];
+			a[imax][k]=a[j][k];
+			a[j][k]=dum;
+		}
+		*d=-*d;         /* Change parity of d */
+		dum=LUtempvv[imax];
+		LUtempvv[imax]=LUtempvv[j]; /* Don't forget scale factor */
+		LUtempvv[j]=dum;
+	}
+	indx[j]=imax;
+	/*
+	** If the pivot element is zero, the matrix is singular
+	** (at least as far as the precision of the machine
+	** is concerned.)  We'll take the original author's
+	** recommendation and replace 0.0 with "tiny".
+	*/
+	if(a[j][j]==(double)0.0)
+		a[j][j]=tiny;
+
+	if(j!=(n-1))
+	{       dum=1.0/a[j][j];
+		for(i=j+1;i<n;i++)
+			a[i][j]=a[i][j]*dum;
+	}
+}
+
+return(1);
+}
+
+/***********
+** lubksb **
+************
+** Also from "Numerical Recipes in Pascal".
+** This routine solves the set of n linear equations A X = B.
+** Here, a[][] is input, not as the matrix A, but as its
+** LU decomposition, created by the routine ludcmp().
+** Indx[] is input as the permutation vector returned by ludcmp().
+**  b[] is input as the right-hand side an returns the
+** solution vector X.
+** a[], n, and indx are not modified by this routine and
+** can be left in place for different values of b[].
+** This routine takes into account the possibility that b will
+** begin with many zero elements, so it is efficient for use in
+** matrix inversion.
+*/
+static void lubksb( double a[][LUARRAYCOLS],
+		int n,
+		int indx[LUARRAYROWS],
+		double b[LUARRAYROWS])
+{
+
+int i,j;        /* Indexes */
+int ip;         /* "pointer" into indx */
+int ii;
+double sum;
+
+/*
+** When ii is set to a positive value, it will become
+** the index of the first nonvanishing element of b[].
+** We now do the forward substitution. The only wrinkle
+** is to unscramble the permutation as we go.
+*/
+ii=-1;
+for(i=0;i<n;i++)
+{       ip=indx[i];
+	sum=b[ip];
+	b[ip]=b[i];
+	if(ii!=-1)
+		for(j=ii;j<i;j++)
+			sum=sum-a[i][j]*b[j];
+	else
+		/*
+		** If a nonzero element is encountered, we have
+		** to do the sums in the loop above.
+		*/
+		if(sum!=(double)0.0)
+			ii=i;
+	b[i]=sum;
+}
+/*
+** Do backsubstitution
+*/
+for(i=(n-1);i>=0;i--)
+{
+	sum=b[i];
+	if(i!=(n-1))
+		for(j=(i+1);j<n;j++)
+			sum=sum-a[i][j]*b[j];
+	b[i]=sum/a[i][i];
+}
+return;
+}
+
+/************
+** lusolve **
+*************
+** Solve a linear set of equations: A x = b
+** Original matrix A will be destroyed by this operation.
+** Returns 0 if matrix is singular, 1 otherwise.
+*/
+static int lusolve(double a[][LUARRAYCOLS],
+		int n,
+		double b[LUARRAYROWS])
+{
+int indx[LUARRAYROWS];
+int d;
+#ifdef DEBUG
+int i,j;
+#endif
+
+if(ludcmp(a,n,indx,&d)==0) return(0);
+
+/* Matrix not singular -- proceed */
+lubksb(a,n,indx,b);
+
+#ifdef DEBUG
+printf("Solution:\n");
+for(i=0;i<n;i++)
+{
+  for(j=0;j<n;j++){
+  /*
+    printf("%6.2f ",a[i][j]);
+  */
+  }
+  printf("%6.2f\t",b[i]);
+  /*
+    printf("\n");
+  */
+}
+printf("\n");
+#endif
+
+return(1);
+}
diff --git a/nbench1.h b/nbench1.h
new file mode 100644
index 0000000..13a5907
--- /dev/null
+++ b/nbench1.h
@@ -0,0 +1,428 @@
+/*
+** nbench1.h
+** Header for nbench1.c
+** BYTEmark (tm)
+** BYTE's Native Mode Benchmarks
+** Rick Grehan, BYTE Magazine
+**
+** Creation:
+** Revision: 3/95;10/95
+**
+** DISCLAIMER
+** The source, executable, and documentation files that comprise
+** the BYTEmark benchmarks are made available on an "as is" basis.
+** This means that we at BYTE Magazine have made every reasonable
+** effort to verify that the there are no errors in the source and
+** executable code.  We cannot, however, guarantee that the programs
+** are error-free.  Consequently, McGraw-HIll and BYTE Magazine make
+** no claims in regard to the fitness of the source code, executable
+** code, and documentation of the BYTEmark.
+**  Furthermore, BYTE Magazine, McGraw-Hill, and all employees
+** of McGraw-Hill cannot be held responsible for any damages resulting
+** from the use of this code or the results obtained from using
+** this code.
+*/
+
+/*
+** DEFINES
+*/
+/* #define DEBUG */
+
+/*
+** EXTERNALS
+*/
+extern ulong global_min_ticks;
+
+extern SortStruct global_numsortstruct;
+extern SortStruct global_strsortstruct;
+extern BitOpStruct global_bitopstruct;
+extern EmFloatStruct global_emfloatstruct;
+extern FourierStruct global_fourierstruct;
+extern AssignStruct global_assignstruct;
+extern IDEAStruct global_ideastruct;
+extern HuffStruct global_huffstruct;
+extern NNetStruct global_nnetstruct;
+extern LUStruct global_lustruct;
+
+/* External PROTOTYPES */
+/*extern unsigned long abs_randwc(unsigned long num);*/     /* From MISC */
+/*extern long randnum(long lngval);*/
+extern int32 randwc(int32 num);
+extern u32 abs_randwc(u32 num);
+extern int32 randnum(int32 lngval);
+
+extern farvoid *AllocateMemory(unsigned long nbytes,    /* From SYSSPEC */
+	int *errorcode);
+extern void FreeMemory(farvoid *mempointer,
+	int *errorcode);
+extern void MoveMemory(farvoid *destination,
+		farvoid *source, unsigned long nbytes);
+extern void ReportError(char *context, int errorcode);
+extern void ErrorExit();
+extern unsigned long StartStopwatch();
+extern unsigned long StopStopwatch(unsigned long startticks);
+extern unsigned long TicksToSecs(unsigned long tickamount);
+extern double TicksToFracSecs(unsigned long tickamount);
+
+/*****************
+** NUMERIC SORT **
+*****************/
+
+/*
+** PROTOTYPES
+*/
+void DoNumSort(void);
+static ulong DoNumSortIteration(farlong *arraybase,
+		ulong arraysize,
+		uint numarrays);
+static void LoadNumArrayWithRand(farlong *array,
+		ulong arraysize,
+		uint numarrays);
+static void NumHeapSort(farlong *array,
+		ulong bottom,
+		ulong top);
+static void NumSift(farlong *array,
+		ulong i,
+		ulong j);
+
+
+/****************
+** STRING SORT **
+*****************
+*/
+
+
+/*
+** PROTOTYPES
+*/
+void DoStringSort(void);
+static ulong DoStringSortIteration(faruchar *arraybase,
+		uint numarrays,
+		ulong arraysize);
+static farulong *LoadStringArray(faruchar *strarray,
+		uint numarrays,
+		ulong *strings,
+		ulong arraysize);
+static void stradjust(farulong *optrarray,
+		faruchar *strarray,
+		ulong nstrings,
+		ulong i,
+		uchar l);
+static void StrHeapSort(farulong *optrarray,
+		faruchar *strarray,
+		ulong numstrings,
+		ulong bottom,
+		ulong top);
+static int str_is_less(farulong *optrarray,
+		faruchar *strarray,
+		ulong numstrings,
+		ulong a,
+		ulong b);
+static void strsift(farulong *optrarray,
+		faruchar *strarray,
+		ulong numstrings,
+		ulong i,
+		ulong j);
+
+/************************
+** BITFIELD OPERATIONS **
+*************************
+*/
+
+/*
+** PROTOTYPES
+*/
+void DoBitops(void);
+static ulong DoBitfieldIteration(farulong *bitarraybase,
+		farulong *bitoparraybase,
+		long bitoparraysize,
+		ulong *nbitops);
+static void ToggleBitRun(farulong *bitmap,
+		ulong bit_addr,
+		ulong nbits,
+		uint val);
+static void FlipBitRun(farulong *bitmap,
+		ulong bit_addr,
+		ulong nbits);
+
+/****************************
+** EMULATED FLOATING POINT **
+****************************/
+typedef struct
+{
+	u8 type;        /* Indicates, NORMAL, SUBNORMAL, etc. */
+	u8 sign;        /* Mantissa sign */
+	short exp;      /* Signed exponent...no bias */
+	u16 mantissa[INTERNAL_FPF_PRECISION];
+} InternalFPF;
+
+/*
+** PROTOTYPES
+*/
+void DoEmFloat(void);
+
+/*
+** EXTERNALS
+*/
+extern void SetupCPUEmFloatArrays(InternalFPF *abase,
+	InternalFPF *bbase, InternalFPF *cbase,
+	ulong arraysize);
+extern ulong DoEmFloatIteration(InternalFPF *abase,
+	InternalFPF *bbase, InternalFPF *cbase,
+	ulong arraysize, ulong loops);
+
+/*************************
+** FOURIER COEFFICIENTS **
+*************************/
+
+/*
+** PROTOTYPES
+*/
+void DoFourier(void);
+static ulong DoFPUTransIteration(fardouble *abase,
+		fardouble *bbase,
+		ulong arraysize);
+static double TrapezoidIntegrate(double x0,
+		double x1,
+		int nsteps,
+		double omegan,
+		int select);
+static double thefunction(double x,
+		double omegan,
+		int select);
+
+/*************************
+** ASSIGNMENT ALGORITHM **
+*************************/
+
+/*
+** DEFINES
+*/
+
+#define ASSIGNROWS 101L
+#define ASSIGNCOLS 101L
+
+/*
+** TYPEDEFS
+*/
+typedef struct {
+	union {
+		long *p;
+		long (*ap)[ASSIGNROWS][ASSIGNCOLS];
+	} ptrs;
+} longptr;
+
+/*
+** PROTOTYPES
+*/
+void DoAssign(void);
+static ulong DoAssignIteration(farlong *arraybase,
+		ulong numarrays);
+static void LoadAssignArrayWithRand(farlong *arraybase,
+		ulong numarrays);
+static void LoadAssign(farlong arraybase[][ASSIGNCOLS]);
+static void CopyToAssign(farlong arrayfrom[][ASSIGNCOLS],
+		long arrayto[][ASSIGNCOLS]);
+static void Assignment(farlong arraybase[][ASSIGNCOLS]);
+static void calc_minimum_costs(long tableau[][ASSIGNCOLS]);
+static int first_assignments(long tableau[][ASSIGNCOLS],
+		short assignedtableau[][ASSIGNCOLS]);
+static void second_assignments(long tableau[][ASSIGNCOLS],
+		short assignedtableau[][ASSIGNCOLS]);
+
+/********************
+** IDEA ENCRYPTION **
+********************/
+
+/*
+** DEFINES
+*/
+#define IDEAKEYSIZE 16
+#define IDEABLOCKSIZE 8
+#define ROUNDS 8
+#define KEYLEN (6*ROUNDS+4)
+
+/*
+** MACROS
+*/
+#define low16(x) ((x) & 0x0FFFF)
+#define MUL(x,y) (x=mul(low16(x),y))
+
+
+typedef u16 IDEAkey[KEYLEN];
+
+/*
+** PROTOTYPES
+*/
+void DoIDEA(void);
+static ulong DoIDEAIteration(faruchar *plain1,
+	faruchar *crypt1, faruchar *plain2,
+	ulong arraysize, ulong nloops,
+	IDEAkey Z, IDEAkey DK);
+static u16 mul(register u16 a, register u16 b);
+static u16 inv(u16 x);
+static void en_key_idea(u16 userkey[8], IDEAkey Z);
+static void de_key_idea(IDEAkey Z, IDEAkey DK);
+static void cipher_idea(u16 in[4], u16 out[4], IDEAkey Z);
+
+/************************
+** HUFFMAN COMPRESSION **
+************************/
+
+/*
+** DEFINES
+*/
+#define EXCLUDED 32000L          /* Big positive value */
+
+/*
+** TYPEDEFS
+*/
+typedef struct {
+	uchar c;                /* Byte value */
+	float freq;             /* Frequency */
+	int parent;             /* Parent node */
+	int left;               /* Left pointer = 0 */
+	int right;              /* Right pointer = 1 */
+} huff_node;
+
+/*
+** GLOBALS
+*/
+static huff_node *hufftree;             /* The huffman tree */
+static long plaintextlen;               /* Length of plaintext */
+
+/*
+** PROTOTYPES
+*/
+void DoHuffman();
+static void create_text_line(farchar *dt,long nchars);
+static void create_text_block(farchar *tb, ulong tblen,
+		ushort maxlinlen);
+static ulong DoHuffIteration(farchar *plaintext,
+	farchar *comparray, farchar *decomparray,
+	ulong arraysize, ulong nloops, huff_node *hufftree);
+static void SetCompBit(u8 *comparray, u32 bitoffset, char bitchar);
+static int GetCompBit(u8 *comparray, u32 bitoffset);
+
+/********************************
+** BACK PROPAGATION NEURAL NET **
+********************************/
+
+/*
+** DEFINES
+*/
+#define T 1                     /* TRUE */
+#define F 0                     /* FALSE */
+#define ERR -1
+#define MAXPATS 10              /* max number of patterns in data file */
+#define IN_X_SIZE 5             /* number of neurodes/row of input layer */
+#define IN_Y_SIZE 7             /* number of neurodes/col of input layer */
+#define IN_SIZE 35              /* equals IN_X_SIZE*IN_Y_SIZE */
+#define MID_SIZE 8              /* number of neurodes in middle layer */
+#define OUT_SIZE 8              /* number of neurodes in output layer */
+#define MARGIN 0.1              /* how near to 1,0 do we have to come to stop? */
+#define BETA 0.09               /* beta learning constant */
+#define ALPHA 0.09              /* momentum term constant */
+#define STOP 0.1                /* when worst_error less than STOP, training is done */
+
+/*
+** GLOBALS
+*/
+double  mid_wts[MID_SIZE][IN_SIZE];     /* middle layer weights */
+double  out_wts[OUT_SIZE][MID_SIZE];    /* output layer weights */
+double  mid_out[MID_SIZE];              /* middle layer output */
+double  out_out[OUT_SIZE];              /* output layer output */
+double  mid_error[MID_SIZE];            /* middle layer errors */
+double  out_error[OUT_SIZE];            /* output layer errors */
+double  mid_wt_change[MID_SIZE][IN_SIZE]; /* storage for last wt change */
+double  out_wt_change[OUT_SIZE][MID_SIZE]; /* storage for last wt change */
+double  in_pats[MAXPATS][IN_SIZE];      /* input patterns */
+double  out_pats[MAXPATS][OUT_SIZE];    /* desired output patterns */
+double  tot_out_error[MAXPATS];         /* measure of whether net is done */
+double  out_wt_cum_change[OUT_SIZE][MID_SIZE]; /* accumulated wt changes */
+double  mid_wt_cum_change[MID_SIZE][IN_SIZE];  /* accumulated wt changes */
+
+double  worst_error; /* worst error each pass through the data */
+double  average_error; /* average error each pass through the data */
+double  avg_out_error[MAXPATS]; /* average error each pattern */
+
+int iteration_count;    /* number of passes thru network so far */
+int numpats;            /* number of patterns in data file */
+int numpasses;          /* number of training passes through data file */
+int learned;            /* flag--if TRUE, network has learned all patterns */
+
+/*
+** The Neural Net test requires an input data file.
+** The name is specified here.
+*/
+char *inpath="NNET.DAT";
+
+/*
+** PROTOTYPES
+*/
+void DoNNET(void);
+static ulong DoNNetIteration(ulong nloops);
+static void do_mid_forward(int patt);
+static void do_out_forward();
+void display_output(int patt);
+static void do_forward_pass(int patt);
+static void do_out_error(int patt);
+static void worst_pass_error();
+static void do_mid_error();
+static void adjust_out_wts();
+static void adjust_mid_wts();
+static void do_back_pass(int patt);
+static void move_wt_changes();
+static int check_out_error();
+static void zero_changes();
+static void randomize_wts();
+static int read_data_file();
+/* static int initialize_net(); */
+
+/***********************
+**  LU DECOMPOSITION  **
+** (Linear Equations) **
+***********************/
+
+/*
+** DEFINES
+*/
+
+#define LUARRAYROWS 101L
+#define LUARRAYCOLS 101L
+
+/*
+** TYPEDEFS
+*/
+typedef struct
+{       union
+	{       fardouble *p;
+		fardouble (*ap)[][LUARRAYCOLS];
+	} ptrs;
+} LUdblptr;
+
+/*
+** GLOBALS
+*/
+fardouble *LUtempvv;
+
+/*
+** PROTOTYPES
+*/
+void DoLU(void);
+static void LUFreeMem(fardouble *a, fardouble *b,
+	fardouble *abase, fardouble *bbase);
+static ulong DoLUIteration(fardouble *a, fardouble *b,
+	fardouble *abase, fardouble *bbase,
+	ulong numarrays);
+static void build_problem( double a[][LUARRAYCOLS],
+	int n, double b[LUARRAYROWS]);
+static int ludcmp(double a[][LUARRAYCOLS],
+	int n, int indx[], int *d);
+static void lubksb(double a[][LUARRAYCOLS],
+	int n, int indx[LUARRAYROWS],
+	double b[LUARRAYROWS]);
+static int lusolve(double a[][LUARRAYCOLS],
+	int n, double b[LUARRAYROWS]);
+
+
diff --git a/nmglobal.h b/nmglobal.h
new file mode 100644
index 0000000..2b57db5
--- /dev/null
+++ b/nmglobal.h
@@ -0,0 +1,519 @@
+/*
+** nmglobal.h
+** Global definitions for native mode benchmarks.
+**
+** BYTEmark (tm)
+** BYTE's Native Mode Benchmarks
+** Rick Grehan, BYTE Magazine
+**
+** Creation:
+** Revision: 3/95;10/95
+**  10/95 - Added memory array & alignment -- RG
+**
+** DISCLAIMER
+** The source, executable, and documentation files that comprise
+** the BYTEmark benchmarks are made available on an "as is" basis.
+** This means that we at BYTE Magazine have made every reasonable
+** effort to verify that the there are no errors in the source and
+** executable code.  We cannot, however, guarantee that the programs
+** are error-free.  Consequently, McGraw-HIll and BYTE Magazine make
+** no claims in regard to the fitness of the source code, executable
+** code, and documentation of the BYTEmark.
+**  Furthermore, BYTE Magazine, McGraw-Hill, and all employees
+** of McGraw-Hill cannot be held responsible for any damages resulting
+** from the use of this code or the results obtained from using
+** this code.
+*/
+
+/* is this a 64 bit architecture? If so, this will define LONG64 */
+#include "pointer.h"
+
+/*
+** SYSTEM DEFINES
+*/
+
+/* +++ MEMORY +++ */
+
+/*
+** You must define ONLY ONE of the following identifiers
+** to specify the mechanism for allocating memory:
+** MALLOCMEM
+** DOS16MEM
+** MACMEM
+*/
+
+/*
+** Define MALLOCMEM to use the standard malloc() call for
+** memory.  This is the default for most systems.
+*/
+#define MALLOCMEM
+
+/*
+** Define DOS16MEM if you're running in the old 16-bit segmented
+** model.  This enables some fruity memory management routines
+** required for that model.  NOT defining this assumes that
+** you're running in an environment that allows malloc() to
+** get > 64K chunks of memory.
+*/
+/* #define DOS16MEM */
+
+/* Define MACMEM to use the Mac's GetPtr call to allocate
+** memory (instead of malloc()).
+*/
+/* #define MACMEM */
+
+/* +++ TIMING +++ */
+/*
+** You must define ONLY ONE of the following identifiers to pick
+** the timing routine used.
+**  CLOCKWCPS
+**  CLOCKWCT
+**  MACTIMEMGR
+**  WIN31TIMER
+*/
+
+/*
+** Define CLOCKWCPS if you are using the clock() routine and the
+** constant used as the divisor to determine seconds is
+** CLOCKS_PER_SEC.  This is the default in most cases.
+*/
+#define CLOCKWCPS
+
+/*
+** Define CLOCKWCT if you are using the clock() routine and the
+** constant used as the divisor to determine seconds is CLK_TCK
+*/
+/* #define CLOCKWCT */
+
+/*
+** Define MACTIMEMGR to use the Mac Time manager routines.
+** You'll need to be running at least system 6.0.3 or
+** better...extended time manager is recommended (system 7 or
+** better).
+*/
+/* #define MACTIMEMGR */
+
+/*
+** Define WIN31TIMER to user the timing routines in TOOLHELP.DLL.
+** Gets accuracy down to the millisecond.
+*/
+/* #define WIN31TIMER */
+
+/* +++ MISCELLANEOUS +++ */
+
+/*
+** Define DOS16 if you'll be compiling under DOS in 16-bit
+** (non DOS-extended) mode.  This will enable proper definitions
+** for the far*** typedefs
+*/
+/* #define DOS16 */
+
+/*
+** Define MAC if you're compiling on a Macintosh.  This
+** does a number of things:
+**  includes unix.h
+**  Incorporates code to mimic the command line via either
+**      the console library (Symantec/Think) or the SIOUX
+**      library (Code Warrior).
+*/
+/* #define MAC */
+
+/*
+** Define LONG64 if your compiler emits 64-bit longs.
+** This is typically true of Alpha compilers on Unix
+** systems...though, who knows, this may change in the
+** future. I MOVED THIS DEFINTION INTO THE FILE pointer.h. DO NOT
+** DEFINE IT HERE. IT WILL AUTOMATICALLY BE DEFINED IF NECESSARY.
+** Uwe F. Mayer, Dec 15, 1996, Nov 15, 1997
+*/
+/* #define LONG64 */
+
+/*
+** Define MACCWPROF if you are profiling on the Mac using
+** Code Warrior.  This enables code that turns off the
+** profiler in an evern of an error exit.
+*/
+/* #define MACCWPROF */
+
+#ifdef MAC
+#include <unix.h>
+#endif
+
+/*
+** ERROR CODES
+*/
+#define ERROR_MEMORY    1
+#define ERROR_MEMARRAY_FULL 2
+#define ERROR_MEMARRAY_NFOUND 3
+#define ERROR_FILECREATE 10
+#define ERROR_FILEREAD 11
+#define ERROR_FILEWRITE 12
+#define ERROR_FILEOPEN 13
+#define ERROR_FILESEEK 14
+
+/*
+** MINIMUM_TICKS
+**
+** This sets the default number of minimum ticks.
+** It can, of course, be overridden by the input
+** command file.
+** This ultimately gets loaded into the variable
+** global_min_ticks, which specifies the minimum
+** number of ticks that must take place between
+** a StartStopwatch() and StopStopwatch() call.
+** The idea is to reduce error buildup.
+*/
+#define MINIMUM_TICKS 60
+
+/*
+** MINIMUM_SECONDS
+**
+** Minimum number of seconds to run each test.
+*/
+#define MINIMUM_SECONDS 5
+
+/*
+** MAXPOSLONG
+**
+** This is the maximum positive long.
+*/
+#ifdef LONG64
+#define MAXPOSLONG 0x7FFFFFFFFFFFFFFFL
+#else
+#define MAXPOSLONG 0x7FFFFFFFL
+#endif
+
+/*
+** OTHER DEFINES
+*/
+#ifndef MAC
+#define TRUE    1
+#define FALSE   0
+#endif
+
+/*
+** Memory array size.  Used in SYSSPEC for keeping track
+** of re-aligned memory.
+*/
+#define MEM_ARRAY_SIZE 20
+
+/*
+** TYPEDEFS
+*/
+#define ulong unsigned long
+#define uchar unsigned char
+#define uint unsigned int
+#define ushort unsigned short
+/*
+typedef unsigned char uchar;
+typedef unsigned int uint;
+typedef unsigned short ushort;
+typedef unsigned long ulong;
+*/
+/*
+** The 'farxxx' typedefs were added in deference to DOS, which
+** requires far pointers to handle some of the bigger
+** memory structures.  Other systems will simply
+** map 'farxxx' to 'xxx'
+*/
+#ifdef DOS16
+typedef void huge farvoid;
+typedef double huge fardouble;
+typedef long huge farlong;
+typedef unsigned long huge farulong;
+typedef char huge farchar;
+typedef unsigned char huge faruchar;
+
+#else
+
+typedef void farvoid;
+typedef double fardouble;
+typedef long farlong;
+typedef unsigned long farulong;
+typedef char farchar;
+typedef unsigned char faruchar;
+
+#endif
+
+/*
+** The following typedefs are used when element size
+** is critical.  You'll have to alter these for
+** your specifical platform/compiler.
+*/
+typedef unsigned char u8;       /* Unsigned 8-bits */
+typedef unsigned short u16;     /* Unsigned 16 bits */
+#ifdef LONG64
+typedef unsigned int u32;       /* Unsigned 32 bits */
+typedef int int32;              /* Signed 32 bit integer */
+#else
+typedef unsigned long u32;      /* Unsigned 32 bits */
+typedef long int32;              /* Signed 32 bit integer */
+#endif
+
+/*****************
+** NUMERIC SORT **
+*****************/
+/*
+** DEFINES
+*/
+
+/*
+** The following constant, NUMNUMARRAYS (no, it is not a
+** Peter Sellers joke) is the maximum number of arrays
+** that can be built by the numeric sorting benchmark
+** before it gives up.  This maximum is dependent on the
+** amount of memory in the system.
+*/
+/*#define NUMNUMARRAYS    1000*/
+#define NUMNUMARRAYS    10000
+
+/*
+** The following constant NUMARRAYSIZE determines the
+** default # of elements in each numeric array.  Ordinarily
+** this is something you shouldn't fool with, though as
+** with most of the constants here, it is adjustable.
+*/
+#define NUMARRAYSIZE    8111L
+
+
+/*
+** TYPEDEFS
+*/
+typedef struct {
+        int adjust;             /* Set adjust code */
+        ulong request_secs;     /* # of seconds requested */
+        double sortspersec;     /* # of sort iterations per sec */
+        ushort numarrays;       /* # of arrays */
+        ulong arraysize;        /* # of elements in array */
+} SortStruct;
+
+/****************
+** STRING SORT **
+*****************
+** Note: The string sort benchmark uses the same structure to
+** communicate parameters as does the numeric sort benchmark.
+** (i.e., SortStruct...see above.
+*/
+
+/*
+** DEFINES
+*/
+/*
+** The following constant STRINGARRAYSIZE determines
+** the default # of bytes allocated to each string array.
+** Though the actual size can be pre-set from the command
+** file, this constant should be left unchanged.
+*/
+#define STRINGARRAYSIZE 8111L
+
+/************************
+** BITFIELD OPERATIONS **
+*************************
+*/
+
+/*
+** DEFINES
+*/
+
+/*
+** Following field sets the size of the bitfield array (in longs).
+*/
+#ifdef LONG64
+#define BITFARRAYSIZE 16384L
+#else
+#define BITFARRAYSIZE 32768L
+#endif
+
+/*
+** TYPEDEFS
+*/
+typedef struct {
+        int adjust;             /* Set adjust code */
+        ulong request_secs;     /* # of seconds requested */
+        double bitopspersec;    /* # of bitfield ops per sec */
+        ulong bitoparraysize;           /* Total # of bitfield ops */
+        ulong bitfieldarraysize;        /* Bit field array size */
+} BitOpStruct;
+
+/****************************
+** EMULATED FLOATING POINT **
+****************************/
+/*
+** DEFINES
+*/
+#define INTERNAL_FPF_PRECISION 4
+
+/*
+** The following constant is the maximum number of loops
+** of the emulated floating point test that the system
+** will allow before flagging an error.  This is not a
+** critical constant, and can be altered if your system is
+** a real barn-burner.
+*/
+/*#define CPUEMFLOATLOOPMAX 50000L*/
+#define CPUEMFLOATLOOPMAX 500000L
+
+/*
+** Set size of array
+*/
+#define EMFARRAYSIZE 3000L
+
+/*
+** TYPEDEFS
+*/
+typedef struct {
+        int adjust;             /* Set adjust code */
+        ulong request_secs;     /* # of seconds requested */
+        ulong arraysize;        /* Size of array */
+        ulong loops;            /* Loops per iterations */
+        double emflops;         /* Results */
+} EmFloatStruct;
+
+/*************************
+** FOURIER COEFFICIENTS **
+*************************/
+
+/*
+** TYPEDEFS
+*/
+typedef struct {
+        int adjust;             /* Set adjust code */
+        ulong request_secs;     /* # of requested seconds */
+        ulong arraysize;        /* Size of coeff. arrays */
+        double fflops;          /* Results */
+} FourierStruct;
+
+/*************************
+** ASSIGNMENT ALGORITHM **
+*************************/
+
+/*
+** TYPEDEFS
+*/
+typedef struct {
+        int adjust;             /* Set adjust code */
+        ulong request_secs;     /* Requested # of seconds */
+        ulong numarrays;        /* # of arrays */
+        double iterspersec;     /* Results */
+} AssignStruct;
+
+/********************
+** IDEA ENCRYPTION **
+********************/
+
+/*
+** DEFINES
+*/
+/* Following constant defines the max number of loops the
+** system will attempt. Keeps things from going off into the
+** weeds. */
+/*#define MAXIDEALOOPS 50000L*/
+#define MAXIDEALOOPS 500000L
+
+/*
+** Following constant sets the size of the arrays.
+** NOTE: For the IDEA algorithm to work properly, this
+**  number MUST be some multiple of 8.
+*/
+#define IDEAARRAYSIZE 4000L
+
+/*
+** TYPEDEFS
+*/
+typedef struct {
+        int adjust;             /* Set adjust code */
+        ulong request_secs;     /* Requested # of seconds */
+        ulong arraysize;        /* Size of array */
+        ulong loops;            /* # of times to convert */
+        double iterspersec;     /* Results */
+} IDEAStruct;
+
+
+/************************
+** HUFFMAN COMPRESSION **
+************************/
+
+/*
+** DEFINES
+*/
+/*
+** MAXHUFFLOOPS
+**
+** This constant specifies the maximum number of Huffman
+** compression loops the system will try for.  This keeps
+** the test from going off into the weeds.  This is not
+** a critical constant, and can be increased if your
+** system is a real barn-burner.
+*/
+/*#define MAXHUFFLOOPS 50000L*/
+#define MAXHUFFLOOPS 500000L
+
+/*
+** Following constant sets the size of the arrays to
+** be compressed/uncompressed.
+*/
+#define HUFFARRAYSIZE 5000L
+
+/*
+** TYPEDEFS
+*/
+
+typedef struct {
+        int adjust;             /* Set adjust code */
+        ulong request_secs;     /* Requested # of seconds */
+        ulong arraysize;        /* Size of array */
+        ulong loops;            /* # of times to compress/decompress */
+        double iterspersec;     /* Results */
+} HuffStruct;
+
+/********************************
+** BACK PROPAGATION NEURAL NET **
+********************************/
+
+/*
+**  MAXNNETLOOPS
+**
+** This constant sets the max number of loops through the neural
+** net that the system will attempt before giving up.  This
+** is not a critical constant.  You can alter it if your system
+** has sufficient horsepower.
+*/
+/*#define MAXNNETLOOPS  50000L*/
+#define MAXNNETLOOPS  500000L
+
+/*
+** TYPEDEFS
+*/
+typedef struct {
+        int adjust;             /* Set adjust code */
+        ulong request_secs;     /* Requested # of seconds */
+        ulong loops;            /* # of times to learn */
+        double iterspersec;     /* Results */
+} NNetStruct;
+
+/***********************
+**  LU DECOMPOSITION  **
+** (Linear Equations) **
+***********************/
+
+/*
+** MAXLUARRAYS
+**
+** This sets the upper limit on the number of arrays
+** that the benchmark will attempt to build before
+** flagging an error.  It is not a critical constant, and
+** may be increased if your system has the horsepower.
+*/
+/*#define MAXLUARRAYS 1000*/
+#define MAXLUARRAYS 10000
+
+/*
+** TYPEDEFS
+*/
+typedef struct {
+        int adjust;             /* Set adjust code */
+        ulong request_secs;     /* Requested # of seconds */
+        ulong numarrays;        /* # of arrays */
+        double iterspersec;     /* Results */
+} LUStruct;
+
diff --git a/pointer.c b/pointer.c
new file mode 100644
index 0000000..f4de577
--- /dev/null
+++ b/pointer.c
@@ -0,0 +1,6 @@
+#include <stdio.h>
+int main(){
+ printf("%d",(int)sizeof(long));
+ return(0);
+}
+
diff --git a/sysinfo.c.example b/sysinfo.c.example
new file mode 100644
index 0000000..db650f0
--- /dev/null
+++ b/sysinfo.c.example
@@ -0,0 +1,10 @@
+sprintf(buffer,"**System used for compilation:\n");
+output_string(buffer);
+sprintf(buffer,"**Linux mimi 2.0.31 #5 Thu Oct 23 10:02:08 CDT 1997 i486\n");
+output_string(buffer);
+sprintf(buffer,"**C compiler: gcc version 2.7.2.3\n");
+output_string(buffer);
+sprintf(buffer,"**libc: libc.so.5.4.38\n");
+output_string(buffer);
+sprintf(buffer,"**Date of compilation: Thu Nov 20 10:04:43 CST 1997\n");
+output_string(buffer);
diff --git a/sysinfo.c.template b/sysinfo.c.template
new file mode 100644
index 0000000..c1a986c
--- /dev/null
+++ b/sysinfo.c.template
@@ -0,0 +1,10 @@
+sprintf(buffer,"**System used for compilation:\n");
+output_string(buffer);
+sprintf(buffer,"**%SYSTEM%\n");
+output_string(buffer);
+sprintf(buffer,"**C compiler: %CCVERSION%\n");
+output_string(buffer);
+sprintf(buffer,"**libc: %LIBCVERSION%\n");
+output_string(buffer);
+sprintf(buffer,"**Date of compilation: %DATE%\n");
+output_string(buffer);
diff --git a/sysinfo.sh b/sysinfo.sh
new file mode 100755
index 0000000..57754fe
--- /dev/null
+++ b/sysinfo.sh
@@ -0,0 +1,78 @@
+#!/bin/sh
+
+# the arguments of this script are the compiler name and flags
+
+# try to solve a chicken-and-egg problem on SunOS
+# ucb's test program does not handle -L like the other test programs
+# let's try to find another implementation
+if test -x /bin/test; then
+    TEST=/bin/test;
+else
+    if test -x /usr/bin/test; then
+        TEST=/usr/bin/test;
+    else
+        # cross your fingers that it's not like ucb test
+        TEST=test;
+    fi
+fi
+
+compiler=`echo $* | sed -e 's/-static//g' -e 's/-Bstatic//g'`
+if $TEST `basename $1` = "gcc" && ($compiler -v) >/dev/null 2>&1 ; then
+# Cygwin writes more than one line with "version" in it
+    gccversion=`$compiler -v 2>&1 | sed -e "/version/!d" | tail -n 1`
+else
+    gccversion="$1"
+fi
+
+libcversion=""
+if ($* hello.c -o hello) >/dev/null 2>&1; then
+  ldd_output=`(ldd hello) 2>&1`
+  libcversion=`echo $ldd_output | sed -e 's/.*static.*/static/' \
+				      -e 's/.*not a dynamic.*/static/'`
+  if $TEST "$libcversion" = "static" ; then
+    if ($compiler hello.c -o hello) >/dev/null 2>&1; then
+      if (ldd hello) >/dev/null 2>/dev/null; then
+        libcversion=`(ldd hello) 2>&1`
+        libcversion=`echo $libcversion | sed -e '/libc/!d'\
+			-e 's/^[ 	]*//' \
+			-e 's/.*=>[ 	][ 	]*\([^ 	]*\).*/\1/'`
+	# remember the current directory
+      	current=`pwd`
+      	while $TEST -L "$libcversion" && ! $TEST "$libcversion" = "" ; do
+      	  libcitself=`basename $libcversion`
+      	  libpath=`echo $libcversion | sed -e "s/$libcitself$//"`
+      	  if $TEST -d "$libpath" ; then
+      	    cd $libpath
+      	  fi
+      	  if ls $libcitself >/dev/null 2>/dev/null ; then
+      	    libcversion=`ls -l $libcitself | \
+			   sed -e 's/.*->[ 	][ 	]*\(.*\)$/\1/'`
+      	  else
+      	    # something must have gone wrong, let's bail out
+      	    libcversion=""
+      	  fi
+      	done
+      	# return to the current directory
+      	cd $current
+      fi
+    fi
+  else
+    libcversion=""
+  fi
+fi
+
+rm -f sysinfo.crm sysinfoc.c hello
+
+# this bombs out on Ultrix which expect "cut -d"
+
+compsystem=`uname -a | cut -b 1-78`
+compdate=`date|cut -b1-55`
+
+# let's hope that ctrl-c is not part of any string here
+# this also will barf later if " is in any of the strings
+
+for i in sysinfo.c sysinfoc.c ; do
+ sed -e "s%CCVERSION%$gccversion" -e "s%LIBCVERSION%$libcversion"\
+     -e "s%SYSTEM%$compsystem" -e "s%DATE%$compdate"\
+   ${i}.template > $i
+done
diff --git a/sysinfoc.c.example b/sysinfoc.c.example
new file mode 100644
index 0000000..7da71ac
--- /dev/null
+++ b/sysinfoc.c.example
@@ -0,0 +1,4 @@
+sprintf(buffer,"C compiler          : gcc version 2.7.2.3\n");
+output_string(buffer);
+sprintf(buffer,"libc                : libc.so.5.4.38\n");
+output_string(buffer);
diff --git a/sysinfoc.c.template b/sysinfoc.c.template
new file mode 100644
index 0000000..922a5de
--- /dev/null
+++ b/sysinfoc.c.template
@@ -0,0 +1,4 @@
+sprintf(buffer,"C compiler          : %CCVERSION%\n");
+output_string(buffer);
+sprintf(buffer,"libc                : %LIBCVERSION%\n");
+output_string(buffer);
diff --git a/sysspec.c b/sysspec.c
new file mode 100644
index 0000000..a97010d
--- /dev/null
+++ b/sysspec.c
@@ -0,0 +1,884 @@
+
+/*
+** sysspec.c
+** System-specific routines.
+**
+** BYTEmark (tm)
+** BYTE's Native Mode Benchmarks
+** Rick Grehan, BYTE Magazine
+**
+** Creation:
+** Revision: 3/95;10/95
+**
+** DISCLAIMER
+** The source, executable, and documentation files that comprise
+** the BYTEmark benchmarks are made available on an "as is" basis.
+** This means that we at BYTE Magazine have made every reasonable
+** effort to verify that the there are no errors in the source and
+** executable code.  We cannot, however, guarantee that the programs
+** are error-free.  Consequently, McGraw-HIll and BYTE Magazine make
+** no claims in regard to the fitness of the source code, executable
+** code, and documentation of the BYTEmark.
+**  Furthermore, BYTE Magazine, McGraw-Hill, and all employees
+** of McGraw-Hill cannot be held responsible for any damages resulting
+** from the use of this code or the results obtained from using
+** this code.
+*/
+
+/***********************************
+**    SYSTEM-SPECIFIC ROUTINES    **
+************************************
+**
+** These are the routines that provide functions that are
+** system-specific.  If the benchmarks are to be ported
+** to new hardware/new O.S., this is the first place to
+** start.
+*/
+#include "sysspec.h"
+
+#ifdef DOS16
+#include <io.h>
+#include <fcntl.h>
+#include <sys\stat.h>
+#endif
+/*********************************
+**  MEMORY MANAGEMENT ROUTINES  **
+*********************************/
+
+
+/****************************
+** AllocateMemory
+** This routine returns a void pointer to a memory
+** block.  The size of the memory block is given in bytes
+** as the first argument.  This routine also returns an
+** error code in the second argument.
+** 10/95 Update:
+**  Added an associative array for memory alignment reasons.
+**  mem_array[2][MEM_ARRAY_SIZE]
+**   mem_array[0][n] = Actual address (from malloc)
+**   mem_array[1][n] = Aligned address
+** Currently, mem_array[][] is only used if you use malloc;
+**  it is not used for the 16-bit DOS and MAC versions.
+*/
+farvoid *AllocateMemory(unsigned long nbytes,   /* # of bytes to alloc */
+		int *errorcode)                 /* Returned error code */
+{
+#ifdef DOS16MEM
+union REGS registers;
+unsigned short nparas;            /* # of paragraphs */
+
+/*
+** Set # of paragraphs to nbytes/16 +1.  The +1 is a
+** slop factor.
+*/
+nparas=(unsigned short)(nbytes/16L) + 1;
+
+/*
+** Set incoming registers.
+*/
+registers.h.ah=0x48;            /* Allocate memory */
+registers.x.bx=nparas;          /* # of paragraphs */
+
+
+intdos(&registers,&registers);  /* Call DOS */
+
+/*
+** See if things succeeded.
+*/
+if(registers.x.cflag)
+{       printf("error: %d Lgst: %d\n",registers.x.ax,registers.x.bx);
+	    *errorcode=ERROR_MEMORY;
+	return((farvoid *)NULL);
+}
+
+/*
+** Create a farvoid pointer to return.
+*/
+*errorcode=0;
+return((farvoid *)MK_FP(registers.x.ax,0));
+
+#endif
+
+#ifdef MACMEM
+/*
+** For MAC CodeWarrior, we'll use the MacOS NewPtr call
+*/
+farvoid *returnval;
+returnval=(farvoid *)NewPtr((Size)nbytes);
+if(returnval==(farvoid *)NULL)
+	*errorcode=ERROR_MEMORY;
+else
+	*errorcode=0;
+return(returnval);
+#endif
+
+#ifdef MALLOCMEM
+/*
+** Everyone else, its pretty straightforward, given
+** that you use a 32-bit compiler which treats size_t as
+** a 4-byte entity.
+*/
+farvoid *returnval;             /* Return value */
+ulong true_addr;		/* True address */
+ulong adj_addr;			/* Adjusted address */
+
+returnval=(farvoid *)malloc((size_t)(nbytes+2L*(long)global_align));
+if(returnval==(farvoid *)NULL)
+	*errorcode=ERROR_MEMORY;
+else
+	*errorcode=0;
+
+/*
+** Check for alignment
+*/
+adj_addr=true_addr=(ulong)returnval;
+if(global_align==0)
+{	
+	if(AddMemArray(true_addr, adj_addr))
+		*errorcode=ERROR_MEMARRAY_FULL;
+	return(returnval);
+}
+
+if(global_align==1)
+{	
+        if(true_addr%2==0) adj_addr++;
+}
+else
+{	
+	while(adj_addr%global_align!=0) ++adj_addr;
+	if(adj_addr%(global_align*2)==0) adj_addr+=global_align;
+}
+returnval=(void *)adj_addr;
+if(AddMemArray(true_addr,adj_addr))
+	*errorcode=ERROR_MEMARRAY_FULL;
+return(returnval);
+#endif
+
+}
+
+
+/****************************
+** FreeMemory
+** This is the reverse of AllocateMemory.  The memory
+** block passed in is freed.  Should an error occur,
+** that error is returned in errorcode.
+*/
+void FreeMemory(farvoid *mempointer,    /* Pointer to memory block */
+		int *errorcode)
+{
+#ifdef DOS16MEM
+/*
+** 16-bit DOS VERSION!!
+*/
+unsigned int segment;
+unsigned int offset;
+union REGS registers;
+struct SREGS sregisters;
+
+/*
+** First get the segment/offset of the farvoid pointer.
+*/
+segment=FP_SEG(mempointer);
+offset=FP_OFF(mempointer);
+
+/*
+** Align the segment properly.  For as long as offset > 16,
+** subtract 16 from offset and add 1 to segment.
+*/
+while(offset>=16)
+{       offset-=16;
+	segment++;
+}
+
+/*
+** Build the call to DOS
+*/
+registers.h.ah=0x49;            /* Free memory */
+sregisters.es=segment;
+
+intdosx(&registers,&registers,&sregisters);
+
+/*
+** Check for error
+*/
+if(registers.x.cflag)
+{       *errorcode=ERROR_MEMORY;
+	return;
+}
+
+*errorcode=0;
+return;
+#endif
+
+#ifdef MACMEM
+DisposPtr((Ptr)mempointer);
+*errorcode=0;
+return;
+#endif
+
+#ifdef MALLOCMEM
+ulong adj_addr, true_addr;
+
+/* Locate item in memory array */
+adj_addr=(ulong)mempointer;
+if(RemoveMemArray(adj_addr, &true_addr))
+{	*errorcode=ERROR_MEMARRAY_NFOUND;
+	return;
+}
+mempointer=(void *)true_addr;
+free(mempointer);
+*errorcode=0;
+return;
+#endif
+}
+
+/****************************
+** MoveMemory
+** Moves n bytes from a to b.  Handles overlap.
+** In most cases, this is just a memmove operation.
+** But, not in DOS....noooo....
+*/
+void MoveMemory( farvoid *destination,  /* Destination address */
+		farvoid *source,        /* Source address */
+		unsigned long nbytes)
+{
+
+/* +++16-bit DOS VERSION+++ */
+#ifdef DOS16MEM
+
+	FarDOSmemmove( destination, source, nbytes);
+
+#else
+
+memmove(destination, source, nbytes);
+
+#endif
+}
+
+#ifdef DOS16MEM
+
+/****************************
+** FarDOSmemmove
+** Performs the same function as memmove for DOS when
+** the arrays are defined with far pointers.
+*/
+void FarDOSmemmove(farvoid *destination,        /* Destination pointer */
+		farvoid *source,        /* Source pointer */
+		unsigned long nbytes)   /* # of bytes to move */
+{
+unsigned char huge *uchsource;  /* Temp source */
+unsigned char huge *uchdest;    /* Temp destination */
+unsigned long saddr;            /* Source "true" address */
+unsigned long daddr;            /* Destination "true" address */
+
+
+/*
+** Get unsigned char pointer equivalents
+*/
+uchsource=(unsigned char huge *)source;
+uchdest=(unsigned char huge *)destination;
+
+/*
+** Calculate true address of source and destination and
+** compare.
+*/
+saddr=(unsigned long)(FP_SEG(source)*16 + FP_OFF(source));
+daddr=(unsigned long)(FP_SEG(destination)*16 + FP_OFF(destination));
+
+if(saddr > daddr)
+{
+	/*
+	** Source is greater than destination.
+	** Use a series of standard move operations.
+	** We'll move 65535 bytes at a time.
+	*/
+	while(nbytes>=65535L)
+	{       _fmemmove((farvoid *)uchdest,
+			(farvoid *)uchsource,
+			(size_t) 65535);
+		uchsource+=65535;       /* Advance pointers */
+		uchdest+=65535;
+		nbytes-=65535;
+	}
+
+	/*
+	** Move remaining bytes
+	*/
+	if(nbytes!=0L)
+		_fmemmove((farvoid *)uchdest,
+			(farvoid *)uchsource,
+			(size_t)(nbytes & 0xFFFF));
+
+}
+else
+{
+	/*
+	** Destination is greater than source.
+	** Advance pointers to the end of their
+	** respective blocks.
+	*/
+	uchsource+=nbytes;
+	uchdest+=nbytes;
+
+	/*
+	** Again, move 65535 bytes at a time.  However,
+	** "back" the pointers up before doing the
+	** move.
+	*/
+	while(nbytes>=65535L)
+	{
+		uchsource-=65535;
+		uchdest-=65535;
+		_fmemmove((farvoid *)uchdest,
+			(farvoid *)uchsource,
+			(size_t) 65535);
+		nbytes-=65535;
+	}
+
+	/*
+	** Move remaining bytes.
+	*/
+	if(nbytes!=0L)
+	{       uchsource-=nbytes;
+		uchdest-=nbytes;
+		_fmemmove((farvoid *)uchdest,
+			(farvoid *)uchsource,
+			(size_t)(nbytes & 0xFFFF));
+	}
+}
+return;
+}
+#endif
+
+/***********************************
+** MEMORY ARRAY HANDLING ROUTINES **
+***********************************/
+/****************************
+** InitMemArray
+** Initialize the memory array.  This simply amounts to
+** setting mem_array_ents to zero, indicating that there
+** isn't anything in the memory array.
+*/
+void InitMemArray(void)
+{
+mem_array_ents=0;
+return;
+}
+
+/***************************
+** AddMemArray
+** Add a pair of items to the memory array.
+**  true_addr is the true address (mem_array[0][n])
+**  adj_addr is the adjusted address (mem_array[0][n])
+** Returns 0 if ok
+** -1 if not enough room
+*/
+int AddMemArray(ulong true_addr,
+		ulong adj_addr)
+{
+if(mem_array_ents>=MEM_ARRAY_SIZE)
+	return(-1);
+
+mem_array[0][mem_array_ents]=true_addr;
+mem_array[1][mem_array_ents]=adj_addr;
+mem_array_ents++;
+return(0);
+}
+
+/*************************
+** RemoveMemArray
+** Given an adjusted address value (mem_array[1][n]), locate
+** the entry and remove it from the mem_array.
+** Also returns the associated true address.
+** Returns 0 if ok
+** -1 if not found.
+*/
+int RemoveMemArray(ulong adj_addr,ulong *true_addr)
+{
+int i,j;
+
+/* Locate the item in the array. */
+for(i=0;i<mem_array_ents;i++)
+	if(mem_array[1][i]==adj_addr)
+	{       /* Found it..bubble stuff down */
+		*true_addr=mem_array[0][i];
+		j=i;
+		while(j+1<mem_array_ents)
+		{       mem_array[0][j]=mem_array[0][j+1];
+			mem_array[1][j]=mem_array[1][j+1];
+			j++;
+		}
+		mem_array_ents--;
+		return(0);      /* Return if found */
+	}
+
+/* If we made it here...something's wrong...show error */
+return(-1);
+}
+
+/**********************************
+**    FILE HANDLING ROUTINES     **
+**********************************/
+
+/****************************
+** CreateFile
+** This routine accepts a filename for an argument and
+** creates that file in the current directory (unless the
+** name contains a path that overrides the current directory).
+** Note that the routine does not OPEN the file.
+** If the file exists, it is truncated to length 0.
+*/
+void CreateFile(char *filename,
+		int *errorcode)
+{
+
+#ifdef DOS16
+/*
+** DOS VERSION!!
+*/
+int fhandle;            /* File handle used internally */
+
+fhandle=open(filename,O_CREAT | O_TRUNC, S_IREAD | S_IWRITE);
+
+if(fhandle==-1)
+	*errorcode=ERROR_FILECREATE;
+else
+	*errorcode=0;
+
+/*
+** Since all we're doing here is creating the file,
+** go ahead and close it.
+*/
+close(fhandle);
+
+return;
+#endif
+
+#ifdef LINUX
+FILE *fhandle;            /* File handle used internally */
+
+fhandle=fopen(filename,"w");
+
+if(fhandle==NULL)
+	*errorcode=ERROR_FILECREATE;
+else
+	*errorcode=0;
+
+/*
+** Since all we're doing here is creating the file,
+** go ahead and close it.
+*/
+fclose(fhandle);
+
+return;
+#endif
+}
+
+/****************************
+** bmOpenFile
+** Opens the file given by fname, returning its handle.
+** If an error occurs, returns its code in errorcode.
+** The file is opened in read-write exclusive mode.
+*/
+#ifdef DOS16
+/*
+** DOS VERSION!!
+*/
+
+int bmOpenFile(char *fname,       /* File name */
+	int *errorcode)         /* Error code returned */
+{
+
+int fhandle;            /* Returned file handle */
+
+fhandle=open(fname,O_BINARY | O_RDWR, S_IREAD | S_IWRITE);
+
+if(fhandle==-1)
+	*errorcode=ERROR_FILEOPEN;
+else
+	*errorcode=0;
+
+return(fhandle);
+}
+#endif
+
+
+#ifdef LINUX
+
+FILE *bmOpenFile(char *fname,       /* File name */
+	    int *errorcode)         /* Error code returned */
+{
+
+FILE *fhandle;            /* Returned file handle */
+
+fhandle=fopen(fname,"w+");
+
+if(fhandle==NULL)
+	*errorcode=ERROR_FILEOPEN;
+else
+	*errorcode=0;
+
+return(fhandle);
+}
+#endif
+
+
+/****************************
+** CloseFile
+** Closes the file identified by fhandle.
+** A more inocuous routine there never was.
+*/
+#ifdef DOS16
+/*
+** DOS VERSION!!!
+*/
+void CloseFile(int fhandle,             /* File handle */
+		int *errorcode)         /* Returned error code */
+{
+
+close(fhandle);
+*errorcode=0;
+return;
+}
+#endif
+#ifdef LINUX
+void CloseFile(FILE *fhandle,             /* File handle */
+		int *errorcode)         /* Returned error code */
+{
+fclose(fhandle);
+*errorcode=0;
+return;
+}
+#endif
+
+/****************************
+** readfile
+** Read bytes from an opened file.  This routine
+** is a combination seek-and-read.
+** Note that this routine expects the offset to be from
+** the beginning of the file.
+*/
+#ifdef DOS16
+/*
+** DOS VERSION!!
+*/
+
+void readfile(int fhandle,              /* File handle */
+	unsigned long offset,           /* Offset into file */
+	unsigned long nbytes,           /* # of bytes to read */
+	void *buffer,                   /* Buffer to read into */
+	int *errorcode)                 /* Returned error code */
+{
+
+long newoffset;                         /* New offset by lseek */
+int readcode;                           /* Return code from read */
+
+/*
+** Presume success.
+*/
+*errorcode=0;
+
+/*
+** Seek to the proper offset.
+*/
+newoffset=lseek(fhandle,(long)offset,SEEK_SET);
+if(newoffset==-1L)
+{       *errorcode=ERROR_FILESEEK;
+	return;
+}
+
+/*
+** Do the read.
+*/
+readcode=read(fhandle,buffer,(unsigned)(nbytes & 0xFFFF));
+if(readcode==-1)
+	*errorcode=ERROR_FILEREAD;
+
+return;
+}
+#endif
+#ifdef LINUX
+void readfile(FILE *fhandle,            /* File handle */
+	unsigned long offset,           /* Offset into file */
+	unsigned long nbytes,           /* # of bytes to read */
+	void *buffer,                   /* Buffer to read into */
+	int *errorcode)                 /* Returned error code */
+{
+
+long newoffset;                         /* New offset by fseek */
+size_t nelems;                          /* Expected return code from read */
+size_t readcode;                        /* Actual return code from read */
+
+/*
+** Presume success.
+*/
+*errorcode=0;
+
+/*
+** Seek to the proper offset.
+*/
+newoffset=fseek(fhandle,(long)offset,SEEK_SET);
+if(newoffset==-1L)
+{       *errorcode=ERROR_FILESEEK;
+	return;
+}
+
+/*
+** Do the read.
+*/
+nelems=(size_t)(nbytes & 0xFFFF);
+readcode=fread(buffer,(size_t)1,nelems,fhandle);
+if(readcode!=nelems)
+	*errorcode=ERROR_FILEREAD;
+
+return;
+}
+#endif
+
+/****************************
+** writefile
+** writes bytes to an opened file.  This routine is
+** a combination seek-and-write.
+** Note that this routine expects the offset to be from
+** the beinning of the file.
+*/
+#ifdef DOS16
+/*
+** DOS VERSION!!
+*/
+
+void writefile(int fhandle,             /* File handle */
+	unsigned long offset,           /* Offset into file */
+	unsigned long nbytes,           /* # of bytes to read */
+	void *buffer,                   /* Buffer to read into */
+	int *errorcode)                 /* Returned error code */
+{
+
+long newoffset;                         /* New offset by lseek */
+int writecode;                          /* Return code from write */
+
+/*
+** Presume success.
+*/
+*errorcode=0;
+
+/*
+** Seek to the proper offset.
+*/
+newoffset=lseek(fhandle,(long)offset,SEEK_SET);
+if(newoffset==-1L)
+{       *errorcode=ERROR_FILESEEK;
+	return;
+}
+
+/*
+** Do the write.
+*/
+writecode=write(fhandle,buffer,(unsigned)(nbytes & 0xFFFF));
+if(writecode==-1)
+	*errorcode=ERROR_FILEWRITE;
+
+return;
+}
+#endif
+
+#ifdef LINUX
+
+void writefile(FILE *fhandle,           /* File handle */
+	unsigned long offset,           /* Offset into file */
+	unsigned long nbytes,           /* # of bytes to read */
+	void *buffer,                   /* Buffer to read into */
+	int *errorcode)                 /* Returned error code */
+{
+
+long newoffset;                         /* New offset by lseek */
+size_t nelems;                          /* Expected return code from write */
+size_t writecode;                       /* Actual return code from write */
+
+/*
+** Presume success.
+*/
+*errorcode=0;
+
+/*
+** Seek to the proper offset.
+*/
+newoffset=fseek(fhandle,(long)offset,SEEK_SET);
+if(newoffset==-1L)
+{       *errorcode=ERROR_FILESEEK;
+	return;
+}
+
+/*
+** Do the write.
+*/
+nelems=(size_t)(nbytes & 0xFFFF);
+writecode=fwrite(buffer,(size_t)1,nelems,fhandle);
+if(writecode==nelems)
+	*errorcode=ERROR_FILEWRITE;
+
+return;
+}
+#endif
+
+
+/********************************
+**   ERROR HANDLING ROUTINES   **
+********************************/
+
+/****************************
+** ReportError
+** Report error message condition.
+*/
+void ReportError(char *errorcontext,    /* Error context string */
+		int errorcode)          /* Error code number */
+{
+
+/*
+** Display error context
+*/
+printf("ERROR CONDITION\nContext: %s\n",errorcontext);
+
+/*
+** Display code
+*/
+printf("Code: %d",errorcode);
+
+return;
+}
+
+/****************************
+** ErrorExit
+** Peforms an exit from an error condition.
+*/
+void ErrorExit()
+{
+
+/*
+** For profiling on the Mac with MetroWerks -- 11/17/94 RG
+** Have to do this to turn off profiler.
+*/
+#ifdef MACCWPROF
+#if __profile__
+ProfilerTerm();
+#endif
+#endif
+
+/*
+** FOR NOW...SIMPLE EXIT
+*/
+exit(1);
+}
+
+/*****************************
+**    STOPWATCH ROUTINES    **
+*****************************/
+
+/****************************
+** StartStopwatch
+** Starts a software stopwatch.  Returns the first value of
+** the stopwatch in ticks.
+*/
+unsigned long StartStopwatch()
+{
+#ifdef MACTIMEMGR
+/*
+** For Mac code warrior, use timer. In this case, what we return is really
+** a dummy value.
+*/
+InsTime((QElemPtr)&myTMTask);
+PrimeTime((QElemPtr)&myTMTask,-MacHSTdelay);
+return((unsigned long)1);
+#else
+#ifdef WIN31TIMER
+/*
+** Win 3.x timer returns a DWORD, which we coax into a long.
+*/
+_Call16(lpfn,"p",&win31tinfo);
+return((unsigned long)win31tinfo.dwmsSinceStart);
+#else
+return((unsigned long)clock());
+#endif
+#endif
+}
+
+/****************************
+** StopStopwatch
+** Stops the software stopwatch.  Expects as an input argument
+** the stopwatch start time.
+*/
+unsigned long StopStopwatch(unsigned long startticks)
+{
+	
+#ifdef MACTIMEMGR
+/*
+** For Mac code warrior...ignore startticks.  Return val. in microseconds
+*/
+RmvTime((QElemPtr)&myTMTask);
+return((unsigned long)(MacHSTdelay+myTMTask.tmCount-MacHSTohead));
+#else
+#ifdef WIN31TIMER
+_Call16(lpfn,"p",&win31tinfo);
+return((unsigned long)win31tinfo.dwmsSinceStart-startticks);
+#else
+return((unsigned long)clock()-startticks);
+#endif
+#endif
+}
+
+/****************************
+** TicksToSecs
+** Converts ticks to seconds.  Converts ticks to integer
+** seconds, discarding any fractional amount.
+*/
+unsigned long TicksToSecs(unsigned long tickamount)
+{
+#ifdef CLOCKWCT
+return((unsigned long)(tickamount/CLK_TCK));
+#endif
+
+#ifdef MACTIMEMGR
+/* +++ MAC time manager version (using timer in microseconds) +++ */
+return((unsigned long)(tickamount/1000000));
+#endif
+
+#ifdef CLOCKWCPS
+/* Everybody else */
+return((unsigned long)(tickamount/CLOCKS_PER_SEC));
+#endif
+
+#ifdef WIN31TIMER
+/* Each tick is 840 nanoseconds */
+return((unsigned long)(tickamount/1000L));
+#endif
+
+}
+
+/****************************
+** TicksToFracSecs
+** Converts ticks to fractional seconds.  In other words,
+** this returns the exact conversion from ticks to
+** seconds.
+*/
+double TicksToFracSecs(unsigned long tickamount)
+{
+#ifdef CLOCKWCT
+return((double)tickamount/(double)CLK_TCK);
+#endif
+
+#ifdef MACTIMEMGR
+/* +++ MAC time manager version +++ */
+return((double)tickamount/(double)1000000);
+#endif
+
+#ifdef CLOCKWCPS
+/* Everybody else */
+return((double)tickamount/(double)CLOCKS_PER_SEC);
+#endif
+
+#ifdef WIN31TIMER
+/* Using 840 nanosecond ticks */
+return((double)tickamount/(double)1000);
+#endif
+}
+
diff --git a/sysspec.h b/sysspec.h
new file mode 100644
index 0000000..ba57a96
--- /dev/null
+++ b/sysspec.h
@@ -0,0 +1,168 @@
+/*
+** sysspec.h
+** Header file for sysspec.c
+** BYTEmark (tm)
+** BYTE's Native Mode Benchmarks
+** Rick Grehan, BYTE Magazine
+**
+** Creation:
+** Revision: 3/95
+**
+** DISCLAIMER
+** The source, executable, and documentation files that comprise
+** the BYTEmark benchmarks are made available on an "as is" basis.
+** This means that we at BYTE Magazine have made every reasonable
+** effort to verify that the there are no errors in the source and
+** executable code.  We cannot, however, guarantee that the programs
+** are error-free.  Consequently, McGraw-HIll and BYTE Magazine make
+** no claims in regard to the fitness of the source code, executable
+** code, and documentation of the BYTEmark.
+**  Furthermore, BYTE Magazine, McGraw-Hill, and all employees
+** of McGraw-Hill cannot be held responsible for any damages resulting
+** from the use of this code or the results obtained from using
+** this code.
+*/
+
+/*
+** Standard includes
+*/
+#include <stdlib.h>
+#include <stdio.h>
+#include <time.h>
+#include <string.h>
+
+#include "nmglobal.h"
+
+#if !defined(MAC) && !defined(OSX)
+#include <malloc.h>
+#endif
+
+
+/*
+** System-specific includes
+*/
+
+#ifdef DOS16MEM
+#include "dos.h"
+#endif
+
+/* #include "time.h"
+#include "io.h"
+#include "fcntl.h"
+#include "sys\stat.h" */
+/* Removed for MSVC++
+#include "alloc.h"
+*/
+
+/*
+** MAC Time Manager routines (from Code Warrior)
+*/
+#ifdef MACTIMEMGR
+#include <memory.h>
+#include <lowmem.h>
+#include <Types.h>
+#include <Timer.h>
+extern struct TMTask myTMTask;
+extern long MacHSTdelay,MacHSTohead;
+#endif
+
+/*
+** Windows 3.1 timer defines
+*/
+#ifdef WIN31TIMER
+#include <windows.h>
+#include <toolhelp.h>
+TIMERINFO win31tinfo;
+HANDLE hThlp;
+FARPROC lpfn;
+#endif
+
+/**************
+** EXTERNALS **
+**************/
+extern ulong mem_array[2][MEM_ARRAY_SIZE];
+extern int mem_array_ents;
+extern int global_align;
+
+/****************************
+**   FUNCTION PROTOTYPES   **
+****************************/
+
+farvoid *AllocateMemory(unsigned long nbytes,
+                int *errorcode);
+
+void FreeMemory(farvoid *mempointer,
+                int *errorcode);
+
+void MoveMemory( farvoid *destination,
+                farvoid *source,
+                unsigned long nbytes);
+
+#ifdef DOS16MEM
+void FarDOSmemmove(farvoid *destination,
+                farvoid *source,
+                unsigned long nbytes);
+#endif
+
+void InitMemArray(void);
+
+int AddMemArray(ulong true_addr, ulong adj_addr);
+
+int RemoveMemArray(ulong adj_addr,ulong *true_addr);
+
+void ReportError(char *context, int errorcode);
+
+void ErrorExit();
+
+void CreateFile(char *filename,
+                int *errorcode);
+
+#ifdef DOS16
+int bmOpenFile(char *fname,
+                int *errorcode);
+
+void CloseFile(int fhandle,
+                int *errorcode);
+
+void readfile(int fhandle,
+                unsigned long offset,
+                unsigned long nbytes,
+                void *buffer,
+                int *errorcode);
+
+void writefile(int fhandle,
+                unsigned long offset,
+                unsigned long nbytes,
+                void *buffer,
+                int *errorcode);
+#endif
+
+#ifdef LINUX
+FILE *bmOpenFile(char *fname,
+                int *errorcode);
+
+void CloseFile(FILE *fhandle,
+                int *errorcode);
+
+void readfile(FILE *fhandle,
+                unsigned long offset,
+                unsigned long nbytes,
+                void *buffer,
+                int *errorcode);
+
+void writefile(FILE *fhandle,
+                unsigned long offset,
+                unsigned long nbytes,
+                void *buffer,
+                int *errorcode);
+
+#endif
+
+unsigned long StartStopwatch();
+
+unsigned long StopStopwatch(unsigned long startticks);
+
+unsigned long TicksToSecs(unsigned long tickamount);
+
+double TicksToFracSecs(unsigned long tickamount);
+
diff --git a/wordcat.h b/wordcat.h
new file mode 100644
index 0000000..9f18b42
--- /dev/null
+++ b/wordcat.h
@@ -0,0 +1,81 @@
+/*
+** wordcat.h
+** Word catalog
+** BYTEmark (tm)
+** BYTE's Native Mode Benchmarks
+** Rick Grehan, BYTE Magazine
+**
+** Creation:
+** Revision: 3/95
+**
+** DISCLAIMER
+** The source, executable, and documentation files that comprise
+** the BYTEmark benchmarks are made available on an "as is" basis.
+** This means that we at BYTE Magazine have made every reasonable
+** effort to verify that the there are no errors in the source and
+** executable code.  We cannot, however, guarantee that the programs
+** are error-free.  Consequently, McGraw-HIll and BYTE Magazine make
+** no claims in regard to the fitness of the source code, executable
+** code, and documentation of the BYTEmark.
+**  Furthermore, BYTE Magazine, McGraw-Hill, and all employees
+** of McGraw-Hill cannot be held responsible for any damages resulting
+** from the use of this code or the results obtained from using
+** this code.
+*/
+
+/*
+** Word catalog
+*/
+#define WORDCATSIZE 50
+
+char *wordcatarray[WORDCATSIZE] =
+{	"Hello",
+	"He",
+	"Him",
+	"the",
+	"this",
+	"that",
+	"though",
+	"rough",
+	"cough",
+	"obviously",
+	"But",
+	"but",
+	"bye",
+	"begin",
+	"beginning",
+	"beginnings",
+	"of",
+	"our",
+	"ourselves",
+	"yourselves",
+	"to",
+	"together",
+	"togetherness",
+	"from",
+	"either",
+	"I",
+	"A",
+	"return",
+	"However",
+	"that",
+	"example",
+	"yet",
+	"quickly",
+	"all",
+	"if",
+	"were",
+	"includes",
+	"always",
+	"never",
+	"not",
+	"small",
+	"returns",
+	"set",
+	"basic",
+	"Entered",
+	"with",
+	"used",
+	"shown",
+	"you",
+	"know" };
-- 
cgit v1.2.3