33 files changed, 12872 insertions, 0 deletions
diff --git a/COM.DAT b/COM.DAT
new file mode 100644
index 0000000..8dee49c
--- /dev/null
+++ b/COM.DAT
@@ -0,0 +1,11 @@
+ALLSTATS=T
+DONUMSORT=T
+DOSTRINGSORT=T
+DOBITFIELD=T
+DOEMF=T
+DOFOUR=T
+DOASSIGN=T
+DOIDEA=T
+DOHUFF=T
+DONNET=T
+DOLU=T
diff --git a/Changes b/Changes
new file mode 100644
index 0000000..111d8bd
--- /dev/null
+++ b/Changes
@@ -0,0 +1,42 @@
+This is about BYTE's beta version of the native-algorithm benchmark
+
+December 16, 1996:
+
+The source for DOS is obtainable at http://www.byte.com/bmark/bmark.htm
+Linux adaptation written by Uwe F. Mayer <mayer@tux.org>
+
+February 7, 1997:
+
+added -DSOLARIS flag to support solaris
+
+November 11, 1997:
+
+added index split suggested by Andrew D. Balsa
+re-baselined to a Linux machine
+added checking of CPU-type at run-time (cpuinfo.c)
+increased maximal number of loops in some tests
+removed -DSOLARIS flag, works now automatically (this also removed the
+  compiler warnings about redefined types and leads to a 20% faster
+  code for "Bitfield" if compiled with -funroll-loops!)
+
+November 13-19, 1997:
+
+changed debugging information
+changed random number generator to be always 32 bits even on 64 bit OSs
+added data resets to Bitfield and Huffman
+created this Changes file
+added debug code for Bitfield
+
+December 6, 1997:
+
+got rid of cpuinfo.c
+added a RESULTS file
+
+December 7, 1997:
+
+fixed the statistical analysis used to compute the confidence coefficient
+fixed a bug in the DEBUG routine of "Assignment"
+
+December 11, 1997
+added some entries to RESULTS
+
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..5045c77
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,153 @@
+# Makefile for nbench, December 11, 1997, Uwe F. Mayer <mayer@tux.org>
+# Updated February 18, 2003
+
+default: nbench
+
+##########################################################################
+#   If you are using gcc-2.7.2.3 or earlier:
+#   The optimizer of gcc has a bug and in general you should not specify
+#   -funroll-loops together with -O (or -O2, -O3, etc.)
+#   This bug is supposed to be fixed with release 2.8 of gcc.
+#
+#   This bug does NOT seem to have an effect on the correct compilation
+#   of this benchmark suite on my Linux box. However, it leads to
+#   the dreaded "internal compiler error" message on our alpha
+#   running DEC Unix 4.0b. The Linux-binary that was used to obtain
+#   the baseline results was nevertheless compiled with
+#   CFLAGS = -s -static -Wall -O3 -fomit-frame-pointer -funroll-loops
+#
+# You should leave -static in the CFLAGS so that your sysinfo can be
+# compiled into the executable.
+
+CC = gcc
+
+# generic options for gcc
+CFLAGS = -s -static -Wall -O3
+
+# if your gcc lets you do it, then try this one
+#CFLAGS = -s -static -Wall -O3 -fomit-frame-pointer -funroll-loops
+
+# for gcc on an older Pentium type processor you can try the following
+#CFLAGS = -s -static -O3 -fomit-frame-pointer -Wall -m486 \
+#	-fforce-addr -fforce-mem -falign-loops=2 -falign-functions=2 \
+#	-falign-jumps=2 -funroll-loops
+
+# for a newer gcc on a newer Pentium type processor you can try the following
+#CFLAGS = -s -static -O3 -fomit-frame-pointer -Wall -march=i686 \
+#	-fforce-addr -fforce-mem -falign-loops=2 -falign-functions=2 \
+#	-falign-jumps=2 -funroll-loops
+
+# for a newer gcc on an Athlon XP type processor you can try the following
+#CFLAGS = -s -static -O3 -fomit-frame-pointer -Wall -march=athlon-xp \
+#	-fforce-addr -fforce-mem -falign-loops=2 -falign-functions=2 \
+#	-falign-jumps=2 -funroll-loops
+
+# For debugging using gcc
+#CFLAGS = -g -O3 -Wall -DDEBUG
+
+##########################################################################
+# For Linux machines with more than one binary format.
+# The default binaries, depends on your system whether it's elf or aout.
+MACHINE=
+# a.out code for linux on an elf machine
+#MACHINE= -bi486-linuxaout
+# elf code for linux on an a.out machine
+#MACHINE= -bi486-linuxelf
+# if you want a different compiler version and different binaries, for example
+#MACHINE= -V2.7.2 -bi486-linuxaout
+
+##########################################################################
+# Read the file README.nonlinux if you are not using Linux
+
+# for DEC Unix using cc you can try
+#CC = cc
+#CFLAGS = -O3
+#LINKFLAGS = -s -non_shared
+
+# for SunOS using cc
+#CC = cc
+#CFLAGS = -O3 -s
+
+# for DEC Ultrix using cc
+#CC = cc
+#CFLAGS = -O2
+#LINKFLAGS = -s
+
+# for a Mac with OsX and the Darwin environment
+#CC = cc
+#CFLAGS = -O3 -DOSX
+
+# For debugging using cc
+#CC = cc
+#CFLAGS = -g -DDEBUG
+
+##########################################################################
+# If your system does not understand the system command "uname -s -r"
+# then comment this out
+
+# NO_UNAME= -DNO_UNAME
+
+##########################################################################
+# For any Unix flavor you need -DLINUX
+# You also need -DLINUX to get the new indices
+
+DEFINES= -DLINUX $(NO_UNAME)
+
+##########################################################################
+# For LINUX-like systems with gcc
+sysinfoc.c: Makefile
+	./sysinfo.sh $(CC) $(MACHINE) $(DEFINES) $(CFLAGS)
+
+sysinfo.c: Makefile
+	./sysinfo.sh $(CC) $(MACHINE) $(DEFINES) $(CFLAGS)
+
+##########################################################################
+# For non-LINUX systems
+# Edit the files sysinfo.c and sysinfoc.c to include your system information
+# and take sysinfo.c and sysinfoc.c out of the dependencies for nbench0.o
+
+hardware.o: hardware.c hardware.h Makefile
+	$(CC) $(MACHINE) $(DEFINES) $(CFLAGS)\
+		-c hardware.c
+
+nbench0.o: nbench0.h nbench0.c nmglobal.h pointer.h hardware.h\
+	   Makefile sysinfo.c sysinfoc.c
+	$(CC) $(MACHINE) $(DEFINES) $(CFLAGS)\
+		-c nbench0.c
+
+emfloat.o: emfloat.h emfloat.c nmglobal.h pointer.h Makefile
+	$(CC) $(MACHINE) $(DEFINES) $(CFLAGS)\
+		-c emfloat.c
+
+pointer.h: pointer Makefile
+	$(CC) $(MACHINE) $(DEFINES) $(CFLAGS)\
+		-o pointer pointer.c
+	rm -f pointer.h
+	if [ "4" = `./pointer` ] ; then touch pointer.h ;\
+	else echo "#define LONG64" >pointer.h ; fi
+
+misc.o: misc.h misc.c Makefile
+	$(CC) $(MACHINE) $(DEFINES) $(CFLAGS)\
+		-c misc.c
+
+nbench1.o: nbench1.h nbench1.c wordcat.h nmglobal.h pointer.h Makefile
+	$(CC) $(MACHINE) $(DEFINES) $(CFLAGS)\
+		-c nbench1.c
+
+sysspec.o: sysspec.h sysspec.c nmglobal.h pointer.h Makefile
+	$(CC) $(MACHINE) $(DEFINES) $(CFLAGS)\
+		-c sysspec.c
+
+nbench: emfloat.o misc.o nbench0.o nbench1.o sysspec.o hardware.o
+	$(CC) $(MACHINE) $(DEFINES) $(CFLAGS) $(LINKFLAGS)\
+		emfloat.o misc.o nbench0.o nbench1.o sysspec.o hardware.o\
+		-o nbench -lm
+
+##########################################################################
+
+clean:
+	- /bin/rm -f *.o *~ \#* core a.out hello sysinfo.c sysinfoc.c \
+		 bug pointer pointer.h debugbit.dat
+
+mrproper: clean
+	- /bin/rm -f nbench
diff --git a/NNET.DAT b/NNET.DAT
new file mode 100644
index 0000000..5711730
--- /dev/null
+++ b/NNET.DAT
@@ -0,0 +1,210 @@
+5  7  8 
+26
+0  0  1  0  0
+0  1  0  1  0
+1  0  0  0  1
+1  0  0  0  1
+1  1  1  1  1
+1  0  0  0  1
+1  0  0  0  1
+0  1  0  0  0  0  0  1
+1  1  1  1  0
+1  0  0  0  1
+1  0  0  0  1
+1  1  1  1  0
+1  0  0  0  1
+1  0  0  0  1
+1  1  1  1  0
+0  1  0  0  0  0  1  0
+0  1  1  1  0
+1  0  0  0  1
+1  0  0  0  0
+1  0  0  0  0
+1  0  0  0  0
+1  0  0  0  1
+0  1  1  1  0
+0  1  0  0  0  0  1  1
+1  1  1  1  0
+1  0  0  0  1
+1  0  0  0  1
+1  0  0  0  1
+1  0  0  0  1
+1  0  0  0  1
+1  1  1  1  0
+0  1  0  0  0  1  0  0
+1  1  1  1  1
+1  0  0  0  0
+1  0  0  0  0
+1  1  1  0  0
+1  0  0  0  0
+1  0  0  0  0
+1  1  1  1  1
+0  1  0  0  0  1  0  1
+1  1  1  1  1
+1  0  0  0  0
+1  0  0  0  0
+1  1  1  0  0
+1  0  0  0  0
+1  0  0  0  0
+1  0  0  0  0
+0  1  0  0  0  1  1  0
+0  1  1  1  0
+1  0  0  0  1
+1  0  0  0  0
+1  0  0  0  0
+1  0  0  1  1
+1  0  0  0  1
+0  1  1  1  0
+0  1  0  0  0  1  1  1
+1  0  0  0  1
+1  0  0  0  1
+1  0  0  0  1
+1  1  1  1  1
+1  0  0  0  1
+1  0  0  0  1
+1  0  0  0  1
+0  1  0  0  1  0  0  0
+0  1  1  1  0
+0  0  1  0  0
+0  0  1  0  0
+0  0  1  0  0
+0  0  1  0  0
+0  0  1  0  0
+0  1  1  1  0
+0  1  0  0  1  0  0  1
+0  0  0  0  1
+0  0  0  0  1
+0  0  0  0  1
+0  0  0  0  1
+1  0  0  0  1
+1  0  0  0  1
+0  1  1  1  0
+0  1  0  0  1  0  1  0
+1  0  0  0  1
+1  0  0  1  0
+1  0  1  0  0
+1  1  0  0  0
+1  0  1  0  0
+1  0  0  1  0
+1  0  0  0  1
+0  1  0  0  1  0  1  1
+1  0  0  0  0
+1  0  0  0  0
+1  0  0  0  0
+1  0  0  0  0
+1  0  0  0  0
+1  0  0  0  0
+1  1  1  1  1
+0  1  0  0  1  1  0  0
+1  0  0  0  1
+1  1  0  1  1
+1  0  1  0  1
+1  0  1  0  1
+1  0  0  0  1
+1  0  0  0  1
+1  0  0  0  1
+0  1  0  0  1  1  0  1
+1  0  0  0  1
+1  1  0  0  1
+1  0  1  0  1
+1  0  1  0  1
+1  0  1  0  1
+1  0  0  1  1
+1  0  0  0  1
+0  1  0  0  1  1  1  0
+0  1  1  1  0
+1  0  0  0  1
+1  0  0  0  1
+1  0  0  0  1
+1  0  0  0  1
+1  0  0  0  1
+0  1  1  1  0
+0  1  0  0  1  1  1  1
+1  1  1  1  0
+1  0  0  0  1
+1  0  0  0  1
+1  1  1  1  0
+1  0  0  0  0
+1  0  0  0  0
+1  0  0  0  0
+0  1  0  1  0  0  0  0
+0  1  1  1  0
+1  0  0  0  1
+1  0  0  0  1
+1  0  0  0  1
+1  0  1  0  1
+1  0  0  1  1
+0  1  1  1  1
+0  1  0  1  0  0  0  1
+1  1  1  1  0  
+1  0  0  0  1
+1  0  0  0  1
+1  1  1  1  0
+1  0  1  0  0
+1  0  0  1  0
+1  0  0  0  1
+0  1  0  1  0  0  1  0
+0  1  1  1  1
+1  0  0  0  0
+1  0  0  0  0
+0  1  1  1  0
+0  0  0  0  1
+0  0  0  0  1
+1  1  1  1  0
+0  1  0  1  0  0  1  1
+1  1  1  1  1
+0  0  1  0  0
+0  0  1  0  0
+0  0  1  0  0
+0  0  1  0  0
+0  0  1  0  0
+0  0  1  0  0
+0  1  0  1  0  1  0  0
+1  0  0  0  1
+1  0  0  0  1
+1  0  0  0  1
+1  0  0  0  1
+1  0  0  0  1
+1  0  0  0  1
+0  1  1  1  0
+0  1  0  1  0  1  0  1
+1  0  0  0  1
+1  0  0  0  1
+0  1  0  1  0
+0  1  0  1  0
+0  1  0  1  0
+0  1  0  1  0
+0  0  1  0  0
+0  1  0  1  0  1  1  0
+1  0  0  0  1
+1  0  0  0  1
+1  0  0  0  1
+1  0  1  0  1
+1  0  1  0  1
+1  0  1  0  1
+0  1  0  1  0
+0  1  0  1  0  1  1  1
+1  0  0  0  1
+0  1  0  1  0
+0  1  0  1  0
+0  0  1  0  0
+0  1  0  1  0
+0  1  0  1  0
+1  0  0  0  1
+0  1  0  1  1  0  0  0
+1  0  0  0  1
+0  1  0  1  0
+0  1  0  1  0
+0  0  1  0  0
+0  0  1  0  0
+0  0  1  0  0
+0  0  1  0  0
+0  1  0  1  1  0  0  1
+1  1  1  1  1
+0  0  0  1  0
+0  0  0  1  0
+0  0  1  0  0
+0  1  0  0  0
+0  1  0  0  0
+1  1  1  1  1
+0  1  0  1  1  0  1  0
diff --git a/README b/README
new file mode 100644
index 0000000..6863d46
--- /dev/null
+++ b/README
@@ -0,0 +1,66 @@
+February 18, 2003
+-----------------
+Bug-fix release.
+
+December 9, 1997
+----------------
+This release is based on beta release 2 of BYTE Magazine's BYTEmark
+benchmark program (previously known as BYTE's Native Mode
+Benchmarks). This document covers the Native Mode (a.k.a. Algorithm
+Level) tests; benchmarks designed to expose the capabilities of a
+system's CPU, FPU, and memory system.
+
+Running a "make" will create the binary if all goes well. It is called
+"nbench" and performs a suite of 10 tests and compares the results to
+a Dell Pentium 90 with 16 MB RAM and 256 KB L2 cache running MSDOS and
+compiling with the Watcom 10.0 C/C++ compiler. If you define -DLINUX
+during compilation (the default) then you also get a comparison to an
+AMD K6/233 with 32 MB RAM and 512 KB L2-cache running Linux 2.0.32 and
+using a binary which was compiled with GNU gcc version 2.7.2.3 and GNU
+libc-5.4.38.
+
+For more verbose output specify -v as an argument.
+
+The primary web site is: http://www.tux.org/~mayer/linux/bmark.html
+
+The port to Linux/Unix was done by Uwe F. Mayer <mayer@tux.org>.
+
+The index-split was done by Andrew D. Balsa, and reflects the
+realization that memory management is important in CPU design. The
+original tests have been left alone, however, the tests NUMERIC SORT,
+FP EMULATION, IDEA, and HUFFMAN now constitute the integer-arithmetic
+focused benchmark index, while the tests STRING SORT, BITFIELD, and
+ASSIGNMENT make up the new memory index.
+
+The algorithms were not changed from the source which was obtained
+from the BYTE web site at http://www.byte.com/bmark/bmark.htm on
+December 14, 1996.  However, the source was modified to better work
+with 64-bit machines (in particular the random number generator was
+modified to always work with 32 bit, no matter what kind of hardware
+you run it on). Furthermore, for some of the algorithms additional
+resettings of the data was added to increase the consistency across
+different hardware. Some extra debugging code was added, which has no
+impact on normal runs.
+
+In case there is uneven system load due to other processes while this
+benchmark suite executes, it might take longer to run than on an
+unloaded system. This is because the benchmark does some statistical
+analysis to make sure that the reported results are statistically
+significant, and an increased variation in individual runs requires
+more runs to achieve the required statistical confidence.
+
+This is a single-threaded benchmark and is not designed to measure the
+performance gain on multi-processor machines.
+
+For details and customization read bdoc.txt.
+
+THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/README.motorola b/README.motorola
new file mode 100644
index 0000000..223001b
--- /dev/null
+++ b/README.motorola
@@ -0,0 +1,29 @@
+The information in this file is old and no longer valid. It seems that
+the GNU C library has caught up with Motorola's libmoto, and now
+performance is just as good (or better) without libmoto. I'll include
+the old notice out of historical reasons only. Currently libmoto is
+available at ftp://ftp.mcg.mot.com/pub/SPS/PowerPC/software/mklinux/libmoto/,
+but this is subject to change and not under my control.
+
+February 18, 2003
+Uwe F. Mayer
+
+---------------------------------------------------------------------------
+
+If you have a Motorola CPU or equivalent:
+
+When linked with the 'libmoto' (floating point library from Motorola)
+the results you obtain are much better. (FPU index of 0.896 versus
+1.910 in one example.)
+
+The Motorola math library is currently available at:
+http://www.mot.com/SPS/PowerPC/support/rsw_customer_support/mklinux/libmoto/libmoto_reg_mkdev.html
+
+If you have a Motorola CPU and you submit a result then please let me
+know whether you used libmoto or not. Please read the file README.submit.
+
+I do not have a Motorola CPU, and I can't help you with installing the
+library either.
+
+December 3, 1997
+Uwe F. Mayer
+\ No newline at end of file
diff --git a/README.nonlinux b/README.nonlinux
new file mode 100644
index 0000000..641fe09
--- /dev/null
+++ b/README.nonlinux
@@ -0,0 +1,50 @@
+December 3, 1993
+================
+
+DEC Unix 4.0 or DEC OSF1 and gcc
+--------------------------------
+Compiles cleanly if you don't use -funroll-loops with gcc-2.7.2.3 or earlier
+
+DEC UNIX 4.0 or DEC OSF1 and cc
+-------------------------------
+CC = cc
+CFLAGS = -O3
+LINKFLAGS = -s -non_shared
+
+Compiles cleanly.
+
+SunOS and gcc
+-------------
+Compiles cleanly
+
+SunOS and cc
+------------
+CC = cc
+CFLAGS = -O3 -s
+
+Compiles with one warning during compilation of nbench1.c
+
+"/usr/ucbinclude/strings.h", line 48: warning: identifier redeclared: strlen
+        current : function() returning int
+        previous: function() returning uint : "/usr/include/string.h", line 98
+
+HP-UX and gcc
+-------------
+Compiles with one warning during compilation of sysspec.c
+
+In file included from /usr/local/lib/gcc-lib/hppa1.1-hp-hpux9.05/2.7.2.1/include/malloc.h:9,
+                 from sysspec.h:37,
+                 from sysspec.c:37:
+/usr/local/lib/gcc-lib/hppa1.1-hp-hpux9.05/2.7.2.1/include/sys/types.h:117: warning: empty declaration
+/usr/local/lib/gcc-lib/hppa1.1-hp-hpux9.05/2.7.2.1/include/sys/types.h:118: warning: empty declaration
+
+DEC Ultrix and cc
+-----------------
+CC = cc
+CFLAGS = -O2
+LINKFLAGS = -s
+
+Compiles with a warning about the correct usage of cut when running sysinfo.sh
+cut: Usage: cut [-s] [-d<char>] {-c<list> | -f<list>} file ...
+cut: Usage: cut [-s] [-d<char>] {-c<list> | -f<list>} file ...
+
diff --git a/README.submit b/README.submit
new file mode 100644
index 0000000..0dd3138
--- /dev/null
+++ b/README.submit
@@ -0,0 +1,33 @@
+I plan on posting a digest of results in case people mail me any.
+The URL will be linked to
+
+http://www.tux.org/~mayer/linux/bmark.html
+
+If you want to submit, then run the benchmark (use your own
+compilation, I don't care with what flags or compiler, but I want all
+numbers from a single benchmark run) and fill in the template as given
+in the example below:
+
+CPU                             : AMD 5x86P75 (486DX4/133MHz)
+L2 CACHE                        : 256 KB
+OS                              : Linux 2.0.32
+C COMPILER                      : gcc 2.7.2.3
+LIBC                            : libc-5.4.38
+Pentium 90 INTEGER INDEX        : 1.051
+Pentium 90 FLOATING-POINT INDEX : 0.450
+AMD K6/233 MEMORY INDEX         : 0.337
+AMD K6/233 INTEGER INDEX        : 0.238
+AMD K6/233 FLOATING-POINT INDEX : 0.230
+
+Any other format is fine as long as it contains the same info (write
+"unknown" or "?" for data you don't know). For example, you could just
+cut the summary from the output of nbench and mail it together with
+cache, CPU, and OS info in case it is not already present. Please do
+not email me the complete output of nbench, or any other unnecessarily
+long email, as this just eats up my hard-disk space.  However, long
+collections of results are of course welcome.
+
+Send your result to mayer@tux.org
+
+Uwe F. Mayer
+February 18, 2003
diff --git a/RESULTS b/RESULTS
new file mode 100644
index 0000000..ccf2336
--- /dev/null
+++ b/RESULTS
@@ -0,0 +1,138 @@
+December 7, 1997
+
+This file contains a few results so you may compare your machine.
+If you read this much after December 1997 then the results herein
+are probably obsolete.
+
+For a longer and hopefully more up-to-date list of results consult
+http://www.tux.org/~mayer/linux/bmark.html
+This web site, however, currently lists the old Pentium 90 indices!
+
+The indices below are with respect to the new AMD K6/233 baseline.
+
+OS                  : DEC Ultrix 4.4
+C compiler          : cc
+libc                : unknown version
+CPU                 : mips R6000
+L2 cache            : ?
+MEMORY INDEX        : 0.029
+INTEGER INDEX       : 0.046
+FLOATING-POINT INDEX: 0.077
+
+OS                  : LINUX 2.0.31
+C compiler          : gcc version 2.7.2.3
+libc                : libc.so.5.4.38
+CPU                 : Intel 486DX2/66 MHz
+L2 cache            : 256 KB
+MEMORY INDEX        : 0.098
+INTEGER INDEX       : 0.141
+FLOATING-POINT INDEX: 0.116
+
+OS                  : LINUX 2.0.32
+C compiler          : gcc version 2.7.2.3
+libc                : libc.so.5.4.38
+CPU                 : AMD 5x86P75 (486DX4/133MHz)
+L2 cache            : 256 KB
+MEMORY INDEX        : 0.234
+INTEGER INDEX       : 0.286
+FLOATING-POINT INDEX: 0.249
+
+OS                  : OSF1 V3.2 214
+C compiler          : cc
+libc                : unknown version
+CPU                 : 21064 alpha (DEC 3000 MODEL 300, year 1993)
+L2 cache            : 256 KB
+MEMORY INDEX        : 0.358
+INTEGER INDEX       : 0.362
+FLOATING-POINT INDEX: 0.656
+
+OS                  : HP-UX A.09.05
+C compiler          : gcc version 2.7.2.1
+libc                : unknown version
+CPU                 : 9000/715
+L2 cache            : ?
+MEMORY INDEX        : 0.208
+INTEGER INDEX       : 0.369
+FLOATING-POINT INDEX: 0.516
+
+OS                  : LINUX 2.0.31
+C compiler          : gcc version 2.7.2.3
+libc                : libc.so.5.4.38
+CPU                 : Intel Pentium 133 MHz
+L2 cache            : 512 KB
+MEMORY INDEX        : 0.383
+INTEGER INDEX       : 0.444
+FLOATING-POINT INDEX: 0.632
+
+OS                  : SunOS 5.5.1
+C compiler          : cc
+libc                : unknown version
+CPU                 : SUN-Ultra-Enterprise-2 sparc
+L2 cache            : ?
+MEMORY INDEX        : 0.417
+INTEGER INDEX       : 0.546
+FLOATING-POINT INDEX: 1.028
+
+OS                  : LINUX 2.0.29
+C compiler          : gcc version 2.7.2.3
+libc                : libc.so.5.4.38
+CPU                 : Cyrix 6x86L PR200+ (at 2 x 75 = 150 MHz)
+L2 cache            : 256 KB
+MEMORY INDEX        : 0.666
+INTEGER INDEX       : 0.599
+FLOATING-POINT INDEX: 0.508
+
+OS                  : LINUX 2.0.31
+C compiler          : gcc version 2.7.2.3
+libc                : libc.so.5.4.38
+CPU                 : Intel Pentium MMX 200 MHz
+L2 cache            : 512 KB
+MEMORY INDEX        : 0.601
+INTEGER INDEX       : 0.636
+FLOATING-POINT INDEX: 0.970
+
+OS                  : LINUX 2.0.31
+C compiler          : gcc version 2.7.2.3
+libc                : libc.so.5.4.38
+CPU                 : Intel 686 PentiumPro 200 MHz
+L2 cache            : 256 KB (internal)
+MEMORY INDEX        : 0.699
+INTEGER INDEX       : 0.732
+FLOATING-POINT INDEX: 1.140
+
+OS                  : LINUX 2.0.29
+C compiler          : gcc version 2.7.2.3
+libc                : libc.so.5.4.38
+CPU                 : Cyrix 6x86MX PR233 (at 2.5 x 75 = 187.5 MHz)
+L2 cache            : 512 KB
+MEMORY INDEX        : 0.861
+INTEGER INDEX       : 0.773
+FLOATING-POINT INDEX: 0.730
+
+OS                  : LINUX 2.0.32
+C compiler          : gcc version 2.7.2.3
+libc                : libc.so.5.4.38
+CPU                 : AMD K6/233
+L2 cache            : 512 KB
+MEMORY INDEX        : 1.000
+INTEGER INDEX       : 1.000
+FLOATING-POINT INDEX: 1.000
+
+OS                  : LINUX 2.0.31
+C compiler          : gcc version 2.7.2.3
+libc                : libc.so.5.4.38
+CPU                 : Intel 686 Pentium II 300 MHz
+L2 cache            : 512 KB
+MEMORY INDEX        : 1.255
+INTEGER INDEX       : 1.093
+FLOATING-POINT INDEX: 1.842
+
+OS                  : DEC UNIX 4.0b 564
+C compiler          : cc
+libc                : unknown version
+CPU                 : 21164 Alpha 300 MHz (dual CPU)
+L2 cache            : 96 KB
+L3 cache            : 4 MB per CPU
+MEMORY INDEX        : 0.973
+INTEGER INDEX       : 1.124
+FLOATING-POINT INDEX: 3.237
diff --git a/bdoc.txt b/bdoc.txt
new file mode 100644
index 0000000..e557bb0
--- /dev/null
+++ b/bdoc.txt
@@ -0,0 +1,2109 @@
+http://www.byte.com/bmark/bmark.htm
+----------------------------------------------------------------------------
+
+BYTEmark
+
+----------------------------------------------------------------------------
+
+This is release 2 of BYTE Magazine's BYTEmark benchmark program (previously
+known as BYTE's Native Mode Benchmarks). This document covers the Native
+Mode (a.k.a. Algorithm Level) tests; benchmarks designed to expose the
+capabilities of a system's CPU, FPU, and memory system. Another group of
+benchmarks within the BYTEmark suite includes the Application Simulation
+Benchmarks. They are detailed in a separate document. [NOTE: The
+documentation for the Application simulation benchmarks should appear before
+the end of March, 95. -- RG].
+
+The Tests
+
+The Native Mode portion of the BYTEmark consists of a number of well-known
+algorithms; some BYTE has used before in earlier versions of the benchmark,
+others are new. The complete suite consists of 10 tests:
+
+Numeric sort - Sorts an array of 32-bit integers.
+
+String sort - Sorts an array of strings of arbitrary length.
+
+Bitfield - Executes a variety of bit manipulation functions.
+
+Emulated floating-point - A small software floating-point package.
+
+Fourier coefficients - A numerical analysis routine for calculating series
+approximations of waveforms.
+
+Assignment algorithm - A well-known task allocation algorithm.
+
+Huffman compression - A well-known text and graphics compression algorithm.
+
+IDEA encryption - A relatively new block cipher algorithm.
+
+Neural Net - A small but functional back-propagation network simulator.
+
+LU Decomposition - A robust algorithm for solving linear equations.
+
+A more complete description of each test can be found in later sections of
+this document.
+
+BYTE built the BYTEmark with the multiplatform world foremost in mind. There
+were, of course, other considerations that we kept high on the list:
+
+Real-world algorithms. The algorithms should actually do something. Previous
+benchmarks often moved gobs of bytes from one point to another, added or
+subtracted piles and piles of numbers, or (in some cases) actually executed
+NOP instructions. We should not belittle those tests of yesterday, they had
+their place. However, we think it better that tests be based on activities
+that are more complex in nature.
+
+Easy to port. All the benchmarks are written in "vanilla" ANSI C. This
+provides us with the best chance of moving them quickly and accurately to
+new processors and operating systems as they appear. It also simplifies
+maintenance.
+
+This means that as new 64-bit (and, perhaps, 128-bit) processors appear, the
+benchmarks can test them as soon as a compiler is available.
+
+Comprehensive. The algorithms were derived from a variety of sources. Some
+are routines that BYTE had been using for some time. Others are routines
+derived from well-known texts in the computer science world. Furthermore,
+the algorithms differ in structure. Some simply "walk" sequentially through
+one-dimensional arrays. Others build and manipulate two-dimensional arrays.
+Finally, some benchmarks are "integer" tests, while others exercise the
+floating-point coprocessor (if one is available).
+
+Scalable. We wanted these benchmarks to be useful across as wide a variety
+of systems as possible. We also wanted to give them a lifetime beyond the
+next wave of new processors.
+
+To that end, we incorporated "dynamic workload adjustment." A complete
+description of this appears in a later section. In a nutshell, this allows
+the tests to "expand or contract" depending on the capabilities of the
+system under test, all the while providing consistent results so that fair
+and accurate comparisons are possible.
+
+Honesty In Advertising
+
+We'd be lying if we said that the BYTEmark was all the benchmarking that
+anyone would ever need to run on a system. It would be equally inaccurate to
+suggest that the tests are completely free of inadequacies. There are many
+things the tests do not do, there are shortcomings, and there are problems.
+
+BYTE will continue to improve the BYTEmark. The source code is freely
+available, and we encourage vendors and users to examine the routines and
+provide us with their feedback. In this way, we assure fairness,
+comprehensiveness, and accuracy.
+
+Still, as we mentioned, there are some shortcomings. Here are those we
+consider the most significant. Keep them in mind as you examine the results
+of the benchmarks now and in the future.
+
+At the mercy of C compilers. Being written in ANSI C, the benchmark program
+is highly portable. This is a reflection of the "world we live in." If this
+were a one-processor world, we might stand a chance at hand-crafting a
+benchmark in assembly language. (At one time, that's exactly what BYTE did.)
+Not today, no way.
+
+The upshot is that the benchmarks must be compiled. For broadest coverage,
+we selected ANSI C. And when they're compiled, the resulting executable's
+performance can be highly dependent on the capabilities of the C compiler.
+Today's benchmark results can be blown out of the water tomorrow if someone
+new enters the scene with an optimizing strategy that outperforms existing
+competition.
+
+This concern is not easily waved off. It will require you to keep careful
+track of compiler version and optimization switches. As BYTE builds its
+database of benchmark results, version number and switch setting will become
+an integral part of that data. This will be true for published information
+as well, so that you can make comparisons fairly and accurately. BYTE will
+control the distribution of test results so that all relevant compiler
+information is attached to the data.
+
+As a faint justification -- for those who think this situation results in
+"polluted" tests -- we should point out that we are in the same boat as all
+the other developers (at least, all those using C compilers -- and that's
+quite a sizeable group). If the only C compilers for a given system happen
+to be poor ones, everyone suffers. It's a fact that a given platform's
+ultimate potential depends as much on the development software available as
+on the technical achievements of the hardware design.
+
+It's just CPU and FPU. It's very tempting to try to capture the performance
+of a machine in a single number. That has never been possible -- though it's
+been tried a lot -- and the gap between that ideal and reality will forever
+widen.
+
+These benchmarks are meant to expose the theoretical upper limit of the CPU,
+FPU, and memory architecture of a system. They cannot measure video, disk,
+or network throughput (those are the domains of a different set of
+benchmarks). You should, therefore, use the results of these tests as part,
+not all, of any evaluation of a system.
+
+Single threaded. Currently, each benchmark test uses only a single execution
+thread. It's unlikely that you'll find any modern operating system that does
+not have some multitasking component. How a system "scales" as more tasks
+are run simultaneously is an effect that the current benchmarks cannot
+explore.
+
+BYTE is working on a future version of the tests that will solve this
+problem.
+
+The tests are synthetic. This quite reasonable argument is based on the fact
+that people don't run benchmarks for a living, they run applications.
+Consequently, the only true measure of a system is how well it performs
+whatever applications you will be running. This, in fact, is the philosophy
+behind the BAPCo benchmarks.
+
+This is not a point with which we would disagree. BYTE regularly makes use
+of a variety of application benchmarks. None of this suggests, however, that
+the BYTEmark benchmarks serve no purpose.
+
+BYTEmark's results should be used as predictors. They can be moved to a new
+platform long before native applications will be ported. The BYTEmark
+benchmarks will therefore provide an early look at the potential of the
+machine. Additionally, the BYTEmark permits you to "home in" on an aspect of
+the overall architecture. How well does the system perform when executing
+floating-point computations? Does its memory architecture help or hinder the
+management of memory buffers that may fall on arbitrary address boundaries?
+How does the cache work with a program whose memory access favors moving
+randomly through memory as opposed to moving sequentially through memory?
+
+The answers to these questions can give you a good idea of how well a system
+would support a particular class of applications. Only a synthetic benchmark
+can give the narrow view necessary to find the answers.
+
+Dynamic Workloads
+
+Our long history of benchmarking has taught us one thing above all others:
+Tomorrow's system will go faster than today's by an amount exceeding your
+wildest guess -- and then some. Dealing with this can become an unending
+race.
+
+It goes like this: You design a benchmark algorithm, you specify its
+parameters (how big the array is, how many loops, etc.), you run it on
+today's latest super-microcomputer, collect your data, and go home. A new
+machine arrives the next day, you run your benchmark, and discover that the
+test executes so quickly that the resolution of the clock routine you're
+using can't keep up with it (i.e., the test is over and done before the
+system clock even has a chance to tick).
+
+If you modify your routine, the figures you collected yesterday are no good.
+If you create a better clock routine by sneaking down into the system
+hardware, you can kiss portability goodbye.
+
+The BYTEmark benchmarks solve this problem by a process we'll refer to as
+"dynamic workload adjustment." In principle, it simply means that if the
+test runs so fast that the system clock can't time it, the benchmark
+increases the test workload -- and keeps increasing it -- until enough time
+is consumed to gather reliable test results.
+
+Here's an example.
+
+The BYTEmark benchmarks perform timing using a "stopwatch" paradigm. The
+routine StartStopwatch() begins timing; StopStopwatch() ends timing and
+reports the elapsed time in clock ticks. Now, "clock ticks" is a value that
+varies from system to system. We'll presume that our test system provides
+1000 clock ticks per second. (We'll also presume that the system actually
+updates its clock 1000 times per second. Surprisingly, some systems don't do
+that. One we know of will tell you that the clock provides 100 ticks per
+second, but updates the clock in 5- or 6-tick increments. The resolution is
+no better than somewhere around 1/18th of a second.) Here, when we say
+"system" we mean not only the computer system, but the environment provided
+by the C compiler. Interestingly, different C compilers for the same system
+will report different clock ticks per second.
+
+Built into the benchmarks is a global variable called GLOBALMINTICKS. This
+variable is the minimum number of clock ticks that the benchmark will allow
+StopStopwatch() to report.
+
+Suppose you run the Numeric Sort benchmark. The benchmark program will
+construct an array filled with random numbers, call StartStopwatch(), sort
+the array, and call StopStopwatch(). If the time reported in StopStopwatch()
+is less than GLOBALMINTICKS, then the benchmark will build two arrays, and
+try again. If sorting two arrays took less time than GLOBALMINTICKS, the
+process repeats with more arrays.
+
+This goes on until the benchmark makes enough work so that an interval
+between StartStopwatch() and StopStopwatch() exceeds GLOBALMINTICKS. Once
+that happens, the test is actually run, and scores are calculated.
+
+Notice that the benchmark didn't make bigger arrays, it made more arrays.
+That's because the time taken by the sort test does not increase linearly as
+the array grows, it increases by a factor of N*log(N) (where N is the size
+of the array).
+
+This principle is applied to all the benchmark tests. A machine with a less
+accurate clock may be forced to sort more arrays at a time, but the results
+are given in arrays per second. In this way fast machines, slow machines,
+machines with accurate clocks, machines with less accurate clocks, can all
+be tested with the same code.
+
+Confidence Intervals
+
+Another built-in feature of the BYTEmark is a set of statistical-analysis
+routines. Running benchmarks is one thing; the question arises as to how
+many times should a test be run until you know you have a good sampling.
+Also, can you determine whether the test is stable (i.e., do results vary
+widely from one execution of the benchmark to the next)?
+
+The BYTEmark keeps score as follows: Each test (a test being a numeric
+sort, a string sort, etc.) is run five times. These five scores are
+averaged, the standard deviation is determined, and a 95% confidence
+half-interval for the mean is calculated (using the student t
+distribution). This tells us that the true average lies -- with a 95%
+probability -- within plus or minus the confidence half-interval of
+the calculated average. If this half-interval is within 5% of the
+calculated average, the benchmarking stops. Otherwise, a new test is
+run and the calculations are repeated with all of the runs done so
+far, including the new one. The benchmark proceeds this way up to a
+total of 30 runs. If the length of the half-interval is still bigger
+than 5% of the calculated average then a warning issued that the
+results might not be statistically certain before the average is
+displayed.
+
+** Fixed a statistical bug here. Uwe F. Mayer
+
+The upshot is that, for each benchmark test, the true average is -- with a
+95% level of confidence -- within 5% of the average reported. Here, the
+"true average" is the average we would get were we able to run the tests
+over and over again an infinite number of times.
+
+This specification ensures that the calculation of results is controlled;
+that someone running the tests in California will use the same technique for
+determining benchmark results as someone running the tests in New York.
+
+In case there is uneven system load due to other processes while this
+benchmark suite executes, it might take longer to run the benchmark suite
+as compared to a run an unloaded system. This is because the benchmark does
+some statistical analysis to make sure that the reported results are
+statistically significant (as explained above), and a high variation in
+individual runs requires more runs to achieve the required statistical
+confidence.
+
+*** added last the paragraph, Uwe F. Mayer
+
+Interpreting Results
+
+Of course, running the benchmarks can present you with a boatload of data.
+It can get mystifying, and some of the more esoteric statistical information
+is valuable only to a limited audience. The big question is: What does it
+all mean?
+
+First, we should point out that the BYTEmark reports both "raw" and indexed
+scores for each test. The raw score for a particular test amounts to the
+"iterations per second" of that test. For example, the numeric sort test
+reports as its raw score the number of arrays it was able to sort per
+second.
+
+The indexed score is the raw score of the system under test divided by the
+raw score obtained on the baseline machine. As of this release, the
+baseline machine is a DELL 90 Mhz Pentium XPS/90 with 16 MB of RAM and 256K
+of external processor cache. (The compiler used was the Watcom C/C++ 10.0
+compiler; optimizations set to "fastest possible code", 4-byte structure
+alignment, Pentium code generation with Pentium register-based calling. The
+operating system was MSDOS.) The indexed score serves to "normalize" the
+raw scores, reducing their dynamic range and making them easier to
+grasp. Simply put, if your machine has an index score of 2.0 on the numeric
+sort test, it performed that test twice as fast as this 90 Mhz Pentium.
+
+If you run all the tests (as you'll see, it is possible to perform "custom
+runs", which execute only a subset of the tests) the BYTEmark will also
+produce two overall index figures: Integer index and Floating-point index.
+The Integer index is the geometric mean of those tests that involve only
+integer processing -- numeric sort, string sort, bitfield, emulated
+floating-point, assignment, Huffman, and IDEA -- while the Floating-point
+index is the geometric mean of those tests that require the floating-point
+coprocessor -- Fourier, neural net, and LU decomposition. You can use these
+scores to get a general feel for the performance of the machine under test
+as compared to the baseline 90 Mhz Pentium.
+
+The Linux/Unix port has a second baseline machine, it is an AMD K6/233 with
+32 MB RAM and 512 KB L2-cache running Linux 2.0.32 and using GNU gcc
+version 2.7.2.3 and libc-5.4.38.  The integer index was split as suggested
+by Andrew D. Balsa <andrewbalsa@usa.net>, and reflects the realization that
+memory management is important in CPU design. The original tests have been
+left alone, however, the geometric mean of the tests NUMERIC SORT, FP
+EMULATION, IDEA, and HUFFMAN now constitutes the integer-arithmetic focused
+benchmark index, while the geometric mean of the tests STRING SORT,
+BITFIELD, and ASSIGNMENT makes up the new memory index. The floating point
+index has been left alone, it is still the geometric mean of FOURIER,
+NEURAL NET, and LU DECOMPOSITION.
+
+*** added the section on Linux, Uwe F. Mayer
+
+What follows is a list of the benchmarks and associated brief remarks that
+describe what the tests do: What they exercise; what a "good" result or a
+"bad" result means. Keep in mind that, in this expanding universe of faster
+processors, bigger caches, more elaborate memory architectures, "good" and
+"bad" are indeed relative terms. A good score on today's hot new processor
+will be a bad score on tomorrow's hot new processor.
+
+These remarks are based on empirical data and profiling that we have done to
+date. (NOTE: The profiling is limited to Intel and Motorola 68K on this
+release. As more data is gathered, we will be refining this section.
+3/14/95--RG)
+
+Benchmark                            Description
+
+Numeric sort                         Generic integer performance.  Should
+                                     exercise non-sequential performance
+                                     of cache (or memory if cache is less
+                                     than 8K).  Moves 32-bit longs at a
+                                     time, so 16-bit processors will be
+                                     at a disadvantage.
+
+
+
+String sort                          Tests memory-move performance.
+                                     Should exercise non-sequential
+                                     performance of cache, with added
+                                     burden that moves are byte-wide and
+                                     can occur on odd address boundaries.
+                                      May tax the performance of
+                                     cell-based processors that must
+                                     perform additional shift operations
+                                     to deal with bytes.
+
+
+
+Bitfield                             Exercises "bit twiddling"
+                                     performance.  Travels through memory
+                                     in a somewhat sequential fashion;
+                                     different from sorts in that data is
+                                     merely altered in place.  If
+                                     properly compiled, takes into
+                                     account 64-bit processors, which
+                                     should see a boost.
+
+
+
+Emulated F.P.                        Past experience has shown this test
+                                     to be a good measurement of overall
+                                     performance.
+
+
+
+Fourier                              Good measure of transcendental and
+                                     trigonometric performance of FPU.
+                                     Little array activity, so this test
+                                     should not be dependent of cache or
+                                     memory architecture.
+
+
+
+Assignment                           The test moves through large integer
+                                     arrays in both row-wise and
+                                     column-wise fashion.  Cache/memory
+                                     with good sequential performance
+                                     should see a boost (memory is
+                                     altered in place -- no moving as in
+                                     a sort operation).   Processing is
+                                     done in 32-bit chunks -- no
+                                     advantage given to 64-bit
+                                     processors.
+
+
+
+Huffman                              A combination of byte operations,
+                                     bit twiddling, and overall integer
+                                     manipulation.  Should be a good
+                                     general measurement.
+
+
+
+IDEA                                 Moves through data sequentially in
+                                     16-bit chunks.  Should provide a
+                                     good indication of raw speed.
+
+
+
+Neural Net                           Small-array floating-point test
+                                     heavily dependent on the exponential
+                                     function; less dependent on overall
+                                     FPU performance.  Small arrays, so
+                                     cache/memory architecture should not
+                                     come into play.
+
+
+
+LU decomposition.                           A floating-point test that moves
+                                     through arrays in both row-wise and
+                                     column-wise fashion.  Exercises only
+                                     fundamental math operations (+, -,
+                                     *, /).
+
+The Command File
+
+Purpose
+
+The BYTEmark program allows you to override many of its default parameters
+using a command file. The command file also lets you request statistical
+information, as well as specify an output file to hold the test results for
+later use.
+
+You identify the command file using a command-line argument. E.G.,
+
+C:NBENCH -cCOMFILE.DAT
+
+tells the benchmark program to read from COMFILE.DAT in the current
+directory.
+
+The content of the command file is simply a series of parameter names and
+values, each on a single line. The parameters control internal variables
+that are either global in nature (i.e., they effect all tests in the
+program) or are specific to a given benchmark test.
+
+The parameters are listed in a reference guide that follows, arranged in the
+following groups:
+
+Global Parameters
+
+Numeric Sort
+
+String Sort
+
+Bitfield
+
+Emulated floating-point
+
+Fourier coefficients
+
+Assignment algorithm
+
+IDEA encryption
+
+Huffman compression
+
+Neural net
+
+LU decomposition
+
+As mentioned above, those items listed under "Global Parameters" affect all
+tests; the rest deal with specific benchmarks. There is no required ordering
+to parameters as they appear in the command file. You can specify them in
+any sequence you wish.
+
+You should be judicious in your use of a command file. Some parameters will
+override the "dynamic workload" adjustment that each test performs. Doing
+this completely bypasses the benchmark code that is designed to produce an
+accurate reading from your system clock. Other parameters will alter default
+settings, yielding test results that cannot be compared with published
+benchmark results.
+
+A Sample Command File
+
+Suppose you built a command file that contained the following:
+
+ALLSTATS=T
+
+CUSTOMRUN=T
+
+OUTFILE=D:\DATA.DAT
+
+DONUMSORT=T
+
+DOLU=T
+
+Here's what this file tells the benchmark program:
+
+ALLSTATS=T means that you've requested a "dump" of all the statistics the
+test gathers. This includes not only the standard deviations of tests run,
+it also produces test-specific information such as the number of arrays
+built, the array size, etc.
+
+CUSTOMRUN=T tells the system that this is a custom run. Only tests
+explicitly specified will be executed.
+
+OUTFILE=D:\DATA.DAT will write the output of the benchmark to the file
+DATA.DAT on the root of the D: drive. (If DATA.DAT already exists, output
+will be appended to the file.)
+
+DONUMSORT=T tells the system to run the numeric sort benchmark. (This was
+necessary on account of the CUSTOMRUN=T line, above.)
+
+DOLU=T tells the system to run the LU decomposition benchmark.
+
+Command File Parameters Reference
+
+(NOTE: Altering some global parameters can invalidate results for comparison
+purposes. Those parameters are indicated in the following section by a bold
+asterisk (*). If you alter any parameters so indicated, you may NOT publish
+the resulting data as BYTEmark scores.)
+
+Global Parameters
+
+GLOBALMINTICKS=<n>
+
+This overrides the default global_min_ticks value (defined in NBENCH1.H).
+The global_min_ticks value is defined as the minimum number of clock ticks
+per iteration of a particular benchmark. For example, if global_min_ticks is
+set to 100 and the numeric sort benchmark is run; each iteration MUST take
+at least 100 ticks, or the system will expand the work-per-iteration.
+
+MINSECONDS=<n>
+
+Sets the minimum number of seconds any particular test will run. This has
+the effect of controlling the number of repetitions done. Default: 5.
+
+ALLSTATS=<T|F>
+
+Set this flag to T for a "dump" of all statistics. The information displayed
+varies from test to test. Default: F.
+
+OUTFILE=<path>
+
+Specifies that output should go to the specified output file. Any test
+results and statistical data displayed on-screen will also be written to the
+file. If the file does not exist, it will be created; otherwise, new output
+will be appended to an existing file. This allows you to "capture" several
+runs into a single file for later review.
+
+Note: the path should not appear in quotes. For example, something like the
+following would work: OUTFILE=C:\BENCH\DUMP.DAT
+
+CUSTOMRUN=<T|F>
+
+Set this flag to T for a custom run. A "custom run" means that the program
+will run only the benchmark tests that you explicitly specify. So, use this
+flag to run a subset of the tests. Default: F.
+
+Numeric Sort
+
+DONUMSORT=<T|F>
+
+Indicates whether to do the numeric sort. Default is T, unless this is a
+custom run (CUSTOMRUN=T), in which case default is F.
+
+NUMNUMARRAYS=<n>
+
+Indicates the number of numeric arrays the system will build. Setting this
+value will override the program's "dynamic workload" adjustment for this
+test.*
+
+NUMARRAYSIZE=<n>
+
+Indicates the number of elements in each numeric array. Default is 8001
+entries. (NOTE: Altering this value will invalidate the test for comparison
+purposes. The performance of the numeric sort test is not related to the
+array size as a linear function; i.e., an array twice as big will not take
+twice as long. The relationship involves a logarithmic function.)*
+
+NUMMINSECONDS=<n>
+
+Overrides MINSECONDS for the numeric sort test.
+
+String Sort
+
+DOSTRINGSORT=<T|F>
+
+Indicates whether to do the string sort. Default is T, unless this is a
+custom run (CUSTOMRUN=T), in which case the default is F.
+
+STRARRAYSIZE=<n>
+
+Sets the size of the string array. Default is 8111. (NOTE: Altering this
+value will invalidate the test for comparison purposes. The performance of
+the string sort test is not related to the array size as a linear function;
+i.e., an array twice as big will not take twice as long. The relationship
+involves a logarithmic function.)*
+
+NUMSTRARRAYS=<n>
+
+Sets the number of string arrays that will be created to run the test.
+Setting this value will override the program's "dynamic workload" adjustment
+for this test.*
+
+STRMINSECONDS=<n>
+
+Overrides MINSECONDS for the string sort test.
+
+Bitfield
+
+DOBITFIELD=<T|F>
+
+Indicates whether to do the bitfield test. Default is T, unless this is a
+custom run (CUSTOMRUN=T), in which case the default is F.
+
+NUMBITOPS=<n>
+
+Sets the number of bitfield operations that will be performed. Setting this
+value will override the program's "dynamic workload" adjustment for this
+test.*
+
+BITFIELDSIZE=<n>
+
+Sets the number of 32-bit elements in the bitfield arrays. The default value
+is dependent on the size of a long as defined by the current compiler. For a
+typical compiler that defines a long to be 32 bits, the default is 32768.
+(NOTE: Altering this parameter will invalidate test results for comparison
+purposes.)*
+
+BITMINSECONDS=<n>
+
+Overrides MINSECONDS for the bitfield test.
+
+Emulated floating-point
+
+DOEMF=<T|F>
+
+Indicates whether to do the emulated floating-point test. Default is T,
+unless this is a custom run (CUSTOMRUN=T), in which case the default is F.
+
+EMFARRAYSIZE=<n>
+
+Sets the size (number of elements) of the emulated floating-point benchmark.
+Default is 3000. The test builds three arrays, each of equal size. This
+parameter sets the number of elements for EACH array. (NOTE: Altering this
+parameter will invalidate test results for comparison purposes.)*
+
+EMFLOOPS=<n>
+
+Sets the number of loops per iteration of the floating-point test. Setting
+this value will override the program's "dynamic workload" adjustment for
+this test.*
+
+EMFMINSECONDS=<n>
+
+Overrides MINSECONDS for the emulated floating-point test.
+
+Fourier coefficients
+
+DOFOUR=<T|F>
+
+Indicates whether to do the Fourier test. Default is T, unless this is a
+custom run (CUSTOMRUN=T), in which case the default is F.
+
+FOURASIZE=<n>
+
+Sets the size of the array for the Fourier test. This sets the number of
+coefficients the test will derive. NOTE: Specifying this value will override
+the system's "dynamic workload" adjustment for this test, and may make the
+results invalid for comparison purposes.*
+
+FOURMINSECONDS=<n>
+
+Overrides MINSECONDS for the Fourier test.
+
+Assignment Algorithm
+
+DOASSIGN=<T|F>
+
+Indicates whether to do the assignment algorithm test. Default is T, unless
+this is a custom run (CUSTOMRUN=T), in which case the default is F.
+
+ASSIGNARRAYS=<n>
+
+Indicates the number of arrays that will be built for the test. Specifying
+this value will override the system's "dynamic workload" adjustment for this
+test. (NOTE: The size of the arrays in the assignment algorithm is fixed at
+101 x 101. Altering the array size requires adjusting global constants and
+recompiling; to do so, however, would invalidate test results.)*
+
+ASSIGNMINSECONDS=<n>
+
+Overrides MINSECONDS for the assignment algorithm test.
+
+IDEA encryption
+
+DOIDEA=<T|F>
+
+Indicates whether to do the IDEA encryption test. Default is T, unless this
+is a custom run (CUSTOMRUN=T), in which case the default is F.
+
+IDEAARRAYSIZE=<n>
+
+Sets the size of the plain-text character array that will be encrypted by the
+test. Default is 4000. The benchmark actually builds 3 arrays: 1st
+plain-text, encrypted version, and 2nd plain-text. The 2nd plain-text array is
+the destination for the decryption process [part of the test]. All arrays
+are set to the same size. (NOTE: Specifying this value will invalidate test
+results for comparison purposes.)*
+
+IDEALOOPS=<n>
+
+Indicates the number of loops in the IDEA test. Specifying this value will
+override the system's "dynamic workload" adjustment for this test.*
+
+IDEAMINSECONDS=<n>
+
+Overrides MINSECONDS for the IDEA test.
+
+Huffman compression
+
+DOHUFF=<T|F>
+
+Indicates whether to do the Huffman test. Default is T, unless this is a
+custom run (CUSTOMRUN=T), in which case the default is F.
+
+HUFFARRAYSIZE=<n>
+
+Sets the size of the string buffer that will be compressed using the Huffman
+test. The default is 5000. (NOTE: Altering this value will invalidate test
+results for comparison purposes.)*
+
+HUFFLOOPS=<n>
+
+Sets the number of loops in the Huffman test. Specifying this value will
+override the system's "dynamic workload" adjustment for this test.*
+
+HUFFMINSECONDS=<n>
+
+Overrides MINSECONDS for the Huffman test.
+
+Neural net
+
+DONNET=<T|F>
+
+Indicates whether to do the Neural Net test. Default is T, unless this is a
+custom run (CUSTOMRUN=T), in which case the default is F.
+
+NNETLOOPS=<n>
+
+Sets the number of loops in the Neural Net test. NOTE: Altering this value
+overrides the benchmark's "dynamic workload" adjustment algorithm, and may
+invalidate the results for comparison purposes.*
+
+NNETMINSECONDS=<n>
+
+Overrides MINSECONDS for the Neural Net test.
+
+LU decomposition
+
+DOLU=<T|F>
+
+Indicates whether to do the LU decomposition test. Default is T, unless this
+is a custom run (CUSTOMRUN=T), in which case the default is F.
+
+LUNUMARRAYS=<n>
+
+Sets the number of arrays in each iteration of the LU decomposition test.
+Specifying this value will override the system's "dynamic workload"
+adjustment for this test.*
+
+LUMINSECONDS=<n>
+
+Overrides MINSECONDS for the LU decomposition test.
+
+Numeric Sort
+
+Description
+
+This benchmark is designed to explore how well the system sorts a numeric
+array. In this case, a numeric array is a one-dimensional collection of
+signed, 32-bit integers. The actual sorting is performed by a heapsort
+algorithm (see the text box following for a description of the heapsort
+algorithm).
+
+It's probably unnecessary to point out (but we'll do it anyway) that sorting
+is a fundamental operation in computer application software. You'll likely
+find sorting routines nestled deep inside a variety of applications;
+everything from database systems to operating-systems kernels.
+
+The numeric sort benchmark reports the number of arrays it was able to sort
+per second. The array size is set by a global constant (it can be overridden
+by the command file -- see below).
+
+Analysis
+
+Optimized 486 code: Profiling of the numeric sort benchmark using Watcom's
+profiler (Watcom C/C++ 10.0) indicates that the algorithm spends most of its
+time in the numsift() function (specifically, about 90% of the benchmark's
+time takes place in numsift()). Within numsift(), two if statements dominate
+time spent:
+
+if(array[k]<array[k+1L]) and if(array[i]<array[k])
+
+Both statements involve indexes into arrays, so it's likely the processor is
+spending a lot of time resolving the array references. (Though both
+statements involve "less-than" comparisons, we doubt that much time is
+consumed in performing the signed compare operation.) Though the first
+statement involves array elements that are adjacent to one another, the
+second does not. In fact, the second statement will probably involve
+elements that are far apart from one another during early passes through the
+sifting process. We expect that systems whose caching system pre-fetches
+contiguous elements (often in "burst" line fills) will not have any great
+advantage of systems without pre-fetch mechanisms.
+
+Similar results were found when we profiled the numeric sort algorithm under
+the Borland C/C++ compiler.
+
+680x0 Code (Macintosh CodeWarrior): CodeWarrior's profiler is function
+based; consequently, it does not allow for line-by-line analysis as does the
+Watcom compiler's profiler.
+
+However, the CodeWarrior profiler does give us enough information to note
+that NumSift() only accounts for about 28% of the time consumed by the
+benchmark. The outer routine, NumHeapSort() accounts for around 71% of the
+time taken. It will require additional analysis to determine why the two
+compilers -- Watcom and CodeWarrior divide the workload so differently. (It
+may have something to do with compiler architecture, or the act of profiling
+the code may produce results that are significantly different than how the
+program runs under normal conditions, though that would lead one to wonder
+what use profilers would be.)
+
+Porting Considerations
+
+The numeric sort routine should represent a trivial porting exercise. It is
+not an overly large benchmark in terms of source code. Additionally, the
+only external routines it calls on are for allocating and releasing memory,
+and managing the stopwatch.
+
+The numeric sort benchmark depends on the following global definitions (note
+that these may be overridden by the command file):
+
+NUMNUMARRAYS -- Sets the upper limit on the number of arrays that the
+benchmark will attempt to build. The numeric sort benchmark creates work for
+itself by requiring the system to sort more and more arrays...not bigger and
+bigger arrays. (The latter case would skew results, because the sorting time
+for heapsort is N log2 N - e.g., doubling the array size does not double the
+sort time.) This constant sets the upper limit to the number of arrays the
+system will build before it signals an error. The default value is 100, and
+may be changed if your system exceeds this limit.
+
+NUMARRAYSIZE - Determines the size of each array built. It has been set to
+8111L and should not be tampered with. The command file entry
+NUMARRAYSIZE=<n> can be used to change this value, but results produced by
+doing this will make your results incompatible with other runs of the
+benchmark (since results will be skewed -- see preceding paragraph).
+
+To test for a correct execution of the numeric sort benchmark, #define the
+DEBUG symbol. This will enable code that verifies that arrays are properly
+sorted. You should run the benchmark program using a command file that has
+only the numeric sort test enabled. If there is an error, the program will
+display "SORT ERROR" (If this happens, it's possible that tons of "SORT
+ERROR" messages will be emitted, so it's best not to redirect output to a
+file), otherwise it will print "Numeric sort: OK" (also quite a few times).
+
+References
+
+Gonnet, G.H. 1984, Handbook of Algorithms and Data Structures (Reading, MA:
+Addison-Wesley).
+
+Knuth, Donald E. 1968, Fundamental Algorithms, vol 1 of The Art of Computer
+Programming (Reading, MA: Addison-Wesley).
+
+Press, William H., Flannery, Brian P., Teukolsky, Saul A., and Vetterling,
+William T. 1989, Numerical Recipes in Pascal (Cambridge: Cambridge
+University Press).
+
+Heapsort
+
+The heapsort algorithm is well-covered in a number of the popular
+computer-science textbooks. In fact, it gets a pat on the back in Numerical
+Recipes (Press et. al.), where the authors write:
+
+Heapsort is our favorite sorting routine. It can be recommended
+wholeheartedly for a variety of sorting applications. It is a true
+"in-place" sort, requiring no auxiliary storage.
+
+Heapsort works by building the array into a kind of a queue called a heap.
+You can imagine this heap as being a form of in-memory binary tree. The
+topmost (root) element of the tree is the element that -- were the array
+sorted -- would be the largest element in the array. Sorting takes place by
+first constructing the heap, then pulling the root off the tree, promoting
+the next largest element to the root, pulling it off, and so on. (The
+promotion process is known as "sifting up.")
+
+Heapsort executes in N log2 N time even in its worst case. Unlike some other
+sorting algorithms, it does not benefit from a partially sorted array
+(though Gonnet does refer to a variation of heapsort, called "smoothsort,"
+which does -- see references).
+
+String Sort
+
+Description
+
+This benchmark is designed to gauge how well the system moves bytes around.
+By that we mean, how well the system can copy a string of bytes from one
+location to another; source and destination being aligned to arbitrary
+addresses. (This is unlike the numeric sort array, which moves bytes
+longword-at-a-time.) The strings themselves are built so as to be of random
+length, ranging from no fewer than 4 bytes and no greater than 80 bytes. The
+mixture of random lengths means that processors will be forced to deal with
+strings that begin and end on arbitrary address boundaries.
+
+The string sort benchmark uses the heapsort algorithm; this is the same
+algorithm as is used in the numeric sort benchmark (see the sidebar on the
+heapsort for a detailed description of the algorithm).
+
+Manipulation of the strings is actually handled by two arrays. One array
+holds the strings themselves; the other is a pointers array. Each member of
+the pointers array carries an offset that points into the string array, so
+that the ith pointer carries the offset to the ith string. This allows the
+benchmark to rapidly locate the position of the ith string. (The sorting
+algorithm requires exchanges of items that might be "distant" from one
+another in the array. It's critical that the routine be able to rapidly find
+a string based on its indexed position in the array.)
+
+The string sort benchmark reports the number of string arrays it was able to
+sort per second. The size of the array is set by a global constant.
+
+Analysis
+
+Optimized 486 code (Watcom C/C++ 10.0): Profiling of the string sort
+benchmark indicates that it spends most of its time in the C library routine
+memmove(). Within that routine, most of the execution is consumed by a pair
+of instructions: rep movsw and rep movsd. These are repeated string move --
+word width and repeated string move -- doubleword width, respectively.
+
+This is precisely where we want to see the time spent. It's interesting to
+note that the memmove() of the particular compiler/profiler tested (Watcom
+C/C++ 10.0) was "smart" enough to do most of the moving on word or
+doubleword boundaries. The string sort benchmark specifically sets arbitrary
+boundaries, so we'd expect to see lots of byte-wide moves. The "smart"
+memmove() is able to move bytes only when it has to, and does the remainder
+of the work via words and doublewords (which can move more bits at a time).
+
+680x0 Code (Macintosh CodeWarrior): Because CodeWarrior's profiler is
+function based, it is impossible to get an idea of how much time the test
+spends in library routines such as memmove(). Fortunately, as an artifact of
+the early version of the benchmark, the string sort algorithm makes use of
+the MoveMemory() routine in the sysspec.c file (system specific routines).
+This call, on anything other than a 16-bit DOS system, calls memmove()
+directly. Hence, we can get a good approximation of how much time is spent
+moving bytes.
+
+The answer is that nearly 78% of the benchmark's time is consumed by
+MoveMemory(), the rest being taken up by the other routines (the
+str_is_less() routine, which performs string comparisons, takes about 7% of
+the time). As above, we can guess that most of the benchmark's time is
+dependent on the performance of the library's memmove() routine.
+
+Porting Considerations
+
+As with the numeric sort routine, the string sort benchmark should be simple
+to port. Simpler, in fact. The string sort benchmark routine is not
+dependent on any typedef that may change from machine to machine (unless a
+char type is not 8 bits).
+
+The string sort benchmark depends on the following global definitions:
+
+NUMSTRARRAYS - Sets the upper limit on the number of arrays that the
+benchmark will attempt to build. The string sort benchmark creates work for
+itself by requiring the system to sort more and more arrays, not bigger and
+bigger arrays. (See section on Numeric Sort for an explanation.) This
+constant sets the upper limit to the number of arrays the system will build
+before it signals an error. The default value is 100, and may be changed if
+your system exceeds this limit.
+
+STRARRAYSIZE - Sets the default size of the string arrays built. We say
+"arrays" because, as with the numeric sort benchmark, the system adds work
+not by expanding the size of the array, but by adding more arrays. This
+value is set to 8111, and should not be modified, since results would not be
+comparable with other runs of the same benchmark on other machines.
+
+To test for a correct execution of the string sort benchmark, #define
+the DEBUG symbol. This will enable code that verifies the arrays are
+properly sorted. Set up a command file that runs only the string sort,
+and execute the benchmark program. If the routine is operating
+properly, the benchmark will print "String sort: OK", this message is
+printed quite often. Otherwise, the program will display "SORT ERROR"
+for each pair of strings it finds out of order (which can be really
+often).
+
+References
+
+See the references for the Numeric Sort benchmark.
+
+Bitfield Operations
+
+Description
+
+The purpose of this benchmark is to explore how efficiently the system
+executes operations that deal with "twiddling bits." The test is set up to
+simulate a "bit map"; a data structure used to keep track of storage usage.
+(Don't confuse this meaning of "bitmap" with its use in describing a
+graphics data structure.)
+
+Systems often use bit maps to keep an inventory of memory blocks or (more
+frequently) disk blocks. In the case of a bit map that manages disk usage,
+an operating system will set aside a buffer in memory so that each bit in
+that buffer corresponds to a block on the disk drive. A 0 bit means that the
+corresponding block is free; a 1 bit means the block is in use. Whenever a
+file requests a new block of disk storage, the operating system searches the
+bit map for the first 0 bit, sets the bit (to indicate that the block is now
+spoken for), and returns the number of the corresponding disk block to the
+requesting file.
+
+These types of operations are precisely what this test simulates. A block of
+memory is set allocated for the bit map. Another block of memory is
+allocated, and set up to hold a series of "bit map commands". Each bitmap
+command tells the simulation to do 1 of 3 things:
+
+1) Clear a series of consecutive bits,
+
+2) Set a series of consecutive bits, or
+
+3) Complement (1->0 and 0->1) a series of consecutive bits.
+
+The bit map command block is loaded with a set of random bit map commands
+(each command covers an random number of bits), and simulation routine steps
+sequentially through the command block, grabbing a command and executing it.
+
+The bitfield benchmark reports the number of bits it was able to operate on
+per second. The size of the bit map is constant; the bitfield operations
+array is adjusted based on the capabilities of the processor. (See the
+section describing the auto-adjust feature of the benchmarks.)
+
+Analysis
+
+Optimized 486 code: Using the Watcom C/C++ 10.0 profiler, the Bitfield
+benchmark appears to spend all of its time in two routines: ToggleBitRun()
+(74% of the time) and DoBitFieldIteration() (24% of the time). We say
+"appears" because this is misleading, as we will explain.
+
+First, it is important to recall that the test performs one of three
+operations for each run of bits (see above). The routine ToggleBitRun()
+handles two of those three operations: setting a run of bits and clearing a
+run of bits. An if() statement inside ToggleBitRun() decides which of the
+two operations is performed. (Speed freaks will quite rightly point out that
+this slows the entire algorithm. ToggleBitRun() is called by a switch()
+statement which has already decided whether bits should be set or cleared;
+it's a waste of time to have ToggleBitRun() have to make that decision yet
+again.)
+
+DoBitFieldIteration() is the "outer" routine that calls ToggleBitRun().
+DoBitFieldIteration() also calls FlipBitRun(). This latter routine is the
+one that performs the third bitfield operation: complementing a run of bits.
+FlipBitRun() gets no "air time" at all (while DoBitFieldIteration() gets 24
+% of the time) simply because the compiler's optimizer recognizes that
+FlipBitRun() is only called by DoBitFieldIteration(), and is called only
+once. Consequently, the optimizer moves FlipBitRun() "inline", i.e., into
+DoBitFieldIteration(). This removes an unnecessary call/return cycle (and is
+probably part of the reason why the FlipBitRun() code gets 24% of the
+algorithm's time, instead of something closer to 30% of its time.)
+
+Within the routines, those lines of code that actually do the shifting, the
+and operations, and the or operations, consume time evenly. This should make
+for a good test of a processor's "bit twiddling" capabilities.
+
+680x0 Code (Macintosh CodeWarrior): The CodeWarrior profiler is function
+based. Consequently, it is impossible to produce a profile of machine
+instruction execution time. We can, however, get a good picture of how the
+algorithm divides its time among the various functions.
+
+Unlike the 486 compiler, the CodeWarrior compiler did not appear to collapse
+the FlipBitRun() routine into the outer DoBitFieldIteration() routine. (We
+don't know this for certain, of course. It's possible that the compiler
+would have done this had we not been profiling.)
+
+In any case, the time spent in the two "core" routines of the bitfield test
+are shown below:
+
+FlipBitRun() - 18031.2 microsecs (called 509 times)
+
+ToggleBitRun() - 50770.6 microsecs (called 1031 times)
+
+In terms of total time, FlipBitRun() takes about 35% of the time (it gets
+about 33% of the calls). Remember, ToggleBitRun() is a single routine that
+is called both to set and clear bits. Hence, ToggleBitRun() is called twice
+as often as FlipBitRun().
+
+We can conclude that time spent setting bits to 1, setting bits to 0, and
+changing the state of bits, is about equal; the load is balanced close to
+what we'd expect it to be, based on the structure of the algorithm.
+
+Porting Considerations
+
+The bitfield operations benchmark is dependent on the size of the long
+datatype. On most systems, this is 32 bits. However, on some of the newer
+RISC chips, a long can be 64 bits long. If your system does use 64-bit
+longs, you'll need to #define the symbol LONG64.
+
+If you are unsure of the size of a long in your system (some C compiler
+manuals make it difficult to discover), simply place an ALLSTATS=T line in
+the command file and run the benchmarks. This will cause the benchmark
+program to display (among other things) the size of the data types int,
+short, and long in bytes.
+
+BITFARRAYSIZE - Sets the number of longs in the bit map array. This number
+is fixed, and should not be altered. The bitfield test adjusts itself by
+adding more bitfield commands (see above), not by creating a larger bit map.
+
+Currently, there is no code added to test for correct execution. If you are
+concerned that your port was incorrect, you'll need to step through your
+favorite debugger and verify execution against the original source code.
+
+** I added a resetting of the random number generator, and a resetting
+** of the bitfield to each loop. Those operations are outside of the
+** timed loop, and should add to make the benchmark more consistent.
+** There also is now debugging information available. If you define
+** DEBUG then the program will write a file named "debugbit.dat",
+** which is the contents of the bitfield after the calibration loop of
+** 30 operations. You can compare this file with the file
+** "debugbit.good" that comes with the distribution.
+** Uwe F. Mayer <mayer@tux.edu>
+
+References
+
+None.
+
+Emulated Floating-point
+
+Description
+
+The emulated floating-point benchmark includes routines that are similar to
+those that would be executed whenever a system performs floating-point
+operations in the absence of a coprocessor. In general, this amounts to a
+mixture of integer instructions, including shift operations, integer
+addition and subtraction, and bit testing (among others).
+
+The benchmark itself is remarkably simple. The test builds three
+1-dimensional arrays and loads the first two up with random floating-point
+numbers. The arrays are then partitioned into 4 equal-sized groups, and the
+test proceeds by performing addition, subtraction, multiplication, and
+division -- one operation on each group. (For example, for the addition
+group, an element from the first array is added to the second array and the
+result is placed in the third array.)
+
+Of course, most of the work takes place inside the routines that perform the
+addition, subtraction, multiplication, and division. These routines operate
+on a special data type (referred to as an InternalFPF number) that -- though
+not strictly IEEE compliant -- carries all the necessary data fields to
+support an IEEE-compatible floating-point system. Specifically, an
+InternalFPF number is built up of the following fields:
+
+Type (indicates a NORMAL, SUBNORMAL, etc.)
+
+Mantissa sign
+
+Unbiased, signed 16-bit exponent
+
+4-word (16 bits) mantissa.
+
+The emulated floating-point test reports its results in number of loops per
+second (where a "loop" is one pass through the arrays as described above).
+
+Finally, we are aware that this test could be on its way to becoming an
+anachronism. A growing number of systems are appearing that have
+coprocessors built into the main CPU. It's possible that floating-point
+emulation will one day be a thing of the past.
+
+Analysis
+
+Optimized 486 code (Watcom C/C++ 10.0): The algorithm's time is distributed
+across a number of routines. The distribution is:
+
+ShiftMantLeft1() - 60% of the time
+
+ShiftMantRight1() - 17% of the time
+
+DivideInternalFPF() - 14% of the time
+
+MultiplyInternalFPF() - 5% of the time.
+
+The first two routines are similar to one another; both shift bits about in
+a floating-point number's mantissa. It's reasonable that ShiftMantLeft1()
+should take a larger share of the system's time; it is called as part of the
+normalization process that concludes every emulated addition, subtraction,
+mutiplication, and division.
+
+680x0 Code (Macintosh CodeWarrior): CodeWarrior's profiler is
+function-based; consequently, it isn't possible to get timing at the machine
+instruction level. However, the output to CodeWarrior's profiler has
+provided insight into the breakdown of time spent in various functions that
+forces us to rethink our 486 code analysis.
+
+Analyzing what goes on inside the emulated floating-point tests is a tough
+one to call because some of the routines that are part of the test are
+called by the function that builds the arrays. Consequently, a quick look at
+the profiler's output can be misleading; it's not obvious how much time a
+particular routine is spending in the test and how much time that same
+routine is spending setting up the test (an operation that does not get
+timed).
+
+Specifically, the routine that loads up the arrays with test data calls
+LongToInternalFPF() and DivideInternalFPF(). LongToInternalFPF() makes one
+call to normalize() if the number is not a true zero. In turn, normalize()
+makes an indeterminate number of calls to ShiftMantLeft1(), depending on the
+structure of the mantissa being normalized.
+
+What's worse, DivideInternalFPF() makes all sorts of calls to all kinds of
+important low-level routines such as Sub16Bits() and ShiftMantLeft1().
+Untangling the wiring of which routine is being called as part of the test,
+and which is being called as part of the setup could probably be done with
+the computer equivalent of detective work and spelunking, but in the
+interest of time we'll opt for approximation.
+
+Here's a breakdown of some of the important routines and their times:
+
+AddSubInternalFPF() - 1003.9 microsecs (called 9024 times)
+
+MultiplyInternalFPF() - 20143 microsecs (called 5610 times)
+
+DivideInternalFPF() - 18820.9 microsecs (called 3366 times).
+
+The 3366 calls to DivideInternalFPF() are timed calls, not setup calls --
+the profiler at least gives outputs of separate calls made to the same
+routine, so we can determine which call is being made by the benchmark, and
+which is being made by the setup routine. It turns out that the setup
+routine calls DivideInternalFPF() 30,000 times.
+
+Notice that though addition/subtraction are called most often,
+multiplication next, then finally division; the time spent in each is the
+reverse. Division takes the most time, then multiplication, finally
+addition/subtraction. (There's probably some universal truth lurking here
+somewhere, but we haven't found it yet.)
+
+Other routines, and their breakdown:
+
+Add16Bits() - 115.3 microsecs
+
+ShiftMantRight1() - 574.2 microsecs
+
+Sub16Bits() - 1762 microsecs
+
+StickySiftRightMant - 40.4 microsecs
+
+ShiftMantLeft1() - 17486.1 microsecs
+
+The times for the last three routines are suspect, since they are called by
+DivideInternalFPF(), and a large portion of their time could be part of the
+setup process. This is what leads us to question the results obtained in the
+486 analysis, since it, too, is unable to determine precisely who is calling
+whom.
+
+Porting Considerations
+
+Earlier versions of this benchmark were extremely sensitive to porting;
+particularly to the "endianism" of the target system. We have tried to
+eliminate many of these problems. The test is nonetheless more "sensitive"
+to porting than most others.
+
+Pay close attention to the following defines and typedefs. They can be found
+in the files EMFLOAT.H, NMGLOBAL.H, and NBENCH1.H:
+
+u8 - Stands for unsigned, 8-bit. Usually defined to be unsigned char.
+
+u16 - Stands for unsigned, 16-bit. Usually defined to be unsigned short.
+
+u32 - Stands for unsigned, 32-bit. Usually defined to be unsigned long.
+
+INTERNAL_FPF_PRECISION - Indicates the number of elements in the mantissa of
+an InternalFPF number. Should be set to 4.
+
+The exponent field of an InternalFPF number is of type short. It should be
+set to whatever minimal data type can hold a signed, 16-bit number.
+
+Other global definitions you will want to be aware of:
+
+CPUEMFLOATLOOPMAX - Sets the maximum number of loops the benchmark will
+attempt before flagging an error. Each execution of a loop in the emulated
+floating-point test is "non-destructive," since the test takes factors from
+two arrays, operates on the factors, and places the result in a third array.
+Consequently, the test makes more work for itself by increasing the number
+of times it passes through the arrays (# of loops). If the system exceeds
+the limit set by CPUEMFLOATLOOPMAX, it will signal an error.
+
+This value may be altered to suit your system; it will not effect the
+benchmark results (unless you reduce it so much the system can never
+generate enough loops to produce a good test run).
+
+EMFARRAYSIZE - Sets the size of the arrays to be used in the test. This
+value is the number of entries (InternalFPF numbers) per array. Currently,
+the number is fixed at 3000, and should not be altered.
+
+Currently, there is no means of testing correct execution of the benchmark
+other than via debugger. There are routines available to decode the internal
+floating point format and print out the numbers, but no formal correctness
+test has been constructed. (This should be available soon. -- 3/14/95 RG)
+
+** It now prints out the operations of 8 of the entries used in the
+** test. Assuming you leave EMFARRAYSIZE at 3000, your results should
+** look like the ones below. The number in front of the colon is the
+** index of the entry.
+**  
+**  	 2: (-1.1160E   0) + (-4.5159E   0) = -5.6320E   0
+**  	 6: (-4.4507E  -1) - (-8.2050E  -1) = +3.7543E  -1
+**  	10: (+1.2465E   0) * (+7.4667E  -1) = +9.3075E  -1
+**  	14: (-1.2781E   0) / (-1.7367E   0) = +7.3596E  -1
+**    2986: (-7.0390E   0) * (-2.0752E   0) = +1.4607E   1
+**    2990: (+8.3753E  -1) / (+2.3876E   1) = +3.5078E  -2
+**    2994: (-1.1393E   0) + (-1.6080E   1) = -1.7219E   1
+**    2998: (+7.2450E   0) - (-8.2654E  -1) = +8.0716E   0
+**
+** Uwe F. Mayer <mayer@tux.edu>
+
+References
+
+Microprocessor Programming for Computer Hobbyists, Neill Graham, Tab Books,
+Blue Ridge Summit, PA, 1977.
+
+Apple Numerica Manual, Second edition, Apple Computer, Addison-Wesley
+Publishing Co., Reading, MA, 1988.
+
+Fourier Series
+
+Description
+
+This is a floating-point benchmark designed primarily to exercise the
+trigonometric and transcendental functions of the system. It calculates the
+first n Fourier coefficients of the function (x+1)x on the interval 0,2. In
+this case, the function (x+1)x is being treated as a cyclic waveform with a
+period of 2.
+
+The Fourier coefficients, when applied as factors to a properly constructed
+series of sine and cosine functions, allow you to approximate the original
+waveform. (In fact, if you can calculate all the Fourier coefficients --
+there'll be an infinite number -- you can reconstruct the waveform exactly).
+You have to calculate the coefficients via integration, and the algorithm
+does this using a simple trapezoidal rule for its numeric integration
+function.
+
+The upshot of all this is that it provides an exercise for the
+floating-point routines that calculate sine, cosine, and raising a number to
+a power. There are also some floating-point multiplications, divisions,
+additions, and subtractions mixed in.
+
+The benchmark reports its results as the number of coefficients calculated
+per second.
+
+As an additional note, we should point out that the performance of this
+benchmark is heavily dependent on how well-built the compiler's math library
+is. We have seen at least two cases where recompilation with new (and
+improved!) math libraries have resulted in two-fold and five-fold
+performance improvements. (Apparently, when a compiler gets moved to a new
+platform, the trigonometric and transcendental functions in the math
+libraries are among the last routines to be "hand optimized" for the new
+platform.) About all we can say about this is that whenever you run this
+test, verify that you have the latest and greatest math libraries.
+
+Analysis
+
+Optimized 486 code: The benchmark partitions its time almost evenly among
+the modules pow387, exp386, and trig387; giving between 25% and 28% of its
+time to each. This is based on profiling with the Watcom compiler running
+under Windows NT. These modules hold the routines that handle raising a
+number to a power and performing trigonometric (sine and cosine)
+calculations. For example, within trig387, time was nearly equally divided
+between the routine that calculates sine and the routine that calculates
+cosine.
+
+The remaining time (between 17% and 18%) was spent in the balance of the
+test. We noticed that most of that time occurred in the routine
+thefunction(). This is at the heart of the numerical integration routine the
+benchmark uses.
+
+Consequently, this benchmark should be a good test of the exponential and
+trigonometric capabilities of a processor. (Note that we recognize that the
+performance also depends on how well the compiler's math library is built.)
+
+680x0 Code (Macintosh CodeWarrior): The CodeWarrior profiler is function
+based, therefore it is impossible to get performance results for individual
+machine instructions. The CodeWarrior compiler is also unable to tell us how
+much time is spent within a given library routine; we can't see how much
+time gets spent executing the sin(), cos(), or pow() functions (which,
+unfortunately, was the whole idea behind the benchmark).
+
+About all we can glean from the results is that thefunction() takes about
+74% of the time in the test (this is where the heavy math calculations take
+place) while trapezoidintegrate() accounts for about 26% of the time on its
+own.
+
+Porting Considerations
+
+Necessarily, this benchmark is at the mercy of the efficiency of the
+floating-point support provided by whatever compiler you are using. It is
+recommended that, if you are doing the port yourself, you contact the
+designers of the compiler, and discuss with them what optimization switches
+should be set to produce the fastest code. (This sounds simple; usually it's
+not. Some systems let you decide between speed and true IEEE compliance.)
+
+As far as global definitions go, this benchmark is happily free of them. All
+the math is done using double data types. We have noticed that, on some Unix
+systems, you must be careful to include the correct math libraries.
+Typically, you'll discover this at link time.
+
+To test for correct execution of the benchmark: It's unlikely you'll need to
+do this, since the algorithm is so cut-and-dried. Furthermore, there are no
+explicit provisions made to verify the correctness. You can, however, either
+dip into your favorite debugger, or alter the code to print out the contents
+of the abase (which holds the A[i] terms) and bbase (which holds the B[i]
+terms) arrays as they are being filled (see routine DoFPUTransIteration).
+** This is exactly what I have done, it now prints out A[i] and B[i] data.
+** Uwe F. Mayer <mayer@tux.edu>
+Run the benchmark with a command file set to execute only the Fourier test,
+and examine the contents of the arrays. The first 100 are listed below.
+
+A[i]=
+   2.84 1.05 0.274 0.0824 0.0102 -0.024 -0.0426 -0.0536 -0.0605 -0.065
+-0.0679 -0.0698 -0.0709 -0.0715 -0.0717 -0.0715 -0.0711 -0.0704
+-0.0696 -0.0685 -0.0674 -0.0661 -0.0647 -0.0632 -0.0615 -0.0598 -0.058
+-0.0561 -0.0542 -0.0521 -0.0501 -0.0479 -0.0457 -0.0434 -0.0411
+-0.0387 -0.0363 -0.0338 -0.0313 -0.0288 -0.0262 -0.0236 -0.0209
+-0.0183 -0.0156 -0.0129 -0.0102 -0.00744 -0.0047 -0.00196 0.000794
+0.00355 0.0063 0.00905 0.0118 0.0145 0.0172 0.0199 0.0226 0.0253
+0.0279 0.0305 0.0331 0.0357 0.0382 0.0407 0.0431 0.0455 0.0479 0.0502
+0.0525 0.0547 0.0569 0.059 0.061 0.063 0.0649 0.0668 0.0686 0.0703
+0.072 0.0736 0.0751 0.0765 0.0779 0.0792 0.0804 0.0816 0.0826 0.0836
+0.0845 0.0853 0.0861 0.0867 0.0873 0.0877 0.0881 0.0884 0.0887 0.0888
+
+B[i]= 
+(undefined) -1.88 -1.16 -0.806 -0.61 -0.487 -0.402 -0.34 -0.293 -0.255
+-0.224 -0.199 -0.177 -0.158 -0.141 -0.126 -0.113 -0.101 -0.0901
+-0.0802 -0.071 -0.0625 -0.0546 -0.0473 -0.0404 -0.034 -0.0279 -0.0222
+-0.0168 -0.0117 -0.00693 -0.00238 0.00193 0.00601 0.00988 0.0135 0.017
+0.0203 0.0234 0.0263 0.0291 0.0317 0.0341 0.0364 0.0385 0.0405 0.0424
+0.0441 0.0457 0.0471 0.0484 0.0496 0.0507 0.0516 0.0525 0.0532 0.0538
+0.0543 0.0546 0.0549 0.055 0.0551 0.055 0.0549 0.0546 0.0543 0.0538
+0.0533 0.0527 0.052 0.0512 0.0503 0.0493 0.0483 0.0472 0.046 0.0447
+0.0434 0.042 0.0405 0.039 0.0374 0.0358 0.0341 0.0323 0.0305 0.0287
+0.0268 0.0249 0.023 0.021 0.019 0.0169 0.0149 0.0128 0.0107 0.00857
+0.00644 0.0043 0.00215
+
+Note that there is no B[0] coefficient. If the above numbers are in the
+arrays shown, you can feel pretty confident that the benchmark it working
+properly.
+
+References
+
+Engineering and Scientific Computations in Pascal, Lawrence P. Huelsman,
+Harper & Row, New York, 1986.
+
+Assignment Algorithm
+
+Description
+
+This test is built on an algorithm with direct application to the business
+world. The assignment algorithm solves the following problem: Say you have X
+machines and Y jobs. Any of the machines can do any of the jobs; however, the
+machines are sufficiently different so that the cost of doing a particular
+job can vary depending what machine does it. Furthermore, the jobs are
+sufficiently different that the cost varies depending on which job a given
+machine does. You therefore construct a matrix; machines are the rows, jobs
+are the columns, and the [i,j] element of the array is the cost of doing the
+jth job on the ith machine. How can you assign the jobs so that the cost of
+completing them all is minimal? (This also assumes that one machine does one
+job.)
+
+Did you get that?
+
+The assignment algorithm benchmark is largely a test of how well the
+processor handles problems built around array manipulation. It is not a
+floating-point test; the "cost matrix" built by the algorithm is simply a 2D
+array of long integers. This benchmark considers an iteration to be a run of
+the assignment algorithm on a 101 x 101 - element matrix. It reports its
+results in iterations per second.
+
+Analysis
+
+Optimized 486 code (Watcom C/C++ 10.0): There are numerous loops within the
+assignment algorithm. The development system we were using (Watcom C/C++
+10.0) appears to have a fine time unrolling many of them. Consequently, it
+is difficult to pin down the execution impact of single lines (as in, for
+example, the numeric sort benchmark).
+
+On the level of functions, the benchmark spends around 70% of its time in
+the routine first_assignments(). This is where a) lone zeros in rows and
+columns are found and selected, and b) a choice is made between duplicate
+zeros. Around 23% of the time is spent in the second_assignments() routine
+where (if first_assignments() fails) the matrix is partitioned into smaller
+submatrices.
+
+Overall, we did a tally of instruction mix execution. The approximate
+breakdowns are:
+
+move - 38%
+
+conditional jump - 12%
+
+unconditional jump - 11%
+
+comparison - 14%
+
+math/logical/shift - 24%
+
+Many of the move instructions that appeared to consume the most amounts of
+time were referencing items on the local stack frame. This required an
+indirect reference through EBP, plus a constant offset to resolve the
+address.
+
+This should be a good exercise of a cache, since operations in the
+first_assignments() routine require both row-wise and column-wise movement
+through the array. Note that the routine could be made more "severe" by
+chancing the assignedtableau[][] array to an array of unsigned char --
+forcing fetches on byte boundaries.
+
+680x0 Code (CodeWarrior): The CodeWarrior profiler is function-based.
+Consequently, it's not possible to determine what's going on at the machine
+instruction level. We can, however, get a good idea of how much time the
+algorithm spends in each routine. The important routines are broken down as
+follows:
+
+calc_minimum_costs() - approximately 0.3% of the time
+
+(250 microsecs)
+
+first_assignments() - approximately 79% of the time
+
+(96284.6 microsecs)
+
+second_assignments() - approximately 19% of the time
+
+(22758 microsecs)
+
+These times are approximate; some time is spent in the Assignment() routine
+itself.
+
+These figures are reasonably close to those of the 486, at least in terms of
+the mixture of time spent in a particular routine. Hence, this should still
+be a good test of system cache (as described in the preceding section),
+given the behavior of the first_assignments() routine.
+
+Porting Considerations
+
+The assignment algorithm test is purely an integer benchmark, and requires
+no special data types that might be affected by ports to different
+architectures. There are only two global constants that affect the
+algorithm:
+
+ASSIGNROWS and ASSIGNCOLS - These set the size of the assignment array. Both
+are defined to be 101 (so, the array that is benchmarked is a 101 x 101
+-element array of longs). These values should not be altered.
+
+To test for correct execution of the benchmark: #define the symbol DEBUG,
+recompile, set up a command file that executes only the assignment
+algorithm, and run the benchmark. (You may want to pipe the output through a
+paging filter, like the more program.) The act of defining DEBUG will enable
+a section of code that displays the assigned columns on a per-row basis. If
+the benchmark is working properly, the numbers to be displayed
+should be:
+
+R000: 056 R001: 066 R002: 052 R003: 065 R004: 043 R005: 023 R006: 016
+R007: 077 R008: 095 R009: 004 R010: 064 R011: 076 R012: 078 R013: 091
+R014: 013 R015: 029 R016: 044 R017: 014 R018: 041 R019: 042 R020: 020
+R021: 071 R022: 024 R023: 017 R024: 055 R025: 040 R026: 070 R027: 025
+R028: 031 R029: 019 R030: 073 R031: 002 R032: 047 R033: 009 R034: 035
+R035: 045 R036: 005 R037: 063 R038: 081 R039: 039 R040: 087 R041: 008
+R042: 053 R043: 093 R044: 049 R045: 092 R046: 061 R047: 046 R048: 026
+R049: 034 R050: 088 R051: 000 R052: 028 R053: 018 R054: 072 R055: 021
+R056: 037 R057: 082 R058: 006 R059: 058 R060: 096 R061: 068 R062: 069
+R063: 054 R064: 057 R065: 086 R066: 097 R067: 084 R068: 099 R069: 051
+R070: 098 R071: 003 R072: 074 R073: 062 R074: 080 R075: 033 R076: 011
+R077: 094 R078: 012 R079: 050 R080: 010 R081: 038 R082: 089 R083: 059
+R084: 022 R085: 079 R086: 015 R087: 007 R088: 075 R089: 083 R090: 060
+R091: 048 R092: 032 R093: 067 R094: 001 R095: 030 R096: 027 R097: 085
+R098: 090 R099: 036 R100: 100
+
+These are the column choices for each row made by the algorithm. If
+you see these numbers displayed, the algorithm is working correctly.
+
+*** The original debugging information was incorrect, as it not only
+*** display the chosen columns, but also displayed eliminated columns.
+*** Changed to show all 101 entries. Uwe F. Mayer <mayer@tux.edu>
+
+References
+
+Quantitative Decision Making for Business, Gordon, Pressman, and Cohn,
+Prentice-Hall, Englewood Cliffs, NJ, 1990.
+
+Quantitative Decision Making, Guiseppi A. Forgionne, Wadsworth Publishing
+Co., California, 1986.
+
+Huffman Compression
+
+Description
+
+This is a compression algorithm that -- while helpful for some time as a
+text compression technique -- has since fallen out of fashion on account of
+the superior performance by algorithms such as LZW compression. It is,
+however, still used in some graphics file formats in one form or another.
+
+The benchmark consists of three parts:
+
+Building a "Huffman Tree" (explained below),
+
+Compression, and
+
+Decompression.
+
+A "Huffman Tree" is a special data structure that guides the compression and
+decompression processes. If you were to diagram one, it would look like a
+large binary tree (i.e., two branches per each node). Describing its
+function in detail is beyond the scope of this paper (see the references for
+more information). We should, however, point out that the tree is built from
+the "bottom up"; and the procedure for constructing it requires that the
+algorithm scan the uncompressed buffer, building a frequency table for all
+the characters appearing in the buffer. (This version of the Huffman
+algorithm compresses byte-at-a-time, though there's no reason why the same
+principle could not be applied to tokens larger than one byte.)
+
+Once the tree is built, text compression is relatively straightforward. The
+algorithm fetches a character from the uncompressed buffer, navigates the
+tree based on the character's value, and produces a bit stream that is
+concatenated to the compressed buffer. Decompression is the reverse of that
+process. (We recognize that we are simplifying the algorithm. Again, we
+recommend you check the references.)
+
+The Huffman Compression benchmark considers an iteration to be the three
+operations described above, performed on an uncompressed text buffer of 5000
+bytes. It reports its results in iterations per second.
+
+Analysis
+
+Optimized 486 code (Watcom C/C++ 10.0): The Huffman compression algorithm --
+tree building, compression, and decompression -- is written as a single,
+large routine: DoHuffIteration(). All the benchmark's time is spent within
+that routine.
+
+Components of DoHuffIteration() that consume the most time are those that
+perform the compression and decompression .
+
+The code for performing the compression spends most of its time (accounting
+for about 13%) constructing the bit string for a character that is being
+compressed. It does this by seeking up the tree from a leaf, emitting 1's
+and 0's in the process, until it reaches the root. The stream of 1's and 0's
+are loaded into a character array; the algorithm then walks "backward"
+through the array, setting (or clearing) bits in the compression buffer as
+it goes.
+
+Similarly, the decompression portion takes about 12% of the time as the
+algorithm pulls bits out of the compressed buffer -- using them to navigate
+the Huffman tree -- and reconstructs the original text.
+
+680x0 Code (Macintosh CodeWarrior): CodeWarrior's profiler is function
+based. Consequently, it's impossible to get performance scores for
+individual machine instructions. Furthermore, as mentioned above, the
+Huffman compression algorithm is written as a monolithic routine. This makes
+the results from the CodeWarrior profiler all the more sparse.
+
+We can at least point out that the lowmost routines (GetCompBit() and
+SetCompBit()) that read and write individual bits, though called nearly 13
+million times each, account for only 0.7% and 0.3% of the total time,
+respectively.
+
+Porting Considerations
+
+The Huffman algorithm relies on no special data types. It should port
+readily. Global constants of interest include:
+
+EXCLUDED - This is a large, positive value. Currently it is set to 32000,
+and should be left alone. Basically, this is a token that the system uses to
+indicate an excluded character (one that does not appear in the plain-text).
+It is set to a ridiculously high value that will never appear in the
+pointers of the tree during normal construction.
+
+MAXHUFFLOOPS - This is another one of those "governor" constants. The
+Huffman benchmark creates more work for itself by doing multiple
+compression/decompression loops. This constant sets the maximum number of
+loops it will attempt per iteration before it gives up. Currently, it is set
+to 50000. Though it is unlikely you'll ever need to modify this value, you
+can increase it if your machine is too fast for the adjustment algorithm. Do
+not reduce the number.
+
+HUFFARRAYSIZE - This value sets the size of the plain-text array to be
+compressed. You can override this value with the command file to see how
+well your machine performs for larger or smaller arrays. The subsequent
+results, however, are invalid for comparison with other systems.
+
+To test for correct execution of the benchmark: #define the symbol DEBUG,
+recompile, build a command file that executes only the Huffman compression
+algorithm, and run the benchmark. Defining DEBUG will enable a section of
+code that verifies the decompression as it takes place (i.e., the routine
+compares -- character at a time -- the uncompressed data with the original
+plain-text). If there's an error, the program will repeatedly display: "Error
+at textoffset xxx". 
+
+** If everything is correct it will emit quite a few "Huffman: OK" messages.
+**
+** I added a resetting of the random number generator, outside of the
+** timed loop, and a resetting of the Huffman tree, inside of the
+** timed loop. That should help to make the benchmark more consistent.
+** The program did originally only reset half of the tree, which lead
+** to runtime errors on some systems. The effect on the benchmark
+** should be negligible, and in fact comes out as being of the order
+** of less than 1% on my test system.
+** Uwe F. Mayer <mayer@tux.edu>
+
+References
+
+Data Compression: Methods and Theory, James A. Storer, Computer Science
+Press, Rockville, MD, 1988.
+
+An Introduction to Text Processing, Peter D. Smith, MIT Press, Cambridge,
+MA, 1990.
+
+IDEA Encryption
+
+Description
+
+This is another benchmark based on a "higher-level" algorithm; "higher
+-level" in the sense that it is more complex than a sort or a search
+operation.
+
+Security -- and, therefore, cryptography -- are becoming increasingly
+important issues in the computer realm. It's likely that more and more
+machines will be running routines like the IDEA encryption algorithm. (IDEA
+is an acronym for the International Data Encryption Algorithm.)
+
+A good description of the algorithm (and, in fact, the reference we used to
+create the source code for the test) can be found in Bruce Schneier's
+exhaustive exploration of encryption, "Applied Cryptography" (see
+references). To quote Mr. Schneier: "In my opinion, it [IDEA] is the best
+and most secure block algorithm available to the public at this time."
+
+IDEA is a symmetrical, block cipher algorithm. Symmetrical means that the
+same routine used to encrypt the data also decrypts the data. A block cipher
+works on the plain-text (the message to be encrypted) in fixed, discrete
+chunks. In the case of IDEA, the algorithm encrypts and decrypts 64 bits at
+a time.
+
+As pointed out in Schneier's book, there are three operations that the IDEA
+uses to do its work:
+
+XOR (exclusive-or)
+
+Addition modulo 216 (ignoring overflow)
+
+Multiplication modulo 216+1 (ignoring overflow).
+
+IDEA requires a key of 128 bits. However, keys and blocks are further
+subdivided into 16-bit chunks, so that any given operation within the IDEA
+encryption is performed on 16-bit quantities. (This is one of the many
+advantages of the algorithm, it is efficient even on 16-bit processors.)
+
+The IDEA benchmark considers an "iteration" to be an encryption and
+decryption of a buffer of 4000 bytes. The test actually builds 3 buffers:
+The first to hold the original plain-text, the second to hold the encrypted
+text, and the third to hold the decrypted text (the contents of which should
+match that of the first buffer). It reports its results in iterations per
+second.
+
+Analysis
+
+Optimized 486 code: The algorithm actually spends most of its time (nearly
+75%) within the mul() routine, which performs the multiplication modulo
+216+1. This is a super-simple routine, consisting primarily of if
+statements, shifts, and additions.
+
+The remaining time (around 24%) is spent in the balance of the cipher_idea()
+routine. (Note that cipher_idea() calls the mul() routine frequently; so,
+the 24% is comprised of the other lines of cipher_idea()). cipher_idea() is
+littered with simple pointer-fetch-and-increment operations, some addition,
+and some exclusive-or operations.
+
+Note that IDEA's exercise of system capabilities probably doesn't extend
+beyond testing simple integer math operations. Since the buffer size is set
+to 4000 bytes, the test will run entirely in processor cache on most
+systems. Even the cache won't get a heavy "internal" workout, since the
+algorithm proceeds sequentially through each buffer from lower to higher
+addresses.
+
+680x0 code (Macintosh CodeWarrior): CodeWarrior's profiler is function
+based; consequently, it is impossible to determine execution profiles for
+individual machine instructions. We can, however, get an idea of how much
+time is spent in each routine.
+
+As with Huffman compression, the IDEA algorithm is written monolithically --
+a single, large routine does most of the work. However, a special
+multiplication routine, mul(), is frequently called within each
+encryption/decryption iteration (see above).
+
+In this instance, the results for the 68K system diverges widely from those
+of the 486 system. The CodeWarrior profiler shows the mul() routine as
+taking only 4% of the total time in the benchmark, even though it is called
+over 20 million times. The outer routine is called 600,000 times, and
+accounts for about 96% of the whole program's entire time.
+
+Porting Considerations
+
+Since IDEA does its work in 16-bit units, it is particularly important that
+u16 be defined to whatever datatype provides an unsigned 16-bit integer on
+the test platform. Usually, unsigned short works for this. (You can verify
+the size of a short by running the benchmarks with a command file that
+includes ALLSTATS=T as one of the commands. This will cause the benchmark
+program to display a message that tells the size of the int, short, and long
+data-types in bytes.)
+
+Also, the mul() routine in IDEA requires the u32 datatype to define an
+unsigned 32-bit integer. In most cases, unsigned long works.
+
+To test for correct execution of the benchmark: #define the symbol DEBUG,
+recompile, build a command file that executes only the IDEA algorithm, and
+run the benchmark. Defining DEBUG will enable a section of code that
+compares the original plain-text with the output of the test. (Remember, the
+benchmark performs both encryption and decryption.) If the algorithm has
+failed, the output will not match the input, and you'll see "IDEA Error"
+messages all over your display.
+
+References
+
+Applied Cryptography: Protocols, Algorithms, and Source Code in C, Bruce
+Schneier, John Wiley & Sons, Inc., New York, 1994.
+
+Neural Net
+
+Description
+
+The Neural Net simulation benchmark is based on a simple back-propagation
+neural network presented by Maureen Caudill as part of a BYTE article that
+appeared in the October, 1991 issue (see "Expert Networks" in that issue).
+The network involved is a simple 3-layer (input neurodes, middle-layer
+neurodes, and output neurodes) network that accepts a number of 5 x 7 input
+patterns and produce a single 8-bit output pattern.
+
+The test involves sending the network an input pattern that is the 5 x 7
+"image" of a character (1's and 0's -- 1's representing lit pixels, 0's
+representing unlit pixels), and teaching it the 8-bit ASCII code for the
+character.
+
+A thorough description of how the back propagation algorithm works is beyond
+the scope of this paper. We recommend you search through the references
+given at the end of this paper, particularly Ms. Caudill's article, for
+detailed discussion. In brief, the benchmark is primarily an exercise in
+floating-point operations, with some frequent use of the exp() function. It
+also performs a great deal of array references, though the arrays in use are
+well under 300 elements each (and less than 100 in most cases).
+
+The Neural Net benchmark considers an iteration to be a single learning
+cycle. (A "learning cycle" is defined as the time it takes the network to be
+able to associate all input patterns to the correct output patterns within a
+specified tolerance.) It reports its results in iterations per second.
+
+Analysis
+
+Optimized 486 code: The forward pass of the network (i.e., calculating
+outputs from inputs) utilize a sigmoid function. This function has, at its
+heart, a call to the exp() library routine. A small but non-negligible
+amount of time is spent in that function (a little over 5% for the 486
+system we tested).
+
+The learning portion of the network benchmark depends on the derivative of
+the sigmoid function, which turns out to require only multiplications and
+subtractions. Consequently, each learning pass exercises only simple
+floating-point operations.
+
+If we divide the time spent in the test into two parts -- forward pass and
+backward pass (the latter being the learning pass) -- then the test appears
+to spend the greatest part of its time in the learning phase. In fact, most
+time is spent in the adjust_mid_wts() routine. This is the part of the
+routine that alters the weights on the middle layer neurodes. (It accounts
+for over 40% of the benchmark's time.)
+
+680x0 Code (Macintosh CodeWarrior): Though CodeWarrior's profiler is
+function based, the neural net benchmark is highly modular. We can therefore
+get a good breakdown of routine usage:
+
+worst_pass_error() - 304 microsecs (called 4680 times)
+
+adjust_mid_wts() - 83277 microsecs (called 46800 times)
+
+adjust_out_wts() - 17394 microsecs (called 46800 times)
+
+do_mid_error() - 11512 microsecs (called 46800 times)
+
+do_out_error() - 3002 microsecs (called 46800 times)
+
+do_mid_forward() - 49559 microsecs (called 46800 times)
+
+do_out_forward() - 20634 microsecs (called 46800 times)
+
+Again, most time was spent in adjust_mid_wts() (as on the 486), accounting
+for almost twice as much time as do_mid_forward().
+
+Porting Consideration
+
+The Neural Net benchmark is not dependent on any special data types. There
+are a number of global variables and arrays that should not be altered in
+any way. Most importantly, the #defines found in NBENCH1.H under the Neural
+Net section should not be changed. These control not only the number of
+neurodes in each layer; they also include constants that govern the learning
+processes.
+
+Other globals to be aware of:
+
+MAXNNETLOOPS - This constant simply sets the upper limit on the number of
+training loops the test will permit per iteration. The Neural Net benchmark
+adjusts its workload by re-teaching itself over and over (each time it
+begins a new training session, the network is "cleared" -- loaded with
+random values). It is unlikely you will ever need to modify this constant.
+
+inpath - This string pointer is set to the path from which the neural net's
+input data is read. It is currently hardwired to "NNET.DAT". You shouldn't
+have to change this name, unless your file system requires directory
+information as part of the path.
+
+Note that the Neural Net benchmark is the only test that requires an
+external data file. The contents of the file are listed in an attachment to
+this paper. You should use the attachment to reconstruct the file should it
+become lost or corrupted. Any changes to the file will invalidate the test
+results.
+
+To test for correct execution of the benchmark: #define the symbol DEBUG,
+recompile, build a command file that executes only the Neural Net test, and
+run the benchmark. Defining DEBUG will enable a section of code that
+displays how many passes through the learning process were required for the
+net to learn. It should learn in 780 passes.
+
+References
+
+"Expert Networks," Maureen Caudill, BYTE Magazine, October, 1991.
+
+Simulating Neural Networks, Norbert Hoffmann, Verlag Vieweg, Wiesbaden,
+1994.
+
+Signal and Image Processing with Neural Networks, Timothy Masters, John
+Wiley and Sons, New York, 1994.
+
+Introduction to Neural Networks, Jeannette Stanley, California Scientific
+Software, CA, 1989.
+
+LU Decomposition
+
+Description
+
+LU Decomposition is an algorithm that can be used as the heart of a program
+for solving linear equations. Suppose you have a matrix A. LU Decomposition
+determines the matrices L and U such that
+
+L . U = A
+
+where L is a lower triangular matrix and U is an upper triangular matrix. (A
+lower triangular matrix has nonzero elements only on the main diagonal and
+below. An upper triangular matrix has nonzero elements only on the main
+diagonal and above.)
+
+Without going into the mathematical details too deeply, having the L and U
+matrices makes the solution of linear equations (i.e., equations of the form
+A . x = b) quite easy. It turns out that you can also use LU decomposition
+to determine matrix inverses and determinants.
+
+The algorithm used in the benchmarks was derived from Numerical Recipes in
+Pascal (there is a C version of the book, which we did not have on hand), a
+book we heartily recommend to anyone serious about mathematical and
+scientific computing. The authors are approving of LU decomposition as a
+means of solving linear equations, pointing out that their version (which
+makes use of what we would have to call "Crout's method with partial
+implicit pivoting") is a factor of 3 better than one of their Gauss-Jordan
+routines, a factor of 1.5 better than another. They go on to demonstrate the
+use of LU decomposition for iterative improvement of linear equation
+solutions.
+
+The benchmark begins by creating a "solvable" linear system. This is easily
+done by loading up the column vector b with random integers, then
+initializing A with an identity matrix. The equations are then "scrambled"
+by either multiplying a row by a constant, or adding one row to another. The
+scrambled matrices are handed to the LU algorithm.
+
+The LU Decomposition benchmark considers a single iteration to be the
+solution of one set of equations (the size of A is fixed at 101 x 101
+elements). It reports its results in iterations per second.
+
+Analysis
+
+Optimized 486 code (Watcom C/C++ 10.0): The entire algorithm consists of two
+parts: the LU decomposition itself, and the back substitution algorithm that
+builds the solution vector. The majority of the algorithm's time takes place
+within the former; the algorithm that builds the L and U matrices (this
+takes place in routine ludcmp()).
+
+Within ludcmp(), there are two extremely tight for loops forming the heart
+of Crout's algorithm that consume the majority of the time. The loops are
+"tight" in that they each consist of only one line of code; in both cases,
+the line of code is a "multiply and accumulate" operation (actually, it's
+sort of a multiply and de-accumulate, since the result of the multiplication
+is subtracted, not added).
+
+In both cases, the items multiplied are elements from the A array; and one
+factor's row index is varying more rapidly, while another factor's column
+index is varying more rapidly.
+
+Note that this is a good overall test of floating-point operations within
+matrices. Most of the math is floating-point; primarily additions,
+subtractions, and multiplications (only a few divisions).
+
+680x0 Code (Macintosh CodeWarrior): CodeWarrior's profiler is function
+based. It is therefore impossible to determine execution profiles at the
+machine-code level. The profiler does, however, allow us to determine how
+much time the benchmark spends in each routine. This breakdown is as
+follows:
+
+lusolve() - 3.4 microsecs (about 0% of the time)
+
+lubksb() 1198 microsec (about 2% of the time)
+
+ludcmp() - 63171 microsec (about 91% of the time)
+
+The above percentages are for the whole program. Consequently, as a portion
+of actual benchmark time, the amount attributed to each will be slightly
+larger (though the proportions will remain the same).
+
+Since ludcmp() performs the actual LU decomposition, this is exactly where
+we'd want the benchmark to spend its time. The lubksb() routine calls
+ludcmp(), using the resulting matrix to "back-solve" the linear equation.
+
+Porting Considerations
+
+The LU Decomposition routine requires no special data types, and is immune
+to byte ordering. It does make use of a typedef (LUdblptr) that includes an
+embedded union; this allows the benchmark to "coerce" a pointer to double
+into a pointer to a 2D array of double. This arrangement has not caused
+problems with the compilers we have tested to date.
+
+Other constants and globals to be aware of:
+
+LUARRAYROWS and LUARRAYCOLS - These constants set the size of the
+coefficient matrix, A. They cannot be altered by command file. In fact, you
+shouldn't alter them at all, or your results will be invalid. Currently,
+they are both set to 101.
+
+MAXLUARRAYS - This is another "governor" constant. The algorithm performs
+dynamic workload adjustment by building more and more arrays to solve per
+timing round. This sets the maximum upper limit of arrays that it will
+build. Currently, it is set to 1000, which should be more than enough for
+the reasonable future (1000 arrays of 101 x 101 floating-point doubles would
+require somewhere around 80 megabytes of RAM -- and that's not counting the
+column vectors).
+
+To test for correct execution of the benchmark: Currently, there is no
+simple technique for doing this. You can, however, either use your favorite
+debugger (or embed a printf() statement) at the conclusion of the lubksb()
+routine. When this routine concludes, the array b will hold the solution
+vector. These items will be stored as floating-point doubles, and the first
+14 are (with rounding):
+
+46 20 23 22 85 86 97 95 8 89 75 67 6 86
+
+If you find these numbers as the first 14 in the array b[], then you're
+virtually guaranteed that the algorithm is working correctly.
+
+*** The above is not correct, as the initial matrix is not the identity,
+*** but a matrix with random nonzero entries on the diagonal (they have
+*** altered the algorithm since they wrote the documentation).
+*** I changed the output of the debugging routine, it now prints first
+*** what the array b should hold (as righthand side divided by diagonal
+*** entry), and then it prints what the array b does hold after the
+*** decomposition has been done to compute the solution of the system. If
+*** you get the same, then fine.
+*** And, by the way, my original right hand sides are
+***  46  23  85  97   8  75   6  81  88  76   6  84  31  53   2 ...
+*** and the diagonal entries are
+*** 520 922 186 495  89 267 786 571 175 600 738 321 897 541 859 ...
+*** You notice that one has every other number of the original sequence.
+*** This is due to BYTE's change of the algorithm, as they now also use the
+*** random number generator to generate the diagonal elements.
+*** Here is the complete set of data:
+*** 46/520=0.09  23/922=0.02  85/186=0.46   97/495=0.20  8/89=0.09
+*** 75/267=0.28  6/786=0.01   81/571=0.14   88/175=0.50  76/600=0.13
+*** 6/738=0.01   84/321=0.26  31/897=0.03   53/541=0.10  2/859=0.00
+*** 86/92=0.93   51/121=0.42  29/248=0.12   51/789=0.06  84/6=14.00
+*** 21/180=0.12  33/48=0.69   2/899=0.00    12/820=0.01  69/372=0.19
+*** 59/809=0.07  74/18=4.11   40/788=0.05   39/56=0.70   86/91=0.95
+*** 33/878=0.04  82/165=0.50  42/561=0.07   8/274=0.03   84/694=0.12
+*** 32/352=0.09  25/969=0.03  59/816=0.07   33/112=0.29  5/125=0.04
+*** 89/740=0.12  7/223=0.03   54/994=0.05   33/80=0.41   55/676=0.08
+*** 6/524=0.01   36/544=0.07  21/160=0.13   58/596=0.10  15/717=0.02
+*** 84/311=0.27  98/530=0.18  46/713=0.06   41/233=0.18  73/640=0.11
+*** 40/343=0.12  72/586=0.12  100/965=0.10  59/764=0.08  37/866=0.04
+*** 27/682=0.04  3/652=0.00   41/352=0.12   87/786=0.11  45/79=0.57
+*** 83/761=0.11  41/817=0.05  46/209=0.22   78/930=0.08  85/210=0.40
+*** 80/756=0.11  18/931=0.02  30/669=0.04   47/127=0.37  85/891=0.10
+*** 66/364=0.18  83/955=0.09  58/637=0.09   58/778=0.07  82/288=0.28
+*** 42/540=0.08  76/290=0.26  59/36=1.64    29/463=0.06  63/476=0.13
+*** 6/340=0.02   73/341=0.21  59/737=0.08   81/492=0.16  98/443=0.22
+*** 58/32=1.81   53/562=0.09  54/263=0.21   46/367=0.13  58/390=0.15
+*** 96/845=0.11  30/746=0.04  2/687=0.00    28/849=0.03  84/180=0.47
+*** 85/382=0.22
+*** Uwe F. Mayer <mayer@tux.edu>
+
+References
+
+Numerical Recipes in Pascal: The Art of Scientific Computing, Press,
+Flannery, Teukolsky, Vetterling, Cambridge University Press, New York, 1989.
diff --git a/debugbit.good.gz b/debugbit.good.gz
new file mode 100644
index 0000000..fdc893e
--- /dev/null
+++ b/debugbit.good.gz
diff --git a/emfloat.c b/emfloat.c
new file mode 100644
index 0000000..5e73890
--- /dev/null
+++ b/emfloat.c
@@ -0,0 +1,1343 @@
+/*
+** emfloat.c
+** Source for emulated floating-point routines.
+** BYTEmark (tm)
+** BYTE's Native Mode Benchmarks
+** Rick Grehan, BYTE Magazine.
+**
+** Created:
+** Last update: 3/95
+**
+** DISCLAIMER
+** The source, executable, and documentation files that comprise
+** the BYTEmark benchmarks are made available on an "as is" basis.
+** This means that we at BYTE Magazine have made every reasonable
+** effort to verify that the there are no errors in the source and
+** executable code.  We cannot, however, guarantee that the programs
+** are error-free.  Consequently, McGraw-HIll and BYTE Magazine make
+** no claims in regard to the fitness of the source code, executable
+** code, and documentation of the BYTEmark.
+**  Furthermore, BYTE Magazine, McGraw-Hill, and all employees
+** of McGraw-Hill cannot be held responsible for any damages resulting
+** from the use of this code or the results obtained from using
+** this code.
+*/
+
+
+#include <stdio.h>
+#include <string.h>
+#include "nmglobal.h"
+#include "emfloat.h"
+
+/*
+** Floating-point emulator.
+** These routines are only "sort of" IEEE-compliant.  All work is
+** done using an internal representation.  Also, the routines do
+** not check for many of the exceptions that might occur.
+** Still, the external formats produced are IEEE-compatible,
+** with the restriction that they presume a low-endian machine
+** (though the endianism will not effect the performance).
+**
+** Some code here was based on work done by Steve Snelgrove of
+** Orem, UT.  Other code comes from routines presented in
+** the long-ago book: "Microprocessor Programming for
+** Computer Hobbyists" by Neill Graham.
+*/
+
+/**************************
+** SetupCPUEmFloatArrays **
+***************************
+** Set up the arrays that will be used in the emulated
+** floating-point tests.
+** This is done by loading abase and bbase elements with
+** random numbers.  We use our long-to-floating point
+** routine to set them up.
+** NOTE: We really don't need the pointer to cbase...cbase
+** is overwritten in the benchmark.
+*/
+void SetupCPUEmFloatArrays(InternalFPF *abase,
+                InternalFPF *bbase,
+                InternalFPF *cbase,
+                ulong arraysize)
+{
+ulong i;
+InternalFPF locFPF1,locFPF2;
+/*
+** Reset random number generator so things repeat. Inserted by Uwe F. Mayer.
+*/
+extern int32 randnum(int32 lngval);
+randnum((int32)13);
+
+for(i=0;i<arraysize;i++)
+{/*       LongToInternalFPF(randwc(50000L),&locFPF1); */
+        Int32ToInternalFPF(randwc((int32)50000),&locFPF1);
+ /*       LongToInternalFPF(randwc(50000L)+1L,&locFPF2); */
+        Int32ToInternalFPF(randwc((int32)50000)+(int32)1,&locFPF2);
+        DivideInternalFPF(&locFPF1,&locFPF2,abase+i);
+ /*       LongToInternalFPF(randwc(50000L)+1L,&locFPF2); */
+        Int32ToInternalFPF(randwc((int32)50000)+(int32)1,&locFPF2);
+        DivideInternalFPF(&locFPF1,&locFPF2,bbase+i);
+}
+return;
+}
+
+/***********************
+** DoEmFloatIteration **
+************************
+** Perform an iteration of the emulated floating-point
+** benchmark.  Note that "an iteration" can involve multiple
+** loops through the benchmark.
+*/
+ulong DoEmFloatIteration(InternalFPF *abase,
+                InternalFPF *bbase,
+                InternalFPF *cbase,
+                ulong arraysize, ulong loops)
+{
+ulong elapsed;          /* For the stopwatch */
+static uchar jtable[16] = {0,0,0,0,1,1,1,1,2,2,2,2,2,3,3,3};
+ulong i;
+#ifdef DEBUG
+int number_of_loops;
+#endif
+/*
+** Begin timing
+*/
+elapsed=StartStopwatch();
+#ifdef DEBUG
+number_of_loops=loops-1; /* the index of the first loop we run */
+#endif
+
+/*
+** Each pass through the array performs operations in
+** the followingratios:
+**   4 adds, 4 subtracts, 5 multiplies, 3 divides
+** (adds and subtracts being nearly the same operation)
+*/
+while(loops--)
+{
+        for(i=0;i<arraysize;i++)
+                switch(jtable[i % 16])
+                {
+                        case 0: /* Add */
+                                AddSubInternalFPF(0,abase+i,
+                                  bbase+i,
+                                  cbase+i);
+                                break;
+                        case 1: /* Subtract */
+                                AddSubInternalFPF(1,abase+i,
+                                  bbase+i,
+                                  cbase+i);
+                                break;
+                        case 2: /* Multiply */
+                                MultiplyInternalFPF(abase+i,
+                                  bbase+i,
+                                  cbase+i);
+                                break;
+                        case 3: /* Divide */
+                                DivideInternalFPF(abase+i,
+                                  bbase+i,
+                                  cbase+i);
+                                break;
+                }
+#ifdef DEBUG
+{
+  ulong j[8];   /* we test 8 entries */
+  int k;
+  ulong i;
+  char buffer[1024];
+  if (number_of_loops==loops) /* the first loop */
+    {
+      j[0]=(ulong)2;
+      j[1]=(ulong)6;
+      j[2]=(ulong)10;
+      j[3]=(ulong)14;
+      j[4]=(ulong)(arraysize-14);
+      j[5]=(ulong)(arraysize-10);
+      j[6]=(ulong)(arraysize-6);
+      j[7]=(ulong)(arraysize-2);
+      for(k=0;k<8;k++){
+	i=j[k];
+	InternalFPFToString(buffer,abase+i);
+	printf("%6ld: (%s) ",i,buffer);
+	switch(jtable[i % 16])
+	  {
+	  case 0: strcpy(buffer,"+"); break;
+	  case 1: strcpy(buffer,"-"); break;
+	  case 2: strcpy(buffer,"*"); break;
+	  case 3: strcpy(buffer,"/"); break;
+	  }
+	printf("%s ",buffer);
+	InternalFPFToString(buffer,bbase+i);
+	printf("(%s) = ",buffer);
+	InternalFPFToString(buffer,cbase+i);
+	printf("%s\n",buffer);
+      }
+    }
+}
+#endif
+}
+return(StopStopwatch(elapsed));
+}
+
+/***********************
+** SetInternalFPFZero **
+************************
+** Set an internal floating-point-format number to zero.
+** sign determines the sign of the zero.
+*/
+static void SetInternalFPFZero(InternalFPF *dest,
+                        uchar sign)
+{
+int i;          /* Index */
+
+dest->type=IFPF_IS_ZERO;
+dest->sign=sign;
+dest->exp=MIN_EXP;
+for(i=0;i<INTERNAL_FPF_PRECISION;i++)
+        dest->mantissa[i]=0;
+return;
+}
+
+/***************************
+** SetInternalFPFInfinity **
+****************************
+** Set an internal floating-point-format number to infinity.
+** This can happen if the exponent exceeds MAX_EXP.
+** As above, sign picks the sign of infinity.
+*/
+static void SetInternalFPFInfinity(InternalFPF *dest,
+                        uchar sign)
+{
+int i;          /* Index */
+
+dest->type=IFPF_IS_INFINITY;
+dest->sign=sign;
+dest->exp=MIN_EXP;
+for(i=0;i<INTERNAL_FPF_PRECISION;i++)
+        dest->mantissa[i]=0;
+return;
+}
+
+/**********************
+** SetInternalFPFNaN **
+***********************
+** Set an internal floating-point-format number to Nan
+** (not a number).  Note that we "emulate" an 80x87 as far
+** as the mantissa bits go.
+*/
+static void SetInternalFPFNaN(InternalFPF *dest)
+{
+int i;          /* Index */
+
+dest->type=IFPF_IS_NAN;
+dest->exp=MAX_EXP;
+dest->sign=1;
+dest->mantissa[0]=0x4000;
+for(i=1;i<INTERNAL_FPF_PRECISION;i++)
+        dest->mantissa[i]=0;
+
+return;
+}
+
+/*******************
+** IsMantissaZero **
+********************
+** Pass this routine a pointer to an internal floating point format
+** number's mantissa.  It checks for an all-zero mantissa.
+** Returns 0 if it is NOT all zeros, !=0 otherwise.
+*/
+static int IsMantissaZero(u16 *mant)
+{
+int i;          /* Index */
+int n;          /* Return value */
+
+n=0;
+for(i=0;i<INTERNAL_FPF_PRECISION;i++)
+        n|=mant[i];
+
+return(!n);
+}
+
+/**************
+** Add16Bits **
+***************
+** Add b, c, and carry.  Retult in a.  New carry in carry.
+*/
+static void Add16Bits(u16 *carry,
+                u16 *a,
+                u16 b,
+                u16 c)
+{
+u32 accum;              /* Accumulator */
+
+/*
+** Do the work in the 32-bit accumulator so we can return
+** the carry.
+*/
+accum=(u32)b;
+accum+=(u32)c;
+accum+=(u32)*carry;
+*carry=(u16)((accum & 0x00010000) ? 1 : 0);     /* New carry */
+*a=(u16)(accum & 0xFFFF);       /* Result is lo 16 bits */
+return;
+}
+
+/**************
+** Sub16Bits **
+***************
+** Additive inverse of above.
+*/
+static void Sub16Bits(u16 *borrow,
+                u16 *a,
+                u16 b,
+                u16 c)
+{
+u32 accum;              /* Accumulator */
+
+accum=(u32)b;
+accum-=(u32)c;
+accum-=(u32)*borrow;
+*borrow=(u32)((accum & 0x00010000) ? 1 : 0);    /* New borrow */
+*a=(u16)(accum & 0xFFFF);
+return;
+}
+
+/*******************
+** ShiftMantLeft1 **
+********************
+** Shift a vector of 16-bit numbers left 1 bit.  Also provides
+** a carry bit, which is shifted in at the beginning, and
+** shifted out at the end.
+*/
+static void ShiftMantLeft1(u16 *carry,
+                        u16 *mantissa)
+{
+int i;          /* Index */
+int new_carry;
+u16 accum;      /* Temporary holding placed */
+
+for(i=INTERNAL_FPF_PRECISION-1;i>=0;i--)
+{       accum=mantissa[i];
+        new_carry=accum & 0x8000;       /* Get new carry */
+        accum=accum<<1;                 /* Do the shift */
+        if(*carry)
+                accum|=1;               /* Insert previous carry */
+        *carry=new_carry;
+        mantissa[i]=accum;              /* Return shifted value */
+}
+return;
+}
+
+/********************
+** ShiftMantRight1 **
+*********************
+** Shift a mantissa right by 1 bit.  Provides carry, as
+** above
+*/
+static void ShiftMantRight1(u16 *carry,
+                        u16 *mantissa)
+{
+int i;          /* Index */
+int new_carry;
+u16 accum;
+
+for(i=0;i<INTERNAL_FPF_PRECISION;i++)
+{       accum=mantissa[i];
+        new_carry=accum & 1;            /* Get new carry */
+        accum=accum>>1;
+        if(*carry)
+                accum|=0x8000;
+        *carry=new_carry;
+        mantissa[i]=accum;
+}
+return;
+}
+
+
+/*****************************
+** StickyShiftMantRight **
+******************************
+** This is a shift right of the mantissa with a "sticky bit".
+** I.E., if a carry of 1 is shifted out of the least significant
+** bit, the least significant bit is set to 1.
+*/
+static void StickyShiftRightMant(InternalFPF *ptr,
+                        int amount)
+{
+int i;          /* Index */
+u16 carry;      /* Self-explanatory */
+u16 *mantissa;
+
+mantissa=ptr->mantissa;
+
+if(ptr->type!=IFPF_IS_ZERO)     /* Don't bother shifting a zero */
+{
+        /*
+        ** If the amount of shifting will shift everyting
+        ** out of existence, then just clear the whole mantissa
+        ** and set the lowmost bit to 1.
+        */
+        if(amount>=INTERNAL_FPF_PRECISION * 16)
+        {
+                for(i=0;i<INTERNAL_FPF_PRECISION-1;i++)
+                        mantissa[i]=0;
+                mantissa[INTERNAL_FPF_PRECISION-1]=1;
+        }
+        else
+                for(i=0;i<amount;i++)
+                {
+                        carry=0;
+                        ShiftMantRight1(&carry,mantissa);
+                        if(carry)
+                                mantissa[INTERNAL_FPF_PRECISION-1] |= 1;
+                }
+}
+return;
+}
+
+
+/**************************************************
+**         POST ARITHMETIC PROCESSING            **
+**  (NORMALIZE, ROUND, OVERFLOW, AND UNDERFLOW)  **
+**************************************************/
+
+/**************
+** normalize **
+***************
+** Normalize an internal-representation number.  Normalization
+** discards empty most-significant bits.
+*/
+static void normalize(InternalFPF *ptr)
+{
+u16     carry;
+
+/*
+** As long as there's a highmost 0 bit, shift the significand
+** left 1 bit.  Each time you do this, though, you've
+** gotta decrement the exponent.
+*/
+while ((ptr->mantissa[0] & 0x8000) == 0)
+{
+        carry = 0;
+        ShiftMantLeft1(&carry, ptr->mantissa);
+        ptr->exp--;
+}
+return;
+}
+
+/****************
+** denormalize **
+*****************
+** Denormalize an internal-representation number.  This means
+** shifting it right until its exponent is equivalent to
+** minimum_exponent. (You have to do this often in order
+** to perform additions and subtractions).
+*/
+static void denormalize(InternalFPF *ptr,
+                int minimum_exponent)
+{
+long exponent_difference;
+
+if (IsMantissaZero(ptr->mantissa))
+{
+        printf("Error:  zero significand in denormalize\n");
+}
+
+exponent_difference = ptr->exp-minimum_exponent;
+if (exponent_difference < 0)
+{
+        /*
+        ** The number is subnormal
+        */
+        exponent_difference = -exponent_difference;
+        if (exponent_difference >= (INTERNAL_FPF_PRECISION * 16))
+        {
+                /* Underflow */
+                SetInternalFPFZero(ptr, ptr->sign);
+        }
+        else
+        {
+                ptr->exp+=exponent_difference;
+                StickyShiftRightMant(ptr, exponent_difference);
+        }
+}
+return;
+}
+
+
+/*********************
+** RoundInternalFPF **
+**********************
+** Round an internal-representation number.
+** The kind of rounding we do here is simplest...referred to as
+** "chop".  "Extraneous" rightmost bits are simply hacked off.
+*/
+void RoundInternalFPF(InternalFPF *ptr)
+{
+/* int i; */
+
+if (ptr->type == IFPF_IS_NORMAL ||
+        ptr->type == IFPF_IS_SUBNORMAL)
+{
+        denormalize(ptr, MIN_EXP);
+        if (ptr->type != IFPF_IS_ZERO)
+        {
+
+                /* clear the extraneous bits */
+                ptr->mantissa[3] &= 0xfff8;
+/*              for (i=4; i<INTERNAL_FPF_PRECISION; i++)
+                {
+                        ptr->mantissa[i] = 0;
+                }
+*/
+                /*
+                ** Check for overflow
+                */
+/*              Does not do anything as ptr->exp is a short and MAX_EXP=37268
+		if (ptr->exp > MAX_EXP)
+                {
+                        SetInternalFPFInfinity(ptr, ptr->sign);
+                }
+*/
+        }
+}
+return;
+}
+
+/*******************************************************
+**  ARITHMETIC OPERATIONS ON INTERNAL REPRESENTATION  **
+*******************************************************/
+
+/***************
+** choose_nan **
+****************
+** Called by routines that are forced to perform math on
+** a pair of NaN's.  This routine "selects" which NaN is
+** to be returned.
+*/
+static void choose_nan(InternalFPF *x,
+                InternalFPF *y,
+                InternalFPF *z,
+                int intel_flag)
+{
+int i;
+
+/*
+** Compare the two mantissas,
+** return the larger.  Note that we will be emulating
+** an 80387 in this operation.
+*/
+for (i=0; i<INTERNAL_FPF_PRECISION; i++)
+{
+        if (x->mantissa[i] > y->mantissa[i])
+        {
+                memmove((void *)x,(void *)z,sizeof(InternalFPF));
+                return;
+        }
+        if (x->mantissa[i] < y->mantissa[i])
+        {
+                memmove((void *)y,(void *)z,sizeof(InternalFPF));
+                return;
+        }
+}
+
+/*
+** They are equal
+*/
+if (!intel_flag)
+        /* if the operation is addition */
+        memmove((void *)x,(void *)z,sizeof(InternalFPF));
+else
+        /* if the operation is multiplication */
+        memmove((void *)y,(void *)z,sizeof(InternalFPF));
+return;
+}
+
+
+/**********************
+** AddSubInternalFPF **
+***********************
+** Adding or subtracting internal-representation numbers.
+** Internal-representation numbers pointed to by x and y are
+** added/subtracted and the result returned in z.
+*/
+static void AddSubInternalFPF(uchar operation,
+                InternalFPF *x,
+                InternalFPF *y,
+                InternalFPF *z)
+{
+int exponent_difference;
+u16 borrow;
+u16 carry;
+int i;
+InternalFPF locx,locy;  /* Needed since we alter them */
+
+/*
+** Following big switch statement handles the
+** various combinations of operand types.
+*/
+switch ((x->type * IFPF_TYPE_COUNT) + y->type)
+{
+case ZERO_ZERO:
+        memmove((void *)x,(void *)z,sizeof(InternalFPF));
+        if (x->sign ^ y->sign ^ operation)
+        {
+                z->sign = 0; /* positive */
+        }
+        break;
+
+case NAN_ZERO:
+case NAN_SUBNORMAL:
+case NAN_NORMAL:
+case NAN_INFINITY:
+case SUBNORMAL_ZERO:
+case NORMAL_ZERO:
+case INFINITY_ZERO:
+case INFINITY_SUBNORMAL:
+case INFINITY_NORMAL:
+        memmove((void *)x,(void *)z,sizeof(InternalFPF));
+        break;
+
+
+case ZERO_NAN:
+case SUBNORMAL_NAN:
+case NORMAL_NAN:
+case INFINITY_NAN:
+        memmove((void *)y,(void *)z,sizeof(InternalFPF));
+        break;
+
+case ZERO_SUBNORMAL:
+case ZERO_NORMAL:
+case ZERO_INFINITY:
+case SUBNORMAL_INFINITY:
+case NORMAL_INFINITY:
+        memmove((void *)y,(void *)z,sizeof(InternalFPF));
+        z->sign ^= operation;
+        break;
+
+case SUBNORMAL_SUBNORMAL:
+case SUBNORMAL_NORMAL:
+case NORMAL_SUBNORMAL:
+case NORMAL_NORMAL:
+        /*
+        ** Copy x and y to locals, since we may have
+        ** to alter them.
+        */
+        memmove((void *)&locx,(void *)x,sizeof(InternalFPF));
+        memmove((void *)&locy,(void *)y,sizeof(InternalFPF));
+
+        /* compute sum/difference */
+        exponent_difference = locx.exp-locy.exp;
+        if (exponent_difference == 0)
+        {
+                /*
+                ** locx.exp == locy.exp
+                ** so, no shifting required
+                */
+                if (locx.type == IFPF_IS_SUBNORMAL ||
+                  locy.type == IFPF_IS_SUBNORMAL)
+                        z->type = IFPF_IS_SUBNORMAL;
+                else
+                        z->type = IFPF_IS_NORMAL;
+
+                /*
+                ** Assume that locx.mantissa > locy.mantissa
+                */
+                z->sign = locx.sign;
+                z->exp= locx.exp;
+        }
+        else
+                if (exponent_difference > 0)
+                {
+                        /*
+                        ** locx.exp > locy.exp
+                        */
+                        StickyShiftRightMant(&locy,
+                                 exponent_difference);
+                        z->type = locx.type;
+                        z->sign = locx.sign;
+                        z->exp = locx.exp;
+                }
+                else    /* if (exponent_difference < 0) */
+                {
+                        /*
+                        ** locx.exp < locy.exp
+                        */
+                        StickyShiftRightMant(&locx,
+                                -exponent_difference);
+                        z->type = locy.type;
+                        z->sign = locy.sign ^ operation;
+                        z->exp = locy.exp;
+                }
+
+                if (locx.sign ^ locy.sign ^ operation)
+                {
+                        /*
+                        ** Signs are different, subtract mantissas
+                        */
+                        borrow = 0;
+                        for (i=(INTERNAL_FPF_PRECISION-1); i>=0; i--)
+                                Sub16Bits(&borrow,
+                                        &z->mantissa[i],
+                                        locx.mantissa[i],
+                                        locy.mantissa[i]);
+
+                        if (borrow)
+                        {
+                                /* The y->mantissa was larger than the
+                                ** x->mantissa leaving a negative
+                                ** result.  Change the result back to
+                                ** an unsigned number and flip the
+                                ** sign flag.
+                                */
+                                z->sign = locy.sign ^ operation;
+                                borrow = 0;
+                                for (i=(INTERNAL_FPF_PRECISION-1); i>=0; i--)
+                                {
+                                        Sub16Bits(&borrow,
+                                                &z->mantissa[i],
+                                                0,
+                                                z->mantissa[i]);
+                                }
+                        }
+                        else
+                        {
+                                /* The assumption made above
+                                ** (i.e. x->mantissa >= y->mantissa)
+                                ** was correct.  Therefore, do nothing.
+                                ** z->sign = x->sign;
+                                */
+                        }
+
+                        if (IsMantissaZero(z->mantissa))
+                        {
+                                z->type = IFPF_IS_ZERO;
+                                z->sign = 0; /* positive */
+                        }
+                        else
+                                if (locx.type == IFPF_IS_NORMAL ||
+                                         locy.type == IFPF_IS_NORMAL)
+                                {
+                                        normalize(z);
+                                }
+                }
+                else
+                {
+                        /* signs are the same, add mantissas */
+                        carry = 0;
+                        for (i=(INTERNAL_FPF_PRECISION-1); i>=0; i--)
+                        {
+                                Add16Bits(&carry,
+                                        &z->mantissa[i],
+                                        locx.mantissa[i],
+                                        locy.mantissa[i]);
+                        }
+
+                        if (carry)
+                        {
+                                z->exp++;
+                                carry=0;
+                                ShiftMantRight1(&carry,z->mantissa);
+                                z->mantissa[0] |= 0x8000;
+                                z->type = IFPF_IS_NORMAL;
+                        }
+                        else
+                                if (z->mantissa[0] & 0x8000)
+                                        z->type = IFPF_IS_NORMAL;
+        }
+        break;
+
+case INFINITY_INFINITY:
+        SetInternalFPFNaN(z);
+        break;
+
+case NAN_NAN:
+        choose_nan(x, y, z, 1);
+        break;
+}
+
+/*
+** All the math is done; time to round.
+*/
+RoundInternalFPF(z);
+return;
+}
+
+
+/************************
+** MultiplyInternalFPF **
+*************************
+** Two internal-representation numbers x and y are multiplied; the
+** result is returned in z.
+*/
+static void MultiplyInternalFPF(InternalFPF *x,
+                        InternalFPF *y,
+                        InternalFPF *z)
+{
+int i;
+int j;
+u16 carry;
+u16 extra_bits[INTERNAL_FPF_PRECISION];
+InternalFPF locy;       /* Needed since this will be altered */
+/*
+** As in the preceding function, this large switch
+** statement selects among the many combinations
+** of operands.
+*/
+switch ((x->type * IFPF_TYPE_COUNT) + y->type)
+{
+case INFINITY_SUBNORMAL:
+case INFINITY_NORMAL:
+case INFINITY_INFINITY:
+case ZERO_ZERO:
+case ZERO_SUBNORMAL:
+case ZERO_NORMAL:
+        memmove((void *)x,(void *)z,sizeof(InternalFPF));
+        z->sign ^= y->sign;
+        break;
+
+case SUBNORMAL_INFINITY:
+case NORMAL_INFINITY:
+case SUBNORMAL_ZERO:
+case NORMAL_ZERO:
+        memmove((void *)y,(void *)z,sizeof(InternalFPF));
+        z->sign ^= x->sign;
+        break;
+
+case ZERO_INFINITY:
+case INFINITY_ZERO:
+        SetInternalFPFNaN(z);
+        break;
+
+case NAN_ZERO:
+case NAN_SUBNORMAL:
+case NAN_NORMAL:
+case NAN_INFINITY:
+        memmove((void *)x,(void *)z,sizeof(InternalFPF));
+        break;
+
+case ZERO_NAN:
+case SUBNORMAL_NAN:
+case NORMAL_NAN:
+case INFINITY_NAN:
+        memmove((void *)y,(void *)z,sizeof(InternalFPF));
+        break;
+
+
+case SUBNORMAL_SUBNORMAL:
+case SUBNORMAL_NORMAL:
+case NORMAL_SUBNORMAL:
+case NORMAL_NORMAL:
+        /*
+        ** Make a local copy of the y number, since we will be
+        ** altering it in the process of multiplying.
+        */
+        memmove((void *)&locy,(void *)y,sizeof(InternalFPF));
+
+        /*
+        ** Check for unnormal zero arguments
+        */
+        if (IsMantissaZero(x->mantissa) || IsMantissaZero(y->mantissa))
+                SetInternalFPFInfinity(z, 0);
+
+        /*
+        ** Initialize the result
+        */
+        if (x->type == IFPF_IS_SUBNORMAL ||
+            y->type == IFPF_IS_SUBNORMAL)
+                z->type = IFPF_IS_SUBNORMAL;
+        else
+                z->type = IFPF_IS_NORMAL;
+
+        z->sign = x->sign ^ y->sign;
+        z->exp = x->exp + y->exp ;
+        for (i=0; i<INTERNAL_FPF_PRECISION; i++)
+        {
+                z->mantissa[i] = 0;
+                extra_bits[i] = 0;
+        }
+
+        for (i=0; i<(INTERNAL_FPF_PRECISION*16); i++)
+        {
+                /*
+                ** Get rightmost bit of the multiplier
+                */
+                carry = 0;
+                ShiftMantRight1(&carry, locy.mantissa);
+                if (carry)
+                {
+                        /*
+                        ** Add the multiplicand to the product
+                        */
+                        carry = 0;
+                        for (j=(INTERNAL_FPF_PRECISION-1); j>=0; j--)
+                                Add16Bits(&carry,
+                                        &z->mantissa[j],
+                                        z->mantissa[j],
+                                        x->mantissa[j]);
+                }
+                else
+                {
+                        carry = 0;
+                }
+
+                /*
+                ** Shift the product right.  Overflow bits get
+                ** shifted into extra_bits.  We'll use it later
+                ** to help with the "sticky" bit.
+                */
+                ShiftMantRight1(&carry, z->mantissa);
+                ShiftMantRight1(&carry, extra_bits);
+        }
+
+        /*
+        ** Normalize
+        ** Note that we use a "special" normalization routine
+        ** because we need to use the extra bits. (These are
+        ** bits that may have been shifted off the bottom that
+        ** we want to reclaim...if we can.
+        */
+        while ((z->mantissa[0] & 0x8000) == 0)
+        {
+                carry = 0;
+                ShiftMantLeft1(&carry, extra_bits);
+                ShiftMantLeft1(&carry, z->mantissa);
+                z->exp--;
+        }
+
+        /*
+        ** Set the sticky bit if any bits set in extra bits.
+        */
+        if (IsMantissaZero(extra_bits))
+        {
+                z->mantissa[INTERNAL_FPF_PRECISION-1] |= 1;
+        }
+        break;
+
+case NAN_NAN:
+        choose_nan(x, y, z, 0);
+        break;
+}
+
+/*
+** All math done...do rounding.
+*/
+RoundInternalFPF(z);
+return;
+}
+
+
+/**********************
+** DivideInternalFPF **
+***********************
+** Divide internal FPF number x by y.  Return result in z.
+*/
+static void DivideInternalFPF(InternalFPF *x,
+                        InternalFPF *y,
+                        InternalFPF *z)
+{
+int i;
+int j;
+u16 carry;
+u16 extra_bits[INTERNAL_FPF_PRECISION];
+InternalFPF locx;       /* Local for x number */
+
+/*
+** As with preceding function, the following switch
+** statement selects among the various possible
+** operands.
+*/
+switch ((x->type * IFPF_TYPE_COUNT) + y->type)
+{
+case ZERO_ZERO:
+case INFINITY_INFINITY:
+        SetInternalFPFNaN(z);
+        break;
+
+case ZERO_SUBNORMAL:
+case ZERO_NORMAL:
+        if (IsMantissaZero(y->mantissa))
+        {
+                SetInternalFPFNaN(z);
+                break;
+        }
+
+case ZERO_INFINITY:
+case SUBNORMAL_INFINITY:
+case NORMAL_INFINITY:
+        SetInternalFPFZero(z, x->sign ^ y->sign);
+        break;
+
+case SUBNORMAL_ZERO:
+case NORMAL_ZERO:
+        if (IsMantissaZero(x->mantissa))
+        {
+                SetInternalFPFNaN(z);
+                break;
+        }
+
+case INFINITY_ZERO:
+case INFINITY_SUBNORMAL:
+case INFINITY_NORMAL:
+        SetInternalFPFInfinity(z, 0);
+        z->sign = x->sign ^ y->sign;
+        break;
+
+case NAN_ZERO:
+case NAN_SUBNORMAL:
+case NAN_NORMAL:
+case NAN_INFINITY:
+        memmove((void *)x,(void *)z,sizeof(InternalFPF));
+        break;
+
+case ZERO_NAN:
+case SUBNORMAL_NAN:
+case NORMAL_NAN:
+case INFINITY_NAN:
+        memmove((void *)y,(void *)z,sizeof(InternalFPF));
+        break;
+
+case SUBNORMAL_SUBNORMAL:
+case NORMAL_SUBNORMAL:
+case SUBNORMAL_NORMAL:
+case NORMAL_NORMAL:
+        /*
+        ** Make local copy of x number, since we'll be
+        ** altering it in the process of dividing.
+        */
+        memmove((void *)&locx,(void *)x,sizeof(InternalFPF));
+
+        /*
+        ** Check for unnormal zero arguments
+        */
+        if (IsMantissaZero(locx.mantissa))
+        {
+                if (IsMantissaZero(y->mantissa))
+                        SetInternalFPFNaN(z);
+                else
+                        SetInternalFPFZero(z, 0);
+                break;
+        }
+        if (IsMantissaZero(y->mantissa))
+        {
+                SetInternalFPFInfinity(z, 0);
+                break;
+        }
+
+        /*
+        ** Initialize the result
+        */
+        z->type = x->type;
+        z->sign = x->sign ^ y->sign;
+        z->exp = x->exp - y->exp +
+                        ((INTERNAL_FPF_PRECISION * 16 * 2));
+        for (i=0; i<INTERNAL_FPF_PRECISION; i++)
+        {
+                z->mantissa[i] = 0;
+                extra_bits[i] = 0;
+        }
+
+        while ((z->mantissa[0] & 0x8000) == 0)
+        {
+                carry = 0;
+                ShiftMantLeft1(&carry, locx.mantissa);
+                ShiftMantLeft1(&carry, extra_bits);
+
+                /*
+                ** Time to subtract yet?
+                */
+                if (carry == 0)
+                        for (j=0; j<INTERNAL_FPF_PRECISION; j++)
+                        {
+                                if (y->mantissa[j] > extra_bits[j])
+                                {
+                                        carry = 0;
+                                        goto no_subtract;
+                                }
+                                if (y->mantissa[j] < extra_bits[j])
+                                        break;
+                        }
+                /*
+                ** Divisor (y) <= dividend (x), subtract
+                */
+                carry = 0;
+                for (j=(INTERNAL_FPF_PRECISION-1); j>=0; j--)
+                        Sub16Bits(&carry,
+                                &extra_bits[j],
+                                extra_bits[j],
+                                y->mantissa[j]);
+                carry = 1;      /* 1 shifted into quotient */
+        no_subtract:
+                ShiftMantLeft1(&carry, z->mantissa);
+                z->exp--;
+        }
+        break;
+
+case NAN_NAN:
+        choose_nan(x, y, z, 0);
+        break;
+}
+
+/*
+** Math complete...do rounding
+*/
+RoundInternalFPF(z);
+}
+
+/**********************
+** LongToInternalFPF **
+** Int32ToInternalFPF **
+***********************
+** Convert a signed (long) 32-bit integer into an internal FPF number.
+*/
+/* static void LongToInternalFPF(long mylong, */
+static void Int32ToInternalFPF(int32 mylong,
+                InternalFPF *dest)
+{
+int i;          /* Index */
+u16 myword;     /* Used to hold converted stuff */
+/*
+** Save the sign and get the absolute value.  This will help us
+** with 64-bit machines, since we use only the lower 32
+** bits just in case. (No longer necessary after we use int32.)
+*/
+/* if(mylong<0L) */
+if(mylong<(int32)0)
+{       dest->sign=1;
+        mylong=(int32)0-mylong;
+}
+else
+        dest->sign=0;
+/*
+** Prepare the destination floating point number
+*/
+dest->type=IFPF_IS_NORMAL;
+for(i=0;i<INTERNAL_FPF_PRECISION;i++)
+        dest->mantissa[i]=0;
+
+/*
+** See if we've got a zero.  If so, make the resultant FP
+** number a true zero and go home.
+*/
+if(mylong==0)
+{       dest->type=IFPF_IS_ZERO;
+        dest->exp=0;
+        return;
+}
+
+/*
+** Not a true zero.  Set the exponent to 32 (internal FPFs have
+** no bias) and load the low and high words into their proper
+** locations in the mantissa.  Then normalize.  The action of
+** normalizing slides the mantissa bits into place and sets
+** up the exponent properly.
+*/
+dest->exp=32;
+myword=(u16)((mylong >> 16) & 0xFFFFL);
+dest->mantissa[0]=myword;
+myword=(u16)(mylong & 0xFFFFL);
+dest->mantissa[1]=myword;
+normalize(dest);
+return;
+}
+
+#ifdef DEBUG
+/************************
+** InternalFPFToString **
+*************************
+** FOR DEBUG PURPOSES
+** This routine converts an internal floating point representation
+** number to a string.  Used in debugging the package.
+** Returns length of converted number.
+** NOTE: dest must point to a buffer big enough to hold the
+**  result.  Also, this routine does append a null (an effect
+**  of using the sprintf() function).  It also returns
+**  a length count.
+** NOTE: This routine returns 5 significant digits.  Thats
+**  about all I feel safe with, given the method of
+**  conversion.  It should be more than enough for programmers
+**  to determine whether the package is properly ported.
+*/
+static int InternalFPFToString(char *dest,
+                InternalFPF *src)
+{
+InternalFPF locFPFNum;          /* Local for src (will be altered) */
+InternalFPF IFPF10;             /* Floating-point 10 */
+InternalFPF IFPFComp;           /* For doing comparisons */
+int msign;                      /* Holding for mantissa sign */
+int expcount;                   /* Exponent counter */
+int ccount;                     /* Character counter */
+int i,j,k;                      /* Index */
+u16 carryaccum;                 /* Carry accumulator */
+u16 mycarry;                    /* Local for carry */
+
+/*
+** Check first for the simple things...Nan, Infinity, Zero.
+** If found, copy the proper string in and go home.
+*/
+switch(src->type)
+{
+        case IFPF_IS_NAN:
+                memcpy(dest,"NaN",3);
+                return(3);
+
+        case IFPF_IS_INFINITY:
+                if(src->sign==0)
+                        memcpy(dest,"+Inf",4);
+                else
+                        memcpy(dest,"-Inf",4);
+                return(4);
+
+        case IFPF_IS_ZERO:
+                if(src->sign==0)
+                        memcpy(dest,"+0",2);
+                else
+                        memcpy(dest,"-0",2);
+                return(2);
+}
+
+/*
+** Move the internal number into our local holding area, since
+** we'll be altering it to print it out.
+*/
+memcpy((void *)&locFPFNum,(void *)src,sizeof(InternalFPF));
+
+/*
+** Set up a floating-point 10...which we'll use a lot in a minute.
+*/
+/* LongToInternalFPF(10L,&IFPF10); */
+Int32ToInternalFPF((int32)10,&IFPF10);
+
+/*
+** Save the mantissa sign and make it positive.
+*/
+msign=src->sign;
+
+/* src->sign=0 */ /* bug, fixed Nov. 13, 1997 */
+(&locFPFNum)->sign=0;
+
+expcount=0;             /* Init exponent counter */
+
+/*
+** See if the number is less than 10.  If so, multiply
+** the number repeatedly by 10 until it's not.   For each
+** multiplication, decrement a counter so we can keep track
+** of the exponent.
+*/
+
+while(1)
+{       AddSubInternalFPF(1,&locFPFNum,&IFPF10,&IFPFComp);
+        if(IFPFComp.sign==0) break;
+        MultiplyInternalFPF(&locFPFNum,&IFPF10,&IFPFComp);
+        expcount--;
+        memcpy((void *)&locFPFNum,(void *)&IFPFComp,sizeof(InternalFPF));
+}
+/*
+** Do the reverse of the above.  As long as the number is
+** greater than or equal to 10, divide it by 10.  Increment the
+** exponent counter for each multiplication.
+*/
+
+while(1)
+{
+        AddSubInternalFPF(1,&locFPFNum,&IFPF10,&IFPFComp);
+        if(IFPFComp.sign!=0) break;
+        DivideInternalFPF(&locFPFNum,&IFPF10,&IFPFComp);
+        expcount++;
+        memcpy((void *)&locFPFNum,(void *)&IFPFComp,sizeof(InternalFPF));
+}
+
+/*
+** About time to start storing things.  First, store the
+** mantissa sign.
+*/
+ccount=1;               /* Init character counter */
+if(msign==0)
+        *dest++='+';
+else
+        *dest++='-';
+
+/*
+** At this point we know that the number is in the range
+** 10 > n >=1.  We need to "strip digits" out of the
+** mantissa.  We do this by treating the mantissa as
+** an integer and multiplying by 10. (Not a floating-point
+** 10, but an integer 10.  Since this is debug code and we
+** could care less about speed, we'll do it the stupid
+** way and simply add the number to itself 10 times.
+** Anything that makes it to the left of the implied binary point
+** gets stripped off and emitted.  We'll do this for
+** 5 significant digits (which should be enough to
+** verify things).
+*/
+/*
+** Re-position radix point
+*/
+carryaccum=0;
+while(locFPFNum.exp>0)
+{
+        mycarry=0;
+        ShiftMantLeft1(&mycarry,locFPFNum.mantissa);
+        carryaccum=(carryaccum<<1);
+        if(mycarry) carryaccum++;
+        locFPFNum.exp--;
+}
+
+while(locFPFNum.exp<0)
+{
+        mycarry=0;
+        ShiftMantRight1(&mycarry,locFPFNum.mantissa);
+        locFPFNum.exp++;
+}
+
+for(i=0;i<6;i++)
+        if(i==1)
+        {       /* Emit decimal point */
+                *dest++='.';
+                ccount++;
+        }
+        else
+        {       /* Emit a digit */
+                *dest++=('0'+carryaccum);
+                ccount++;
+
+                carryaccum=0;
+                memcpy((void *)&IFPF10,
+                        (void *)&locFPFNum,
+                        sizeof(InternalFPF));
+
+                /* Do multiply via repeated adds */
+                for(j=0;j<9;j++)
+                {
+                        mycarry=0;
+                        for(k=(INTERNAL_FPF_PRECISION-1);k>=0;k--)
+                                Add16Bits(&mycarry,&(IFPFComp.mantissa[k]),
+                                        locFPFNum.mantissa[k],
+                                        IFPF10.mantissa[k]);
+                        carryaccum+=mycarry ? 1 : 0;
+                        memcpy((void *)&locFPFNum,
+                                (void *)&IFPFComp,
+                                sizeof(InternalFPF));
+                }
+        }
+
+/*
+** Now move the 'E', the exponent sign, and the exponent
+** into the string.
+*/
+*dest++='E';
+
+/* sprint is supposed to return an integer, but it caused problems on SunOS
+ * with the native cc. Hence we force it.
+ * Uwe F. Mayer
+ */
+ccount+=(int)sprintf(dest,"%4d",expcount);
+
+/*
+** All done, go home.
+*/
+return(ccount);
+
+}
+
+#endif
diff --git a/emfloat.h b/emfloat.h
new file mode 100644
index 0000000..41cc6d9
--- /dev/null
+++ b/emfloat.h
@@ -0,0 +1,154 @@
+
+/*
+** emfloat.h
+** Header for emfloat.c
+**
+** BYTEmark (tm)
+** BYTE Magazine's Native Mode benchmarks
+** Rick Grehan, BYTE Magazine
+**
+** Create:
+** Revision: 3/95
+**
+** DISCLAIMER
+** The source, executable, and documentation files that comprise
+** the BYTEmark benchmarks are made available on an "as is" basis.
+** This means that we at BYTE Magazine have made every reasonable
+** effort to verify that the there are no errors in the source and
+** executable code.  We cannot, however, guarantee that the programs
+** are error-free.  Consequently, McGraw-HIll and BYTE Magazine make
+** no claims in regard to the fitness of the source code, executable
+** code, and documentation of the BYTEmark.
+**  Furthermore, BYTE Magazine, McGraw-Hill, and all employees
+** of McGraw-Hill cannot be held responsible for any damages resulting
+** from the use of this code or the results obtained from using
+** this code.
+*/
+
+#include <stdio.h>
+
+/* Is this a 64 bit architecture? If so, this will define LONG64 */
+/* Uwe F. Mayer 15 November 1997                                 */
+#include "pointer.h"
+
+/*
+** DEFINES
+*/
+#define u8 unsigned char
+#define u16 unsigned short
+#ifdef LONG64
+#define u32 unsigned int
+#else
+#define u32 unsigned long
+#endif
+#define uchar unsigned char
+#define ulong unsigned long
+
+#define MAX_EXP 32767L
+#define MIN_EXP (-32767L)
+
+#define IFPF_IS_ZERO 0
+#define IFPF_IS_SUBNORMAL 1
+#define IFPF_IS_NORMAL 2
+#define IFPF_IS_INFINITY 3
+#define IFPF_IS_NAN 4
+#define IFPF_TYPE_COUNT 5
+
+#define ZERO_ZERO                       0
+#define ZERO_SUBNORMAL                  1
+#define ZERO_NORMAL                     2
+#define ZERO_INFINITY                   3
+#define ZERO_NAN                        4
+
+#define SUBNORMAL_ZERO                  5
+#define SUBNORMAL_SUBNORMAL             6
+#define SUBNORMAL_NORMAL                7
+#define SUBNORMAL_INFINITY              8
+#define SUBNORMAL_NAN                   9
+
+#define NORMAL_ZERO                     10
+#define NORMAL_SUBNORMAL                11
+#define NORMAL_NORMAL                   12
+#define NORMAL_INFINITY                 13
+#define NORMAL_NAN                      14
+
+#define INFINITY_ZERO                   15
+#define INFINITY_SUBNORMAL              16
+#define INFINITY_NORMAL                 17
+#define INFINITY_INFINITY               18
+#define INFINITY_NAN                    19
+
+#define NAN_ZERO                        20
+#define NAN_SUBNORMAL                   21
+#define NAN_NORMAL                      22
+#define NAN_INFINITY                    23
+#define NAN_NAN                         24
+#define OPERAND_ZERO                    0
+#define OPERAND_SUBNORMAL               1
+#define OPERAND_NORMAL                  2
+#define OPERAND_INFINITY                3
+#define OPERAND_NAN                     4
+
+/*
+** Following already defined in NMGLOBAL.H
+**
+#define INTERNAL_FPF_PRECISION 4
+*/
+
+/*
+** TYPEDEFS
+*/
+
+typedef struct
+{
+        u8 type;        /* Indicates, NORMAL, SUBNORMAL, etc. */
+        u8 sign;        /* Mantissa sign */
+        short exp;      /* Signed exponent...no bias */
+        u16 mantissa[INTERNAL_FPF_PRECISION];
+} InternalFPF;
+
+/*
+** PROTOTYPES
+*/
+void SetupCPUEmFloatArrays(InternalFPF *abase,
+        InternalFPF *bbase, InternalFPF *cbase, ulong arraysize);
+ulong DoEmFloatIteration(InternalFPF *abase,
+        InternalFPF *bbase, InternalFPF *cbase,
+        ulong arraysize, ulong loops);
+static void SetInternalFPFZero(InternalFPF *dest,
+                        uchar sign);
+static void SetInternalFPFInfinity(InternalFPF *dest,
+                        uchar sign);
+static void SetInternalFPFNaN(InternalFPF *dest);
+static int IsMantissaZero(u16 *mant);
+static void Add16Bits(u16 *carry,u16 *a,u16 b,u16 c);
+static void Sub16Bits(u16 *borrow,u16 *a,u16 b,u16 c);
+static void ShiftMantLeft1(u16 *carry,u16 *mantissa);
+static void ShiftMantRight1(u16 *carry,u16 *mantissa);
+static void StickyShiftRightMant(InternalFPF *ptr,int amount);
+static void normalize(InternalFPF *ptr);
+static void denormalize(InternalFPF *ptr,int minimum_exponent);
+void RoundInternalFPF(InternalFPF *ptr);
+static void choose_nan(InternalFPF *x,InternalFPF *y,InternalFPF *z,
+                int intel_flag);
+static void AddSubInternalFPF(uchar operation,InternalFPF *x,
+                InternalFPF *y,InternalFPF *z);
+static void MultiplyInternalFPF(InternalFPF *x,InternalFPF *y,
+                        InternalFPF *z);
+static void DivideInternalFPF(InternalFPF *x,InternalFPF *y, 
+                        InternalFPF *z);
+/* static void LongToInternalFPF(long mylong, */
+static void Int32ToInternalFPF(int32 mylong,
+                InternalFPF *dest);
+#ifdef DEBUG
+static int InternalFPFToString(char *dest,
+                InternalFPF *src);
+#endif
+
+/*
+** EXTERNALS
+*/
+extern ulong StartStopwatch();
+extern ulong StopStopwatch(ulong elapsed);
+/* extern long randwc(long num); */
+extern int32 randwc(int32 num);
diff --git a/hardware b/hardware
new file mode 100755
index 0000000..6fb3293
--- /dev/null
+++ b/hardware
diff --git a/hardware.c b/hardware.c
new file mode 100644
index 0000000..4838b2f
--- /dev/null
+++ b/hardware.c
@@ -0,0 +1,202 @@
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+
+#define BUF_SIZ 1024
+
+/******************
+** output_string **
+*******************
+** Displays a string on the screen.  Also, if the flag
+** write_to_file is set, outputs the string to the output file.
+** Note, this routine presumes that you've included a carriage
+** return at the end of the buffer.
+*/
+static void output_string(const char *buffer, const int write_to_file,
+                          FILE *global_ofile){
+  printf("%s",buffer);
+  if(write_to_file!=0)
+    fprintf(global_ofile,"%s",buffer);
+  return;
+}
+
+
+/******************
+** removeNewLine **
+*******************
+** Removes a trailing newline character if present
+*/
+static void removeNewLine(char * s) {
+  if(strlen(s)>0 && s[strlen(s)-1] == '\n') {
+    s[strlen(s)-1] = '\0';
+  }
+}
+
+
+/***************
+** runCommand **
+****************
+** Run the system command through a pipe
+** The pointer result must point to a pre-allocated array of at least BUF_SIZ
+*/
+static void runCommand (const char *command, char *result) {
+  FILE * pipe;
+
+  pipe = popen(command, "r");
+  if(pipe == NULL) {
+    /* command failed */
+    result[0] = '\0';
+  } else {
+    if(NULL == fgets(result, BUF_SIZ, pipe)){
+      /* command failed */
+      result[0] = '\0';
+    }
+    pclose(pipe);
+  }
+  removeNewLine(result);
+}
+
+
+/********************
+** readProcCpuInfo **
+*********************
+** Reads and parses /proc/cpuinfo on a Linux system
+** The pointers must point to pre-allocated arrays of at least BUF_SIZ
+*/
+static void readProcCpuInfo (char *model, char *cache) {
+  FILE * info;
+  char * cp;
+  int cpus = 0;
+  char * buffer_end;
+  char buffer[BUF_SIZ];
+  char vendor_id[BUF_SIZ];
+  char model_name[BUF_SIZ];
+  char cpu_MHz[BUF_SIZ];
+  int i;
+  float f;
+
+  vendor_id[0] = model_name[0] = cpu_MHz[0] = model[0] = cache[0] = '\0';
+  info = fopen("/proc/cpuinfo", "r");
+  if(info != NULL) {
+    /* command did not fail */
+    while(NULL != fgets(buffer, BUF_SIZ, info)){
+      buffer_end = buffer + strlen(buffer);
+      cp = buffer;
+      if(! strncmp(buffer, "processor", 9)) {
+        cpus++;
+      } else if(! strncmp(buffer, "vendor_id", 9)) {
+        cp+=strlen("vendor_id");
+        while(cp < buffer_end && ( *cp == ' ' || *cp == ':'|| *cp == '\t'))
+          cp++;
+        if(cp<buffer_end) {
+          strcpy(vendor_id, cp);
+        }
+        removeNewLine(vendor_id);
+      } else if(! strncmp(buffer, "model name", 10)) {
+        cp+=strlen("model name");
+        while(cp < buffer_end && ( *cp == ' ' || *cp == ':'|| *cp == '\t'))
+          cp++;
+        if(cp<buffer_end) {
+          strcpy(model_name, cp);
+        }
+        removeNewLine(model_name);
+      } else if(! strncmp(buffer, "cpu MHz", 7)) {
+        cp+=strlen("cpu MHz");
+        while(cp < buffer_end && ( *cp == ' ' || *cp == ':'|| *cp == '\t'))
+          cp++;
+        if(cp<buffer_end) {
+          strcpy(cpu_MHz, cp);
+        }
+        removeNewLine(cpu_MHz);
+      } else if(! strncmp(buffer, "cache size", 10)) {
+        cp+=strlen("cache size");
+        while(cp < buffer_end && ( *cp == ' ' || *cp == ':'|| *cp == '\t'))
+          cp++;
+        if(cp<buffer_end) {
+          strcpy(cache, cp);
+        }
+        removeNewLine(cache);
+      }
+    }
+    if(cpus>1) {
+      if (cpus==2) {
+        strcpy(model, "Dual");
+      } else {
+        sprintf(model, "%d CPU", cpus);
+      }
+    }
+    cp = model + strlen(model);
+    if(vendor_id[0] != '\0'){
+      if(cp != model){
+        *cp++ = ' ';
+      }
+      strcpy(cp, vendor_id);
+      cp += strlen(vendor_id);
+    }
+    if(model_name[0] != '\0'){
+      if(cp != model){
+        *cp++ = ' ';
+      }
+      strcpy(cp, model_name);
+      cp += strlen(model_name);
+    }
+    if(cpu_MHz[0] != '\0'){
+      if(cp != model){
+        *cp++ = ' ';
+      }
+      f = atof(cpu_MHz);
+      i = (int)(f+0.5f);
+      sprintf(cpu_MHz, "%dMHz", i);
+      strcpy(cp, cpu_MHz);
+      cp += strlen(cpu_MHz);
+    }
+    fclose(info);
+  }
+}
+
+
+/*************
+** hardware **
+**************
+** Runs the system command "uname -s -r"
+** Reads /proc/cpuinfo if on a linux system
+** Writes output
+*/
+void hardware(const int write_to_file, FILE *global_ofile) {
+  char buffer[BUF_SIZ];
+  char os[BUF_SIZ];
+  char model[BUF_SIZ];
+  char cache[BUF_SIZ];
+  char os_command[] = "uname -s -r";
+#ifdef NO_UNAME
+  os[0] = '\0';
+#else
+  runCommand(os_command, os);
+#endif
+  if(NULL != strstr(os, "Linux")) {
+    readProcCpuInfo (model, cache);
+  } else {
+    model[0] = '\0';
+    cache[0] = '\0';
+  }
+  sprintf(buffer, "CPU                 : %s\n", model);
+  output_string(buffer, write_to_file, global_ofile);
+  sprintf(buffer, "L2 Cache            : %s\n", cache);
+  output_string(buffer, write_to_file, global_ofile);
+  sprintf(buffer, "OS                  : %s\n", os);
+  output_string(buffer, write_to_file, global_ofile);
+}
+
+
+/************************
+** main for hardware.c **
+*************************
+** For testing of code only
+** Should be commented out
+*/
+/*
+int main(int argc, char * argv[]) {
+  hardware(0, NULL);
+  return 0;
+}
+*/
diff --git a/hardware.h b/hardware.h
new file mode 100644
index 0000000..2a07934
--- /dev/null
+++ b/hardware.h
@@ -0,0 +1,2 @@
+extern
+void hardware(const int write_to_file, FILE *global_ofile);
diff --git a/hello.c b/hello.c
new file mode 100644
index 0000000..c664483
--- /dev/null
+++ b/hello.c
@@ -0,0 +1,2 @@
+#include <stdio.h>
+int main () {printf("hello.\n");return(0);}
diff --git a/misc.c b/misc.c
new file mode 100644
index 0000000..a5144e4
--- /dev/null
+++ b/misc.c
@@ -0,0 +1,120 @@
+
+/*
+** misc.c
+** BYTEmark (tm)
+** BYTE's Native Mode Benchmarks
+** Rick Grehan, BYTE Magazine
+** DISCLAIMER
+** The source, executable, and documentation files that comprise
+** the BYTEmark benchmarks are made available on an "as is" basis.
+** This means that we at BYTE Magazine have made every reasonable
+** effort to verify that the there are no errors in the source and
+** executable code.  We cannot, however, guarantee that the programs
+** are error-free.  Consequently, McGraw-HIll and BYTE Magazine make
+** no claims in regard to the fitness of the source code, executable
+** code, and documentation of the BYTEmark.
+**  Furthermore, BYTE Magazine, McGraw-Hill, and all employees
+** of McGraw-Hill cannot be held responsible for any damages resulting
+** from the use of this code or the results obtained from using
+** this code.
+*/
+
+#include <stdio.h>
+#include "misc.h"
+
+/***********************************************************
+**     MISCELLANEOUS BUT OTHERWISE NECESSARY ROUTINES     **
+***********************************************************/
+
+/****************************
+** RANDOM NUMBER GENERATOR **
+*****************************
+** This is a second-order linear congruential random number
+** generator.  Its advantage is (of course) that it can be
+** seeded and will thus produce repeatable sequences of
+** random numbers.
+*/
+
+/****************************
+*         randwc()          *
+*****************************
+** Returns signed long random modulo num.
+*/
+/*
+long randwc(long num)
+{
+	return(randnum(0L)%num);
+}
+*/
+/*
+** Returns signed 32-bit random modulo num.
+*/
+int32 randwc(int32 num)
+{
+	return(randnum((int32)0)%num);
+}
+
+/***************************
+**      abs_randwc()      **
+****************************
+** Same as randwc(), only this routine returns only
+** positive numbers.
+*/
+/*
+unsigned long abs_randwc(unsigned long num)
+{
+long temp;
+
+temp=randwc(num);
+if(temp<0) temp=0L-temp;
+
+return((unsigned long)temp);
+}
+*/
+u32 abs_randwc(u32 num)
+{
+int32 temp;		/* Temporary storage */ 
+
+temp=randwc(num);
+if(temp<0) temp=(int32)0-temp;
+
+return((u32)temp);
+}
+
+/****************************
+*        randnum()          *
+*****************************
+** Second order linear congruential generator.
+** Constants suggested by J. G. Skellam.
+** If val==0, returns next member of sequence.
+**    val!=0, restart generator.
+*/
+/*
+long randnum(long lngval)
+{
+	register long interm;
+	static long randw[2] = { 13L , 117L };
+
+	if (lngval!=0L)
+	{	randw[0]=13L; randw[1]=117L; }
+
+	interm=(randw[0]*254754L+randw[1]*529562L)%999563L;
+	randw[1]=randw[0];
+	randw[0]=interm;
+	return(interm);
+}
+*/
+int32 randnum(int32 lngval)
+{
+	register int32 interm;
+	static int32 randw[2] = { (int32)13 , (int32)117 };
+
+	if (lngval!=(int32)0)
+	{	randw[0]=(int32)13; randw[1]=(int32)117; }
+
+	interm=(randw[0]*(int32)254754+randw[1]*(int32)529562)%(int32)999563;
+	randw[1]=randw[0];
+	randw[0]=interm;
+	return(interm);
+}
+
diff --git a/misc.h b/misc.h
new file mode 100644
index 0000000..0f9bc13
--- /dev/null
+++ b/misc.h
@@ -0,0 +1,41 @@
+/*
+** misc.h
+** Header for misc.c
+** BYTEmark (tm)
+** BYTE's Native Mode Benchmarks
+** Rick Grehan, BYTE Magazine
+**
+** Creation:
+** Revision: 3/95
+**
+** DISCLAIMER
+** The source, executable, and documentation files that comprise
+** the BYTEmark benchmarks are made available on an "as is" basis.
+** This means that we at BYTE Magazine have made every reasonable
+** effort to verify that the there are no errors in the source and
+** executable code.  We cannot, however, guarantee that the programs
+** are error-free.  Consequently, McGraw-HIll and BYTE Magazine make
+** no claims in regard to the fitness of the source code, executable
+** code, and documentation of the BYTEmark.
+**  Furthermore, BYTE Magazine, McGraw-Hill, and all employees
+** of McGraw-Hill cannot be held responsible for any damages resulting
+** from the use of this code or the results obtained from using
+** this code.
+*/
+
+/************************
+** FUNCTION PROTOTYPES **
+************************/
+
+/*
+long randwc(long num);
+unsigned long abs_randwc(unsigned long num);
+long randnum(long lngval);
+*/
+
+#include "nmglobal.h"
+int32 randwc(int32 num);
+u32 abs_randwc(u32 num);
+int32 randnum(int32 lngval);
+
+
diff --git a/nbench0.c b/nbench0.c
new file mode 100644
index 0000000..784b501
--- /dev/null
+++ b/nbench0.c
@@ -0,0 +1,1174 @@
+
+/*
+** nbench0.c
+*/
+
+/*******************************************
+**             BYTEmark (tm)              **
+** BYTE MAGAZINE'S NATIVE MODE BENCHMARKS **
+**           FOR CPU/FPU                  **
+**             ver 2.0                    **
+**       Rick Grehan, BYTE Magazine       **
+********************************************
+** NOTE: These benchmarks do NOT check for the presence
+** of an FPU.  You have to find that out manually.
+**
+** REVISION HISTORY FOR BENCHMARKS
+**  9/94 -- First beta. --RG
+**  12/94 -- Bug discovered in some of the integer routines
+**    (IDEA, Huffman,...).  Routines were not accurately counting
+**    the number of loops.  Fixed. --RG (Thanks to Steve A.)
+**  12/94 -- Added routines to calculate and display index
+**    values. Indexes based on DELL XPS 90 (90 MHz Pentium).
+**  1/95 -- Added Mac time manager routines for more accurate
+**    timing on Macintosh (said to be good to 20 usecs) -- RG
+**  1/95 -- Re-did all the #defines so they made more
+**    sense.  See NMGLOBAL.H -- RG
+**  3/95 -- Fixed memory leak in LU decomposition.  Did not
+**    invalidate previous results, just made it easier to run.--RG
+**  3/95 -- Added TOOLHELP.DLL timing routine to Windows timer. --RG
+**  10/95 -- Added memory array & alignment; moved memory
+**      allocation out of LU Decomposition -- RG
+**
+** DISCLAIMER
+** The source, executable, and documentation files that comprise
+** the BYTEmark benchmarks are made available on an "as is" basis.
+** This means that we at BYTE Magazine have made every reasonable
+** effort to verify that the there are no errors in the source and
+** executable code.  We cannot, however, guarantee that the programs
+** are error-free.  Consequently, McGraw-HIll and BYTE Magazine make
+** no claims in regard to the fitness of the source code, executable
+** code, and documentation of the BYTEmark.
+**  Furthermore, BYTE Magazine, McGraw-Hill, and all employees
+** of McGraw-Hill cannot be held responsible for any damages resulting
+** from the use of this code or the results obtained from using
+** this code.
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <ctype.h>
+#include <string.h>
+#include <time.h>
+#include <math.h>
+#include "nmglobal.h"
+#include "nbench0.h"
+#include "hardware.h"
+
+/*************
+**** main ****
+*************/
+#ifdef MAC
+void main(void)
+#else
+int main(int argc, char *argv[])
+#endif
+{
+int i;                  /* Index */
+time_t time_and_date;   /* Self-explanatory */
+struct tm *loctime;
+double bmean;           /* Benchmark mean */
+double bstdev;          /* Benchmark stdev */
+double lx_memindex;     /* Linux memory index (mainly integer operations)*/
+double lx_intindex;     /* Linux integer index */
+double lx_fpindex;      /* Linux floating-point index */
+double intindex;        /* Integer index */
+double fpindex;         /* Floating-point index */
+ulong bnumrun;          /* # of runs */
+
+#ifdef MAC
+        MaxApplZone();
+#endif
+
+#ifdef MACTIMEMGR
+/* Set up high res timer */
+MacHSTdelay=600*1000*1000;      /* Delay is 10 minutes */
+
+memset((char *)&myTMTask,0,sizeof(TMTask));
+
+/* Prime and remove the task, calculating overhead */
+PrimeTime((QElemPtr)&myTMTask,-MacHSTdelay);
+RmvTime((QElemPtr)&myTMTask);
+MacHSTohead=MacHSTdelay+myTMTask.tmCount;
+#endif
+
+#ifdef WIN31TIMER
+/* Set up the size of the timer info structure */
+win31tinfo.dwSize=(DWORD)sizeof(TIMERINFO);
+/* Load library */
+if((hThlp=LoadLibrary("TOOLHELP.DLL"))<32)
+{       printf("Error loading TOOLHELP\n");
+        exit(0);
+}
+if(!(lpfn=GetProcAddress(hThlp,"TimerCount")))
+{       printf("TOOLHELP error\n");
+        exit(0);
+}
+#endif
+
+/*
+** Set global parameters to default.
+*/
+global_min_ticks=MINIMUM_TICKS;
+global_min_seconds=MINIMUM_SECONDS;
+global_allstats=0;
+global_custrun=0;
+global_align=8;
+write_to_file=0;
+lx_memindex=(double)1.0;        /* set for geometric mean computations */
+lx_intindex=(double)1.0;
+lx_fpindex=(double)1.0;
+intindex=(double)1.0;
+fpindex=(double)1.0;
+mem_array_ents=0;               /* Nothing in mem array */
+
+/*
+** We presume all tests will be run unless told
+** otherwise
+*/
+for(i=0;i<NUMTESTS;i++)
+        tests_to_do[i]=1;
+
+/*
+** Initialize test data structures to default
+** values.
+*/
+set_request_secs();     /* Set all request_secs fields */
+global_numsortstruct.adjust=0;
+global_numsortstruct.arraysize=NUMARRAYSIZE;
+
+global_strsortstruct.adjust=0;
+global_strsortstruct.arraysize=STRINGARRAYSIZE;
+
+global_bitopstruct.adjust=0;
+global_bitopstruct.bitfieldarraysize=BITFARRAYSIZE;
+
+global_emfloatstruct.adjust=0;
+global_emfloatstruct.arraysize=EMFARRAYSIZE;
+
+global_fourierstruct.adjust=0;
+
+global_assignstruct.adjust=0;
+
+global_ideastruct.adjust=0;
+global_ideastruct.arraysize=IDEAARRAYSIZE;
+
+global_huffstruct.adjust=0;
+global_huffstruct.arraysize=HUFFARRAYSIZE;
+
+global_nnetstruct.adjust=0;
+
+global_lustruct.adjust=0;
+
+/*
+** For Macintosh -- read the command line.
+*/
+#ifdef MAC
+UCommandLine();
+#endif
+
+/*
+** Handle any command-line arguments.
+*/
+if(argc>1)
+        for(i=1;i<argc;i++)
+                if(parse_arg(argv[i])==-1)
+                {       display_help(argv[0]);
+                        exit(0);
+                }
+/*
+** Output header
+*/
+#ifdef LINUX
+output_string("\nBYTEmark* Native Mode Benchmark ver. 2 (10/95)\n");
+output_string("Index-split by Andrew D. Balsa (11/97)\n");
+output_string("Linux/Unix* port by Uwe F. Mayer (12/96,11/97)\n");
+#else
+output_string("BBBBBB   YYY   Y  TTTTTTT  EEEEEEE\n");
+output_string("BBB   B  YYY   Y    TTT    EEE\n");
+output_string("BBB   B  YYY   Y    TTT    EEE\n");
+output_string("BBBBBB    YYY Y     TTT    EEEEEEE\n");
+output_string("BBB   B    YYY      TTT    EEE\n");
+output_string("BBB   B    YYY      TTT    EEE\n");
+output_string("BBBBBB     YYY      TTT    EEEEEEE\n\n");
+output_string("\nBYTEmark (tm) Native Mode Benchmark ver. 2 (10/95)\n");
+#endif
+/*
+** See if the user wants all stats.  Output heading info
+** if so.
+*/
+if(global_allstats)
+{
+                output_string("\n");
+                output_string("============================== ALL STATISTICS ===============================\n");
+        time(&time_and_date);
+        loctime=localtime(&time_and_date);
+        sprintf(buffer,"**Date and time of benchmark run: %s",asctime(loctime));
+        output_string(buffer);
+        sprintf(buffer,"**Sizeof: char:%u short:%u int:%u long:%u u8:%u u16:%u u32:%u int32:%u\n",
+                (unsigned int)sizeof(char),
+                (unsigned int)sizeof(short),
+                (unsigned int)sizeof(int),
+                (unsigned int)sizeof(long),
+                (unsigned int)sizeof(u8),
+                (unsigned int)sizeof(u16),
+                (unsigned int)sizeof(u32),
+                (unsigned int)sizeof(int32));
+        output_string(buffer);
+#ifdef LINUX
+#include "sysinfo.c"
+#else
+        sprintf(buffer,"**%s\n",sysname);
+        output_string(buffer);
+        sprintf(buffer,"**%s\n",compilername);
+        output_string(buffer);
+        sprintf(buffer,"**%s\n",compilerversion);
+        output_string(buffer);
+#endif
+                output_string("=============================================================================\n");
+}
+
+/*
+** Execute the tests.
+*/
+#ifdef LINUX
+output_string("\nTEST                : Iterations/sec.  : Old Index   : New Index\n");
+output_string("                    :                  : Pentium 90* : AMD K6/233*\n");
+output_string("--------------------:------------------:-------------:------------\n");
+#endif
+
+for(i=0;i<NUMTESTS;i++)
+{
+        if(tests_to_do[i])
+        {       sprintf(buffer,"%s    :",ftestnames[i]);
+                                output_string(buffer);
+                if (0!=bench_with_confidence(i,
+                        &bmean,
+                        &bstdev,
+                        &bnumrun)){
+		  output_string("\n** WARNING: The current test result is NOT 95 % statistically certain.\n");
+		  output_string("** WARNING: The variation among the individual results is too large.\n");
+		  output_string("                    :");
+		}
+#ifdef LINUX
+                sprintf(buffer," %15.5g  :  %9.2f  :  %9.2f\n",
+                        bmean,bmean/bindex[i],bmean/lx_bindex[i]);
+#else
+		sprintf(buffer,"  Iterations/sec.: %13.2f  Index: %6.2f\n",
+                        bmean,bmean/bindex[i]);
+#endif
+                output_string(buffer);
+		/*
+		** Gather integer or FP indexes
+		*/
+		if((i==4)||(i==8)||(i==9)){
+		  /* FP index */
+		  fpindex=fpindex*(bmean/bindex[i]);
+		  /* Linux FP index */
+		  lx_fpindex=lx_fpindex*(bmean/lx_bindex[i]);
+		}
+		else{
+		  /* Integer index */
+		  intindex=intindex*(bmean/bindex[i]);
+		  if((i==0)||(i==3)||(i==6)||(i==7))
+		    /* Linux integer index */
+		    lx_intindex=lx_intindex*(bmean/lx_bindex[i]);
+		  else
+		    /* Linux memory index */
+		    lx_memindex=lx_memindex*(bmean/lx_bindex[i]);
+		}
+
+                if(global_allstats)
+                {
+                        sprintf(buffer,"  Absolute standard deviation: %g\n",bstdev);
+                        output_string(buffer);
+			if (bmean>(double)1e-100){
+			  /* avoid division by zero */
+			  sprintf(buffer,"  Relative standard deviation: %g %%\n",
+				  (double)100*bstdev/bmean);
+			  output_string(buffer);
+			}
+                        sprintf(buffer,"  Number of runs: %lu\n",bnumrun);
+                        output_string(buffer);
+                        show_stats(i);
+                        sprintf(buffer,"Done with %s\n\n",ftestnames[i]);
+                        output_string(buffer);
+                }
+        }
+}
+/* printf("...done...\n"); */
+
+/*
+** Output the total indexes
+*/
+if(global_custrun==0)
+{
+        output_string("==========================ORIGINAL BYTEMARK RESULTS==========================\n");
+        sprintf(buffer,"INTEGER INDEX       : %.3f\n",
+                       pow(intindex,(double).142857));
+        output_string(buffer);
+        sprintf(buffer,"FLOATING-POINT INDEX: %.3f\n",
+                        pow(fpindex,(double).33333));
+        output_string(buffer);
+        output_string("Baseline (MSDOS*)   : Pentium* 90, 256 KB L2-cache, Watcom* compiler 10.0\n");
+#ifdef LINUX
+        output_string("==============================LINUX DATA BELOW===============================\n");
+	hardware(write_to_file, global_ofile);
+#include "sysinfoc.c"
+        sprintf(buffer,"MEMORY INDEX        : %.3f\n",
+                       pow(lx_memindex,(double).3333333333));
+        output_string(buffer);
+        sprintf(buffer,"INTEGER INDEX       : %.3f\n",
+                       pow(lx_intindex,(double).25));
+        output_string(buffer);
+        sprintf(buffer,"FLOATING-POINT INDEX: %.3f\n",
+                        pow(lx_fpindex,(double).3333333333));
+        output_string(buffer);
+        output_string("Baseline (LINUX)    : AMD K6/233*, 512 KB L2-cache, gcc 2.7.2.3, libc-5.4.38\n");
+#endif
+output_string("* Trademarks are property of their respective holder.\n");
+}
+
+exit(0);
+}
+
+/**************
+** parse_arg **
+***************
+** Given a pointer to a string, we assume that's an argument.
+** Parse that argument and act accordingly.
+** Return 0 if ok, else return -1.
+*/
+static int parse_arg(char *argptr)
+{
+int i;          /* Index */
+FILE *cfile;    /* Command file identifier */
+
+/*
+** First character has got to be a hyphen.
+*/
+if(*argptr++!='-') return(-1);
+
+/*
+** Convert the rest of the argument to upper case
+** so there's little chance of confusion.
+*/
+for(i=0;i<strlen(argptr);i++)
+        argptr[i]=(char)toupper((int)argptr[i]);
+
+/*
+** Next character picks the action.
+*/
+switch(*argptr++)
+{
+        case '?':       return(-1);     /* Will display help */
+
+        case 'V': global_allstats=1; return(0); /* verbose mode */
+
+        case 'C':                       /* Command file name */
+                /*
+                ** First try to open the file for reading.
+                */
+                cfile=fopen(argptr,"r");
+                if(cfile==(FILE *)NULL)
+                {       printf("**Error opening file: %s\n",argptr);
+                        return(-1);
+                }
+                read_comfile(cfile);    /* Read commands */
+                fclose(cfile);
+                break;
+        default:
+                return(-1);
+}
+return(0);
+}
+
+/*******************
+** display_help() **
+********************
+** Display a help message showing argument requirements and such.
+** Exit when you're done...I mean, REALLY exit.
+*/
+void display_help(char *progname)
+{
+        printf("Usage: %s [-v] [-c<FILE>]\n",progname);
+        printf(" -v = verbose\n");
+        printf(" -c = input parameters thru command file <FILE>\n");
+        exit(0);
+}
+
+
+/*****************
+** read_comfile **
+******************
+** Read the command file.  Set global parameters as
+** specified.  This routine assumes that the command file
+** is already open.
+*/
+static void read_comfile(FILE *cfile)
+{
+char inbuf[40];
+char *eptr;             /* Offset to "=" sign */
+int i;                  /* Index */
+
+/*
+** Sit in a big loop, reading a line from the file at each
+** pass.  Terminate on EOF.
+*/
+while(fgets(inbuf,39,cfile)!=(char *)NULL)
+{
+        /* Overwrite the CR character */
+        if(strlen(inbuf)>0)
+                inbuf[strlen(inbuf)-1]='\0';
+
+        /*
+        ** Parse up to the "=" sign.  If we don't find an
+        ** "=", then flag an error.
+        */
+        if((eptr=strchr(inbuf,(int)'='))==(char *)NULL)
+        {       printf("**COMMAND FILE ERROR at LINE:\n %s\n",
+                        inbuf);
+                goto skipswitch;        /* A GOTO!!!! */
+        }
+
+        /*
+        ** Insert a null where the "=" was, then convert
+        ** the substring to uppercase.  That will enable
+        ** us to perform the match.
+        */
+        *eptr++='\0';
+        strtoupper((char *)&inbuf[0]);
+        i=MAXPARAM;
+        do {
+                if(strcmp(inbuf,paramnames[i])==0)
+                        break;
+        } while(--i>=0);
+
+        if(i<0)
+        {       printf("**COMMAND FILE ERROR -- UNKNOWN PARAM: %s",
+                        inbuf);
+                goto skipswitch;
+        }
+
+        /*
+        ** Advance eptr to the next field...which should be
+        ** the value assigned to the parameter.
+        */
+        switch(i)
+        {
+                case PF_GMTICKS:        /* GLOBALMINTICKS */
+                        global_min_ticks=(ulong)atol(eptr);
+                        break;
+
+                case PF_MINSECONDS:     /* MINSECONDS */
+                        global_min_seconds=(ulong)atol(eptr);
+                        set_request_secs();
+                        break;
+
+                case PF_ALLSTATS:       /* ALLSTATS */
+                        global_allstats=getflag(eptr);
+                        break;
+
+                case PF_OUTFILE:        /* OUTFILE */
+                        strcpy(global_ofile_name,eptr);
+                        global_ofile=fopen(global_ofile_name,"a");
+                        /*
+                        ** Open the output file.
+                        */
+                        if(global_ofile==(FILE *)NULL)
+                        {       printf("**Error opening output file: %s\n",
+                                        global_ofile_name);
+                                ErrorExit();
+                        }
+                        write_to_file=-1;
+                        break;
+
+                case PF_CUSTOMRUN:      /* CUSTOMRUN */
+                        global_custrun=getflag(eptr);
+                        for(i=0;i<NUMTESTS;i++)
+                                tests_to_do[i]=1-global_custrun;
+                        break;
+
+                case PF_DONUM:          /* DONUMSORT */
+                        tests_to_do[TF_NUMSORT]=getflag(eptr);
+                        break;
+
+                case PF_NUMNUMA:        /* NUMNUMARRAYS */
+                        global_numsortstruct.numarrays=
+                                (ushort)atoi(eptr);
+                        global_numsortstruct.adjust=1;
+                        break;
+
+                case PF_NUMASIZE:       /* NUMARRAYSIZE */
+                        global_numsortstruct.arraysize=
+                                (ulong)atol(eptr);
+                        break;
+
+                case PF_NUMMINS:        /* NUMMINSECONDS */
+                        global_numsortstruct.request_secs=
+                                (ulong)atol(eptr);
+                        break;
+
+                case PF_DOSTR:          /* DOSTRINGSORT */
+                        tests_to_do[TF_SSORT]=getflag(eptr);
+                        break;
+
+                case PF_STRASIZE:       /* STRARRAYSIZE */
+                        global_strsortstruct.arraysize=
+                                (ulong)atol(eptr);
+                        break;
+
+                case PF_NUMSTRA:        /* NUMSTRARRAYS */
+                        global_strsortstruct.numarrays=
+                                (ushort)atoi(eptr);
+                        global_strsortstruct.adjust=1;
+                        break;
+
+                case PF_STRMINS:        /* STRMINSECONDS */
+                        global_strsortstruct.request_secs=
+                                (ulong)atol(eptr);
+                        break;
+
+                case PF_DOBITF: /* DOBITFIELD */
+                        tests_to_do[TF_BITOP]=getflag(eptr);
+                        break;
+
+                case PF_NUMBITOPS:      /* NUMBITOPS */
+                        global_bitopstruct.bitoparraysize=
+                                (ulong)atol(eptr);
+                        global_bitopstruct.adjust=1;
+                        break;
+
+                case PF_BITFSIZE:       /* BITFIELDSIZE */
+                        global_bitopstruct.bitfieldarraysize=
+                                (ulong)atol(eptr);
+                        break;
+
+                case PF_BITMINS:        /* BITMINSECONDS */
+                        global_bitopstruct.request_secs=
+                                (ulong)atol(eptr);
+                        break;
+
+                case PF_DOEMF:          /* DOEMF */
+                        tests_to_do[TF_FPEMU]=getflag(eptr);
+                        break;
+
+                case PF_EMFASIZE:       /* EMFARRAYSIZE */
+                        global_emfloatstruct.arraysize=
+                                (ulong)atol(eptr);
+                        break;
+
+                case PF_EMFLOOPS:       /* EMFLOOPS */
+                        global_emfloatstruct.loops=
+                                (ulong)atol(eptr);
+                        break;
+
+                case PF_EMFMINS:        /* EMFMINSECOND */
+                        global_emfloatstruct.request_secs=
+                                (ulong)atol(eptr);
+                        break;
+
+                case PF_DOFOUR: /* DOFOUR */
+                        tests_to_do[TF_FFPU]=getflag(eptr);
+                        break;
+
+                case PF_FOURASIZE:      /* FOURASIZE */
+                        global_fourierstruct.arraysize=
+                                (ulong)atol(eptr);
+                        global_fourierstruct.adjust=1;
+                        break;
+
+                case PF_FOURMINS:       /* FOURMINSECONDS */
+                        global_fourierstruct.request_secs=
+                                (ulong)atol(eptr);
+                        break;
+
+                case PF_DOASSIGN:       /* DOASSIGN */
+                        tests_to_do[TF_ASSIGN]=getflag(eptr);
+                        break;
+
+                case PF_AARRAYS:        /* ASSIGNARRAYS */
+                        global_assignstruct.numarrays=
+                                (ulong)atol(eptr);
+                        break;
+
+                case PF_ASSIGNMINS:     /* ASSIGNMINSECONDS */
+                        global_assignstruct.request_secs=
+                                (ulong)atol(eptr);
+                        break;
+
+                case PF_DOIDEA: /* DOIDEA */
+                        tests_to_do[TF_IDEA]=getflag(eptr);
+                        break;
+
+                case PF_IDEAASIZE:      /* IDEAARRAYSIZE */
+                        global_ideastruct.arraysize=
+                                (ulong)atol(eptr);
+                        break;
+
+                case PF_IDEALOOPS:      /* IDEALOOPS */
+                        global_ideastruct.loops=
+                                (ulong)atol(eptr);
+                        break;
+
+                case PF_IDEAMINS:       /* IDEAMINSECONDS */
+                        global_ideastruct.request_secs=
+                                (ulong)atol(eptr);
+                        break;
+
+                case PF_DOHUFF: /* DOHUFF */
+                        tests_to_do[TF_HUFF]=getflag(eptr);
+                        break;
+
+                case PF_HUFFASIZE:      /* HUFFARRAYSIZE */
+                        global_huffstruct.arraysize=
+                                (ulong)atol(eptr);
+                        break;
+
+                case PF_HUFFLOOPS:      /* HUFFLOOPS */
+                        global_huffstruct.loops=
+                                (ulong)atol(eptr);
+                        global_huffstruct.adjust=1;
+                        break;
+
+                case PF_HUFFMINS:       /* HUFFMINSECONDS */
+                        global_huffstruct.request_secs=
+                                (ulong)atol(eptr);
+                        break;
+
+                case PF_DONNET: /* DONNET */
+                        tests_to_do[TF_NNET]=getflag(eptr);
+                        break;
+
+                case PF_NNETLOOPS:      /* NNETLOOPS */
+                        global_nnetstruct.loops=
+                                (ulong)atol(eptr);
+                        global_nnetstruct.adjust=1;
+                        break;
+
+                case PF_NNETMINS:       /* NNETMINSECONDS */
+                        global_nnetstruct.request_secs=
+                                (ulong)atol(eptr);
+                        break;
+
+                case PF_DOLU:           /* DOLU */
+                        tests_to_do[TF_LU]=getflag(eptr);
+                        break;
+
+                case PF_LUNARRAYS:      /* LUNUMARRAYS */
+                        global_lustruct.numarrays=
+                                (ulong)atol(eptr);
+                        global_lustruct.adjust=1;
+                        break;
+
+                case PF_LUMINS: /* LUMINSECONDS */
+                        global_lustruct.request_secs=
+                                (ulong)atol(eptr);
+                        break;
+
+                                case PF_ALIGN:          /* ALIGN */
+                                                global_align=atoi(eptr);
+                                                break;
+        }
+skipswitch:
+        continue;
+}       /* End while */
+
+return;
+}
+
+/************
+** getflag **
+*************
+** Return 1 if cptr points to "T"; 0 otherwise.
+*/
+static int getflag(char *cptr)
+{
+        if(toupper((int)*cptr)=='T') return(1);
+return(0);
+}
+
+/***************
+** strtoupper **
+****************
+** Convert's a string to upper case.  The string is presumed
+** to consist only of alphabetic characters, and to be terminated
+** with a null.
+*/
+static void strtoupper(char *s)
+{
+
+do {
+/*
+** Oddly enough, the following line did not work under THINK C.
+** So, I modified it....hmmmm. --RG
+        *s++=(char)toupper((int)*s);
+*/
+        *s=(char)toupper((int)*s);
+        s++;
+} while(*s!=(char)'\0');
+return;
+}
+
+/*********************
+** set_request_secs **
+**********************
+** Set everyone's "request_secs" entry to whatever
+** value is in global_min_secs.  This is done
+** at the beginning, and possibly later if the
+** user redefines global_min_secs in the command file.
+*/
+static void set_request_secs(void)
+{
+
+global_numsortstruct.request_secs=global_min_seconds;
+global_strsortstruct.request_secs=global_min_seconds;
+global_bitopstruct.request_secs=global_min_seconds;
+global_emfloatstruct.request_secs=global_min_seconds;
+global_fourierstruct.request_secs=global_min_seconds;
+global_assignstruct.request_secs=global_min_seconds;
+global_ideastruct.request_secs=global_min_seconds;
+global_huffstruct.request_secs=global_min_seconds;
+global_nnetstruct.request_secs=global_min_seconds;
+global_lustruct.request_secs=global_min_seconds;
+
+return;
+}
+
+
+/**************************
+** bench_with_confidence **
+***************************
+** Given a benchmark id that indicates a function, this routine
+** repeatedly calls that benchmark, seeking to collect and replace
+** scores to get 5 that meet the confidence criteria.
+**
+** The above is mathematically questionable, as the statistical theory
+** depends on independent observations, and if we exchange data points
+** depending on what we already have then this certainly violates
+** independence of the observations. Hence I changed this so that at
+** most 30 observations are done, but none are deleted as we go
+** along. We simply do more runs and hope to get a big enough sample
+** size so that things stabilize. Uwe F. Mayer
+**
+** Return 0 if ok, -1 if failure.  Returns mean
+** and std. deviation of results if successful.
+*/
+static int bench_with_confidence(int fid,       /* Function id */
+        double *mean,                   /* Mean of scores */
+        double *stdev,                  /* Standard deviation */
+        ulong *numtries)                /* # of attempts */
+{
+double myscores[30];            /* Need at least 5 scores, use at most 30 */
+double c_half_interval;         /* Confidence half interval */
+int i;                          /* Index */
+/* double newscore; */          /* For improving confidence interval */
+
+/*
+** Get first 5 scores.  Then begin confidence testing.
+*/
+for (i=0;i<5;i++)
+{       (*funcpointer[fid])();
+        myscores[i]=getscore(fid);
+#ifdef DEBUG
+	printf("score # %d = %g\n", i, myscores[i]);
+#endif
+}
+*numtries=5;            /* Show 5 attempts */
+
+/*
+** The system allows a maximum of 30 tries before it gives
+** up.  Since we've done 5 already, we'll allow 25 more.
+*/
+
+/*
+** Enter loop to test for confidence criteria.
+*/
+while(1)
+{
+        /*
+        ** Calculate confidence. Should always return 0.
+        */
+        if (0!=calc_confidence(myscores,
+		*numtries,
+                &c_half_interval,
+                mean,
+                stdev)) return(-1);
+
+        /*
+        ** Is the length of the half interval 5% or less of mean?
+        ** If so, we can go home.  Otherwise, we have to continue.
+        */
+        if(c_half_interval/ (*mean) <= (double)0.05)
+                break;
+
+#ifdef OLDCODE
+#undef OLDCODE
+#endif
+#ifdef OLDCODE
+/* this code is no longer valid, we now do not replace but add new scores */
+/* Uwe F. Mayer */
+	      /*
+	      ** Go get a new score and see if it
+	      ** improves existing scores.
+	      */
+	      do {
+		      if(*numtries==10)
+			      return(-1);
+		      (*funcpointer[fid])();
+		      *numtries+=1;
+		      newscore=getscore(fid);
+	      } while(seek_confidence(myscores,&newscore,
+		      &c_half_interval,mean,stdev)==0);
+#endif
+	/* We now simply add a new test run and hope that the runs
+           finally stabilize, Uwe F. Mayer */
+	if(*numtries==30) return(-1);
+	(*funcpointer[fid])();
+	myscores[*numtries]=getscore(fid);
+#ifdef DEBUG
+	printf("score # %ld = %g\n", *numtries, myscores[*numtries]);
+#endif
+	*numtries+=1;
+}
+
+return(0);
+}
+
+#ifdef OLDCODE
+/* this procecdure is no longer needed, Uwe F. Mayer */
+  /********************
+  ** seek_confidence **
+  *********************
+  ** Pass this routine an array of 5 scores PLUS a new score.
+  ** This routine tries the new score in place of each of
+  ** the other five scores to determine if the new score,
+  ** when replacing one of the others, improves the confidence
+  ** half-interval.
+  ** Return 0 if failure.  Original 5 scores unchanged.
+  ** Return -1 if success.  Also returns new half-interval,
+  ** mean, and standard deviation of the sample.
+  */
+  static int seek_confidence( double scores[5],
+  		double *newscore,
+  		double *c_half_interval,
+  		double *smean,
+  		double *sdev)
+  {
+  double sdev_to_beat;    /* Original sdev to be beaten */
+  double temp;            /* For doing a swap */
+  int is_beaten;          /* Indicates original was beaten */
+  int i;                  /* Index */
+
+  /*
+  ** First calculate original standard deviation
+  */
+  calc_confidence(scores,c_half_interval,smean,sdev);
+  sdev_to_beat=*sdev;
+  is_beaten=-1;
+
+  /*
+  ** Try to beat original score.  We'll come out of this
+  ** loop with a flag.
+  */
+  for(i=0;i<5;i++)
+  {
+  	temp=scores[i];
+  	scores[i]=*newscore;
+  	calc_confidence(scores,c_half_interval,smean,sdev);
+  	scores[i]=temp;
+  	if(sdev_to_beat>*sdev)
+  	{       is_beaten=i;
+  		sdev_to_beat=*sdev;
+  	}
+  }
+
+  if(is_beaten!=-1)
+  {       scores[is_beaten]=*newscore;
+  	return(-1);
+  }
+  return(0);
+  }
+#endif
+
+/********************
+** calc_confidence **
+*********************
+** Given a set of numtries scores, calculate the confidence
+** half-interval.  We'll also return the sample mean and sample
+** standard deviation.
+** NOTE: This routines presumes a confidence of 95% and
+** a confidence coefficient of .95
+** returns 0 if there is an error, otherwise -1
+*/
+static int calc_confidence(double scores[], /* Array of scores */
+		int num_scores,             /* number of scores in array */
+                double *c_half_interval,    /* Confidence half-int */
+                double *smean,              /* Standard mean */
+                double *sdev)               /* Sample stand dev */
+{
+/* Here is a list of the student-t distribution up to 29 degrees of
+   freedom. The value at 0 is bogus, as there is no value for zero
+   degrees of freedom. */
+double student_t[30]={0.0 , 12.706 , 4.303 , 3.182 , 2.776 , 2.571 ,
+                             2.447 , 2.365 , 2.306 , 2.262 , 2.228 ,
+                             2.201 , 2.179 , 2.160 , 2.145 , 2.131 ,
+                             2.120 , 2.110 , 2.101 , 2.093 , 2.086 ,
+                             2.080 , 2.074 , 2.069 , 2.064 , 2.060 ,
+		             2.056 , 2.052 , 2.048 , 2.045 };
+int i;          /* Index */
+if ((num_scores<2) || (num_scores>30)) {
+  output_string("Internal error: calc_confidence called with an illegal number of scores\n");
+  return(-1);
+}
+/*
+** First calculate mean.
+*/
+*smean=(double)0.0;
+for(i=0;i<num_scores;i++){
+  *smean+=scores[i];
+}
+*smean/=(double)num_scores;
+
+/* Get standard deviation */
+*sdev=(double)0.0;
+for(i=0;i<num_scores;i++) {
+  *sdev+=(scores[i]-(*smean))*(scores[i]-(*smean));
+}
+*sdev/=(double)(num_scores-1);
+*sdev=sqrt(*sdev);
+
+/* Now calculate the length of the confidence half-interval.  For a
+** confidence level of 95% our confidence coefficient gives us a
+** multiplying factor of the upper .025 quartile of a t distribution
+** with num_scores-1 degrees of freedom, and dividing by sqrt(number of
+** observations). See any introduction to statistics.
+*/
+*c_half_interval=student_t[num_scores-1] * (*sdev) / sqrt((double)num_scores);
+return(0);
+}
+
+/*************
+** getscore **
+**************
+** Return the score for a particular benchmark.
+*/
+static double getscore(int fid)
+{
+
+/*
+** Fid tells us the function.  This is really a matter of
+** doing the proper coercion.
+*/
+switch(fid)
+{
+        case TF_NUMSORT:
+                return(global_numsortstruct.sortspersec);
+        case TF_SSORT:
+                return(global_strsortstruct.sortspersec);
+        case TF_BITOP:
+                return(global_bitopstruct.bitopspersec);
+        case TF_FPEMU:
+                return(global_emfloatstruct.emflops);
+        case TF_FFPU:
+                return(global_fourierstruct.fflops);
+        case TF_ASSIGN:
+                return(global_assignstruct.iterspersec);
+        case TF_IDEA:
+                return(global_ideastruct.iterspersec);
+        case TF_HUFF:
+                return(global_huffstruct.iterspersec);
+        case TF_NNET:
+                return(global_nnetstruct.iterspersec);
+        case TF_LU:
+                return(global_lustruct.iterspersec);
+}
+return((double)0.0);
+}
+
+/******************
+** output_string **
+*******************
+** Displays a string on the screen.  Also, if the flag
+** write_to_file is set, outputs the string to the output file.
+** Note, this routine presumes that you've included a carriage
+** return at the end of the buffer.
+*/
+static void output_string(char *buffer)
+{
+
+printf("%s",buffer);
+if(write_to_file!=0)
+        fprintf(global_ofile,"%s",buffer);
+return;
+}
+
+/***************
+** show_stats **
+****************
+** This routine displays statistics for a particular benchmark.
+** The benchmark is identified by its id.
+*/
+static void show_stats (int bid)
+{
+char buffer[80];        /* Display buffer */
+
+switch(bid)
+{
+        case TF_NUMSORT:                /* Numeric sort */
+                sprintf(buffer,"  Number of arrays: %d\n",
+                        global_numsortstruct.numarrays);
+                output_string(buffer);
+                sprintf(buffer,"  Array size: %ld\n",
+                        global_numsortstruct.arraysize);
+                output_string(buffer);
+                break;
+
+        case TF_SSORT:          /* String sort */
+                sprintf(buffer,"  Number of arrays: %d\n",
+                        global_strsortstruct.numarrays);
+                output_string(buffer);
+                sprintf(buffer,"  Array size: %ld\n",
+                        global_strsortstruct.arraysize);
+                output_string(buffer);
+                break;
+
+        case TF_BITOP:          /* Bitmap operation */
+                sprintf(buffer,"  Operations array size: %ld\n",
+                        global_bitopstruct.bitoparraysize);
+                output_string(buffer);
+                sprintf(buffer,"  Bitfield array size: %ld\n",
+                        global_bitopstruct.bitfieldarraysize);
+                output_string(buffer);
+                break;
+
+        case TF_FPEMU:          /* Floating-point emulation */
+                sprintf(buffer,"  Number of loops: %lu\n",
+                        global_emfloatstruct.loops);
+                output_string(buffer);
+                sprintf(buffer,"  Array size: %lu\n",
+                        global_emfloatstruct.arraysize);
+                output_string(buffer);
+                break;
+
+        case TF_FFPU:           /* Fourier test */
+                sprintf(buffer,"  Number of coefficients: %lu\n",
+                        global_fourierstruct.arraysize);
+                output_string(buffer);
+                break;
+
+        case TF_ASSIGN:
+                sprintf(buffer,"  Number of arrays: %lu\n",
+                        global_assignstruct.numarrays);
+                output_string(buffer);
+                break;
+
+        case TF_IDEA:
+                sprintf(buffer,"  Array size: %lu\n",
+                        global_ideastruct.arraysize);
+                output_string(buffer);
+                sprintf(buffer," Number of loops: %lu\n",
+                        global_ideastruct.loops);
+                output_string(buffer);
+                break;
+
+        case TF_HUFF:
+                sprintf(buffer,"  Array size: %lu\n",
+                        global_huffstruct.arraysize);
+                output_string(buffer);
+                sprintf(buffer,"  Number of loops: %lu\n",
+                        global_huffstruct.loops);
+                output_string(buffer);
+                break;
+
+        case TF_NNET:
+                sprintf(buffer,"  Number of loops: %lu\n",
+                        global_nnetstruct.loops);
+                output_string(buffer);
+                break;
+
+        case TF_LU:
+                sprintf(buffer,"  Number of arrays: %lu\n",
+                        global_lustruct.numarrays);
+                output_string(buffer);
+                break;
+}
+return;
+}
+
+/*
+** Following code added for Mac stuff, so that we can emulate command
+** lines.
+*/
+
+#ifdef MAC
+
+/*****************
+** UCommandLine **
+******************
+** Reads in a command line, and sets up argc and argv appropriately.
+** Note that this routine uses gets() to read in the line.  This means
+** you'd better not enter more than 128 characters on a command line, or
+** things will overflow, and oh boy...
+*/
+void UCommandLine(void)
+{
+printf("Enter command line\n:");
+gets((char *)Uargbuff);
+UParse();
+return;
+}
+
+/***********
+** UParse **
+************
+** Parse the pseudo command-line.  This code appeared as part of the
+** Small-C library in Dr. Dobb's ToolBook of C.
+** It expects the following globals:
+** argc = arg count
+** argv = Pointer to array of char pointers
+** Uargbuff = Character array that holds the arguments.  Should be 129 bytes long.
+** Udummy1 = This is a 2-byte buffer that holds a "*", and acts as the first
+**  argument in the argument list.  This maintains compatibility with other
+**  C's, though it does not provide access to the executable filename.
+** This routine allows for up to 20 individual command-line arguments.
+** Also note that this routine does NOT allow for redirection.
+*/
+void UParse(void)
+{
+unsigned char *ptr;
+
+argc=0;         /* Start arg count */
+Udummy[0]='*';  /* Set dummy first argument */
+Udummy[1]='\0';
+argv[argc++]=(char *)Udummy;
+
+ptr=Uargbuff;           /* Start pointer */
+while(*ptr)
+{
+        if(isspace(*ptr))
+        {       ++ptr;
+                continue;
+        }
+        if(argc<20) argv[argc++]=(char *)ptr;
+        ptr=UField(ptr);
+}
+return;
+}
+/***********
+** UField **
+************
+** Isolate the next command-line field.
+*/
+unsigned char *UField(unsigned char *ptr)
+{
+while(*ptr)
+{       if(isspace(*ptr))
+        {       *ptr=(unsigned char)NULL;
+                return(++ptr);
+        }
+        ++ptr;
+}
+return(ptr);
+}
+#endif
diff --git a/nbench0.h b/nbench0.h
new file mode 100644
index 0000000..cef0928
--- /dev/null
+++ b/nbench0.h
@@ -0,0 +1,356 @@
+/*
+** nbench0.h
+** Header for nbench0.c
+** BYTEmark (tm)
+** BYTE's Native Mode Benchmarks
+** Rick Grehan, BYTE Magazine
+**
+** Creation:
+** Revision: 3/95;10/95
+**  10/95 - Added memory array & alignment -- RG
+**
+** DISCLAIMER
+** The source, executable, and documentation files that comprise
+** the BYTEmark benchmarks are made available on an "as is" basis.
+** This means that we at BYTE Magazine have made every reasonable
+** effort to verify that the there are no errors in the source and
+** executable code.  We cannot, however, guarantee that the programs
+** are error-free.  Consequently, McGraw-HIll and BYTE Magazine make
+** no claims in regard to the fitness of the source code, executable
+** code, and documentation of the BYTEmark.
+**  Furthermore, BYTE Magazine, McGraw-Hill, and all employees
+** of McGraw-Hill cannot be held responsible for any damages resulting
+** from the use of this code or the results obtained from using
+** this code.
+*/
+
+/*
+** Following should be modified accordingly per each
+** compilation.
+*/
+char *sysname="You can enter your system description in nbench0.h";
+char *compilername="It then will be printed here after you recompile";
+char *compilerversion="Have a nice day";
+
+/*  Parameter flags.  Must coincide with parameter names array
+** which appears below. */
+#define PF_GMTICKS 0            /* GLOBALMINTICKS */
+#define PF_MINSECONDS 1         /* MINSECONDS */
+#define PF_ALLSTATS 2           /* ALLSTATS */
+#define PF_OUTFILE 3            /* OUTFILE */
+#define PF_CUSTOMRUN 4          /* CUSTOMRUN */
+#define PF_DONUM 5              /* DONUMSORT */
+#define PF_NUMNUMA 6            /* NUMNUMARRAYS */
+#define PF_NUMASIZE 7           /* NUMARRAYSIZE */
+#define PF_NUMMINS 8            /* NUMMINSECONDS */
+#define PF_DOSTR 9              /* DOSTRINGSORT */
+#define PF_STRASIZE 10          /* STRARRAYSIZE */
+#define PF_NUMSTRA 11           /* NUMSTRARRAYS */
+#define PF_STRMINS 12           /* STRMINSECONDS */
+#define PF_DOBITF 13            /* DOBITFIELD */
+#define PF_NUMBITOPS 14         /* NUMBITOPS */
+#define PF_BITFSIZE 15          /* BITFIELDSIZE */
+#define PF_BITMINS 16           /* BITMINSECONDS */
+#define PF_DOEMF 17             /* DOEMF */
+#define PF_EMFASIZE 18          /* EMFARRAYSIZE */
+#define PF_EMFLOOPS 19          /* EMFLOOPS */
+#define PF_EMFMINS 20           /* EMFMINSECOND */
+#define PF_DOFOUR 21            /* DOFOUR */
+#define PF_FOURASIZE 22         /* FOURASIZE */
+#define PF_FOURMINS 23          /* FOURMINSECONDS */
+#define PF_DOASSIGN 24          /* DOASSIGN */
+#define PF_AARRAYS 25           /* ASSIGNARRAYS */
+#define PF_ASSIGNMINS 26        /* ASSIGNMINSECONDS */
+#define PF_DOIDEA 27            /* DOIDEA */
+#define PF_IDEAASIZE 28         /* IDEAARRAYSIZE */
+#define PF_IDEALOOPS 29         /* IDEALOOPS */
+#define PF_IDEAMINS 30          /* IDEAMINSECONDS */
+#define PF_DOHUFF 31            /* DOHUFF */
+#define PF_HUFFASIZE 32         /* HUFFARRAYSIZE */
+#define PF_HUFFLOOPS 33         /* HUFFLOOPS */
+#define PF_HUFFMINS 34          /* HUFFMINSECONDS */
+#define PF_DONNET 35            /* DONNET */
+#define PF_NNETLOOPS 36         /* NNETLOOPS */
+#define PF_NNETMINS 37          /* NNETMINSECONDS */
+#define PF_DOLU 38              /* DOLU */
+#define PF_LUNARRAYS 39         /* LUNUMARRAYS */
+#define PF_LUMINS 40            /* LUMINSECONDS */
+#define PF_ALIGN 41		        /* ALIGN */
+
+#define MAXPARAM 41
+
+/* Tests-to-do flags...must coincide with test. */
+#define TF_NUMSORT 0
+#define TF_SSORT 1
+#define TF_BITOP 2
+#define TF_FPEMU 3
+#define TF_FFPU 4
+#define TF_ASSIGN 5
+#define TF_IDEA 6
+#define TF_HUFF 7
+#define TF_NNET 8
+#define TF_LU 9
+
+#define NUMTESTS 10
+
+/*
+** GLOBALS
+*/
+
+#define BUF_SIZ 1024
+
+/*
+** Test names
+*/
+char *ftestnames[] = {
+        "NUMERIC SORT    ",
+        "STRING SORT     ",
+        "BITFIELD        ",
+        "FP EMULATION    ",
+        "FOURIER         ",
+        "ASSIGNMENT      ",
+        "IDEA            ",
+        "HUFFMAN         ",
+        "NEURAL NET      ",
+        "LU DECOMPOSITION" };
+
+/*
+** Indexes -- Baseline is DELL Pentium XP90
+** 11/28/94
+*/
+double bindex[] = {
+    38.993,                     /* Numeric sort */
+    2.238,                      /* String sort */
+    5829704,                    /* Bitfield */
+    2.084,                      /* FP Emulation */
+    879.278,                    /* Fourier */
+    .2628,                      /* Assignment */
+    65.382,                     /* IDEA */
+    36.062,                     /* Huffman */
+    .6225,                      /* Neural Net */
+    19.3031 };                  /* LU Decomposition */
+
+/*
+** Indices -- Baseline is a AMD K6-233, 32MB RAM (60ns SDRAM),512k L2 cache,
+** Linux kernel 2.0.32, libc-5.4.38, gcc-2.7.2.3)
+** Nov/30/97
+*/
+double lx_bindex[] = {
+      118.73, 	    /* Numeric sort */
+      14.459,	    /* String sort */
+    27910000,	    /* Bitfield */
+      9.0314,	    /* FP Emulation */
+      1565.5,	    /* Fourier */
+      1.0132,	    /* Assignment */
+      220.21,	    /* IDEA */
+      112.93,	    /* Huffman */
+      1.4799,	    /* Neural Net */
+      26.732};      /* LU Decomposition */
+
+/* Parameter names */
+char *paramnames[]= {
+        "GLOBALMINTICKS",
+        "MINSECONDS",
+        "ALLSTATS",
+        "OUTFILE",
+        "CUSTOMRUN",
+        "DONUMSORT",
+        "NUMNUMARRAYS",
+        "NUMARRAYSIZE",
+        "NUMMINSECONDS",
+        "DOSTRINGSORT",
+        "STRARRAYSIZE",
+        "NUMSTRARRAYS",
+        "STRMINSECONDS",
+        "DOBITFIELD",
+        "NUMBITOPS",
+        "BITFIELDSIZE",
+        "BITMINSECONDS",
+        "DOEMF",
+        "EMFARRAYSIZE",
+        "EMFLOOPS",
+        "EMFMINSECONDS",
+        "DOFOUR",
+        "FOURSIZE",
+        "FOURMINSECONDS",
+        "DOASSIGN",
+        "ASSIGNARRAYS",
+        "ASSIGNMINSECONDS",
+        "DOIDEA",
+        "IDEARRAYSIZE",
+        "IDEALOOPS",
+        "IDEAMINSECONDS",
+        "DOHUFF",
+        "HUFARRAYSIZE",
+        "HUFFLOOPS",
+        "HUFFMINSECONDS",
+        "DONNET",
+        "NNETLOOPS",
+        "NNETMINSECONDS",
+        "DOLU",
+        "LUNUMARRAYS",
+        "LUMINSECONDS",
+	"ALIGN" };
+
+/*
+** Following array is a collection of flags indicating which
+** tests to perform.
+*/
+int tests_to_do[NUMTESTS];
+
+/*
+** Buffer for holding output text.
+*/
+char buffer[BUF_SIZ];
+
+/*
+** Global parameters.
+*/
+ulong global_min_ticks;         /* Minimum ticks */
+ulong global_min_seconds;       /* Minimum seconds tests run */
+int global_allstats;            /* Statistics dump flag */
+char global_ofile_name[BUF_SIZ];/* Output file name */
+FILE *global_ofile;             /* Output file */
+int global_custrun;             /* Custom run flag */
+int write_to_file;              /* Write output to file */
+int global_align;		/* Memory alignment */
+
+/*
+** Following global is the memory array.  This is used to store
+** original and aligned (modified) memory addresses.
+*/
+ulong mem_array[2][MEM_ARRAY_SIZE];
+int mem_array_ents;		/* # of active entries */
+
+/*
+** Following are global structures, one built for
+** each of the tests.
+*/
+SortStruct global_numsortstruct;        /* For numeric sort */
+SortStruct global_strsortstruct;        /* For string sort */
+BitOpStruct global_bitopstruct;         /* For bitfield operations */
+EmFloatStruct global_emfloatstruct;     /* For emul. float. point */
+FourierStruct global_fourierstruct;     /* For fourier test */
+AssignStruct global_assignstruct;       /* For assignment algorithm */
+IDEAStruct global_ideastruct;           /* For IDEA encryption */
+HuffStruct global_huffstruct;           /* For Huffman compression */
+NNetStruct global_nnetstruct;           /* For Neural Net */
+LUStruct global_lustruct;               /* For LU decomposition */
+
+/*
+** The following array of function struct pointers lets
+** us very rapidly map a function to its controlling
+** data structure. NOTE: These must match the "TF_xxx"
+** constants above.
+*/
+void *global_fstruct[] =
+{       (void *)&global_numsortstruct,
+        (void *)&global_strsortstruct,
+        (void *)&global_bitopstruct,
+        (void *)&global_emfloatstruct,
+        (void *)&global_fourierstruct,
+        (void *)&global_assignstruct,
+        (void *)&global_ideastruct,
+        (void *)&global_huffstruct,
+        (void *)&global_nnetstruct,
+        (void *)&global_lustruct };
+
+/*
+** Following globals added to support command line emulation on
+** the Macintosh....which doesn't have command lines.
+*/
+#ifdef MAC
+int argc;                       /* Argument count */
+char *argv[20];                 /* Argument vectors */
+
+unsigned char Uargbuff[129];    /* Buffer holding arguments string */
+unsigned char Udummy[2];        /* Dummy buffer for first arg */
+
+#endif
+
+#ifdef MACTIMEMGR
+#include <Types.h>
+#include <Timer.h>
+/*
+** Timer globals for Mac
+*/
+struct TMTask myTMTask;
+long MacHSTdelay,MacHSTohead;
+
+#endif
+
+/*
+** Following globals used by Win 31 timing routines.
+** NOTE: This requires the includes of the w31timer.asm
+** file in your project!!
+*/
+#ifdef WIN31TIMER
+#include <windows.h>
+#include <toolhelp.h>
+extern TIMERINFO win31tinfo;
+extern HANDLE hThlp;
+extern FARPROC lpfn;
+#endif
+
+/*
+** PROTOTYPES
+*/
+static int parse_arg(char *argptr);
+static void display_help(char *progname);
+static void read_comfile(FILE *cfile);
+static int getflag(char *cptr);
+static void strtoupper(char *s);
+static void set_request_secs(void);
+static int bench_with_confidence(int fid,
+        double *mean, double *stdev, ulong *numtries);
+/*
+static int seek_confidence(double scores[5],
+        double *newscore, double *c_half_interval,
+        double *smean,double *sdev);
+*/
+static int calc_confidence(double scores[],
+        int num_scores,
+        double *c_half_interval,double *smean,
+        double *sdev);
+static double getscore(int fid);
+static void output_string(char *buffer);
+static void show_stats(int bid);
+
+#ifdef MAC
+void UCommandLine(void);
+void UParse(void);
+unsigned char *UField(unsigned char *ptr);
+#endif
+
+/*
+** EXTERNAL PROTOTYPES
+*/
+extern void DoNumSort(void);    /* From NBENCH1 */
+extern void DoStringSort(void);
+extern void DoBitops(void);
+extern void DoEmFloat(void);
+extern void DoFourier(void);
+extern void DoAssign(void);
+extern void DoIDEA(void);
+extern void DoHuffman(void);
+extern void DoNNET(void);
+extern void DoLU(void);
+
+extern void ErrorExit(void);    /* From SYSSPEC */
+
+/*
+** Array of pointers to the benchmark functions.
+*/
+void (*funcpointer[])(void) =
+{       DoNumSort,
+        DoStringSort,
+        DoBitops,
+        DoEmFloat,
+        DoFourier,
+        DoAssign,
+        DoIDEA,
+        DoHuffman,
+        DoNNET,
+        DoLU };
+
+
diff --git a/nbench1.c b/nbench1.c
new file mode 100644
index 0000000..05c35df
--- /dev/null
+++ b/nbench1.c
@@ -0,0 +1,4445 @@
+
+/*
+** nbench1.c
+*/
+
+/********************************
+**       BYTEmark (tm)         **
+** BYTE NATIVE MODE BENCHMARKS **
+**       VERSION 2             **
+**                             **
+** Included in this source     **
+** file:                       **
+**  Numeric Heapsort           **
+**  String Heapsort            **
+**  Bitfield test              **
+**  Floating point emulation   **
+**  Fourier coefficients       **
+**  Assignment algorithm       **
+**  IDEA Encyption             **
+**  Huffman compression        **
+**  Back prop. neural net      **
+**  LU Decomposition           **
+**    (linear equations)       **
+** ----------                  **
+** Rick Grehan, BYTE Magazine  **
+*********************************
+**
+** BYTEmark (tm)
+** BYTE's Native Mode Benchmarks
+** Rick Grehan, BYTE Magazine
+**
+** Creation:
+** Revision: 3/95;10/95
+**  10/95 - Removed allocation that was taking place inside
+**   the LU Decomposition benchmark. Though it didn't seem to
+**   make a difference on systems we ran it on, it nonetheless
+**   removes an operating system dependency that probably should
+**   not have been there.
+**
+** DISCLAIMER
+** The source, executable, and documentation files that comprise
+** the BYTEmark benchmarks are made available on an "as is" basis.
+** This means that we at BYTE Magazine have made every reasonable
+** effort to verify that the there are no errors in the source and
+** executable code.  We cannot, however, guarantee that the programs
+** are error-free.  Consequently, McGraw-HIll and BYTE Magazine make
+** no claims in regard to the fitness of the source code, executable
+** code, and documentation of the BYTEmark.
+**  Furthermore, BYTE Magazine, McGraw-Hill, and all employees
+** of McGraw-Hill cannot be held responsible for any damages resulting
+** from the use of this code or the results obtained from using
+** this code.
+*/
+
+/*
+** INCLUDES
+*/
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <strings.h>
+#include <math.h>
+#include "nmglobal.h"
+#include "nbench1.h"
+#include "wordcat.h"
+
+#ifdef DEBUG
+static int numsort_status=0;
+static int stringsort_status=0;
+#endif
+
+/*********************
+** NUMERIC HEAPSORT **
+**********************
+** This test implements a heapsort algorithm, performed on an
+** array of longs.
+*/
+
+/**************
+** DoNumSort **
+***************
+** This routine performs the CPU numeric sort test.
+** NOTE: Last version incorrectly stated that the routine
+**  returned result in # of longword sorted per second.
+**  Not so; the routine returns # of iterations per sec.
+*/
+
+void DoNumSort(void)
+{
+SortStruct *numsortstruct;      /* Local pointer to global struct */
+farlong *arraybase;     /* Base pointers of array */
+long accumtime;         /* Accumulated time */
+double iterations;      /* Iteration counter */
+char *errorcontext;     /* Error context string pointer */
+int systemerror;        /* For holding error codes */
+
+/*
+** Link to global structure
+*/
+numsortstruct=&global_numsortstruct;
+
+/*
+** Set the error context string.
+*/
+errorcontext="CPU:Numeric Sort";
+
+/*
+** See if we need to do self adjustment code.
+*/
+if(numsortstruct->adjust==0)
+{
+	/*
+	** Self-adjustment code.  The system begins by sorting 1
+	** array.  If it does that in no time, then two arrays
+	** are built and sorted.  This process continues until
+	** enough arrays are built to handle the tolerance.
+	*/
+	numsortstruct->numarrays=1;
+	while(1)
+	{
+		/*
+		** Allocate space for arrays
+		*/
+		arraybase=(farlong *)AllocateMemory(sizeof(long) *
+			numsortstruct->numarrays * numsortstruct->arraysize,
+			&systemerror);
+		if(systemerror)
+		{       ReportError(errorcontext,systemerror);
+			FreeMemory((farvoid *)arraybase,
+				  &systemerror);
+			ErrorExit();
+		}
+
+		/*
+		** Do an iteration of the numeric sort.  If the
+		** elapsed time is less than or equal to the permitted
+		** minimum, then allocate for more arrays and
+		** try again.
+		*/
+		if(DoNumSortIteration(arraybase,
+			numsortstruct->arraysize,
+			numsortstruct->numarrays)>global_min_ticks)
+			break;          /* We're ok...exit */
+
+		FreeMemory((farvoid *)arraybase,&systemerror);
+		if(numsortstruct->numarrays++>NUMNUMARRAYS)
+		{       printf("CPU:NSORT -- NUMNUMARRAYS hit.\n");
+			ErrorExit();
+		}
+	}
+}
+else
+{       /*
+	** Allocate space for arrays
+	*/
+	arraybase=(farlong *)AllocateMemory(sizeof(long) *
+		numsortstruct->numarrays * numsortstruct->arraysize,
+		&systemerror);
+	if(systemerror)
+	{       ReportError(errorcontext,systemerror);
+		FreeMemory((farvoid *)arraybase,
+			  &systemerror);
+		ErrorExit();
+	}
+
+}
+/*
+** All's well if we get here.  Repeatedly perform sorts until the
+** accumulated elapsed time is greater than # of seconds requested.
+*/
+accumtime=0L;
+iterations=(double)0.0;
+
+do {
+	accumtime+=DoNumSortIteration(arraybase,
+		numsortstruct->arraysize,
+		numsortstruct->numarrays);
+	iterations+=(double)1.0;
+} while(TicksToSecs(accumtime)<numsortstruct->request_secs);
+
+/*
+** Clean up, calculate results, and go home.  Be sure to
+** show that we don't have to rerun adjustment code.
+*/
+FreeMemory((farvoid *)arraybase,&systemerror);
+
+numsortstruct->sortspersec=iterations *
+	(double)numsortstruct->numarrays / TicksToFracSecs(accumtime);
+
+if(numsortstruct->adjust==0)
+	numsortstruct->adjust=1;
+
+#ifdef DEBUG
+if (numsort_status==0) printf("Numeric sort: OK\n");
+numsort_status=0;
+#endif
+return;
+}
+
+/***********************
+** DoNumSortIteration **
+************************
+** This routine executes one iteration of the numeric
+** sort benchmark.  It returns the number of ticks
+** elapsed for the iteration.
+*/
+static ulong DoNumSortIteration(farlong *arraybase,
+		ulong arraysize,
+		uint numarrays)
+{
+ulong elapsed;          /* Elapsed ticks */
+ulong i;
+/*
+** Load up the array with random numbers
+*/
+LoadNumArrayWithRand(arraybase,arraysize,numarrays);
+
+/*
+** Start the stopwatch
+*/
+elapsed=StartStopwatch();
+
+/*
+** Execute a heap of heapsorts
+*/
+for(i=0;i<numarrays;i++)
+	NumHeapSort(arraybase+i*arraysize,0L,arraysize-1L);
+
+/*
+** Get elapsed time
+*/
+elapsed=StopStopwatch(elapsed);
+#ifdef DEBUG
+{
+	for(i=0;i<arraysize-1;i++)
+	{       /*
+		** Compare to check for proper
+		** sort.
+		*/
+		if(arraybase[i+1]<arraybase[i])
+		{       printf("Sort Error\n");
+			numsort_status=1;
+                        break;
+		}
+	}
+}
+#endif
+
+return(elapsed);
+}
+
+/*************************
+** LoadNumArrayWithRand **
+**************************
+** Load up an array with random longs.
+*/
+static void LoadNumArrayWithRand(farlong *array,     /* Pointer to arrays */
+		ulong arraysize,
+		uint numarrays)         /* # of elements in array */
+{
+long i;                 /* Used for index */
+farlong *darray;        /* Destination array pointer */
+/*
+** Initialize the random number generator
+*/
+/* randnum(13L); */
+randnum((int32)13);
+
+/*
+** Load up first array with randoms
+*/
+for(i=0L;i<arraysize;i++)
+        /* array[i]=randnum(0L); */
+	array[i]=randnum((int32)0);
+
+/*
+** Now, if there's more than one array to load, copy the
+** first into each of the others.
+*/
+darray=array;
+while(--numarrays)
+{       darray+=arraysize;
+	for(i=0L;i<arraysize;i++)
+		darray[i]=array[i];
+}
+
+return;
+}
+
+/****************
+** NumHeapSort **
+*****************
+** Pass this routine a pointer to an array of long
+** integers.  Also pass in minimum and maximum offsets.
+** This routine performs a heap sort on that array.
+*/
+static void NumHeapSort(farlong *array,
+	ulong bottom,           /* Lower bound */
+	ulong top)              /* Upper bound */
+{
+ulong temp;                     /* Used to exchange elements */
+ulong i;                        /* Loop index */
+
+/*
+** First, build a heap in the array
+*/
+for(i=(top/2L); i>0; --i)
+	NumSift(array,i,top);
+
+/*
+** Repeatedly extract maximum from heap and place it at the
+** end of the array.  When we get done, we'll have a sorted
+** array.
+*/
+for(i=top; i>0; --i)
+{       NumSift(array,bottom,i);
+	temp=*array;                    /* Perform exchange */
+	*array=*(array+i);
+	*(array+i)=temp;
+}
+return;
+}
+
+/************
+** NumSift **
+*************
+** Peforms the sift operation on a numeric array,
+** constructing a heap in the array.
+*/
+static void NumSift(farlong *array,     /* Array of numbers */
+	ulong i,                /* Minimum of array */
+	ulong j)                /* Maximum of array */
+{
+unsigned long k;
+long temp;                              /* Used for exchange */
+
+while((i+i)<=j)
+{
+	k=i+i;
+	if(k<j)
+		if(array[k]<array[k+1L])
+			++k;
+	if(array[i]<array[k])
+	{
+		temp=array[k];
+		array[k]=array[i];
+		array[i]=temp;
+		i=k;
+	}
+	else
+		i=j+1;
+}
+return;
+}
+
+/********************
+** STRING HEAPSORT **
+********************/
+
+/*****************
+** DoStringSort **
+******************
+** This routine performs the CPU string sort test.
+** Arguments:
+**      requested_secs = # of seconds to execute test
+**      stringspersec = # of strings per second sorted (RETURNED)
+*/
+void DoStringSort(void)
+{
+
+SortStruct *strsortstruct;      /* Local for sort structure */
+faruchar *arraybase;            /* Base pointer of char array */
+long accumtime;                 /* Accumulated time */
+double iterations;              /* # of iterations */
+char *errorcontext;             /* Error context string pointer */
+int systemerror;                /* For holding error code */
+
+/*
+** Link to global structure
+*/
+strsortstruct=&global_strsortstruct;
+
+/*
+** Set the error context
+*/
+errorcontext="CPU:String Sort";
+
+/*
+** See if we have to perform self-adjustment code
+*/
+if(strsortstruct->adjust==0)
+{
+	/*
+	** Initialize the number of arrays.
+	*/
+	strsortstruct->numarrays=1;
+	while(1)
+	{
+		/*
+		** Allocate space for array.  We'll add an extra 100
+		** bytes to protect memory as strings move around
+		** (this can happen during string adjustment)
+		*/
+		arraybase=(faruchar *)AllocateMemory((strsortstruct->arraysize+100L) *
+			(long)strsortstruct->numarrays,&systemerror);
+		if(systemerror)
+		{       ReportError(errorcontext,systemerror);
+			ErrorExit();
+		}
+
+		/*
+		** Do an iteration of the string sort.  If the
+		** elapsed time is less than or equal to the permitted
+		** minimum, then de-allocate the array, reallocate a
+		** an additional array, and try again.
+		*/
+		if(DoStringSortIteration(arraybase,
+			strsortstruct->numarrays,
+			strsortstruct->arraysize)>global_min_ticks)
+			break;          /* We're ok...exit */
+
+		FreeMemory((farvoid *)arraybase,&systemerror);
+		strsortstruct->numarrays+=1;
+	}
+}
+else
+{
+	/*
+	** We don't have to perform self adjustment code.
+	** Simply allocate the space for the array.
+	*/
+	arraybase=(faruchar *)AllocateMemory((strsortstruct->arraysize+100L) *
+		(long)strsortstruct->numarrays,&systemerror);
+	if(systemerror)
+	{       ReportError(errorcontext,systemerror);
+		ErrorExit();
+	}
+}
+/*
+** All's well if we get here.  Repeatedly perform sorts until the
+** accumulated elapsed time is greater than # of seconds requested.
+*/
+accumtime=0L;
+iterations=(double)0.0;
+
+do {
+	accumtime+=DoStringSortIteration(arraybase,
+				strsortstruct->numarrays,
+				strsortstruct->arraysize);
+	iterations+=(double)strsortstruct->numarrays;
+} while(TicksToSecs(accumtime)<strsortstruct->request_secs);
+
+/*
+** Clean up, calculate results, and go home.
+** Set flag to show we don't need to rerun adjustment code.
+*/
+FreeMemory((farvoid *)arraybase,&systemerror);
+strsortstruct->sortspersec=iterations / (double)TicksToFracSecs(accumtime);
+if(strsortstruct->adjust==0)
+	strsortstruct->adjust=1;
+#ifdef DEBUG
+if (stringsort_status==0) printf("String sort: OK\n");
+stringsort_status=0;
+#endif
+return;
+}
+
+/**************************
+** DoStringSortIteration **
+***************************
+** This routine executes one iteration of the string
+** sort benchmark.  It returns the number of ticks
+** Note that this routine also builds the offset pointer
+** array.
+*/
+static ulong DoStringSortIteration(faruchar *arraybase,
+		uint numarrays,ulong arraysize)
+{
+farulong *optrarray;            /* Offset pointer array */
+unsigned long elapsed;          /* Elapsed ticks */
+unsigned long nstrings;         /* # of strings in array */
+int syserror;                   /* System error code */
+unsigned int i;                 /* Index */
+farulong *tempobase;            /* Temporary offset pointer base */
+faruchar *tempsbase;            /* Temporary string base pointer */
+
+/*
+** Load up the array(s) with random numbers
+*/
+optrarray=LoadStringArray(arraybase,numarrays,&nstrings,arraysize);
+
+/*
+** Set temp base pointers...they will be modified as the
+** benchmark proceeds.
+*/
+tempobase=optrarray;
+tempsbase=arraybase;
+
+/*
+** Start the stopwatch
+*/
+elapsed=StartStopwatch();
+
+/*
+** Execute heapsorts
+*/
+for(i=0;i<numarrays;i++)
+{       StrHeapSort(tempobase,tempsbase,nstrings,0L,nstrings-1);
+	tempobase+=nstrings;    /* Advance base pointers */
+	tempsbase+=arraysize+100;
+}
+
+/*
+** Record elapsed time
+*/
+elapsed=StopStopwatch(elapsed);
+
+#ifdef DEBUG
+{
+	unsigned long i;
+	for(i=0;i<nstrings-1;i++)
+	{       /*
+		** Compare strings to check for proper
+		** sort.
+		*/
+		if(str_is_less(optrarray,arraybase,nstrings,i+1,i))
+		{       printf("Sort Error\n");
+			stringsort_status=1;
+                        break;
+		}
+	}
+}
+#endif
+
+/*
+** Release the offset pointer array built by
+** LoadStringArray()
+*/
+FreeMemory((farvoid *)optrarray,&syserror);
+
+/*
+** Return elapsed ticks.
+*/
+return(elapsed);
+}
+
+/********************
+** LoadStringArray **
+*********************
+** Initialize the string array with random strings of
+** varying sizes.
+** Returns the pointer to the offset pointer array.
+** Note that since we're creating a number of arrays, this
+** routine builds one array, then copies it into the others.
+*/
+static farulong *LoadStringArray(faruchar *strarray, /* String array */
+	uint numarrays,                 /* # of arrays */
+	ulong *nstrings,                /* # of strings */
+	ulong arraysize)                /* Size of array */
+{
+faruchar *tempsbase;            /* Temporary string base pointer */
+farulong *optrarray;            /* Local for pointer */
+farulong *tempobase;            /* Temporary offset pointer base pointer */
+unsigned long curroffset;       /* Current offset */
+int fullflag;                   /* Indicates full array */
+unsigned char stringlength;     /* Length of string */
+unsigned char i;                /* Index */
+unsigned long j;                /* Another index */
+unsigned int k;                 /* Yet another index */
+unsigned int l;                 /* Ans still one more index */
+int systemerror;                /* For holding error code */
+
+/*
+** Initialize random number generator.
+*/
+/* randnum(13L); */
+randnum((int32)13);
+
+/*
+** Start with no strings.  Initialize our current offset pointer
+** to 0.
+*/
+*nstrings=0L;
+curroffset=0L;
+fullflag=0;
+
+do
+{
+	/*
+	** Allocate a string with a random length no
+	** shorter than 4 bytes and no longer than
+	** 80 bytes.  Note we have to also make sure
+	** there's room in the array.
+	*/
+        /* stringlength=(unsigned char)((1+abs_randwc(76L)) & 0xFFL);*/
+	stringlength=(unsigned char)((1+abs_randwc((int32)76)) & 0xFFL);
+	if((unsigned long)stringlength+curroffset+1L>=arraysize)
+	{       stringlength=(unsigned char)((arraysize-curroffset-1L) &
+				0xFF);
+		fullflag=1;     /* Indicates a full */
+	}
+
+	/*
+	** Store length at curroffset and advance current offset.
+	*/
+	*(strarray+curroffset)=stringlength;
+	curroffset++;
+
+	/*
+	** Fill up the rest of the string with random bytes.
+	*/
+	for(i=0;i<stringlength;i++)
+	{       *(strarray+curroffset)=
+		        /* (unsigned char)(abs_randwc((long)0xFE)); */
+			(unsigned char)(abs_randwc((int32)0xFE));
+		curroffset++;
+	}
+
+	/*
+	** Increment the # of strings counter.
+	*/
+	*nstrings+=1L;
+
+} while(fullflag==0);
+
+/*
+** We now have initialized a single full array.  If there
+** is more than one array, copy the original into the
+** others.
+*/
+k=1;
+tempsbase=strarray;
+while(k<numarrays)
+{       tempsbase+=arraysize+100;         /* Set base */
+	for(l=0;l<arraysize;l++)
+		tempsbase[l]=strarray[l];
+	k++;
+}
+
+/*
+** Now the array is full, allocate enough space for an
+** offset pointer array.
+*/
+optrarray=(farulong *)AllocateMemory(*nstrings * sizeof(unsigned long) *
+		numarrays,
+		&systemerror);
+if(systemerror)
+{       ReportError("CPU:Stringsort",systemerror);
+	FreeMemory((void *)strarray,&systemerror);
+	ErrorExit();
+}
+
+/*
+** Go through the newly-built string array, building
+** offsets and putting them into the offset pointer
+** array.
+*/
+curroffset=0;
+for(j=0;j<*nstrings;j++)
+{       *(optrarray+j)=curroffset;
+	curroffset+=(unsigned long)(*(strarray+curroffset))+1L;
+}
+
+/*
+** As above, we've made one copy of the offset pointers,
+** so duplicate this array in the remaining ones.
+*/
+k=1;
+tempobase=optrarray;
+while(k<numarrays)
+{       tempobase+=*nstrings;
+	for(l=0;l<*nstrings;l++)
+		tempobase[l]=optrarray[l];
+	k++;
+}
+
+/*
+** All done...go home.  Pass local pointer back.
+*/
+return(optrarray);
+}
+
+/**************
+** stradjust **
+***************
+** Used by the string heap sort.  Call this routine to adjust the
+** string at offset i to length l.  The members of the string array
+** are moved accordingly and the length of the string at offset i
+** is set to l.
+*/
+static void stradjust(farulong *optrarray,      /* Offset pointer array */
+	faruchar *strarray,                     /* String array */
+	ulong nstrings,                         /* # of strings */
+	ulong i,                                /* Offset to adjust */
+	uchar l)                                /* New length */
+{
+unsigned long nbytes;           /* # of bytes to move */
+unsigned long j;                /* Index */
+int direction;                  /* Direction indicator */
+unsigned char adjamount;        /* Adjustment amount */
+
+/*
+** If new length is less than old length, the direction is
+** down.  If new length is greater than old length, the
+** direction is up.
+*/
+direction=(int)l - (int)*(strarray+*(optrarray+i));
+adjamount=(unsigned char)abs(direction);
+
+/*
+** See if the adjustment is being made to the last
+** string in the string array.  If so, we don't have to
+** do anything more than adjust the length field.
+*/
+if(i==(nstrings-1L))
+{       *(strarray+*(optrarray+i))=l;
+	return;
+}
+
+/*
+** Calculate the total # of bytes in string array from
+** location i+1 to end of array.  Whether we're moving "up" or
+** down, this is how many bytes we'll have to move.
+*/
+nbytes=*(optrarray+nstrings-1L) +
+	(unsigned long)*(strarray+*(optrarray+nstrings-1L)) + 1L -
+	*(optrarray+i+1L);
+
+/*
+** Calculate the source and the destination.  Source is
+** string position i+1.  Destination is string position i+l
+** (i+"ell"...don't confuse 1 and l).
+** Hand this straight to memmove and let it handle the
+** "overlap" problem.
+*/
+MoveMemory((farvoid *)(strarray+*(optrarray+i)+l+1),
+	(farvoid *)(strarray+*(optrarray+i+1)),
+	(unsigned long)nbytes);
+
+/*
+** We have to adjust the offset pointer array.
+** This covers string i+1 to numstrings-1.
+*/
+for(j=i+1;j<nstrings;j++)
+	if(direction<0)
+		*(optrarray+j)=*(optrarray+j)-adjamount;
+	else
+		*(optrarray+j)=*(optrarray+j)+adjamount;
+
+/*
+** Store the new length and go home.
+*/
+*(strarray+*(optrarray+i))=l;
+return;
+}
+
+/****************
+** strheapsort **
+*****************
+** Pass this routine a pointer to an array of unsigned char.
+** The array is presumed to hold strings occupying at most
+** 80 bytes (counts a byte count).
+** This routine also needs a pointer to an array of offsets
+** which represent string locations in the array, and
+** an unsigned long indicating the number of strings
+** in the array.
+*/
+static void StrHeapSort(farulong *optrarray, /* Offset pointers */
+	faruchar *strarray,             /* Strings array */
+	ulong numstrings,               /* # of strings in array */
+	ulong bottom,                   /* Region to sort...bottom */
+	ulong top)                      /* Region to sort...top */
+{
+unsigned char temp[80];                 /* Used to exchange elements */
+unsigned char tlen;                     /* Temp to hold length */
+unsigned long i;                        /* Loop index */
+
+
+/*
+** Build a heap in the array
+*/
+for(i=(top/2L); i>0; --i)
+	strsift(optrarray,strarray,numstrings,i,top);
+
+/*
+** Repeatedly extract maximum from heap and place it at the
+** end of the array.  When we get done, we'll have a sorted
+** array.
+*/
+for(i=top; i>0; --i)
+{
+	strsift(optrarray,strarray,numstrings,0,i);
+
+	/* temp = string[0] */
+	tlen=*strarray;
+	MoveMemory((farvoid *)&temp[0], /* Perform exchange */
+		(farvoid *)strarray,
+		(unsigned long)(tlen+1));
+
+
+	/* string[0]=string[i] */
+	tlen=*(strarray+*(optrarray+i));
+	stradjust(optrarray,strarray,numstrings,0,tlen);
+	MoveMemory((farvoid *)strarray,
+		(farvoid *)(strarray+*(optrarray+i)),
+		(unsigned long)(tlen+1));
+
+	/* string[i]=temp */
+	tlen=temp[0];
+	stradjust(optrarray,strarray,numstrings,i,tlen);
+	MoveMemory((farvoid *)(strarray+*(optrarray+i)),
+		(farvoid *)&temp[0],
+		(unsigned long)(tlen+1));
+
+}
+return;
+}
+
+/****************
+** str_is_less **
+*****************
+** Pass this function:
+**      1) A pointer to an array of offset pointers
+**      2) A pointer to a string array
+**      3) The number of elements in the string array
+**      4) Offsets to two strings (a & b)
+** This function returns TRUE if string a is < string b.
+*/
+static int str_is_less(farulong *optrarray, /* Offset pointers */
+	faruchar *strarray,                     /* String array */
+	ulong numstrings,                       /* # of strings */
+	ulong a, ulong b)                       /* Offsets */
+{
+int slen;               /* String length */
+
+/*
+** Determine which string has the minimum length.  Use that
+** to call strncmp().  If they match up to that point, the
+** string with the longer length wins.
+*/
+slen=(int)*(strarray+*(optrarray+a));
+if(slen > (int)*(strarray+*(optrarray+b)))
+	slen=(int)*(strarray+*(optrarray+b));
+
+slen=strncmp((char *)(strarray+*(optrarray+a)),
+		(char *)(strarray+*(optrarray+b)),slen);
+
+if(slen==0)
+{
+	/*
+	** They match.  Return true if the length of a
+	** is greater than the length of b.
+	*/
+	if(*(strarray+*(optrarray+a)) >
+		*(strarray+*(optrarray+b)))
+		return(TRUE);
+	return(FALSE);
+}
+
+if(slen<0) return(TRUE);        /* a is strictly less than b */
+
+return(FALSE);                  /* Only other possibility */
+}
+
+/************
+** strsift **
+*************
+** Pass this function:
+**      1) A pointer to an array of offset pointers
+**      2) A pointer to a string array
+**      3) The number of elements in the string array
+**      4) Offset within which to sort.
+** Sift the array within the bounds of those offsets (thus
+** building a heap).
+*/
+static void strsift(farulong *optrarray,        /* Offset pointers */
+	faruchar *strarray,                     /* String array */
+	ulong numstrings,                       /* # of strings */
+	ulong i, ulong j)                       /* Offsets */
+{
+unsigned long k;                /* Temporaries */
+unsigned char temp[80];
+unsigned char tlen;             /* For string lengths */
+
+
+while((i+i)<=j)
+{
+	k=i+i;
+	if(k<j)
+		if(str_is_less(optrarray,strarray,numstrings,k,k+1L))
+			++k;
+	if(str_is_less(optrarray,strarray,numstrings,i,k))
+	{
+		/* temp=string[k] */
+		tlen=*(strarray+*(optrarray+k));
+		MoveMemory((farvoid *)&temp[0],
+			(farvoid *)(strarray+*(optrarray+k)),
+			(unsigned long)(tlen+1));
+
+		/* string[k]=string[i] */
+		tlen=*(strarray+*(optrarray+i));
+		stradjust(optrarray,strarray,numstrings,k,tlen);
+		MoveMemory((farvoid *)(strarray+*(optrarray+k)),
+			(farvoid *)(strarray+*(optrarray+i)),
+			(unsigned long)(tlen+1));
+
+		/* string[i]=temp */
+		tlen=temp[0];
+		stradjust(optrarray,strarray,numstrings,i,tlen);
+		MoveMemory((farvoid *)(strarray+*(optrarray+i)),
+			(farvoid *)&temp[0],
+			(unsigned long)(tlen+1));
+		i=k;
+	}
+	else
+		i=j+1;
+}
+return;
+}
+
+/************************
+** BITFIELD OPERATIONS **
+*************************/
+
+/*************
+** DoBitops **
+**************
+** Perform the bit operations test portion of the CPU
+** benchmark.  Returns the iterations per second.
+*/
+void DoBitops(void)
+{
+BitOpStruct *locbitopstruct;    /* Local bitop structure */
+farulong *bitarraybase;         /* Base of bitmap array */
+farulong *bitoparraybase;       /* Base of bitmap operations array */
+ulong nbitops;                  /* # of bitfield operations */
+ulong accumtime;                /* Accumulated time in ticks */
+double iterations;              /* # of iterations */
+char *errorcontext;             /* Error context string */
+int systemerror;                /* For holding error codes */
+int ticks;
+
+/*
+** Link to global structure.
+*/
+locbitopstruct=&global_bitopstruct;
+
+/*
+** Set the error context.
+*/
+errorcontext="CPU:Bitfields";
+
+/*
+** See if we need to run adjustment code.
+*/
+if(locbitopstruct->adjust==0)
+{
+	bitarraybase=(farulong *)AllocateMemory(locbitopstruct->bitfieldarraysize *
+		sizeof(ulong),&systemerror);
+	if(systemerror)
+	{       ReportError(errorcontext,systemerror);
+		ErrorExit();
+	}
+
+	/*
+	** Initialize bitfield operations array to [2,30] elements
+	*/
+	locbitopstruct->bitoparraysize=30L;
+
+	while(1)
+	{
+		/*
+		** Allocate space for operations array
+		*/
+		bitoparraybase=(farulong *)AllocateMemory(locbitopstruct->bitoparraysize*2L*
+			sizeof(ulong),
+			&systemerror);
+		if(systemerror)
+		{       ReportError(errorcontext,systemerror);
+			FreeMemory((farvoid *)bitarraybase,&systemerror);
+			ErrorExit();
+		}
+		/*
+		** Do an iteration of the bitmap test.  If the
+		** elapsed time is less than or equal to the permitted
+		** minimum, then de-allocate the array, reallocate a
+		** larger version, and try again.
+		*/
+		ticks=DoBitfieldIteration(bitarraybase,
+					   bitoparraybase,
+					   locbitopstruct->bitoparraysize,
+					   &nbitops);
+#ifdef DEBUG
+#ifdef LINUX
+	        if (locbitopstruct->bitoparraysize==30L){
+		  /* this is the first loop, write a debug file */
+		  FILE *file;
+		  unsigned long *running_base; /* same as farulong */
+		  long counter;
+		  file=fopen("debugbit.dat","w");
+		  running_base=bitarraybase;
+		  for (counter=0;counter<(long)(locbitopstruct->bitfieldarraysize);counter++){
+#ifdef LONG64
+		    fprintf(file,"%08X",(unsigned int)(*running_base&0xFFFFFFFFL));
+		    fprintf(file,"%08X",(unsigned int)((*running_base>>32)&0xFFFFFFFFL));
+		    if ((counter+1)%4==0) fprintf(file,"\n");
+#else
+		    fprintf(file,"%08lX",*running_base);
+		    if ((counter+1)%8==0) fprintf(file,"\n");
+#endif
+		    running_base=running_base+1;
+		  }
+		  fclose(file);
+		  printf("\nWrote the file debugbit.dat, you may want to compare it to debugbit.good\n");
+		}
+#endif
+#endif
+
+		if (ticks>global_min_ticks) break;      /* We're ok...exit */
+
+		FreeMemory((farvoid *)bitoparraybase,&systemerror);
+		locbitopstruct->bitoparraysize+=100L;
+	}
+}
+else
+{
+	/*
+	** Don't need to do self adjustment, just allocate
+	** the array space.
+	*/
+	bitarraybase=(farulong *)AllocateMemory(locbitopstruct->bitfieldarraysize *
+		sizeof(ulong),&systemerror);
+	if(systemerror)
+	{       ReportError(errorcontext,systemerror);
+		ErrorExit();
+	}
+	bitoparraybase=(farulong *)AllocateMemory(locbitopstruct->bitoparraysize*2L*
+		sizeof(ulong),
+		&systemerror);
+	if(systemerror)
+	{       ReportError(errorcontext,systemerror);
+		FreeMemory((farvoid *)bitarraybase,&systemerror);
+		ErrorExit();
+	}
+}
+
+/*
+** All's well if we get here.  Repeatedly perform bitops until the
+** accumulated elapsed time is greater than # of seconds requested.
+*/
+accumtime=0L;
+iterations=(double)0.0;
+do {
+	accumtime+=DoBitfieldIteration(bitarraybase,
+			bitoparraybase,
+			locbitopstruct->bitoparraysize,&nbitops);
+	iterations+=(double)nbitops;
+} while(TicksToSecs(accumtime)<locbitopstruct->request_secs);
+
+/*
+** Clean up, calculate results, and go home.
+** Also, set adjustment flag to show that we don't have
+** to do self adjusting in the future.
+*/
+FreeMemory((farvoid *)bitarraybase,&systemerror);
+FreeMemory((farvoid *)bitoparraybase,&systemerror);
+locbitopstruct->bitopspersec=iterations /TicksToFracSecs(accumtime);
+if(locbitopstruct->adjust==0)
+	locbitopstruct->adjust=1;
+
+return;
+}
+
+/************************
+** DoBitfieldIteration **
+*************************
+** Perform a single iteration of the bitfield benchmark.
+** Return the # of ticks accumulated by the operation.
+*/
+static ulong DoBitfieldIteration(farulong *bitarraybase,
+		farulong *bitoparraybase,
+		long bitoparraysize,
+		ulong *nbitops)
+{
+long i;                         /* Index */
+ulong bitoffset;                /* Offset into bitmap */
+ulong elapsed;                  /* Time to execute */
+/*
+** Clear # bitops counter
+*/
+*nbitops=0L;
+
+/*
+** Construct a set of bitmap offsets and run lengths.
+** The offset can be any random number from 0 to the
+** size of the bitmap (in bits).  The run length can
+** be any random number from 1 to the number of bits
+** between the offset and the end of the bitmap.
+** Note that the bitmap has 8192 * 32 bits in it.
+** (262,144 bits)
+*/
+/*
+** Reset random number generator so things repeat.
+** Also reset the bit array we work on.
+** added by Uwe F. Mayer
+*/
+randnum((int32)13);
+for (i=0;i<global_bitopstruct.bitfieldarraysize;i++)
+{
+#ifdef LONG64
+	*(bitarraybase+i)=(ulong)0x5555555555555555;
+#else
+	*(bitarraybase+i)=(ulong)0x55555555;
+#endif
+}
+randnum((int32)13);
+/* end of addition of code */
+
+for (i=0;i<bitoparraysize;i++)
+{
+	/* First item is offset */
+        /* *(bitoparraybase+i+i)=bitoffset=abs_randwc(262140L); */
+	*(bitoparraybase+i+i)=bitoffset=abs_randwc((int32)262140);
+
+	/* Next item is run length */
+	/* *nbitops+=*(bitoparraybase+i+i+1L)=abs_randwc(262140L-bitoffset);*/
+	*nbitops+=*(bitoparraybase+i+i+1L)=abs_randwc((int32)262140-bitoffset);
+}
+
+/*
+** Array of offset and lengths built...do an iteration of
+** the test.
+** Start the stopwatch.
+*/
+elapsed=StartStopwatch();
+
+/*
+** Loop through array off offset/run length pairs.
+** Execute operation based on modulus of index.
+*/
+for(i=0;i<bitoparraysize;i++)
+{
+	switch(i % 3)
+	{
+
+		case 0: /* Set run of bits */
+			ToggleBitRun(bitarraybase,
+				*(bitoparraybase+i+i),
+				*(bitoparraybase+i+i+1),
+				1);
+			break;
+
+		case 1: /* Clear run of bits */
+			ToggleBitRun(bitarraybase,
+				*(bitoparraybase+i+i),
+				*(bitoparraybase+i+i+1),
+				0);
+			break;
+
+		case 2: /* Complement run of bits */
+			FlipBitRun(bitarraybase,
+				*(bitoparraybase+i+i),
+				*(bitoparraybase+i+i+1));
+			break;
+	}
+}
+
+/*
+** Return elapsed time
+*/
+return(StopStopwatch(elapsed));
+}
+
+
+/*****************************
+**     ToggleBitRun          *
+******************************
+** Set or clear a run of nbits starting at
+** bit_addr in bitmap.
+*/
+static void ToggleBitRun(farulong *bitmap, /* Bitmap */
+		ulong bit_addr,         /* Address of bits to set */
+		ulong nbits,            /* # of bits to set/clr */
+		uint val)               /* 1 or 0 */
+{
+unsigned long bindex;   /* Index into array */
+unsigned long bitnumb;  /* Bit number */
+
+while(nbits--)
+{
+#ifdef LONG64
+	bindex=bit_addr>>6;     /* Index is number /64 */
+	bitnumb=bit_addr % 64;   /* Bit number in word */
+#else
+	bindex=bit_addr>>5;     /* Index is number /32 */
+	bitnumb=bit_addr % 32;  /* bit number in word */
+#endif
+	if(val)
+		bitmap[bindex]|=(1L<<bitnumb);
+	else
+		bitmap[bindex]&=~(1L<<bitnumb);
+	bit_addr++;
+}
+return;
+}
+
+/***************
+** FlipBitRun **
+****************
+** Complements a run of bits.
+*/
+static void FlipBitRun(farulong *bitmap,        /* Bit map */
+		ulong bit_addr,                 /* Bit address */
+		ulong nbits)                    /* # of bits to flip */
+{
+unsigned long bindex;   /* Index into array */
+unsigned long bitnumb;  /* Bit number */
+
+while(nbits--)
+{
+#ifdef LONG64
+	bindex=bit_addr>>6;     /* Index is number /64 */
+	bitnumb=bit_addr % 64;  /* Bit number in longword */
+#else
+	bindex=bit_addr>>5;     /* Index is number /32 */
+	bitnumb=bit_addr % 32;  /* Bit number in longword */
+#endif
+	bitmap[bindex]^=(1L<<bitnumb);
+	bit_addr++;
+}
+
+return;
+}
+
+/*****************************
+** FLOATING-POINT EMULATION **
+*****************************/
+
+/**************
+** DoEmFloat **
+***************
+** Perform the floating-point emulation routines portion of the
+** CPU benchmark.  Returns the operations per second.
+*/
+void DoEmFloat(void)
+{
+EmFloatStruct *locemfloatstruct;        /* Local structure */
+InternalFPF *abase;             /* Base of A array */
+InternalFPF *bbase;             /* Base of B array */
+InternalFPF *cbase;             /* Base of C array */
+ulong accumtime;                /* Accumulated time in ticks */
+double iterations;              /* # of iterations */
+ulong tickcount;                /* # of ticks */
+char *errorcontext;             /* Error context string pointer */
+int systemerror;                /* For holding error code */
+ulong loops;                    /* # of loops */
+
+/*
+** Link to global structure
+*/
+locemfloatstruct=&global_emfloatstruct;
+
+/*
+** Set the error context
+*/
+errorcontext="CPU:Floating Emulation";
+
+
+/*
+** Test the emulation routines.
+*/
+#ifdef DEBUG
+#endif
+
+abase=(InternalFPF *)AllocateMemory(locemfloatstruct->arraysize*sizeof(InternalFPF),
+		&systemerror);
+if(systemerror)
+{       ReportError(errorcontext,systemerror);
+	ErrorExit();
+}
+
+bbase=(InternalFPF *)AllocateMemory(locemfloatstruct->arraysize*sizeof(InternalFPF),
+		&systemerror);
+if(systemerror)
+{       ReportError(errorcontext,systemerror);
+	FreeMemory((farvoid *)abase,&systemerror);
+	ErrorExit();
+}
+
+cbase=(InternalFPF *)AllocateMemory(locemfloatstruct->arraysize*sizeof(InternalFPF),
+		&systemerror);
+if(systemerror)
+{       ReportError(errorcontext,systemerror);
+	FreeMemory((farvoid *)abase,&systemerror);
+	FreeMemory((farvoid *)bbase,&systemerror);
+	ErrorExit();
+}
+
+/*
+** Set up the arrays
+*/
+SetupCPUEmFloatArrays(abase,bbase,cbase,locemfloatstruct->arraysize);
+
+/*
+** See if we need to do self-adjusting code.
+*/
+if(locemfloatstruct->adjust==0)
+{
+	locemfloatstruct->loops=0;
+
+	/*
+	** Do an iteration of the tests.  If the elapsed time is
+	** less than minimum, increase the loop count and try
+	** again.
+	*/
+	for(loops=1;loops<CPUEMFLOATLOOPMAX;loops+=loops)
+	{       tickcount=DoEmFloatIteration(abase,bbase,cbase,
+			locemfloatstruct->arraysize,
+			loops);
+		if(tickcount>global_min_ticks)
+		{       locemfloatstruct->loops=loops;
+			break;
+		}
+	}
+}
+
+/*
+** Verify that selft adjustment code worked.
+*/
+if(locemfloatstruct->loops==0)
+{       printf("CPU:EMFPU -- CMPUEMFLOATLOOPMAX limit hit\n");
+	FreeMemory((farvoid *)abase,&systemerror);
+	FreeMemory((farvoid *)bbase,&systemerror);
+	FreeMemory((farvoid *)cbase,&systemerror);
+	ErrorExit();
+}
+
+/*
+** All's well if we get here.  Repeatedly perform floating
+** tests until the accumulated time is greater than the
+** # of seconds requested.
+** Each iteration performs arraysize * 3 operations.
+*/
+accumtime=0L;
+iterations=(double)0.0;
+do {
+	accumtime+=DoEmFloatIteration(abase,bbase,cbase,
+			locemfloatstruct->arraysize,
+			locemfloatstruct->loops);
+	iterations+=(double)1.0;
+} while(TicksToSecs(accumtime)<locemfloatstruct->request_secs);
+
+
+/*
+** Clean up, calculate results, and go home.
+** Also, indicate that adjustment is done.
+*/
+FreeMemory((farvoid *)abase,&systemerror);
+FreeMemory((farvoid *)bbase,&systemerror);
+FreeMemory((farvoid *)cbase,&systemerror);
+
+locemfloatstruct->emflops=(iterations*(double)locemfloatstruct->loops)/
+		(double)TicksToFracSecs(accumtime);
+if(locemfloatstruct->adjust==0)
+	locemfloatstruct->adjust=1;
+
+#ifdef DEBUG
+printf("----------------------------------------------------------------------------\n");
+#endif
+return;
+}
+
+/*************************
+** FOURIER COEFFICIENTS **
+*************************/
+
+/**************
+** DoFourier **
+***************
+** Perform the transcendental/trigonometric portion of the
+** benchmark.  This benchmark calculates the first n
+** fourier coefficients of the function (x+1)^x defined
+** on the interval 0,2.
+*/
+void DoFourier(void)
+{
+FourierStruct *locfourierstruct;        /* Local fourier struct */
+fardouble *abase;               /* Base of A[] coefficients array */
+fardouble *bbase;               /* Base of B[] coefficients array */
+unsigned long accumtime;        /* Accumulated time in ticks */
+double iterations;              /* # of iterations */
+char *errorcontext;             /* Error context string pointer */
+int systemerror;                /* For error code */
+
+/*
+** Link to global structure
+*/
+locfourierstruct=&global_fourierstruct;
+
+/*
+** Set error context string
+*/
+errorcontext="FPU:Transcendental";
+
+/*
+** See if we need to do self-adjustment code.
+*/
+if(locfourierstruct->adjust==0)
+{
+	locfourierstruct->arraysize=100L;       /* Start at 100 elements */
+	while(1)
+	{
+
+		abase=(fardouble *)AllocateMemory(locfourierstruct->arraysize*sizeof(double),
+				&systemerror);
+		if(systemerror)
+		{       ReportError(errorcontext,systemerror);
+			ErrorExit();
+		}
+
+		bbase=(fardouble *)AllocateMemory(locfourierstruct->arraysize*sizeof(double),
+				&systemerror);
+		if(systemerror)
+		{       ReportError(errorcontext,systemerror);
+			FreeMemory((void *)abase,&systemerror);
+			ErrorExit();
+		}
+		/*
+		** Do an iteration of the tests.  If the elapsed time is
+		** less than or equal to the permitted minimum, re-allocate
+		** larger arrays and try again.
+		*/
+		if(DoFPUTransIteration(abase,bbase,
+			locfourierstruct->arraysize)>global_min_ticks)
+			break;          /* We're ok...exit */
+
+		/*
+		** Make bigger arrays and try again.
+		*/
+		FreeMemory((farvoid *)abase,&systemerror);
+		FreeMemory((farvoid *)bbase,&systemerror);
+		locfourierstruct->arraysize+=50L;
+	}
+}
+else
+{       /*
+	** Don't need self-adjustment.  Just allocate the
+	** arrays, and go.
+	*/
+	abase=(fardouble *)AllocateMemory(locfourierstruct->arraysize*sizeof(double),
+			&systemerror);
+	if(systemerror)
+	{       ReportError(errorcontext,systemerror);
+		ErrorExit();
+	}
+
+	bbase=(fardouble *)AllocateMemory(locfourierstruct->arraysize*sizeof(double),
+			&systemerror);
+	if(systemerror)
+	{       ReportError(errorcontext,systemerror);
+		FreeMemory((void *)abase,&systemerror);
+		ErrorExit();
+	}
+}
+/*
+** All's well if we get here.  Repeatedly perform integration
+** tests until the accumulated time is greater than the
+** # of seconds requested.
+*/
+accumtime=0L;
+iterations=(double)0.0;
+do {
+	accumtime+=DoFPUTransIteration(abase,bbase,locfourierstruct->arraysize);
+	iterations+=(double)locfourierstruct->arraysize*(double)2.0-(double)1.0;
+} while(TicksToSecs(accumtime)<locfourierstruct->request_secs);
+
+
+/*
+** Clean up, calculate results, and go home.
+** Also set adjustment flag to indicate no adjust code needed.
+*/
+FreeMemory((farvoid *)abase,&systemerror);
+FreeMemory((farvoid *)bbase,&systemerror);
+
+locfourierstruct->fflops=iterations/(double)TicksToFracSecs(accumtime);
+
+if(locfourierstruct->adjust==0)
+	locfourierstruct->adjust=1;
+
+return;
+}
+
+/************************
+** DoFPUTransIteration **
+*************************
+** Perform an iteration of the FPU Transcendental/trigonometric
+** benchmark.  Here, an iteration consists of calculating the
+** first n fourier coefficients of the function (x+1)^x on
+** the interval 0,2.  n is given by arraysize.
+** NOTE: The # of integration steps is fixed at
+** 200.
+*/
+static ulong DoFPUTransIteration(fardouble *abase,      /* A coeffs. */
+			fardouble *bbase,               /* B coeffs. */
+			ulong arraysize)                /* # of coeffs */
+{
+double omega;           /* Fundamental frequency */
+unsigned long i;        /* Index */
+unsigned long elapsed;  /* Elapsed time */
+
+/*
+** Start the stopwatch
+*/
+elapsed=StartStopwatch();
+
+/*
+** Calculate the fourier series.  Begin by
+** calculating A[0].
+*/
+
+*abase=TrapezoidIntegrate((double)0.0,
+			(double)2.0,
+			200,
+			(double)0.0,    /* No omega * n needed */
+			0 )/(double)2.0;
+
+/*
+** Calculate the fundamental frequency.
+** ( 2 * pi ) / period...and since the period
+** is 2, omega is simply pi.
+*/
+omega=(double)3.1415926535897932;
+
+for(i=1;i<arraysize;i++)
+{
+
+	/*
+	** Calculate A[i] terms.  Note, once again, that we
+	** can ignore the 2/period term outside the integral
+	** since the period is 2 and the term cancels itself
+	** out.
+	*/
+	*(abase+i)=TrapezoidIntegrate((double)0.0,
+			(double)2.0,
+			200,
+			omega * (double)i,
+			1);
+
+	/*
+	** Calculate the B[i] terms.
+	*/
+	*(bbase+i)=TrapezoidIntegrate((double)0.0,
+			(double)2.0,
+			200,
+			omega * (double)i,
+			2);
+
+}
+#ifdef DEBUG
+{
+  int i;
+  printf("\nA[i]=\n");
+  for (i=0;i<arraysize;i++) printf("%7.3g ",abase[i]);
+  printf("\nB[i]=\n(undefined) ");
+  for (i=1;i<arraysize;i++) printf("%7.3g ",bbase[i]);
+}
+#endif
+/*
+** All done, stop the stopwatch
+*/
+return(StopStopwatch(elapsed));
+}
+
+/***********************
+** TrapezoidIntegrate **
+************************
+** Perform a simple trapezoid integration on the
+** function (x+1)**x.
+** x0,x1 set the lower and upper bounds of the
+** integration.
+** nsteps indicates # of trapezoidal sections
+** omegan is the fundamental frequency times
+**  the series member #
+** select = 0 for the A[0] term, 1 for cosine terms, and
+**   2 for sine terms.
+** Returns the value.
+*/
+static double TrapezoidIntegrate( double x0,            /* Lower bound */
+			double x1,              /* Upper bound */
+			int nsteps,             /* # of steps */
+			double omegan,          /* omega * n */
+			int select)
+{
+double x;               /* Independent variable */
+double dx;              /* Stepsize */
+double rvalue;          /* Return value */
+
+
+/*
+** Initialize independent variable
+*/
+x=x0;
+
+/*
+** Calculate stepsize
+*/
+dx=(x1 - x0) / (double)nsteps;
+
+/*
+** Initialize the return value.
+*/
+rvalue=thefunction(x0,omegan,select)/(double)2.0;
+
+/*
+** Compute the other terms of the integral.
+*/
+if(nsteps!=1)
+{       --nsteps;               /* Already done 1 step */
+	while(--nsteps )
+	{
+		x+=dx;
+		rvalue+=thefunction(x,omegan,select);
+	}
+}
+/*
+** Finish computation
+*/
+rvalue=(rvalue+thefunction(x1,omegan,select)/(double)2.0)*dx;
+
+return(rvalue);
+}
+
+/****************
+** thefunction **
+*****************
+** This routine selects the function to be used
+** in the Trapezoid integration.
+** x is the independent variable
+** omegan is omega * n
+** select chooses which of the sine/cosine functions
+**  are used.  note the special case for select=0.
+*/
+static double thefunction(double x,             /* Independent variable */
+		double omegan,          /* Omega * term */
+		int select)             /* Choose term */
+{
+
+/*
+** Use select to pick which function we call.
+*/
+switch(select)
+{
+	case 0: return(pow(x+(double)1.0,x));
+
+	case 1: return(pow(x+(double)1.0,x) * cos(omegan * x));
+
+	case 2: return(pow(x+(double)1.0,x) * sin(omegan * x));
+}
+
+/*
+** We should never reach this point, but the following
+** keeps compilers from issuing a warning message.
+*/
+return(0.0);
+}
+
+/*************************
+** ASSIGNMENT ALGORITHM **
+*************************/
+
+/*************
+** DoAssign **
+**************
+** Perform an assignment algorithm.
+** The algorithm was adapted from the step by step guide found
+** in "Quantitative Decision Making for Business" (Gordon,
+**  Pressman, and Cohn; Prentice-Hall)
+**
+**
+** NOTES:
+** 1. Even though the algorithm distinguishes between
+**    ASSIGNROWS and ASSIGNCOLS, as though the two might
+**    be different, it does presume a square matrix.
+**    I.E., ASSIGNROWS and ASSIGNCOLS must be the same.
+**    This makes for some algorithmically-correct but
+**    probably non-optimal constructs.
+**
+*/
+void DoAssign(void)
+{
+AssignStruct *locassignstruct;  /* Local structure ptr */
+farlong *arraybase;
+char *errorcontext;
+int systemerror;
+ulong accumtime;
+double iterations;
+
+/*
+** Link to global structure
+*/
+locassignstruct=&global_assignstruct;
+
+/*
+** Set the error context string.
+*/
+errorcontext="CPU:Assignment";
+
+/*
+** See if we need to do self adjustment code.
+*/
+if(locassignstruct->adjust==0)
+{
+	/*
+	** Self-adjustment code.  The system begins by working on 1
+	** array.  If it does that in no time, then two arrays
+	** are built.  This process continues until
+	** enough arrays are built to handle the tolerance.
+	*/
+	locassignstruct->numarrays=1;
+	while(1)
+	{
+		/*
+		** Allocate space for arrays
+		*/
+		arraybase=(farlong *) AllocateMemory(sizeof(long)*
+			ASSIGNROWS*ASSIGNCOLS*locassignstruct->numarrays,
+			 &systemerror);
+		if(systemerror)
+		{       ReportError(errorcontext,systemerror);
+			FreeMemory((farvoid *)arraybase,
+			  &systemerror);
+			ErrorExit();
+		}
+
+		/*
+		** Do an iteration of the assignment alg.  If the
+		** elapsed time is less than or equal to the permitted
+		** minimum, then allocate for more arrays and
+		** try again.
+		*/
+		if(DoAssignIteration(arraybase,
+			locassignstruct->numarrays)>global_min_ticks)
+			break;          /* We're ok...exit */
+
+		FreeMemory((farvoid *)arraybase, &systemerror);
+		locassignstruct->numarrays++;
+	}
+}
+else
+{       /*
+	** Allocate space for arrays
+	*/
+	arraybase=(farlong *)AllocateMemory(sizeof(long)*
+		ASSIGNROWS*ASSIGNCOLS*locassignstruct->numarrays,
+		 &systemerror);
+	if(systemerror)
+	{       ReportError(errorcontext,systemerror);
+		FreeMemory((farvoid *)arraybase,
+		  &systemerror);
+		ErrorExit();
+	}
+}
+
+/*
+** All's well if we get here.  Do the tests.
+*/
+accumtime=0L;
+iterations=(double)0.0;
+
+do {
+	accumtime+=DoAssignIteration(arraybase,
+		locassignstruct->numarrays);
+	iterations+=(double)1.0;
+} while(TicksToSecs(accumtime)<locassignstruct->request_secs);
+
+/*
+** Clean up, calculate results, and go home.  Be sure to
+** show that we don't have to rerun adjustment code.
+*/
+FreeMemory((farvoid *)arraybase,&systemerror);
+
+locassignstruct->iterspersec=iterations *
+	(double)locassignstruct->numarrays / TicksToFracSecs(accumtime);
+
+if(locassignstruct->adjust==0)
+	locassignstruct->adjust=1;
+
+return;
+
+}
+
+/**********************
+** DoAssignIteration **
+***********************
+** This routine executes one iteration of the assignment test.
+** It returns the number of ticks elapsed in the iteration.
+*/
+static ulong DoAssignIteration(farlong *arraybase,
+	ulong numarrays)
+{
+longptr abase;                  /* local pointer */
+ulong elapsed;          /* Elapsed ticks */
+ulong i;
+
+/*
+** Set up local pointer
+*/
+abase.ptrs.p=arraybase;
+
+/*
+** Load up the arrays with a random table.
+*/
+LoadAssignArrayWithRand(arraybase,numarrays);
+
+/*
+** Start the stopwatch
+*/
+elapsed=StartStopwatch();
+
+/*
+** Execute assignment algorithms
+*/
+for(i=0;i<numarrays;i++)
+{       /* abase.ptrs.p+=i*ASSIGNROWS*ASSIGNCOLS; */
+        /* Fixed  by Eike Dierks */
+	Assignment(*abase.ptrs.ap);
+	abase.ptrs.p+=ASSIGNROWS*ASSIGNCOLS;
+}
+
+/*
+** Get elapsed time
+*/
+return(StopStopwatch(elapsed));
+}
+
+/****************************
+** LoadAssignArrayWithRand **
+*****************************
+** Load the assignment arrays with random numbers.  All positive.
+** These numbers represent costs.
+*/
+static void LoadAssignArrayWithRand(farlong *arraybase,
+	ulong numarrays)
+{
+longptr abase,abase1;   /* Local for array pointer */
+ulong i;
+
+/*
+** Set local array pointer
+*/
+abase.ptrs.p=arraybase;
+abase1.ptrs.p=arraybase;
+
+/*
+** Set up the first array.  Then just copy it into the
+** others.
+*/
+LoadAssign(*(abase.ptrs.ap));
+if(numarrays>1)
+	for(i=1;i<numarrays;i++)
+	  {     /* abase1.ptrs.p+=i*ASSIGNROWS*ASSIGNCOLS; */
+	        /* Fixed  by Eike Dierks */
+	        abase1.ptrs.p+=ASSIGNROWS*ASSIGNCOLS;
+		CopyToAssign(*(abase.ptrs.ap),*(abase1.ptrs.ap));
+	}
+
+return;
+}
+
+/***************
+** LoadAssign **
+****************
+** The array given by arraybase is loaded with positive random
+** numbers.  Elements in the array are capped at 5,000,000.
+*/
+static void LoadAssign(farlong arraybase[][ASSIGNCOLS])
+{
+ushort i,j;
+
+/*
+** Reset random number generator so things repeat.
+*/
+/* randnum(13L); */
+randnum((int32)13);
+
+for(i=0;i<ASSIGNROWS;i++)
+  for(j=0;j<ASSIGNROWS;j++){
+    /* arraybase[i][j]=abs_randwc(5000000L);*/
+    arraybase[i][j]=abs_randwc((int32)5000000);
+  }
+
+return;
+}
+
+/*****************
+** CopyToAssign **
+******************
+** Copy the contents of one array to another.  This is called by
+** the routine that builds the initial array, and is used to copy
+** the contents of the intial array into all following arrays.
+*/
+static void CopyToAssign(farlong arrayfrom[ASSIGNROWS][ASSIGNCOLS],
+		farlong arrayto[ASSIGNROWS][ASSIGNCOLS])
+{
+ushort i,j;
+
+for(i=0;i<ASSIGNROWS;i++)
+	for(j=0;j<ASSIGNCOLS;j++)
+		arrayto[i][j]=arrayfrom[i][j];
+
+return;
+}
+
+/***************
+** Assignment **
+***************/
+static void Assignment(farlong arraybase[][ASSIGNCOLS])
+{
+short assignedtableau[ASSIGNROWS][ASSIGNCOLS];
+
+/*
+** First, calculate minimum costs
+*/
+calc_minimum_costs(arraybase);
+
+/*
+** Repeat following until the number of rows selected
+** equals the number of rows in the tableau.
+*/
+while(first_assignments(arraybase,assignedtableau)!=ASSIGNROWS)
+{         second_assignments(arraybase,assignedtableau);
+}
+
+#ifdef DEBUG
+{
+	int i,j;
+	printf("\nColumn choices for each row\n");
+	for(i=0;i<ASSIGNROWS;i++)
+	{
+	        printf("R%03d: ",i);
+		for(j=0;j<ASSIGNCOLS;j++)
+			if(assignedtableau[i][j]==1)
+				printf("%03d ",j);
+	}
+}
+#endif
+
+return;
+}
+
+/***********************
+** calc_minimum_costs **
+************************
+** Revise the tableau by calculating the minimum costs on a
+** row and column basis.  These minima are subtracted from
+** their rows and columns, creating a new tableau.
+*/
+static void calc_minimum_costs(long tableau[][ASSIGNCOLS])
+{
+ushort i,j;              /* Index variables */
+long currentmin;        /* Current minimum */
+/*
+** Determine minimum costs on row basis.  This is done by
+** subtracting -- on a row-per-row basis -- the minum value
+** for that row.
+*/
+for(i=0;i<ASSIGNROWS;i++)
+{
+	currentmin=MAXPOSLONG;  /* Initialize minimum */
+	for(j=0;j<ASSIGNCOLS;j++)
+		if(tableau[i][j]<currentmin)
+			currentmin=tableau[i][j];
+
+	for(j=0;j<ASSIGNCOLS;j++)
+		tableau[i][j]-=currentmin;
+}
+
+/*
+** Determine minimum cost on a column basis.  This works
+** just as above, only now we step through the array
+** column-wise
+*/
+for(j=0;j<ASSIGNCOLS;j++)
+{
+	currentmin=MAXPOSLONG;  /* Initialize minimum */
+	for(i=0;i<ASSIGNROWS;i++)
+		if(tableau[i][j]<currentmin)
+			currentmin=tableau[i][j];
+
+	/*
+	** Here, we'll take the trouble to see if the current
+	** minimum is zero.  This is likely worth it, since the
+	** preceding loop will have created at least one zero in
+	** each row.  We can save ourselves a few iterations.
+	*/
+	if(currentmin!=0)
+		for(i=0;i<ASSIGNROWS;i++)
+			tableau[i][j]-=currentmin;
+}
+
+return;
+}
+
+/**********************
+** first_assignments **
+***********************
+** Do first assignments.
+** The assignedtableau[] array holds a set of values that
+** indicate the assignment of a value, or its elimination.
+** The values are:
+**      0 = Item is neither assigned nor eliminated.
+**      1 = Item is assigned
+**      2 = Item is eliminated
+** Returns the number of selections made.  If this equals
+** the number of rows, then an optimum has been determined.
+*/
+static int first_assignments(long tableau[][ASSIGNCOLS],
+		short assignedtableau[][ASSIGNCOLS])
+{
+ushort i,j,k;                   /* Index variables */
+ushort numassigns;              /* # of assignments */
+ushort totnumassigns;           /* Total # of assignments */
+ushort numzeros;                /* # of zeros in row */
+int selected=0;                 /* Flag used to indicate selection */
+
+/*
+** Clear the assignedtableau, setting all members to show that
+** no one is yet assigned, eliminated, or anything.
+*/
+for(i=0;i<ASSIGNROWS;i++)
+	for(j=0;j<ASSIGNCOLS;j++)
+		assignedtableau[i][j]=0;
+
+totnumassigns=0;
+do {
+	numassigns=0;
+	/*
+	** Step through rows.  For each one that is not currently
+	** assigned, see if the row has only one zero in it.  If so,
+	** mark that as an assigned row/col.  Eliminate other zeros
+	** in the same column.
+	*/
+	for(i=0;i<ASSIGNROWS;i++)
+	{       numzeros=0;
+		for(j=0;j<ASSIGNCOLS;j++)
+			if(tableau[i][j]==0L)
+				if(assignedtableau[i][j]==0)
+				{       numzeros++;
+					selected=j;
+				}
+		if(numzeros==1)
+		{       numassigns++;
+			totnumassigns++;
+			assignedtableau[i][selected]=1;
+			for(k=0;k<ASSIGNROWS;k++)
+				if((k!=i) &&
+				   (tableau[k][selected]==0))
+					assignedtableau[k][selected]=2;
+		}
+	}
+	/*
+	** Step through columns, doing same as above.  Now, be careful
+	** of items in the other rows of a selected column.
+	*/
+	for(j=0;j<ASSIGNCOLS;j++)
+	{       numzeros=0;
+		for(i=0;i<ASSIGNROWS;i++)
+			if(tableau[i][j]==0L)
+				if(assignedtableau[i][j]==0)
+				{       numzeros++;
+					selected=i;
+				}
+		if(numzeros==1)
+		{       numassigns++;
+			totnumassigns++;
+			assignedtableau[selected][j]=1;
+			for(k=0;k<ASSIGNCOLS;k++)
+				if((k!=j) &&
+				   (tableau[selected][k]==0))
+					assignedtableau[selected][k]=2;
+		}
+	}
+	/*
+	** Repeat until no more assignments to be made.
+	*/
+} while(numassigns!=0);
+
+/*
+** See if we can leave at this point.
+*/
+if(totnumassigns==ASSIGNROWS) return(totnumassigns);
+
+/*
+** Now step through the array by row.  If you find any unassigned
+** zeros, pick the first in the row.  Eliminate all zeros from
+** that same row & column.  This occurs if there are multiple optima...
+** possibly.
+*/
+for(i=0;i<ASSIGNROWS;i++)
+{       selected=-1;
+	for(j=0;j<ASSIGNCOLS;j++)
+		if((tableau[i][j]==0L) &&
+		   (assignedtableau[i][j]==0))
+		{       selected=j;
+			break;
+		}
+	if(selected!=-1)
+	{       assignedtableau[i][selected]=1;
+		totnumassigns++;
+		for(k=0;k<ASSIGNCOLS;k++)
+			if((k!=selected) &&
+			   (tableau[i][k]==0L))
+				assignedtableau[i][k]=2;
+		for(k=0;k<ASSIGNROWS;k++)
+			if((k!=i) &&
+			   (tableau[k][selected]==0L))
+				assignedtableau[k][selected]=2;
+	}
+}
+
+return(totnumassigns);
+}
+
+/***********************
+** second_assignments **
+************************
+** This section of the algorithm creates the revised
+** tableau, and is difficult to explain.  I suggest you
+** refer to the algorithm's source, mentioned in comments
+** toward the beginning of the program.
+*/
+static void second_assignments(long tableau[][ASSIGNCOLS],
+		short assignedtableau[][ASSIGNCOLS])
+{
+int i,j;                                /* Indexes */
+short linesrow[ASSIGNROWS];
+short linescol[ASSIGNCOLS];
+long smallest;                          /* Holds smallest value */
+ushort numassigns;                      /* Number of assignments */
+ushort newrows;                         /* New rows to be considered */
+/*
+** Clear the linesrow and linescol arrays.
+*/
+for(i=0;i<ASSIGNROWS;i++)
+	linesrow[i]=0;
+for(i=0;i<ASSIGNCOLS;i++)
+	linescol[i]=0;
+
+/*
+** Scan rows, flag each row that has no assignment in it.
+*/
+for(i=0;i<ASSIGNROWS;i++)
+{       numassigns=0;
+	for(j=0;j<ASSIGNCOLS;j++)
+		if(assignedtableau[i][j]==1)
+		{       numassigns++;
+			break;
+		}
+	if(numassigns==0) linesrow[i]=1;
+}
+
+do {
+
+	newrows=0;
+	/*
+	** For each row checked above, scan for any zeros.  If found,
+	** check the associated column.
+	*/
+	for(i=0;i<ASSIGNROWS;i++)
+	{       if(linesrow[i]==1)
+			for(j=0;j<ASSIGNCOLS;j++)
+				if(tableau[i][j]==0)
+					linescol[j]=1;
+	}
+
+	/*
+	** Now scan checked columns.  If any contain assigned zeros, check
+	** the associated row.
+	*/
+	for(j=0;j<ASSIGNCOLS;j++)
+		if(linescol[j]==1)
+			for(i=0;i<ASSIGNROWS;i++)
+				if((assignedtableau[i][j]==1) &&
+					(linesrow[i]!=1))
+				{
+					linesrow[i]=1;
+					newrows++;
+				}
+} while(newrows!=0);
+
+/*
+** linesrow[n]==0 indicate rows covered by imaginary line
+** linescol[n]==1 indicate cols covered by imaginary line
+** For all cells not covered by imaginary lines, determine smallest
+** value.
+*/
+smallest=MAXPOSLONG;
+for(i=0;i<ASSIGNROWS;i++)
+	if(linesrow[i]!=0)
+		for(j=0;j<ASSIGNCOLS;j++)
+			if(linescol[j]!=1)
+				if(tableau[i][j]<smallest)
+					smallest=tableau[i][j];
+
+/*
+** Subtract smallest from all cells in the above set.
+*/
+for(i=0;i<ASSIGNROWS;i++)
+	if(linesrow[i]!=0)
+		for(j=0;j<ASSIGNCOLS;j++)
+			if(linescol[j]!=1)
+				tableau[i][j]-=smallest;
+
+/*
+** Add smallest to all cells covered by two lines.
+*/
+for(i=0;i<ASSIGNROWS;i++)
+	if(linesrow[i]==0)
+		for(j=0;j<ASSIGNCOLS;j++)
+			if(linescol[j]==1)
+				tableau[i][j]+=smallest;
+
+return;
+}
+
+/********************
+** IDEA Encryption **
+*********************
+** IDEA - International Data Encryption Algorithm.
+** Based on code presented in Applied Cryptography by Bruce Schneier.
+** Which was based on code developed by Xuejia Lai and James L. Massey.
+** Other modifications made by Colin Plumb.
+**
+*/
+
+/***********
+** DoIDEA **
+************
+** Perform IDEA encryption.  Note that we time encryption & decryption
+** time as being a single loop.
+*/
+void DoIDEA(void)
+{
+IDEAStruct *locideastruct;      /* Loc pointer to global structure */
+int i;
+IDEAkey Z,DK;
+u16 userkey[8];
+ulong accumtime;
+double iterations;
+char *errorcontext;
+int systemerror;
+faruchar *plain1;               /* First plaintext buffer */
+faruchar *crypt1;               /* Encryption buffer */
+faruchar *plain2;               /* Second plaintext buffer */
+
+/*
+** Link to global data
+*/
+locideastruct=&global_ideastruct;
+
+/*
+** Set error context
+*/
+errorcontext="CPU:IDEA";
+
+/*
+** Re-init random-number generator.
+*/
+/* randnum(3L); */
+randnum((int32)3);
+
+/*
+** Build an encryption/decryption key
+*/
+for (i=0;i<8;i++)
+        /* userkey[i]=(u16)(abs_randwc(60000L) & 0xFFFF); */
+	userkey[i]=(u16)(abs_randwc((int32)60000) & 0xFFFF);
+for(i=0;i<KEYLEN;i++)
+	Z[i]=0;
+
+/*
+** Compute encryption/decryption subkeys
+*/
+en_key_idea(userkey,Z);
+de_key_idea(Z,DK);
+
+/*
+** Allocate memory for buffers.  We'll make 3, called plain1,
+** crypt1, and plain2.  It works like this:
+**   plain1 >>encrypt>> crypt1 >>decrypt>> plain2.
+** So, plain1 and plain2 should match.
+** Also, fill up plain1 with sample text.
+*/
+plain1=(faruchar *)AllocateMemory(locideastruct->arraysize,&systemerror);
+if(systemerror)
+{
+	ReportError(errorcontext,systemerror);
+	ErrorExit();
+}
+
+crypt1=(faruchar *)AllocateMemory(locideastruct->arraysize,&systemerror);
+if(systemerror)
+{
+	ReportError(errorcontext,systemerror);
+	FreeMemory((farvoid *)plain1,&systemerror);
+	ErrorExit();
+}
+
+plain2=(faruchar *)AllocateMemory(locideastruct->arraysize,&systemerror);
+if(systemerror)
+{
+	ReportError(errorcontext,systemerror);
+	FreeMemory((farvoid *)plain1,&systemerror);
+	FreeMemory((farvoid *)crypt1,&systemerror);
+	ErrorExit();
+}
+/*
+** Note that we build the "plaintext" by simply loading
+** the array up with random numbers.
+*/
+for(i=0;i<locideastruct->arraysize;i++)
+	plain1[i]=(uchar)(abs_randwc(255) & 0xFF);
+
+/*
+** See if we need to perform self adjustment loop.
+*/
+if(locideastruct->adjust==0)
+{
+	/*
+	** Do self-adjustment.  This involves initializing the
+	** # of loops and increasing the loop count until we
+	** get a number of loops that we can use.
+	*/
+	for(locideastruct->loops=100L;
+	  locideastruct->loops<MAXIDEALOOPS;
+	  locideastruct->loops+=10L)
+		if(DoIDEAIteration(plain1,crypt1,plain2,
+		  locideastruct->arraysize,
+		  locideastruct->loops,
+		  Z,DK)>global_min_ticks) break;
+}
+
+/*
+** All's well if we get here.  Do the test.
+*/
+accumtime=0L;
+iterations=(double)0.0;
+
+do {
+	accumtime+=DoIDEAIteration(plain1,crypt1,plain2,
+		locideastruct->arraysize,
+		locideastruct->loops,Z,DK);
+	iterations+=(double)locideastruct->loops;
+} while(TicksToSecs(accumtime)<locideastruct->request_secs);
+
+/*
+** Clean up, calculate results, and go home.  Be sure to
+** show that we don't have to rerun adjustment code.
+*/
+FreeMemory((farvoid *)plain1,&systemerror);
+FreeMemory((farvoid *)crypt1,&systemerror);
+FreeMemory((farvoid *)plain2,&systemerror);
+locideastruct->iterspersec=iterations / TicksToFracSecs(accumtime);
+
+if(locideastruct->adjust==0)
+	locideastruct->adjust=1;
+
+return;
+
+}
+
+/********************
+** DoIDEAIteration **
+*********************
+** Execute a single iteration of the IDEA encryption algorithm.
+** Actually, a single iteration is one encryption and one
+** decryption.
+*/
+static ulong DoIDEAIteration(faruchar *plain1,
+			faruchar *crypt1,
+			faruchar *plain2,
+			ulong arraysize,
+			ulong nloops,
+			IDEAkey Z,
+			IDEAkey DK)
+{
+register ulong i;
+register ulong j;
+ulong elapsed;
+#ifdef DEBUG
+int status=0;
+#endif
+
+/*
+** Start the stopwatch.
+*/
+elapsed=StartStopwatch();
+
+/*
+** Do everything for nloops.
+*/
+for(i=0;i<nloops;i++)
+{
+	for(j=0;j<arraysize;j+=(sizeof(u16)*4))
+		cipher_idea((u16 *)(plain1+j),(u16 *)(crypt1+j),Z);       /* Encrypt */
+
+	for(j=0;j<arraysize;j+=(sizeof(u16)*4))
+		cipher_idea((u16 *)(crypt1+j),(u16 *)(plain2+j),DK);      /* Decrypt */
+}
+
+#ifdef DEBUG
+for(j=0;j<arraysize;j++)
+	if(*(plain1+j)!=*(plain2+j)){
+		printf("IDEA Error! \n");
+                status=1;
+                }
+if (status==0) printf("IDEA: OK\n");
+#endif
+
+/*
+** Get elapsed time.
+*/
+return(StopStopwatch(elapsed));
+}
+
+/********
+** mul **
+*********
+** Performs multiplication, modulo (2**16)+1.  This code is structured
+** on the assumption that untaken branches are cheaper than taken
+** branches, and that the compiler doesn't schedule branches.
+*/
+static u16 mul(register u16 a, register u16 b)
+{
+register u32 p;
+if(a)
+{       if(b)
+	{       p=(u32)(a*b);
+		b=low16(p);
+		a=(u16)(p>>16);
+		return(b-a+(b<a));
+	}
+	else
+		return(1-a);
+}
+else
+	return(1-b);
+}
+
+/********
+** inv **
+*********
+** Compute multiplicative inverse of x, modulo (2**16)+1
+** using Euclid's GCD algorithm.  It is unrolled twice
+** to avoid swapping the meaning of the registers.  And
+** some subtracts are changed to adds.
+*/
+static u16 inv(u16 x)
+{
+u16 t0, t1;
+u16 q, y;
+
+if(x<=1)
+	return(x);      /* 0 and 1 are self-inverse */
+t1=0x10001 / x;
+y=0x10001 % x;
+if(y==1)
+	return(low16(1-t1));
+t0=1;
+do {
+	q=x/y;
+	x=x%y;
+	t0+=q*t1;
+	if(x==1) return(t0);
+	q=y/x;
+	y=y%x;
+	t1+=q*t0;
+} while(y!=1);
+return(low16(1-t1));
+}
+
+/****************
+** en_key_idea **
+*****************
+** Compute IDEA encryption subkeys Z
+*/
+static void en_key_idea(u16 *userkey, u16 *Z)
+{
+int i,j;
+
+/*
+** shifts
+*/
+for(j=0;j<8;j++)
+	Z[j]=*userkey++;
+for(i=0;j<KEYLEN;j++)
+{       i++;
+	Z[i+7]=(Z[i&7]<<9)| (Z[(i+1) & 7] >> 7);
+	Z+=i&8;
+	i&=7;
+}
+return;
+}
+
+/****************
+** de_key_idea **
+*****************
+** Compute IDEA decryption subkeys DK from encryption
+** subkeys Z.
+*/
+static void de_key_idea(IDEAkey Z, IDEAkey DK)
+{
+IDEAkey TT;
+int j;
+u16 t1, t2, t3;
+u16 *p;
+p=(u16 *)(TT+KEYLEN);
+
+t1=inv(*Z++);
+t2=-*Z++;
+t3=-*Z++;
+*--p=inv(*Z++);
+*--p=t3;
+*--p=t2;
+*--p=t1;
+
+for(j=1;j<ROUNDS;j++)
+{       t1=*Z++;
+	*--p=*Z++;
+	*--p=t1;
+	t1=inv(*Z++);
+	t2=-*Z++;
+	t3=-*Z++;
+	*--p=inv(*Z++);
+	*--p=t2;
+	*--p=t3;
+	*--p=t1;
+}
+t1=*Z++;
+*--p=*Z++;
+*--p=t1;
+t1=inv(*Z++);
+t2=-*Z++;
+t3=-*Z++;
+*--p=inv(*Z++);
+*--p=t3;
+*--p=t2;
+*--p=t1;
+/*
+** Copy and destroy temp copy
+*/
+for(j=0,p=TT;j<KEYLEN;j++)
+{       *DK++=*p;
+	*p++=0;
+}
+
+return;
+}
+
+/*
+** MUL(x,y)
+** This #define creates a macro that computes x=x*y modulo 0x10001.
+** Requires temps t16 and t32.  Also requires y to be strictly 16
+** bits.  Here, I am using the simplest form.  May not be the
+** fastest. -- RG
+*/
+/* #define MUL(x,y) (x=mul(low16(x),y)) */
+
+/****************
+** cipher_idea **
+*****************
+** IDEA encryption/decryption algorithm.
+*/
+static void cipher_idea(u16 in[4],
+		u16 out[4],
+		register IDEAkey Z)
+{
+register u16 x1, x2, x3, x4, t1, t2;
+/* register u16 t16;
+register u16 t32; */
+int r=ROUNDS;
+
+x1=*in++;
+x2=*in++;
+x3=*in++;
+x4=*in;
+
+do {
+	MUL(x1,*Z++);
+	x2+=*Z++;
+	x3+=*Z++;
+	MUL(x4,*Z++);
+
+	t2=x1^x3;
+	MUL(t2,*Z++);
+	t1=t2+(x2^x4);
+	MUL(t1,*Z++);
+	t2=t1+t2;
+
+	x1^=t1;
+	x4^=t2;
+
+	t2^=x2;
+	x2=x3^t1;
+	x3=t2;
+} while(--r);
+MUL(x1,*Z++);
+*out++=x1;
+*out++=x3+*Z++;
+*out++=x2+*Z++;
+MUL(x4,*Z);
+*out=x4;
+return;
+}
+
+/************************
+** HUFFMAN COMPRESSION **
+************************/
+
+/**************
+** DoHuffman **
+***************
+** Execute a huffman compression on a block of plaintext.
+** Note that (as with IDEA encryption) an iteration of the
+** Huffman test includes a compression AND a decompression.
+** Also, the compression cycle includes building the
+** Huffman tree.
+*/
+void DoHuffman(void)
+{
+HuffStruct *lochuffstruct;      /* Loc pointer to global data */
+char *errorcontext;
+int systemerror;
+ulong accumtime;
+double iterations;
+farchar *comparray;
+farchar *decomparray;
+farchar *plaintext;
+
+/*
+** Link to global data
+*/
+lochuffstruct=&global_huffstruct;
+
+/*
+** Set error context.
+*/
+errorcontext="CPU:Huffman";
+
+/*
+** Allocate memory for the plaintext and the compressed text.
+** We'll be really pessimistic here, and allocate equal amounts
+** for both (though we know...well, we PRESUME) the compressed
+** stuff will take less than the plain stuff.
+** Also note that we'll build a 3rd buffer to decompress
+** into, and we preallocate space for the huffman tree.
+** (We presume that the Huffman tree will grow no larger
+** than 512 bytes.  This is actually a super-conservative
+** estimate...but, who cares?)
+*/
+plaintext=(farchar *)AllocateMemory(lochuffstruct->arraysize,&systemerror);
+if(systemerror)
+{       ReportError(errorcontext,systemerror);
+	ErrorExit();
+}
+comparray=(farchar *)AllocateMemory(lochuffstruct->arraysize,&systemerror);
+if(systemerror)
+{       ReportError(errorcontext,systemerror);
+	FreeMemory(plaintext,&systemerror);
+	ErrorExit();
+}
+decomparray=(farchar *)AllocateMemory(lochuffstruct->arraysize,&systemerror);
+if(systemerror)
+{       ReportError(errorcontext,systemerror);
+	FreeMemory(plaintext,&systemerror);
+	FreeMemory(comparray,&systemerror);
+	ErrorExit();
+}
+
+hufftree=(huff_node *)AllocateMemory(sizeof(huff_node) * 512,
+	&systemerror);
+if(systemerror)
+{       ReportError(errorcontext,systemerror);
+	FreeMemory(plaintext,&systemerror);
+	FreeMemory(comparray,&systemerror);
+	FreeMemory(decomparray,&systemerror);
+	ErrorExit();
+}
+
+/*
+** Build the plaintext buffer.  Since we want this to
+** actually be able to compress, we'll use the
+** wordcatalog to build the plaintext stuff.
+*/
+/*
+** Reset random number generator so things repeat.
+** added by Uwe F. Mayer
+*/
+randnum((int32)13);
+create_text_block(plaintext,lochuffstruct->arraysize-1,(ushort)500);
+plaintext[lochuffstruct->arraysize-1L]='\0';
+plaintextlen=lochuffstruct->arraysize;
+
+/*
+** See if we need to perform self adjustment loop.
+*/
+if(lochuffstruct->adjust==0)
+{
+	/*
+	** Do self-adjustment.  This involves initializing the
+	** # of loops and increasing the loop count until we
+	** get a number of loops that we can use.
+	*/
+	for(lochuffstruct->loops=100L;
+	  lochuffstruct->loops<MAXHUFFLOOPS;
+	  lochuffstruct->loops+=10L)
+		if(DoHuffIteration(plaintext,
+			comparray,
+			decomparray,
+		  lochuffstruct->arraysize,
+		  lochuffstruct->loops,
+		  hufftree)>global_min_ticks) break;
+}
+
+/*
+** All's well if we get here.  Do the test.
+*/
+accumtime=0L;
+iterations=(double)0.0;
+
+do {
+	accumtime+=DoHuffIteration(plaintext,
+		comparray,
+		decomparray,
+		lochuffstruct->arraysize,
+		lochuffstruct->loops,
+		hufftree);
+	iterations+=(double)lochuffstruct->loops;
+} while(TicksToSecs(accumtime)<lochuffstruct->request_secs);
+
+/*
+** Clean up, calculate results, and go home.  Be sure to
+** show that we don't have to rerun adjustment code.
+*/
+FreeMemory((farvoid *)plaintext,&systemerror);
+FreeMemory((farvoid *)comparray,&systemerror);
+FreeMemory((farvoid *)decomparray,&systemerror);
+FreeMemory((farvoid *)hufftree,&systemerror);
+lochuffstruct->iterspersec=iterations / TicksToFracSecs(accumtime);
+
+if(lochuffstruct->adjust==0)
+	lochuffstruct->adjust=1;
+
+}
+
+/*********************
+** create_text_line **
+**********************
+** Create a random line of text, stored at *dt.  The line may be
+** no more than nchars long.
+*/
+static void create_text_line(farchar *dt,
+			long nchars)
+{
+long charssofar;        /* # of characters so far */
+long tomove;            /* # of characters to move */
+char myword[40];        /* Local buffer for words */
+farchar *wordptr;       /* Pointer to word from catalog */
+
+charssofar=0;
+
+do {
+/*
+** Grab a random word from the wordcatalog
+*/
+/* wordptr=wordcatarray[abs_randwc((long)WORDCATSIZE)];*/
+wordptr=wordcatarray[abs_randwc((int32)WORDCATSIZE)];
+MoveMemory((farvoid *)myword,
+	(farvoid *)wordptr,
+	(unsigned long)strlen(wordptr)+1);
+
+/*
+** Append a blank.
+*/
+tomove=strlen(myword)+1;
+myword[tomove-1]=' ';
+
+/*
+** See how long it is.  If its length+charssofar > nchars, we have
+** to trim it.
+*/
+if((tomove+charssofar)>nchars)
+	tomove=nchars-charssofar;
+/*
+** Attach the word to the current line.  Increment counter.
+*/
+MoveMemory((farvoid *)dt,(farvoid *)myword,(unsigned long)tomove);
+charssofar+=tomove;
+dt+=tomove;
+
+/*
+** If we're done, bail out.  Otherwise, go get another word.
+*/
+} while(charssofar<nchars);
+
+return;
+}
+
+/**********************
+** create_text_block **
+***********************
+** Build a block of text randomly loaded with words.  The words
+** come from the wordcatalog (which must be loaded before you
+** call this).
+** *tb points to the memory where the text is to be built.
+** tblen is the # of bytes to put into the text block
+** maxlinlen is the maximum length of any line (line end indicated
+**  by a carriage return).
+*/
+static void create_text_block(farchar *tb,
+			ulong tblen,
+			ushort maxlinlen)
+{
+ulong bytessofar;       /* # of bytes so far */
+ulong linelen;          /* Line length */
+
+bytessofar=0L;
+do {
+
+/*
+** Pick a random length for a line and fill the line.
+** Make sure the line can fit (haven't exceeded tablen) and also
+** make sure you leave room to append a carriage return.
+*/
+linelen=abs_randwc(maxlinlen-6)+6;
+if((linelen+bytessofar)>tblen)
+	linelen=tblen-bytessofar;
+
+if(linelen>1)
+{
+	create_text_line(tb,linelen);
+}
+tb+=linelen-1;          /* Add the carriage return */
+*tb++='\n';
+
+bytessofar+=linelen;
+
+} while(bytessofar<tblen);
+
+}
+
+/********************
+** DoHuffIteration **
+*********************
+** Perform the huffman benchmark.  This routine
+**  (a) Builds the huffman tree
+**  (b) Compresses the text
+**  (c) Decompresses the text and verifies correct decompression
+*/
+static ulong DoHuffIteration(farchar *plaintext,
+	farchar *comparray,
+	farchar *decomparray,
+	ulong arraysize,
+	ulong nloops,
+	huff_node *hufftree)
+{
+int i;                          /* Index */
+long j;                         /* Bigger index */
+int root;                       /* Pointer to huffman tree root */
+float lowfreq1, lowfreq2;       /* Low frequency counters */
+int lowidx1, lowidx2;           /* Indexes of low freq. elements */
+long bitoffset;                 /* Bit offset into text */
+long textoffset;                /* Char offset into text */
+long maxbitoffset;              /* Holds limit of bit offset */
+long bitstringlen;              /* Length of bitstring */
+int c;                          /* Character from plaintext */
+char bitstring[30];             /* Holds bitstring */
+ulong elapsed;                  /* For stopwatch */
+#ifdef DEBUG
+int status=0;
+#endif
+
+/*
+** Start the stopwatch
+*/
+elapsed=StartStopwatch();
+
+/*
+** Do everything for nloops
+*/
+while(nloops--)
+{
+
+/*
+** Calculate the frequency of each byte value. Store the
+** results in what will become the "leaves" of the
+** Huffman tree.  Interior nodes will be built in those
+** nodes greater than node #255.
+*/
+for(i=0;i<256;i++)
+{
+	hufftree[i].freq=(float)0.0;
+	hufftree[i].c=(unsigned char)i;
+}
+
+for(j=0;j<arraysize;j++)
+	hufftree[(int)plaintext[j]].freq+=(float)1.0;
+
+for(i=0;i<256;i++)
+	if(hufftree[i].freq != (float)0.0)
+		hufftree[i].freq/=(float)arraysize;
+
+/* Reset the second half of the tree. Otherwise the loop below that
+** compares the frequencies up to index 512 makes no sense. Some
+** systems automatically zero out memory upon allocation, others (like
+** for example DEC Unix) do not. Depending on this the loop below gets
+** different data and different run times. On our alpha the data that
+** was arbitrarily assigned led to an underflow error at runtime. We
+** use that zeroed-out bits are in fact 0 as a float.
+** Uwe F. Mayer */
+bzero((char *)&(hufftree[256]),sizeof(huff_node)*256);
+/*
+** Build the huffman tree.  First clear all the parent
+** pointers and left/right pointers.  Also, discard all
+** nodes that have a frequency of true 0.  */
+for(i=0;i<512;i++)
+{       if(hufftree[i].freq==(float)0.0)
+		hufftree[i].parent=EXCLUDED;
+	else
+		hufftree[i].parent=hufftree[i].left=hufftree[i].right=-1;
+}
+
+/*
+** Go through the tree. Finding nodes of really low
+** frequency.
+*/
+root=255;                       /* Starting root node-1 */
+while(1)
+{
+	lowfreq1=(float)2.0; lowfreq2=(float)2.0;
+	lowidx1=-1; lowidx2=-1;
+	/*
+	** Find first lowest frequency.
+	*/
+	for(i=0;i<=root;i++)
+		if(hufftree[i].parent<0)
+			if(hufftree[i].freq<lowfreq1)
+			{       lowfreq1=hufftree[i].freq;
+				lowidx1=i;
+			}
+
+	/*
+	** Did we find a lowest value?  If not, the
+	** tree is done.
+	*/
+	if(lowidx1==-1) break;
+
+	/*
+	** Find next lowest frequency
+	*/
+	for(i=0;i<=root;i++)
+		if((hufftree[i].parent<0) && (i!=lowidx1))
+			if(hufftree[i].freq<lowfreq2)
+			{       lowfreq2=hufftree[i].freq;
+				lowidx2=i;
+			}
+
+	/*
+	** If we could only find one item, then that
+	** item is surely the root, and (as above) the
+	** tree is done.
+	*/
+	if(lowidx2==-1) break;
+
+	/*
+	** Attach the two new nodes to the current root, and
+	** advance the current root.
+	*/
+	root++;                 /* New root */
+	hufftree[lowidx1].parent=root;
+	hufftree[lowidx2].parent=root;
+	hufftree[root].freq=lowfreq1+lowfreq2;
+	hufftree[root].left=lowidx1;
+	hufftree[root].right=lowidx2;
+	hufftree[root].parent=-2;       /* Show root */
+}
+
+/*
+** Huffman tree built...compress the plaintext
+*/
+bitoffset=0L;                           /* Initialize bit offset */
+for(i=0;i<arraysize;i++)
+{
+	c=(int)plaintext[i];                 /* Fetch character */
+	/*
+	** Build a bit string for byte c
+	*/
+	bitstringlen=0;
+	while(hufftree[c].parent!=-2)
+	{       if(hufftree[hufftree[c].parent].left==c)
+			bitstring[bitstringlen]='0';
+		else
+			bitstring[bitstringlen]='1';
+		c=hufftree[c].parent;
+		bitstringlen++;
+	}
+
+	/*
+	** Step backwards through the bit string, setting
+	** bits in the compressed array as you go.
+	*/
+	while(bitstringlen--)
+	{       SetCompBit((u8 *)comparray,(u32)bitoffset,bitstring[bitstringlen]);
+		bitoffset++;
+	}
+}
+
+/*
+** Compression done.  Perform de-compression.
+*/
+maxbitoffset=bitoffset;
+bitoffset=0;
+textoffset=0;
+do {
+	i=root;
+	while(hufftree[i].left!=-1)
+	{       if(GetCompBit((u8 *)comparray,(u32)bitoffset)==0)
+			i=hufftree[i].left;
+		else
+			i=hufftree[i].right;
+		bitoffset++;
+	}
+	decomparray[textoffset]=hufftree[i].c;
+
+#ifdef DEBUG
+	if(hufftree[i].c != plaintext[textoffset])
+	{
+		/* Show error */
+		printf("Error at textoffset %ld\n",textoffset);
+		status=1;
+	}
+#endif
+	textoffset++;
+} while(bitoffset<maxbitoffset);
+
+}       /* End the big while(nloops--) from above */
+
+/*
+** All done
+*/
+#ifdef DEBUG
+  if (status==0) printf("Huffman: OK\n");
+#endif
+return(StopStopwatch(elapsed));
+}
+
+/***************
+** SetCompBit **
+****************
+** Set a bit in the compression array.  The value of the
+** bit is set according to char bitchar.
+*/
+static void SetCompBit(u8 *comparray,
+		u32 bitoffset,
+		char bitchar)
+{
+u32 byteoffset;
+int bitnumb;
+
+/*
+** First calculate which element in the comparray to
+** alter. and the bitnumber.
+*/
+byteoffset=bitoffset>>3;
+bitnumb=bitoffset % 8;
+
+/*
+** Set or clear
+*/
+if(bitchar=='1')
+	comparray[byteoffset]|=(1<<bitnumb);
+else
+	comparray[byteoffset]&=~(1<<bitnumb);
+
+return;
+}
+
+/***************
+** GetCompBit **
+****************
+** Return the bit value of a bit in the comparession array.
+** Returns 0 if the bit is clear, nonzero otherwise.
+*/
+static int GetCompBit(u8 *comparray,
+		u32 bitoffset)
+{
+u32 byteoffset;
+int bitnumb;
+
+/*
+** Calculate byte offset and bit number.
+*/
+byteoffset=bitoffset>>3;
+bitnumb=bitoffset % 8;
+
+/*
+** Fetch
+*/
+return((1<<bitnumb) & comparray[byteoffset] );
+}
+
+/********************************
+** BACK PROPAGATION NEURAL NET **
+*********************************
+** This code is a modified version of the code
+** that was submitted to BYTE Magazine by
+** Maureen Caudill.  It accomanied an article
+** that I CANNOT NOW RECALL.
+** The author's original heading/comment was
+** as follows:
+**
+**  Backpropagation Network
+**  Written by Maureen Caudill
+**  in Think C 4.0 on a Macintosh
+**
+**  (c) Maureen Caudill 1988-1991
+**  This network will accept 5x7 input patterns
+**  and produce 8 bit output patterns.
+**  The source code may be copied or modified without restriction,
+**  but no fee may be charged for its use.
+**
+** ++++++++++++++
+** I have modified the code so that it will work
+** on systems other than a Macintosh -- RG
+*/
+
+/***********
+** DoNNet **
+************
+** Perform the neural net benchmark.
+** Note that this benchmark is one of the few that
+** requires an input file.  That file is "NNET.DAT" and
+** should be on the local directory (from which the
+** benchmark program in launched).
+*/
+void DoNNET(void)
+{
+NNetStruct *locnnetstruct;      /* Local ptr to global data */
+char *errorcontext;
+ulong accumtime;
+double iterations;
+
+/*
+** Link to global data
+*/
+locnnetstruct=&global_nnetstruct;
+
+/*
+** Set error context
+*/
+errorcontext="CPU:NNET";
+
+/*
+** Init random number generator.
+** NOTE: It is important that the random number generator
+**  be re-initialized for every pass through this test.
+**  The NNET algorithm uses the random number generator
+**  to initialize the net.  Results are sensitive to
+**  the initial neural net state.
+*/
+/* randnum(3L); */
+randnum((int32)3);
+
+/*
+** Read in the input and output patterns.  We'll do this
+** only once here at the beginning.  These values don't
+** change once loaded.
+*/
+if(read_data_file()!=0)
+   ErrorExit();
+
+
+/*
+** See if we need to perform self adjustment loop.
+*/
+if(locnnetstruct->adjust==0)
+{
+	/*
+	** Do self-adjustment.  This involves initializing the
+	** # of loops and increasing the loop count until we
+	** get a number of loops that we can use.
+	*/
+	for(locnnetstruct->loops=1L;
+	  locnnetstruct->loops<MAXNNETLOOPS;
+	  locnnetstruct->loops++)
+	  {     /*randnum(3L); */
+		randnum((int32)3);
+		if(DoNNetIteration(locnnetstruct->loops)
+			>global_min_ticks) break;
+	  }
+}
+
+/*
+** All's well if we get here.  Do the test.
+*/
+accumtime=0L;
+iterations=(double)0.0;
+
+do {
+	/* randnum(3L); */    /* Gotta do this for Neural Net */
+	randnum((int32)3);    /* Gotta do this for Neural Net */
+	accumtime+=DoNNetIteration(locnnetstruct->loops);
+	iterations+=(double)locnnetstruct->loops;
+} while(TicksToSecs(accumtime)<locnnetstruct->request_secs);
+
+/*
+** Clean up, calculate results, and go home.  Be sure to
+** show that we don't have to rerun adjustment code.
+*/
+locnnetstruct->iterspersec=iterations / TicksToFracSecs(accumtime);
+
+if(locnnetstruct->adjust==0)
+	locnnetstruct->adjust=1;
+
+
+return;
+}
+
+/********************
+** DoNNetIteration **
+*********************
+** Do a single iteration of the neural net benchmark.
+** By iteration, we mean a "learning" pass.
+*/
+static ulong DoNNetIteration(ulong nloops)
+{
+ulong elapsed;          /* Elapsed time */
+int patt;
+
+/*
+** Run nloops learning cycles.  Notice that, counted with
+** the learning cycle is the weight randomization and
+** zeroing of changes.  This should reduce clock jitter,
+** since we don't have to stop and start the clock for
+** each iteration.
+*/
+elapsed=StartStopwatch();
+while(nloops--)
+{
+	randomize_wts();
+	zero_changes();
+	iteration_count=1;
+	learned = F;
+	numpasses = 0;
+	while (learned == F)
+	{
+		for (patt=0; patt<numpats; patt++)
+		{
+			worst_error = 0.0;      /* reset this every pass through data */
+			move_wt_changes();      /* move last pass's wt changes to momentum array */
+			do_forward_pass(patt);
+			do_back_pass(patt);
+			iteration_count++;
+		}
+		numpasses ++;
+		learned = check_out_error();
+	}
+#ifdef DEBUG
+printf("Learned in %d passes\n",numpasses);
+#endif
+}
+return(StopStopwatch(elapsed));
+}
+
+/*************************
+** do_mid_forward(patt) **
+**************************
+** Process the middle layer's forward pass
+** The activation of middle layer's neurode is the weighted
+** sum of the inputs from the input pattern, with sigmoid
+** function applied to the inputs.
+**/
+static void  do_mid_forward(int patt)
+{
+double  sum;
+int     neurode, i;
+
+for (neurode=0;neurode<MID_SIZE; neurode++)
+{
+	sum = 0.0;
+	for (i=0; i<IN_SIZE; i++)
+	{       /* compute weighted sum of input signals */
+		sum += mid_wts[neurode][i]*in_pats[patt][i];
+	}
+	/*
+	** apply sigmoid function f(x) = 1/(1+exp(-x)) to weighted sum
+	*/
+	sum = 1.0/(1.0+exp(-sum));
+	mid_out[neurode] = sum;
+}
+return;
+}
+
+/*********************
+** do_out_forward() **
+**********************
+** process the forward pass through the output layer
+** The activation of the output layer is the weighted sum of
+** the inputs (outputs from middle layer), modified by the
+** sigmoid function.
+**/
+static void  do_out_forward()
+{
+double sum;
+int neurode, i;
+
+for (neurode=0; neurode<OUT_SIZE; neurode++)
+{
+	sum = 0.0;
+	for (i=0; i<MID_SIZE; i++)
+	{       /*
+		** compute weighted sum of input signals
+		** from middle layer
+		*/
+		sum += out_wts[neurode][i]*mid_out[i];
+	}
+	/*
+	** Apply f(x) = 1/(1+exp(-x)) to weighted input
+	*/
+	sum = 1.0/(1.0+exp(-sum));
+	out_out[neurode] = sum;
+}
+return;
+}
+
+/*************************
+** display_output(patt) **
+**************************
+** Display the actual output vs. the desired output of the
+** network.
+** Once the training is complete, and the "learned" flag set
+** to TRUE, then display_output sends its output to both
+** the screen and to a text output file.
+**
+** NOTE: This routine has been disabled in the benchmark
+** version. -- RG
+**/
+/*
+void  display_output(int patt)
+{
+int             i;
+
+	fprintf(outfile,"\n Iteration # %d",iteration_count);
+	fprintf(outfile,"\n Desired Output:  ");
+
+	for (i=0; i<OUT_SIZE; i++)
+	{
+		fprintf(outfile,"%6.3f  ",out_pats[patt][i]);
+	}
+	fprintf(outfile,"\n Actual Output:   ");
+
+	for (i=0; i<OUT_SIZE; i++)
+	{
+		fprintf(outfile,"%6.3f  ",out_out[i]);
+	}
+	fprintf(outfile,"\n");
+	return;
+}
+*/
+
+/**********************
+** do_forward_pass() **
+***********************
+** control function for the forward pass through the network
+** NOTE: I have disabled the call to display_output() in
+**  the benchmark version -- RG.
+**/
+static void  do_forward_pass(int patt)
+{
+do_mid_forward(patt);   /* process forward pass, middle layer */
+do_out_forward();       /* process forward pass, output layer */
+/* display_output(patt);        ** display results of forward pass */
+return;
+}
+
+/***********************
+** do_out_error(patt) **
+************************
+** Compute the error for the output layer neurodes.
+** This is simply Desired - Actual.
+**/
+static void do_out_error(int patt)
+{
+int neurode;
+double error,tot_error, sum;
+
+tot_error = 0.0;
+sum = 0.0;
+for (neurode=0; neurode<OUT_SIZE; neurode++)
+{
+	out_error[neurode] = out_pats[patt][neurode] - out_out[neurode];
+	/*
+	** while we're here, also compute magnitude
+	** of total error and worst error in this pass.
+	** We use these to decide if we are done yet.
+	*/
+	error = out_error[neurode];
+	if (error <0.0)
+	{
+		sum += -error;
+		if (-error > tot_error)
+			tot_error = -error; /* worst error this pattern */
+	}
+	else
+	{
+		sum += error;
+		if (error > tot_error)
+			tot_error = error; /* worst error this pattern */
+	}
+}
+avg_out_error[patt] = sum/OUT_SIZE;
+tot_out_error[patt] = tot_error;
+return;
+}
+
+/***********************
+** worst_pass_error() **
+************************
+** Find the worst and average error in the pass and save it
+**/
+static void  worst_pass_error()
+{
+double error,sum;
+
+int i;
+
+error = 0.0;
+sum = 0.0;
+for (i=0; i<numpats; i++)
+{
+	if (tot_out_error[i] > error) error = tot_out_error[i];
+	sum += avg_out_error[i];
+}
+worst_error = error;
+average_error = sum/numpats;
+return;
+}
+
+/*******************
+** do_mid_error() **
+********************
+** Compute the error for the middle layer neurodes
+** This is based on the output errors computed above.
+** Note that the derivative of the sigmoid f(x) is
+**        f'(x) = f(x)(1 - f(x))
+** Recall that f(x) is merely the output of the middle
+** layer neurode on the forward pass.
+**/
+static void do_mid_error()
+{
+double sum;
+int neurode, i;
+
+for (neurode=0; neurode<MID_SIZE; neurode++)
+{
+	sum = 0.0;
+	for (i=0; i<OUT_SIZE; i++)
+		sum += out_wts[i][neurode]*out_error[i];
+
+	/*
+	** apply the derivative of the sigmoid here
+	** Because of the choice of sigmoid f(I), the derivative
+	** of the sigmoid is f'(I) = f(I)(1 - f(I))
+	*/
+	mid_error[neurode] = mid_out[neurode]*(1-mid_out[neurode])*sum;
+}
+return;
+}
+
+/*********************
+** adjust_out_wts() **
+**********************
+** Adjust the weights of the output layer.  The error for
+** the output layer has been previously propagated back to
+** the middle layer.
+** Use the Delta Rule with momentum term to adjust the weights.
+**/
+static void adjust_out_wts()
+{
+int weight, neurode;
+double learn,delta,alph;
+
+learn = BETA;
+alph  = ALPHA;
+for (neurode=0; neurode<OUT_SIZE; neurode++)
+{
+	for (weight=0; weight<MID_SIZE; weight++)
+	{
+		/* standard delta rule */
+		delta = learn * out_error[neurode] * mid_out[weight];
+
+		/* now the momentum term */
+		delta += alph * out_wt_change[neurode][weight];
+		out_wts[neurode][weight] += delta;
+
+		/* keep track of this pass's cum wt changes for next pass's momentum */
+		out_wt_cum_change[neurode][weight] += delta;
+	}
+}
+return;
+}
+
+/*************************
+** adjust_mid_wts(patt) **
+**************************
+** Adjust the middle layer weights using the previously computed
+** errors.
+** We use the Generalized Delta Rule with momentum term
+**/
+static void adjust_mid_wts(int patt)
+{
+int weight, neurode;
+double learn,alph,delta;
+
+learn = BETA;
+alph  = ALPHA;
+for (neurode=0; neurode<MID_SIZE; neurode++)
+{
+	for (weight=0; weight<IN_SIZE; weight++)
+	{
+		/* first the basic delta rule */
+		delta = learn * mid_error[neurode] * in_pats[patt][weight];
+
+		/* with the momentum term */
+		delta += alph * mid_wt_change[neurode][weight];
+		mid_wts[neurode][weight] += delta;
+
+		/* keep track of this pass's cum wt changes for next pass's momentum */
+		mid_wt_cum_change[neurode][weight] += delta;
+	}
+}
+return;
+}
+
+/*******************
+** do_back_pass() **
+********************
+** Process the backward propagation of error through network.
+**/
+void  do_back_pass(int patt)
+{
+
+do_out_error(patt);
+do_mid_error();
+adjust_out_wts();
+adjust_mid_wts(patt);
+
+return;
+}
+
+
+/**********************
+** move_wt_changes() **
+***********************
+** Move the weight changes accumulated last pass into the wt-change
+** array for use by the momentum term in this pass. Also zero out
+** the accumulating arrays after the move.
+**/
+static void move_wt_changes()
+{
+int i,j;
+
+for (i = 0; i<MID_SIZE; i++)
+	for (j = 0; j<IN_SIZE; j++)
+	{
+		mid_wt_change[i][j] = mid_wt_cum_change[i][j];
+		/*
+		** Zero it out for next pass accumulation.
+		*/
+		mid_wt_cum_change[i][j] = 0.0;
+	}
+
+for (i = 0; i<OUT_SIZE; i++)
+	for (j=0; j<MID_SIZE; j++)
+	{
+		out_wt_change[i][j] = out_wt_cum_change[i][j];
+		out_wt_cum_change[i][j] = 0.0;
+	}
+
+return;
+}
+
+/**********************
+** check_out_error() **
+***********************
+** Check to see if the error in the output layer is below
+** MARGIN*OUT_SIZE for all output patterns.  If so, then
+** assume the network has learned acceptably well.  This
+** is simply an arbitrary measure of how well the network
+** has learned -- many other standards are possible.
+**/
+static int check_out_error()
+{
+int result,i,error;
+
+result  = T;
+error   = F;
+worst_pass_error();     /* identify the worst error in this pass */
+
+/*
+#ifdef DEBUG
+printf("\n Iteration # %d",iteration_count);
+#endif
+*/
+for (i=0; i<numpats; i++)
+{
+/*      printf("\n Error pattern %d:   Worst: %8.3f; Average: %8.3f",
+	  i+1,tot_out_error[i], avg_out_error[i]);
+	fprintf(outfile,
+	 "\n Error pattern %d:   Worst: %8.3f; Average: %8.3f",
+	 i+1,tot_out_error[i]);
+*/
+
+	if (worst_error >= STOP) result = F;
+	if (tot_out_error[i] >= 16.0) error = T;
+}
+
+if (error == T) result = ERR;
+
+
+#ifdef DEBUG
+/* printf("\n Error this pass thru data:   Worst: %8.3f; Average: %8.3f",
+ worst_error,average_error);
+*/
+/* fprintf(outfile,
+ "\n Error this pass thru data:   Worst: %8.3f; Average: %8.3f",
+  worst_error, average_error); */
+#endif
+
+return(result);
+}
+
+
+/*******************
+** zero_changes() **
+********************
+** Zero out all the wt change arrays
+**/
+static void zero_changes()
+{
+int i,j;
+
+for (i = 0; i<MID_SIZE; i++)
+{
+	for (j=0; j<IN_SIZE; j++)
+	{
+		mid_wt_change[i][j] = 0.0;
+		mid_wt_cum_change[i][j] = 0.0;
+	}
+}
+
+for (i = 0; i< OUT_SIZE; i++)
+{
+	for (j=0; j<MID_SIZE; j++)
+	{
+		out_wt_change[i][j] = 0.0;
+		out_wt_cum_change[i][j] = 0.0;
+	}
+}
+return;
+}
+
+
+/********************
+** randomize_wts() **
+*********************
+** Intialize the weights in the middle and output layers to
+** random values between -0.25..+0.25
+** Function rand() returns a value between 0 and 32767.
+**
+** NOTE: Had to make alterations to how the random numbers were
+** created.  -- RG.
+**/
+static void randomize_wts()
+{
+int neurode,i;
+double value;
+
+/*
+** Following not used int benchmark version -- RG
+**
+**        printf("\n Please enter a random number seed (1..32767):  ");
+**        scanf("%d", &i);
+**        srand(i);
+*/
+
+for (neurode = 0; neurode<MID_SIZE; neurode++)
+{
+	for(i=0; i<IN_SIZE; i++)
+	{
+	        /* value=(double)abs_randwc(100000L); */
+		value=(double)abs_randwc((int32)100000);
+		value=value/(double)100000.0 - (double) 0.5;
+		mid_wts[neurode][i] = value/2;
+	}
+}
+for (neurode=0; neurode<OUT_SIZE; neurode++)
+{
+	for(i=0; i<MID_SIZE; i++)
+	{
+	        /* value=(double)abs_randwc(100000L); */
+		value=(double)abs_randwc((int32)100000);
+		value=value/(double)10000.0 - (double) 0.5;
+		out_wts[neurode][i] = value/2;
+	}
+}
+
+return;
+}
+
+
+/*********************
+** read_data_file() **
+**********************
+** Read in the input data file and store the patterns in
+** in_pats and out_pats.
+** The format for the data file is as follows:
+**
+** line#   data expected
+** -----   ------------------------------
+** 1               In-X-size,in-y-size,out-size
+** 2               number of patterns in file
+** 3               1st X row of 1st input pattern
+** 4..             following rows of 1st input pattern pattern
+**                 in-x+2  y-out pattern
+**                                 1st X row of 2nd pattern
+**                 etc.
+**
+** Each row of data is separated by commas or spaces.
+** The data is expected to be ascii text corresponding to
+** either a +1 or a 0.
+**
+** Sample input for a 1-pattern file (The comments to the
+** right may NOT be in the file unless more sophisticated
+** parsing of the input is done.):
+**
+** 5,7,8                      input is 5x7 grid, output is 8 bits
+** 1                          one pattern in file
+** 0,1,1,1,0                  beginning of pattern for "O"
+** 1,0,0,0,1
+** 1,0,0,0,1
+** 1,0,0,0,1
+** 1,0,0,0,1
+** 1,0,0,0,0
+** 0,1,1,1,0
+** 0,1,0,0,1,1,1,1            ASCII code for "O" -- 0100 1111
+**
+** Clearly, this simple scheme can be expanded or enhanced
+** any way you like.
+**
+** Returns -1 if any file error occurred, otherwise 0.
+**/
+static int read_data_file()
+{
+FILE *infile;
+
+int xinsize,yinsize,youtsize;
+int patt, element, i, row;
+int vals_read;
+int val1,val2,val3,val4,val5,val6,val7,val8;
+
+/* printf("\n Opening and retrieving data from file."); */
+
+infile = fopen(inpath, "r");
+if (infile == NULL)
+{
+	printf("\n CPU:NNET--error in opening file!");
+	return -1 ;
+}
+vals_read =fscanf(infile,"%d  %d  %d",&xinsize,&yinsize,&youtsize);
+if (vals_read != 3)
+{
+	printf("\n CPU:NNET -- Should read 3 items in line one; did read %d",vals_read);
+	return -1;
+}
+vals_read=fscanf(infile,"%d",&numpats);
+if (vals_read !=1)
+{
+	printf("\n CPU:NNET -- Should read 1 item in line 2; did read %d",vals_read);
+	return -1;
+}
+if (numpats > MAXPATS)
+	numpats = MAXPATS;
+
+for (patt=0; patt<numpats; patt++)
+{
+	element = 0;
+	for (row = 0; row<yinsize; row++)
+	{
+		vals_read = fscanf(infile,"%d  %d  %d  %d  %d",
+			&val1, &val2, &val3, &val4, &val5);
+		if (vals_read != 5)
+		{
+			printf ("\n CPU:NNET -- failure in reading input!");
+			return -1;
+		}
+		element=row*xinsize;
+
+		in_pats[patt][element] = (double) val1; element++;
+		in_pats[patt][element] = (double) val2; element++;
+		in_pats[patt][element] = (double) val3; element++;
+		in_pats[patt][element] = (double) val4; element++;
+		in_pats[patt][element] = (double) val5; element++;
+	}
+	for (i=0;i<IN_SIZE; i++)
+	{
+		if (in_pats[patt][i] >= 0.9)
+			in_pats[patt][i] = 0.9;
+		if (in_pats[patt][i] <= 0.1)
+			in_pats[patt][i] = 0.1;
+	}
+	element = 0;
+	vals_read = fscanf(infile,"%d  %d  %d  %d  %d  %d  %d  %d",
+		&val1, &val2, &val3, &val4, &val5, &val6, &val7, &val8);
+
+	out_pats[patt][element] = (double) val1; element++;
+	out_pats[patt][element] = (double) val2; element++;
+	out_pats[patt][element] = (double) val3; element++;
+	out_pats[patt][element] = (double) val4; element++;
+	out_pats[patt][element] = (double) val5; element++;
+	out_pats[patt][element] = (double) val6; element++;
+	out_pats[patt][element] = (double) val7; element++;
+	out_pats[patt][element] = (double) val8; element++;
+}
+
+/* printf("\n Closing the input file now. "); */
+
+fclose(infile);
+return(0);
+}
+
+/*********************
+** initialize_net() **
+**********************
+** Do all the initialization stuff before beginning
+*/
+/*
+static int initialize_net()
+{
+int err_code;
+
+randomize_wts();
+zero_changes();
+err_code = read_data_file();
+iteration_count = 1;
+return(err_code);
+}
+*/
+
+/**********************
+** display_mid_wts() **
+***********************
+** Display the weights on the middle layer neurodes
+** NOTE: This routine is not used in the benchmark
+**  test -- RG
+**/
+/* static void display_mid_wts()
+{
+int             neurode, weight, row, col;
+
+fprintf(outfile,"\n Weights of Middle Layer neurodes:");
+
+for (neurode=0; neurode<MID_SIZE; neurode++)
+{
+	fprintf(outfile,"\n  Mid Neurode # %d",neurode);
+	for (row=0; row<IN_Y_SIZE; row++)
+	{
+		fprintf(outfile,"\n ");
+		for (col=0; col<IN_X_SIZE; col++)
+		{
+			weight = IN_X_SIZE * row + col;
+			fprintf(outfile," %8.3f ", mid_wts[neurode][weight]);
+		}
+	}
+}
+return;
+}
+*/
+/**********************
+** display_out_wts() **
+***********************
+** Display the weights on the output layer neurodes
+** NOTE: This code is not used in the benchmark
+**  test -- RG
+*/
+/* void  display_out_wts()
+{
+int             neurode, weight;
+
+	fprintf(outfile,"\n Weights of Output Layer neurodes:");
+
+	for (neurode=0; neurode<OUT_SIZE; neurode++)
+	{
+		fprintf(outfile,"\n  Out Neurode # %d \n",neurode);
+		for (weight=0; weight<MID_SIZE; weight++)
+		{
+			fprintf(outfile," %8.3f ", out_wts[neurode][weight]);
+		}
+	}
+	return;
+}
+*/
+
+/***********************
+**  LU DECOMPOSITION  **
+** (Linear Equations) **
+************************
+** These routines come from "Numerical Recipes in Pascal".
+** Note that, as in the assignment algorithm, though we
+** separately define LUARRAYROWS and LUARRAYCOLS, the two
+** must be the same value (this routine depends on a square
+** matrix).
+*/
+
+/*********
+** DoLU **
+**********
+** Perform the LU decomposition benchmark.
+*/
+void DoLU(void)
+{
+LUStruct *loclustruct;  /* Local pointer to global data */
+char *errorcontext;
+int systemerror;
+fardouble *a;
+fardouble *b;
+fardouble *abase;
+fardouble *bbase;
+LUdblptr ptra;
+int n;
+int i;
+ulong accumtime;
+double iterations;
+
+/*
+** Link to global data
+*/
+loclustruct=&global_lustruct;
+
+/*
+** Set error context.
+*/
+errorcontext="FPU:LU";
+
+/*
+** Our first step is to build a "solvable" problem.  This
+** will become the "seed" set that all others will be
+** derived from. (I.E., we'll simply copy these arrays
+** into the others.
+*/
+a=(fardouble *)AllocateMemory(sizeof(double) * LUARRAYCOLS * LUARRAYROWS,
+		&systemerror);
+b=(fardouble *)AllocateMemory(sizeof(double) * LUARRAYROWS,
+		&systemerror);
+n=LUARRAYROWS;
+
+/*
+** We need to allocate a temp vector that is used by the LU
+** algorithm.  This removes the allocation routine from the
+** timing.
+*/
+LUtempvv=(fardouble *)AllocateMemory(sizeof(double)*LUARRAYROWS,
+	&systemerror);
+
+/*
+** Build a problem to be solved.
+*/
+ptra.ptrs.p=a;                  /* Gotta coerce linear array to 2D array */
+build_problem(*ptra.ptrs.ap,n,b);
+
+/*
+** Now that we have a problem built, see if we need to do
+** auto-adjust.  If so, repeatedly call the DoLUIteration routine,
+** increasing the number of solutions per iteration as you go.
+*/
+if(loclustruct->adjust==0)
+{
+	loclustruct->numarrays=0;
+	for(i=1;i<=MAXLUARRAYS;i++)
+	{
+		abase=(fardouble *)AllocateMemory(sizeof(double) *
+			LUARRAYCOLS*LUARRAYROWS*(i+1),&systemerror);
+		if(systemerror)
+		{       ReportError(errorcontext,systemerror);
+			LUFreeMem(a,b,(fardouble *)NULL,(fardouble *)NULL);
+			ErrorExit();
+		}
+		bbase=(fardouble *)AllocateMemory(sizeof(double) *
+			LUARRAYROWS*(i+1),&systemerror);
+		if(systemerror)
+		{       ReportError(errorcontext,systemerror);
+			LUFreeMem(a,b,abase,(fardouble *)NULL);
+			ErrorExit();
+		}
+		if(DoLUIteration(a,b,abase,bbase,i)>global_min_ticks)
+		{       loclustruct->numarrays=i;
+			break;
+		}
+		/*
+		** Not enough arrays...free them all and try again
+		*/
+		FreeMemory((farvoid *)abase,&systemerror);
+		FreeMemory((farvoid *)bbase,&systemerror);
+	}
+	/*
+	** Were we able to do it?
+	*/
+	if(loclustruct->numarrays==0)
+	{       printf("FPU:LU -- Array limit reached\n");
+		LUFreeMem(a,b,abase,bbase);
+		ErrorExit();
+	}
+}
+else
+{       /*
+	** Don't need to adjust -- just allocate the proper
+	** number of arrays and proceed.
+	*/
+	abase=(fardouble *)AllocateMemory(sizeof(double) *
+		LUARRAYCOLS*LUARRAYROWS*loclustruct->numarrays,
+		&systemerror);
+	if(systemerror)
+	{       ReportError(errorcontext,systemerror);
+		LUFreeMem(a,b,(fardouble *)NULL,(fardouble *)NULL);
+		ErrorExit();
+	}
+	bbase=(fardouble *)AllocateMemory(sizeof(double) *
+		LUARRAYROWS*loclustruct->numarrays,&systemerror);
+	if(systemerror)
+	{
+		ReportError(errorcontext,systemerror);
+		LUFreeMem(a,b,abase,(fardouble *)NULL);
+		ErrorExit();
+	}
+}
+/*
+** All's well if we get here.  Do the test.
+*/
+accumtime=0L;
+iterations=(double)0.0;
+
+do {
+	accumtime+=DoLUIteration(a,b,abase,bbase,
+		loclustruct->numarrays);
+	iterations+=(double)loclustruct->numarrays;
+} while(TicksToSecs(accumtime)<loclustruct->request_secs);
+
+/*
+** Clean up, calculate results, and go home.  Be sure to
+** show that we don't have to rerun adjustment code.
+*/
+loclustruct->iterspersec=iterations / TicksToFracSecs(accumtime);
+
+if(loclustruct->adjust==0)
+	loclustruct->adjust=1;
+
+LUFreeMem(a,b,abase,bbase);
+return;
+}
+
+/**************
+** LUFreeMem **
+***************
+** Release memory associated with LU benchmark.
+*/
+static void LUFreeMem(fardouble *a, fardouble *b,
+			fardouble *abase,fardouble *bbase)
+{
+int systemerror;
+
+FreeMemory((farvoid *)a,&systemerror);
+FreeMemory((farvoid *)b,&systemerror);
+FreeMemory((farvoid *)LUtempvv,&systemerror);
+
+if(abase!=(fardouble *)NULL) FreeMemory((farvoid *)abase,&systemerror);
+if(bbase!=(fardouble *)NULL) FreeMemory((farvoid *)bbase,&systemerror);
+return;
+}
+
+/******************
+** DoLUIteration **
+*******************
+** Perform an iteration of the LU decomposition benchmark.
+** An iteration refers to the repeated solution of several
+** identical matrices.
+*/
+static ulong DoLUIteration(fardouble *a,fardouble *b,
+		fardouble *abase, fardouble *bbase,
+		ulong numarrays)
+{
+fardouble *locabase;
+fardouble *locbbase;
+LUdblptr ptra;  /* For converting ptr to 2D array */
+ulong elapsed;
+ulong j,i;              /* Indexes */
+
+
+/*
+** Move the seed arrays (a & b) into the destination
+** arrays;
+*/
+for(j=0;j<numarrays;j++)
+{       locabase=abase+j*LUARRAYROWS*LUARRAYCOLS;
+	locbbase=bbase+j*LUARRAYROWS;
+	for(i=0;i<LUARRAYROWS*LUARRAYCOLS;i++)
+		*(locabase+i)=*(a+i);
+	for(i=0;i<LUARRAYROWS;i++)
+		*(locbbase+i)=*(b+i);
+}
+
+/*
+** Do test...begin timing.
+*/
+elapsed=StartStopwatch();
+for(i=0;i<numarrays;i++)
+{       locabase=abase+i*LUARRAYROWS*LUARRAYCOLS;
+	locbbase=bbase+i*LUARRAYROWS;
+	ptra.ptrs.p=locabase;
+	lusolve(*ptra.ptrs.ap,LUARRAYROWS,locbbase);
+}
+
+return(StopStopwatch(elapsed));
+}
+
+/******************
+** build_problem **
+*******************
+** Constructs a solvable set of linear equations.  It does this by
+** creating an identity matrix, then loading the solution vector
+** with random numbers.  After that, the identity matrix and
+** solution vector are randomly "scrambled".  Scrambling is
+** done by (a) randomly selecting a row and multiplying that
+** row by a random number and (b) adding one randomly-selected
+** row to another.
+*/
+static void build_problem(double a[][LUARRAYCOLS],
+		int n,
+		double b[LUARRAYROWS])
+{
+long i,j,k,k1;  /* Indexes */
+double rcon;     /* Random constant */
+
+/*
+** Reset random number generator
+*/
+/* randnum(13L); */
+randnum((int32)13);
+
+/*
+** Build an identity matrix.
+** We'll also use this as a chance to load the solution
+** vector.
+*/
+for(i=0;i<n;i++)
+{       /* b[i]=(double)(abs_randwc(100L)+1L); */
+	b[i]=(double)(abs_randwc((int32)100)+(int32)1);
+	for(j=0;j<n;j++)
+		if(i==j)
+		        /* a[i][j]=(double)(abs_randwc(1000L)+1L); */
+			a[i][j]=(double)(abs_randwc((int32)1000)+(int32)1);
+		else
+			a[i][j]=(double)0.0;
+}
+
+#ifdef DEBUG
+printf("Problem:\n");
+for(i=0;i<n;i++)
+{
+/*
+	for(j=0;j<n;j++)
+		printf("%6.2f ",a[i][j]);
+*/
+	printf("%.0f/%.0f=%.2f\t",b[i],a[i][i],b[i]/a[i][i]);
+/*
+        printf("\n");
+*/
+}
+#endif
+
+/*
+** Scramble.  Do this 8n times.  See comment above for
+** a description of the scrambling process.
+*/
+
+for(i=0;i<8*n;i++)
+{
+	/*
+	** Pick a row and a random constant.  Multiply
+	** all elements in the row by the constant.
+	*/
+ /*       k=abs_randwc((long)n);
+	rcon=(double)(abs_randwc(20L)+1L);
+	for(j=0;j<n;j++)
+		a[k][j]=a[k][j]*rcon;
+	b[k]=b[k]*rcon;
+*/
+	/*
+	** Pick two random rows and add second to
+	** first.  Note that we also occasionally multiply
+	** by minus 1 so that we get a subtraction operation.
+	*/
+        /* k=abs_randwc((long)n); */
+        /* k1=abs_randwc((long)n); */
+	k=abs_randwc((int32)n);
+	k1=abs_randwc((int32)n);
+	if(k!=k1)
+	{
+		if(k<k1) rcon=(double)1.0;
+			else rcon=(double)-1.0;
+		for(j=0;j<n;j++)
+			a[k][j]+=a[k1][j]*rcon;;
+		b[k]+=b[k1]*rcon;
+	}
+}
+
+return;
+}
+
+
+/***********
+** ludcmp **
+************
+** From the procedure of the same name in "Numerical Recipes in Pascal",
+** by Press, Flannery, Tukolsky, and Vetterling.
+** Given an nxn matrix a[], this routine replaces it by the LU
+** decomposition of a rowwise permutation of itself.  a[] and n
+** are input.  a[] is output, modified as follows:
+**   --                       --
+**  |  b(1,1) b(1,2) b(1,3)...  |
+**  |  a(2,1) b(2,2) b(2,3)...  |
+**  |  a(3,1) a(3,2) b(3,3)...  |
+**  |  a(4,1) a(4,2) a(4,3)...  |
+**  |  ...                      |
+**   --                        --
+**
+** Where the b(i,j) elements form the upper triangular matrix of the
+** LU decomposition, and the a(i,j) elements form the lower triangular
+** elements.  The LU decomposition is calculated so that we don't
+** need to store the a(i,i) elements (which would have laid along the
+** diagonal and would have all been 1).
+**
+** indx[] is an output vector that records the row permutation
+** effected by the partial pivoting; d is output as +/-1 depending
+** on whether the number of row interchanges was even or odd,
+** respectively.
+** Returns 0 if matrix singular, else returns 1.
+*/
+static int ludcmp(double a[][LUARRAYCOLS],
+		int n,
+		int indx[],
+		int *d)
+{
+
+double big;     /* Holds largest element value */
+double sum;
+double dum;     /* Holds dummy value */
+int i,j,k;      /* Indexes */
+int imax=0;     /* Holds max index value */
+double tiny;    /* A really small number */
+
+tiny=(double)1.0e-20;
+
+*d=1;           /* No interchanges yet */
+
+for(i=0;i<n;i++)
+{       big=(double)0.0;
+	for(j=0;j<n;j++)
+		if((double)fabs(a[i][j]) > big)
+			big=fabs(a[i][j]);
+	/* Bail out on singular matrix */
+	if(big==(double)0.0) return(0);
+	LUtempvv[i]=1.0/big;
+}
+
+/*
+** Crout's algorithm...loop over columns.
+*/
+for(j=0;j<n;j++)
+{       if(j!=0)
+		for(i=0;i<j;i++)
+		{       sum=a[i][j];
+			if(i!=0)
+				for(k=0;k<i;k++)
+					sum-=(a[i][k]*a[k][j]);
+			a[i][j]=sum;
+		}
+	big=(double)0.0;
+	for(i=j;i<n;i++)
+	{       sum=a[i][j];
+		if(j!=0)
+			for(k=0;k<j;k++)
+				sum-=a[i][k]*a[k][j];
+		a[i][j]=sum;
+		dum=LUtempvv[i]*fabs(sum);
+		if(dum>=big)
+		{       big=dum;
+			imax=i;
+		}
+	}
+	if(j!=imax)             /* Interchange rows if necessary */
+	{       for(k=0;k<n;k++)
+		{       dum=a[imax][k];
+			a[imax][k]=a[j][k];
+			a[j][k]=dum;
+		}
+		*d=-*d;         /* Change parity of d */
+		dum=LUtempvv[imax];
+		LUtempvv[imax]=LUtempvv[j]; /* Don't forget scale factor */
+		LUtempvv[j]=dum;
+	}
+	indx[j]=imax;
+	/*
+	** If the pivot element is zero, the matrix is singular
+	** (at least as far as the precision of the machine
+	** is concerned.)  We'll take the original author's
+	** recommendation and replace 0.0 with "tiny".
+	*/
+	if(a[j][j]==(double)0.0)
+		a[j][j]=tiny;
+
+	if(j!=(n-1))
+	{       dum=1.0/a[j][j];
+		for(i=j+1;i<n;i++)
+			a[i][j]=a[i][j]*dum;
+	}
+}
+
+return(1);
+}
+
+/***********
+** lubksb **
+************
+** Also from "Numerical Recipes in Pascal".
+** This routine solves the set of n linear equations A X = B.
+** Here, a[][] is input, not as the matrix A, but as its
+** LU decomposition, created by the routine ludcmp().
+** Indx[] is input as the permutation vector returned by ludcmp().
+**  b[] is input as the right-hand side an returns the
+** solution vector X.
+** a[], n, and indx are not modified by this routine and
+** can be left in place for different values of b[].
+** This routine takes into account the possibility that b will
+** begin with many zero elements, so it is efficient for use in
+** matrix inversion.
+*/
+static void lubksb( double a[][LUARRAYCOLS],
+		int n,
+		int indx[LUARRAYROWS],
+		double b[LUARRAYROWS])
+{
+
+int i,j;        /* Indexes */
+int ip;         /* "pointer" into indx */
+int ii;
+double sum;
+
+/*
+** When ii is set to a positive value, it will become
+** the index of the first nonvanishing element of b[].
+** We now do the forward substitution. The only wrinkle
+** is to unscramble the permutation as we go.
+*/
+ii=-1;
+for(i=0;i<n;i++)
+{       ip=indx[i];
+	sum=b[ip];
+	b[ip]=b[i];
+	if(ii!=-1)
+		for(j=ii;j<i;j++)
+			sum=sum-a[i][j]*b[j];
+	else
+		/*
+		** If a nonzero element is encountered, we have
+		** to do the sums in the loop above.
+		*/
+		if(sum!=(double)0.0)
+			ii=i;
+	b[i]=sum;
+}
+/*
+** Do backsubstitution
+*/
+for(i=(n-1);i>=0;i--)
+{
+	sum=b[i];
+	if(i!=(n-1))
+		for(j=(i+1);j<n;j++)
+			sum=sum-a[i][j]*b[j];
+	b[i]=sum/a[i][i];
+}
+return;
+}
+
+/************
+** lusolve **
+*************
+** Solve a linear set of equations: A x = b
+** Original matrix A will be destroyed by this operation.
+** Returns 0 if matrix is singular, 1 otherwise.
+*/
+static int lusolve(double a[][LUARRAYCOLS],
+		int n,
+		double b[LUARRAYROWS])
+{
+int indx[LUARRAYROWS];
+int d;
+#ifdef DEBUG
+int i,j;
+#endif
+
+if(ludcmp(a,n,indx,&d)==0) return(0);
+
+/* Matrix not singular -- proceed */
+lubksb(a,n,indx,b);
+
+#ifdef DEBUG
+printf("Solution:\n");
+for(i=0;i<n;i++)
+{
+  for(j=0;j<n;j++){
+  /*
+    printf("%6.2f ",a[i][j]);
+  */
+  }
+  printf("%6.2f\t",b[i]);
+  /*
+    printf("\n");
+  */
+}
+printf("\n");
+#endif
+
+return(1);
+}
diff --git a/nbench1.h b/nbench1.h
new file mode 100644
index 0000000..13a5907
--- /dev/null
+++ b/nbench1.h
@@ -0,0 +1,428 @@
+/*
+** nbench1.h
+** Header for nbench1.c
+** BYTEmark (tm)
+** BYTE's Native Mode Benchmarks
+** Rick Grehan, BYTE Magazine
+**
+** Creation:
+** Revision: 3/95;10/95
+**
+** DISCLAIMER
+** The source, executable, and documentation files that comprise
+** the BYTEmark benchmarks are made available on an "as is" basis.
+** This means that we at BYTE Magazine have made every reasonable
+** effort to verify that the there are no errors in the source and
+** executable code.  We cannot, however, guarantee that the programs
+** are error-free.  Consequently, McGraw-HIll and BYTE Magazine make
+** no claims in regard to the fitness of the source code, executable
+** code, and documentation of the BYTEmark.
+**  Furthermore, BYTE Magazine, McGraw-Hill, and all employees
+** of McGraw-Hill cannot be held responsible for any damages resulting
+** from the use of this code or the results obtained from using
+** this code.
+*/
+
+/*
+** DEFINES
+*/
+/* #define DEBUG */
+
+/*
+** EXTERNALS
+*/
+extern ulong global_min_ticks;
+
+extern SortStruct global_numsortstruct;
+extern SortStruct global_strsortstruct;
+extern BitOpStruct global_bitopstruct;
+extern EmFloatStruct global_emfloatstruct;
+extern FourierStruct global_fourierstruct;
+extern AssignStruct global_assignstruct;
+extern IDEAStruct global_ideastruct;
+extern HuffStruct global_huffstruct;
+extern NNetStruct global_nnetstruct;
+extern LUStruct global_lustruct;
+
+/* External PROTOTYPES */
+/*extern unsigned long abs_randwc(unsigned long num);*/     /* From MISC */
+/*extern long randnum(long lngval);*/
+extern int32 randwc(int32 num);
+extern u32 abs_randwc(u32 num);
+extern int32 randnum(int32 lngval);
+
+extern farvoid *AllocateMemory(unsigned long nbytes,    /* From SYSSPEC */
+	int *errorcode);
+extern void FreeMemory(farvoid *mempointer,
+	int *errorcode);
+extern void MoveMemory(farvoid *destination,
+		farvoid *source, unsigned long nbytes);
+extern void ReportError(char *context, int errorcode);
+extern void ErrorExit();
+extern unsigned long StartStopwatch();
+extern unsigned long StopStopwatch(unsigned long startticks);
+extern unsigned long TicksToSecs(unsigned long tickamount);
+extern double TicksToFracSecs(unsigned long tickamount);
+
+/*****************
+** NUMERIC SORT **
+*****************/
+
+/*
+** PROTOTYPES
+*/
+void DoNumSort(void);
+static ulong DoNumSortIteration(farlong *arraybase,
+		ulong arraysize,
+		uint numarrays);
+static void LoadNumArrayWithRand(farlong *array,
+		ulong arraysize,
+		uint numarrays);
+static void NumHeapSort(farlong *array,
+		ulong bottom,
+		ulong top);
+static void NumSift(farlong *array,
+		ulong i,
+		ulong j);
+
+
+/****************
+** STRING SORT **
+*****************
+*/
+
+
+/*
+** PROTOTYPES
+*/
+void DoStringSort(void);
+static ulong DoStringSortIteration(faruchar *arraybase,
+		uint numarrays,
+		ulong arraysize);
+static farulong *LoadStringArray(faruchar *strarray,
+		uint numarrays,
+		ulong *strings,
+		ulong arraysize);
+static void stradjust(farulong *optrarray,
+		faruchar *strarray,
+		ulong nstrings,
+		ulong i,
+		uchar l);
+static void StrHeapSort(farulong *optrarray,
+		faruchar *strarray,
+		ulong numstrings,
+		ulong bottom,
+		ulong top);
+static int str_is_less(farulong *optrarray,
+		faruchar *strarray,
+		ulong numstrings,
+		ulong a,
+		ulong b);
+static void strsift(farulong *optrarray,
+		faruchar *strarray,
+		ulong numstrings,
+		ulong i,
+		ulong j);
+
+/************************
+** BITFIELD OPERATIONS **
+*************************
+*/
+
+/*
+** PROTOTYPES
+*/
+void DoBitops(void);
+static ulong DoBitfieldIteration(farulong *bitarraybase,
+		farulong *bitoparraybase,
+		long bitoparraysize,
+		ulong *nbitops);
+static void ToggleBitRun(farulong *bitmap,
+		ulong bit_addr,
+		ulong nbits,
+		uint val);
+static void FlipBitRun(farulong *bitmap,
+		ulong bit_addr,
+		ulong nbits);
+
+/****************************
+** EMULATED FLOATING POINT **
+****************************/
+typedef struct
+{
+	u8 type;        /* Indicates, NORMAL, SUBNORMAL, etc. */
+	u8 sign;        /* Mantissa sign */
+	short exp;      /* Signed exponent...no bias */
+	u16 mantissa[INTERNAL_FPF_PRECISION];
+} InternalFPF;
+
+/*
+** PROTOTYPES
+*/
+void DoEmFloat(void);
+
+/*
+** EXTERNALS
+*/
+extern void SetupCPUEmFloatArrays(InternalFPF *abase,
+	InternalFPF *bbase, InternalFPF *cbase,
+	ulong arraysize);
+extern ulong DoEmFloatIteration(InternalFPF *abase,
+	InternalFPF *bbase, InternalFPF *cbase,
+	ulong arraysize, ulong loops);
+
+/*************************
+** FOURIER COEFFICIENTS **
+*************************/
+
+/*
+** PROTOTYPES
+*/
+void DoFourier(void);
+static ulong DoFPUTransIteration(fardouble *abase,
+		fardouble *bbase,
+		ulong arraysize);
+static double TrapezoidIntegrate(double x0,
+		double x1,
+		int nsteps,
+		double omegan,
+		int select);
+static double thefunction(double x,
+		double omegan,
+		int select);
+
+/*************************
+** ASSIGNMENT ALGORITHM **
+*************************/
+
+/*
+** DEFINES
+*/
+
+#define ASSIGNROWS 101L
+#define ASSIGNCOLS 101L
+
+/*
+** TYPEDEFS
+*/
+typedef struct {
+	union {
+		long *p;
+		long (*ap)[ASSIGNROWS][ASSIGNCOLS];
+	} ptrs;
+} longptr;
+
+/*
+** PROTOTYPES
+*/
+void DoAssign(void);
+static ulong DoAssignIteration(farlong *arraybase,
+		ulong numarrays);
+static void LoadAssignArrayWithRand(farlong *arraybase,
+		ulong numarrays);
+static void LoadAssign(farlong arraybase[][ASSIGNCOLS]);
+static void CopyToAssign(farlong arrayfrom[][ASSIGNCOLS],
+		long arrayto[][ASSIGNCOLS]);
+static void Assignment(farlong arraybase[][ASSIGNCOLS]);
+static void calc_minimum_costs(long tableau[][ASSIGNCOLS]);
+static int first_assignments(long tableau[][ASSIGNCOLS],
+		short assignedtableau[][ASSIGNCOLS]);
+static void second_assignments(long tableau[][ASSIGNCOLS],
+		short assignedtableau[][ASSIGNCOLS]);
+
+/********************
+** IDEA ENCRYPTION **
+********************/
+
+/*
+** DEFINES
+*/
+#define IDEAKEYSIZE 16
+#define IDEABLOCKSIZE 8
+#define ROUNDS 8
+#define KEYLEN (6*ROUNDS+4)
+
+/*
+** MACROS
+*/
+#define low16(x) ((x) & 0x0FFFF)
+#define MUL(x,y) (x=mul(low16(x),y))
+
+
+typedef u16 IDEAkey[KEYLEN];
+
+/*
+** PROTOTYPES
+*/
+void DoIDEA(void);
+static ulong DoIDEAIteration(faruchar *plain1,
+	faruchar *crypt1, faruchar *plain2,
+	ulong arraysize, ulong nloops,
+	IDEAkey Z, IDEAkey DK);
+static u16 mul(register u16 a, register u16 b);
+static u16 inv(u16 x);
+static void en_key_idea(u16 userkey[8], IDEAkey Z);
+static void de_key_idea(IDEAkey Z, IDEAkey DK);
+static void cipher_idea(u16 in[4], u16 out[4], IDEAkey Z);
+
+/************************
+** HUFFMAN COMPRESSION **
+************************/
+
+/*
+** DEFINES
+*/
+#define EXCLUDED 32000L          /* Big positive value */
+
+/*
+** TYPEDEFS
+*/
+typedef struct {
+	uchar c;                /* Byte value */
+	float freq;             /* Frequency */
+	int parent;             /* Parent node */
+	int left;               /* Left pointer = 0 */
+	int right;              /* Right pointer = 1 */
+} huff_node;
+
+/*
+** GLOBALS
+*/
+static huff_node *hufftree;             /* The huffman tree */
+static long plaintextlen;               /* Length of plaintext */
+
+/*
+** PROTOTYPES
+*/
+void DoHuffman();
+static void create_text_line(farchar *dt,long nchars);
+static void create_text_block(farchar *tb, ulong tblen,
+		ushort maxlinlen);
+static ulong DoHuffIteration(farchar *plaintext,
+	farchar *comparray, farchar *decomparray,
+	ulong arraysize, ulong nloops, huff_node *hufftree);
+static void SetCompBit(u8 *comparray, u32 bitoffset, char bitchar);
+static int GetCompBit(u8 *comparray, u32 bitoffset);
+
+/********************************
+** BACK PROPAGATION NEURAL NET **
+********************************/
+
+/*
+** DEFINES
+*/
+#define T 1                     /* TRUE */
+#define F 0                     /* FALSE */
+#define ERR -1
+#define MAXPATS 10              /* max number of patterns in data file */
+#define IN_X_SIZE 5             /* number of neurodes/row of input layer */
+#define IN_Y_SIZE 7             /* number of neurodes/col of input layer */
+#define IN_SIZE 35              /* equals IN_X_SIZE*IN_Y_SIZE */
+#define MID_SIZE 8              /* number of neurodes in middle layer */
+#define OUT_SIZE 8              /* number of neurodes in output layer */
+#define MARGIN 0.1              /* how near to 1,0 do we have to come to stop? */
+#define BETA 0.09               /* beta learning constant */
+#define ALPHA 0.09              /* momentum term constant */
+#define STOP 0.1                /* when worst_error less than STOP, training is done */
+
+/*
+** GLOBALS
+*/
+double  mid_wts[MID_SIZE][IN_SIZE];     /* middle layer weights */
+double  out_wts[OUT_SIZE][MID_SIZE];    /* output layer weights */
+double  mid_out[MID_SIZE];              /* middle layer output */
+double  out_out[OUT_SIZE];              /* output layer output */
+double  mid_error[MID_SIZE];            /* middle layer errors */
+double  out_error[OUT_SIZE];            /* output layer errors */
+double  mid_wt_change[MID_SIZE][IN_SIZE]; /* storage for last wt change */
+double  out_wt_change[OUT_SIZE][MID_SIZE]; /* storage for last wt change */
+double  in_pats[MAXPATS][IN_SIZE];      /* input patterns */
+double  out_pats[MAXPATS][OUT_SIZE];    /* desired output patterns */
+double  tot_out_error[MAXPATS];         /* measure of whether net is done */
+double  out_wt_cum_change[OUT_SIZE][MID_SIZE]; /* accumulated wt changes */
+double  mid_wt_cum_change[MID_SIZE][IN_SIZE];  /* accumulated wt changes */
+
+double  worst_error; /* worst error each pass through the data */
+double  average_error; /* average error each pass through the data */
+double  avg_out_error[MAXPATS]; /* average error each pattern */
+
+int iteration_count;    /* number of passes thru network so far */
+int numpats;            /* number of patterns in data file */
+int numpasses;          /* number of training passes through data file */
+int learned;            /* flag--if TRUE, network has learned all patterns */
+
+/*
+** The Neural Net test requires an input data file.
+** The name is specified here.
+*/
+char *inpath="NNET.DAT";
+
+/*
+** PROTOTYPES
+*/
+void DoNNET(void);
+static ulong DoNNetIteration(ulong nloops);
+static void do_mid_forward(int patt);
+static void do_out_forward();
+void display_output(int patt);
+static void do_forward_pass(int patt);
+static void do_out_error(int patt);
+static void worst_pass_error();
+static void do_mid_error();
+static void adjust_out_wts();
+static void adjust_mid_wts();
+static void do_back_pass(int patt);
+static void move_wt_changes();
+static int check_out_error();
+static void zero_changes();
+static void randomize_wts();
+static int read_data_file();
+/* static int initialize_net(); */
+
+/***********************
+**  LU DECOMPOSITION  **
+** (Linear Equations) **
+***********************/
+
+/*
+** DEFINES
+*/
+
+#define LUARRAYROWS 101L
+#define LUARRAYCOLS 101L
+
+/*
+** TYPEDEFS
+*/
+typedef struct
+{       union
+	{       fardouble *p;
+		fardouble (*ap)[][LUARRAYCOLS];
+	} ptrs;
+} LUdblptr;
+
+/*
+** GLOBALS
+*/
+fardouble *LUtempvv;
+
+/*
+** PROTOTYPES
+*/
+void DoLU(void);
+static void LUFreeMem(fardouble *a, fardouble *b,
+	fardouble *abase, fardouble *bbase);
+static ulong DoLUIteration(fardouble *a, fardouble *b,
+	fardouble *abase, fardouble *bbase,
+	ulong numarrays);
+static void build_problem( double a[][LUARRAYCOLS],
+	int n, double b[LUARRAYROWS]);
+static int ludcmp(double a[][LUARRAYCOLS],
+	int n, int indx[], int *d);
+static void lubksb(double a[][LUARRAYCOLS],
+	int n, int indx[LUARRAYROWS],
+	double b[LUARRAYROWS]);
+static int lusolve(double a[][LUARRAYCOLS],
+	int n, double b[LUARRAYROWS]);
+
+
diff --git a/nmglobal.h b/nmglobal.h
new file mode 100644
index 0000000..2b57db5
--- /dev/null
+++ b/nmglobal.h
@@ -0,0 +1,519 @@
+/*
+** nmglobal.h
+** Global definitions for native mode benchmarks.
+**
+** BYTEmark (tm)
+** BYTE's Native Mode Benchmarks
+** Rick Grehan, BYTE Magazine
+**
+** Creation:
+** Revision: 3/95;10/95
+**  10/95 - Added memory array & alignment -- RG
+**
+** DISCLAIMER
+** The source, executable, and documentation files that comprise
+** the BYTEmark benchmarks are made available on an "as is" basis.
+** This means that we at BYTE Magazine have made every reasonable
+** effort to verify that the there are no errors in the source and
+** executable code.  We cannot, however, guarantee that the programs
+** are error-free.  Consequently, McGraw-HIll and BYTE Magazine make
+** no claims in regard to the fitness of the source code, executable
+** code, and documentation of the BYTEmark.
+**  Furthermore, BYTE Magazine, McGraw-Hill, and all employees
+** of McGraw-Hill cannot be held responsible for any damages resulting
+** from the use of this code or the results obtained from using
+** this code.
+*/
+
+/* is this a 64 bit architecture? If so, this will define LONG64 */
+#include "pointer.h"
+
+/*
+** SYSTEM DEFINES
+*/
+
+/* +++ MEMORY +++ */
+
+/*
+** You must define ONLY ONE of the following identifiers
+** to specify the mechanism for allocating memory:
+** MALLOCMEM
+** DOS16MEM
+** MACMEM
+*/
+
+/*
+** Define MALLOCMEM to use the standard malloc() call for
+** memory.  This is the default for most systems.
+*/
+#define MALLOCMEM
+
+/*
+** Define DOS16MEM if you're running in the old 16-bit segmented
+** model.  This enables some fruity memory management routines
+** required for that model.  NOT defining this assumes that
+** you're running in an environment that allows malloc() to
+** get > 64K chunks of memory.
+*/
+/* #define DOS16MEM */
+
+/* Define MACMEM to use the Mac's GetPtr call to allocate
+** memory (instead of malloc()).
+*/
+/* #define MACMEM */
+
+/* +++ TIMING +++ */
+/*
+** You must define ONLY ONE of the following identifiers to pick
+** the timing routine used.
+**  CLOCKWCPS
+**  CLOCKWCT
+**  MACTIMEMGR
+**  WIN31TIMER
+*/
+
+/*
+** Define CLOCKWCPS if you are using the clock() routine and the
+** constant used as the divisor to determine seconds is
+** CLOCKS_PER_SEC.  This is the default in most cases.
+*/
+#define CLOCKWCPS
+
+/*
+** Define CLOCKWCT if you are using the clock() routine and the
+** constant used as the divisor to determine seconds is CLK_TCK
+*/
+/* #define CLOCKWCT */
+
+/*
+** Define MACTIMEMGR to use the Mac Time manager routines.
+** You'll need to be running at least system 6.0.3 or
+** better...extended time manager is recommended (system 7 or
+** better).
+*/
+/* #define MACTIMEMGR */
+
+/*
+** Define WIN31TIMER to user the timing routines in TOOLHELP.DLL.
+** Gets accuracy down to the millisecond.
+*/
+/* #define WIN31TIMER */
+
+/* +++ MISCELLANEOUS +++ */
+
+/*
+** Define DOS16 if you'll be compiling under DOS in 16-bit
+** (non DOS-extended) mode.  This will enable proper definitions
+** for the far*** typedefs
+*/
+/* #define DOS16 */
+
+/*
+** Define MAC if you're compiling on a Macintosh.  This
+** does a number of things:
+**  includes unix.h
+**  Incorporates code to mimic the command line via either
+**      the console library (Symantec/Think) or the SIOUX
+**      library (Code Warrior).
+*/
+/* #define MAC */
+
+/*
+** Define LONG64 if your compiler emits 64-bit longs.
+** This is typically true of Alpha compilers on Unix
+** systems...though, who knows, this may change in the
+** future. I MOVED THIS DEFINTION INTO THE FILE pointer.h. DO NOT
+** DEFINE IT HERE. IT WILL AUTOMATICALLY BE DEFINED IF NECESSARY.
+** Uwe F. Mayer, Dec 15, 1996, Nov 15, 1997
+*/
+/* #define LONG64 */
+
+/*
+** Define MACCWPROF if you are profiling on the Mac using
+** Code Warrior.  This enables code that turns off the
+** profiler in an evern of an error exit.
+*/
+/* #define MACCWPROF */
+
+#ifdef MAC
+#include <unix.h>
+#endif
+
+/*
+** ERROR CODES
+*/
+#define ERROR_MEMORY    1
+#define ERROR_MEMARRAY_FULL 2
+#define ERROR_MEMARRAY_NFOUND 3
+#define ERROR_FILECREATE 10
+#define ERROR_FILEREAD 11
+#define ERROR_FILEWRITE 12
+#define ERROR_FILEOPEN 13
+#define ERROR_FILESEEK 14
+
+/*
+** MINIMUM_TICKS
+**
+** This sets the default number of minimum ticks.
+** It can, of course, be overridden by the input
+** command file.
+** This ultimately gets loaded into the variable
+** global_min_ticks, which specifies the minimum
+** number of ticks that must take place between
+** a StartStopwatch() and StopStopwatch() call.
+** The idea is to reduce error buildup.
+*/
+#define MINIMUM_TICKS 60
+
+/*
+** MINIMUM_SECONDS
+**
+** Minimum number of seconds to run each test.
+*/
+#define MINIMUM_SECONDS 5
+
+/*
+** MAXPOSLONG
+**
+** This is the maximum positive long.
+*/
+#ifdef LONG64
+#define MAXPOSLONG 0x7FFFFFFFFFFFFFFFL
+#else
+#define MAXPOSLONG 0x7FFFFFFFL
+#endif
+
+/*
+** OTHER DEFINES
+*/
+#ifndef MAC
+#define TRUE    1
+#define FALSE   0
+#endif
+
+/*
+** Memory array size.  Used in SYSSPEC for keeping track
+** of re-aligned memory.
+*/
+#define MEM_ARRAY_SIZE 20
+
+/*
+** TYPEDEFS
+*/
+#define ulong unsigned long
+#define uchar unsigned char
+#define uint unsigned int
+#define ushort unsigned short
+/*
+typedef unsigned char uchar;
+typedef unsigned int uint;
+typedef unsigned short ushort;
+typedef unsigned long ulong;
+*/
+/*
+** The 'farxxx' typedefs were added in deference to DOS, which
+** requires far pointers to handle some of the bigger
+** memory structures.  Other systems will simply
+** map 'farxxx' to 'xxx'
+*/
+#ifdef DOS16
+typedef void huge farvoid;
+typedef double huge fardouble;
+typedef long huge farlong;
+typedef unsigned long huge farulong;
+typedef char huge farchar;
+typedef unsigned char huge faruchar;
+
+#else
+
+typedef void farvoid;
+typedef double fardouble;
+typedef long farlong;
+typedef unsigned long farulong;
+typedef char farchar;
+typedef unsigned char faruchar;
+
+#endif
+
+/*
+** The following typedefs are used when element size
+** is critical.  You'll have to alter these for
+** your specifical platform/compiler.
+*/
+typedef unsigned char u8;       /* Unsigned 8-bits */
+typedef unsigned short u16;     /* Unsigned 16 bits */
+#ifdef LONG64
+typedef unsigned int u32;       /* Unsigned 32 bits */
+typedef int int32;              /* Signed 32 bit integer */
+#else
+typedef unsigned long u32;      /* Unsigned 32 bits */
+typedef long int32;              /* Signed 32 bit integer */
+#endif
+
+/*****************
+** NUMERIC SORT **
+*****************/
+/*
+** DEFINES
+*/
+
+/*
+** The following constant, NUMNUMARRAYS (no, it is not a
+** Peter Sellers joke) is the maximum number of arrays
+** that can be built by the numeric sorting benchmark
+** before it gives up.  This maximum is dependent on the
+** amount of memory in the system.
+*/
+/*#define NUMNUMARRAYS    1000*/
+#define NUMNUMARRAYS    10000
+
+/*
+** The following constant NUMARRAYSIZE determines the
+** default # of elements in each numeric array.  Ordinarily
+** this is something you shouldn't fool with, though as
+** with most of the constants here, it is adjustable.
+*/
+#define NUMARRAYSIZE    8111L
+
+
+/*
+** TYPEDEFS
+*/
+typedef struct {
+        int adjust;             /* Set adjust code */
+        ulong request_secs;     /* # of seconds requested */
+        double sortspersec;     /* # of sort iterations per sec */
+        ushort numarrays;       /* # of arrays */
+        ulong arraysize;        /* # of elements in array */
+} SortStruct;
+
+/****************
+** STRING SORT **
+*****************
+** Note: The string sort benchmark uses the same structure to
+** communicate parameters as does the numeric sort benchmark.
+** (i.e., SortStruct...see above.
+*/
+
+/*
+** DEFINES
+*/
+/*
+** The following constant STRINGARRAYSIZE determines
+** the default # of bytes allocated to each string array.
+** Though the actual size can be pre-set from the command
+** file, this constant should be left unchanged.
+*/
+#define STRINGARRAYSIZE 8111L
+
+/************************
+** BITFIELD OPERATIONS **
+*************************
+*/
+
+/*
+** DEFINES
+*/
+
+/*
+** Following field sets the size of the bitfield array (in longs).
+*/
+#ifdef LONG64
+#define BITFARRAYSIZE 16384L
+#else
+#define BITFARRAYSIZE 32768L
+#endif
+
+/*
+** TYPEDEFS
+*/
+typedef struct {
+        int adjust;             /* Set adjust code */
+        ulong request_secs;     /* # of seconds requested */
+        double bitopspersec;    /* # of bitfield ops per sec */
+        ulong bitoparraysize;           /* Total # of bitfield ops */
+        ulong bitfieldarraysize;        /* Bit field array size */
+} BitOpStruct;
+
+/****************************
+** EMULATED FLOATING POINT **
+****************************/
+/*
+** DEFINES
+*/
+#define INTERNAL_FPF_PRECISION 4
+
+/*
+** The following constant is the maximum number of loops
+** of the emulated floating point test that the system
+** will allow before flagging an error.  This is not a
+** critical constant, and can be altered if your system is
+** a real barn-burner.
+*/
+/*#define CPUEMFLOATLOOPMAX 50000L*/
+#define CPUEMFLOATLOOPMAX 500000L
+
+/*
+** Set size of array
+*/
+#define EMFARRAYSIZE 3000L
+
+/*
+** TYPEDEFS
+*/
+typedef struct {
+        int adjust;             /* Set adjust code */
+        ulong request_secs;     /* # of seconds requested */
+        ulong arraysize;        /* Size of array */
+        ulong loops;            /* Loops per iterations */
+        double emflops;         /* Results */
+} EmFloatStruct;
+
+/*************************
+** FOURIER COEFFICIENTS **
+*************************/
+
+/*
+** TYPEDEFS
+*/
+typedef struct {
+        int adjust;             /* Set adjust code */
+        ulong request_secs;     /* # of requested seconds */
+        ulong arraysize;        /* Size of coeff. arrays */
+        double fflops;          /* Results */
+} FourierStruct;
+
+/*************************
+** ASSIGNMENT ALGORITHM **
+*************************/
+
+/*
+** TYPEDEFS
+*/
+typedef struct {
+        int adjust;             /* Set adjust code */
+        ulong request_secs;     /* Requested # of seconds */
+        ulong numarrays;        /* # of arrays */
+        double iterspersec;     /* Results */
+} AssignStruct;
+
+/********************
+** IDEA ENCRYPTION **
+********************/
+
+/*
+** DEFINES
+*/
+/* Following constant defines the max number of loops the
+** system will attempt. Keeps things from going off into the
+** weeds. */
+/*#define MAXIDEALOOPS 50000L*/
+#define MAXIDEALOOPS 500000L
+
+/*
+** Following constant sets the size of the arrays.
+** NOTE: For the IDEA algorithm to work properly, this
+**  number MUST be some multiple of 8.
+*/
+#define IDEAARRAYSIZE 4000L
+
+/*
+** TYPEDEFS
+*/
+typedef struct {
+        int adjust;             /* Set adjust code */
+        ulong request_secs;     /* Requested # of seconds */
+        ulong arraysize;        /* Size of array */
+        ulong loops;            /* # of times to convert */
+        double iterspersec;     /* Results */
+} IDEAStruct;
+
+
+/************************
+** HUFFMAN COMPRESSION **
+************************/
+
+/*
+** DEFINES
+*/
+/*
+** MAXHUFFLOOPS
+**
+** This constant specifies the maximum number of Huffman
+** compression loops the system will try for.  This keeps
+** the test from going off into the weeds.  This is not
+** a critical constant, and can be increased if your
+** system is a real barn-burner.
+*/
+/*#define MAXHUFFLOOPS 50000L*/
+#define MAXHUFFLOOPS 500000L
+
+/*
+** Following constant sets the size of the arrays to
+** be compressed/uncompressed.
+*/
+#define HUFFARRAYSIZE 5000L
+
+/*
+** TYPEDEFS
+*/
+
+typedef struct {
+        int adjust;             /* Set adjust code */
+        ulong request_secs;     /* Requested # of seconds */
+        ulong arraysize;        /* Size of array */
+        ulong loops;            /* # of times to compress/decompress */
+        double iterspersec;     /* Results */
+} HuffStruct;
+
+/********************************
+** BACK PROPAGATION NEURAL NET **
+********************************/
+
+/*
+**  MAXNNETLOOPS
+**
+** This constant sets the max number of loops through the neural
+** net that the system will attempt before giving up.  This
+** is not a critical constant.  You can alter it if your system
+** has sufficient horsepower.
+*/
+/*#define MAXNNETLOOPS  50000L*/
+#define MAXNNETLOOPS  500000L
+
+/*
+** TYPEDEFS
+*/
+typedef struct {
+        int adjust;             /* Set adjust code */
+        ulong request_secs;     /* Requested # of seconds */
+        ulong loops;            /* # of times to learn */
+        double iterspersec;     /* Results */
+} NNetStruct;
+
+/***********************
+**  LU DECOMPOSITION  **
+** (Linear Equations) **
+***********************/
+
+/*
+** MAXLUARRAYS
+**
+** This sets the upper limit on the number of arrays
+** that the benchmark will attempt to build before
+** flagging an error.  It is not a critical constant, and
+** may be increased if your system has the horsepower.
+*/
+/*#define MAXLUARRAYS 1000*/
+#define MAXLUARRAYS 10000
+
+/*
+** TYPEDEFS
+*/
+typedef struct {
+        int adjust;             /* Set adjust code */
+        ulong request_secs;     /* Requested # of seconds */
+        ulong numarrays;        /* # of arrays */
+        double iterspersec;     /* Results */
+} LUStruct;
+
diff --git a/pointer.c b/pointer.c
new file mode 100644
index 0000000..f4de577
--- /dev/null
+++ b/pointer.c
@@ -0,0 +1,6 @@
+#include <stdio.h>
+int main(){
+ printf("%d",(int)sizeof(long));
+ return(0);
+}
+
diff --git a/sysinfo.c.example b/sysinfo.c.example
new file mode 100644
index 0000000..db650f0
--- /dev/null
+++ b/sysinfo.c.example
@@ -0,0 +1,10 @@
+sprintf(buffer,"**System used for compilation:\n");
+output_string(buffer);
+sprintf(buffer,"**Linux mimi 2.0.31 #5 Thu Oct 23 10:02:08 CDT 1997 i486\n");
+output_string(buffer);
+sprintf(buffer,"**C compiler: gcc version 2.7.2.3\n");
+output_string(buffer);
+sprintf(buffer,"**libc: libc.so.5.4.38\n");
+output_string(buffer);
+sprintf(buffer,"**Date of compilation: Thu Nov 20 10:04:43 CST 1997\n");
+output_string(buffer);
diff --git a/sysinfo.c.template b/sysinfo.c.template
new file mode 100644
index 0000000..c1a986c
--- /dev/null
+++ b/sysinfo.c.template
@@ -0,0 +1,10 @@
+sprintf(buffer,"**System used for compilation:\n");
+output_string(buffer);
+sprintf(buffer,"**%SYSTEM%\n");
+output_string(buffer);
+sprintf(buffer,"**C compiler: %CCVERSION%\n");
+output_string(buffer);
+sprintf(buffer,"**libc: %LIBCVERSION%\n");
+output_string(buffer);
+sprintf(buffer,"**Date of compilation: %DATE%\n");
+output_string(buffer);
diff --git a/sysinfo.sh b/sysinfo.sh
new file mode 100755
index 0000000..57754fe
--- /dev/null
+++ b/sysinfo.sh
@@ -0,0 +1,78 @@
+#!/bin/sh
+
+# the arguments of this script are the compiler name and flags
+
+# try to solve a chicken-and-egg problem on SunOS
+# ucb's test program does not handle -L like the other test programs
+# let's try to find another implementation
+if test -x /bin/test; then
+    TEST=/bin/test;
+else
+    if test -x /usr/bin/test; then
+        TEST=/usr/bin/test;
+    else
+        # cross your fingers that it's not like ucb test
+        TEST=test;
+    fi
+fi
+
+compiler=`echo $* | sed -e 's/-static//g' -e 's/-Bstatic//g'`
+if $TEST `basename $1` = "gcc" && ($compiler -v) >/dev/null 2>&1 ; then
+# Cygwin writes more than one line with "version" in it
+    gccversion=`$compiler -v 2>&1 | sed -e "/version/!d" | tail -n 1`
+else
+    gccversion="$1"
+fi
+
+libcversion=""
+if ($* hello.c -o hello) >/dev/null 2>&1; then
+  ldd_output=`(ldd hello) 2>&1`
+  libcversion=`echo $ldd_output | sed -e 's/.*static.*/static/' \
+				      -e 's/.*not a dynamic.*/static/'`
+  if $TEST "$libcversion" = "static" ; then
+    if ($compiler hello.c -o hello) >/dev/null 2>&1; then
+      if (ldd hello) >/dev/null 2>/dev/null; then
+        libcversion=`(ldd hello) 2>&1`
+        libcversion=`echo $libcversion | sed -e '/libc/!d'\
+			-e 's/^[ 	]*//' \
+			-e 's/.*=>[ 	][ 	]*\([^ 	]*\).*/\1/'`
+	# remember the current directory
+      	current=`pwd`
+      	while $TEST -L "$libcversion" && ! $TEST "$libcversion" = "" ; do
+      	  libcitself=`basename $libcversion`
+      	  libpath=`echo $libcversion | sed -e "s/$libcitself$//"`
+      	  if $TEST -d "$libpath" ; then
+      	    cd $libpath
+      	  fi
+      	  if ls $libcitself >/dev/null 2>/dev/null ; then
+      	    libcversion=`ls -l $libcitself | \
+			   sed -e 's/.*->[ 	][ 	]*\(.*\)$/\1/'`
+      	  else
+      	    # something must have gone wrong, let's bail out
+      	    libcversion=""
+      	  fi
+      	done
+      	# return to the current directory
+      	cd $current
+      fi
+    fi
+  else
+    libcversion=""
+  fi
+fi
+
+rm -f sysinfo.crm sysinfoc.c hello
+
+# this bombs out on Ultrix which expect "cut -d"
+
+compsystem=`uname -a | cut -b 1-78`
+compdate=`date|cut -b1-55`
+
+# let's hope that ctrl-c is not part of any string here
+# this also will barf later if " is in any of the strings
+
+for i in sysinfo.c sysinfoc.c ; do
+ sed -e "s%CCVERSION%$gccversion" -e "s%LIBCVERSION%$libcversion"\
+     -e "s%SYSTEM%$compsystem" -e "s%DATE%$compdate"\
+   ${i}.template > $i
+done
diff --git a/sysinfoc.c.example b/sysinfoc.c.example
new file mode 100644
index 0000000..7da71ac
--- /dev/null
+++ b/sysinfoc.c.example
@@ -0,0 +1,4 @@
+sprintf(buffer,"C compiler          : gcc version 2.7.2.3\n");
+output_string(buffer);
+sprintf(buffer,"libc                : libc.so.5.4.38\n");
+output_string(buffer);
diff --git a/sysinfoc.c.template b/sysinfoc.c.template
new file mode 100644
index 0000000..922a5de
--- /dev/null
+++ b/sysinfoc.c.template
@@ -0,0 +1,4 @@
+sprintf(buffer,"C compiler          : %CCVERSION%\n");
+output_string(buffer);
+sprintf(buffer,"libc                : %LIBCVERSION%\n");
+output_string(buffer);
diff --git a/sysspec.c b/sysspec.c
new file mode 100644
index 0000000..a97010d
--- /dev/null
+++ b/sysspec.c
@@ -0,0 +1,884 @@
+
+/*
+** sysspec.c
+** System-specific routines.
+**
+** BYTEmark (tm)
+** BYTE's Native Mode Benchmarks
+** Rick Grehan, BYTE Magazine
+**
+** Creation:
+** Revision: 3/95;10/95
+**
+** DISCLAIMER
+** The source, executable, and documentation files that comprise
+** the BYTEmark benchmarks are made available on an "as is" basis.
+** This means that we at BYTE Magazine have made every reasonable
+** effort to verify that the there are no errors in the source and
+** executable code.  We cannot, however, guarantee that the programs
+** are error-free.  Consequently, McGraw-HIll and BYTE Magazine make
+** no claims in regard to the fitness of the source code, executable
+** code, and documentation of the BYTEmark.
+**  Furthermore, BYTE Magazine, McGraw-Hill, and all employees
+** of McGraw-Hill cannot be held responsible for any damages resulting
+** from the use of this code or the results obtained from using
+** this code.
+*/
+
+/***********************************
+**    SYSTEM-SPECIFIC ROUTINES    **
+************************************
+**
+** These are the routines that provide functions that are
+** system-specific.  If the benchmarks are to be ported
+** to new hardware/new O.S., this is the first place to
+** start.
+*/
+#include "sysspec.h"
+
+#ifdef DOS16
+#include <io.h>
+#include <fcntl.h>
+#include <sys\stat.h>
+#endif
+/*********************************
+**  MEMORY MANAGEMENT ROUTINES  **
+*********************************/
+
+
+/****************************
+** AllocateMemory
+** This routine returns a void pointer to a memory
+** block.  The size of the memory block is given in bytes
+** as the first argument.  This routine also returns an
+** error code in the second argument.
+** 10/95 Update:
+**  Added an associative array for memory alignment reasons.
+**  mem_array[2][MEM_ARRAY_SIZE]
+**   mem_array[0][n] = Actual address (from malloc)
+**   mem_array[1][n] = Aligned address
+** Currently, mem_array[][] is only used if you use malloc;
+**  it is not used for the 16-bit DOS and MAC versions.
+*/
+farvoid *AllocateMemory(unsigned long nbytes,   /* # of bytes to alloc */
+		int *errorcode)                 /* Returned error code */
+{
+#ifdef DOS16MEM
+union REGS registers;
+unsigned short nparas;            /* # of paragraphs */
+
+/*
+** Set # of paragraphs to nbytes/16 +1.  The +1 is a
+** slop factor.
+*/
+nparas=(unsigned short)(nbytes/16L) + 1;
+
+/*
+** Set incoming registers.
+*/
+registers.h.ah=0x48;            /* Allocate memory */
+registers.x.bx=nparas;          /* # of paragraphs */
+
+
+intdos(&registers,&registers);  /* Call DOS */
+
+/*
+** See if things succeeded.
+*/
+if(registers.x.cflag)
+{       printf("error: %d Lgst: %d\n",registers.x.ax,registers.x.bx);
+	    *errorcode=ERROR_MEMORY;
+	return((farvoid *)NULL);
+}
+
+/*
+** Create a farvoid pointer to return.
+*/
+*errorcode=0;
+return((farvoid *)MK_FP(registers.x.ax,0));
+
+#endif
+
+#ifdef MACMEM
+/*
+** For MAC CodeWarrior, we'll use the MacOS NewPtr call
+*/
+farvoid *returnval;
+returnval=(farvoid *)NewPtr((Size)nbytes);
+if(returnval==(farvoid *)NULL)
+	*errorcode=ERROR_MEMORY;
+else
+	*errorcode=0;
+return(returnval);
+#endif
+
+#ifdef MALLOCMEM
+/*
+** Everyone else, its pretty straightforward, given
+** that you use a 32-bit compiler which treats size_t as
+** a 4-byte entity.
+*/
+farvoid *returnval;             /* Return value */
+ulong true_addr;		/* True address */
+ulong adj_addr;			/* Adjusted address */
+
+returnval=(farvoid *)malloc((size_t)(nbytes+2L*(long)global_align));
+if(returnval==(farvoid *)NULL)
+	*errorcode=ERROR_MEMORY;
+else
+	*errorcode=0;
+
+/*
+** Check for alignment
+*/
+adj_addr=true_addr=(ulong)returnval;
+if(global_align==0)
+{	
+	if(AddMemArray(true_addr, adj_addr))
+		*errorcode=ERROR_MEMARRAY_FULL;
+	return(returnval);
+}
+
+if(global_align==1)
+{	
+        if(true_addr%2==0) adj_addr++;
+}
+else
+{	
+	while(adj_addr%global_align!=0) ++adj_addr;
+	if(adj_addr%(global_align*2)==0) adj_addr+=global_align;
+}
+returnval=(void *)adj_addr;
+if(AddMemArray(true_addr,adj_addr))
+	*errorcode=ERROR_MEMARRAY_FULL;
+return(returnval);
+#endif
+
+}
+
+
+/****************************
+** FreeMemory
+** This is the reverse of AllocateMemory.  The memory
+** block passed in is freed.  Should an error occur,
+** that error is returned in errorcode.
+*/
+void FreeMemory(farvoid *mempointer,    /* Pointer to memory block */
+		int *errorcode)
+{
+#ifdef DOS16MEM
+/*
+** 16-bit DOS VERSION!!
+*/
+unsigned int segment;
+unsigned int offset;
+union REGS registers;
+struct SREGS sregisters;
+
+/*
+** First get the segment/offset of the farvoid pointer.
+*/
+segment=FP_SEG(mempointer);
+offset=FP_OFF(mempointer);
+
+/*
+** Align the segment properly.  For as long as offset > 16,
+** subtract 16 from offset and add 1 to segment.
+*/
+while(offset>=16)
+{       offset-=16;
+	segment++;
+}
+
+/*
+** Build the call to DOS
+*/
+registers.h.ah=0x49;            /* Free memory */
+sregisters.es=segment;
+
+intdosx(&registers,&registers,&sregisters);
+
+/*
+** Check for error
+*/
+if(registers.x.cflag)
+{       *errorcode=ERROR_MEMORY;
+	return;
+}
+
+*errorcode=0;
+return;
+#endif
+
+#ifdef MACMEM
+DisposPtr((Ptr)mempointer);
+*errorcode=0;
+return;
+#endif
+
+#ifdef MALLOCMEM
+ulong adj_addr, true_addr;
+
+/* Locate item in memory array */
+adj_addr=(ulong)mempointer;
+if(RemoveMemArray(adj_addr, &true_addr))
+{	*errorcode=ERROR_MEMARRAY_NFOUND;
+	return;
+}
+mempointer=(void *)true_addr;
+free(mempointer);
+*errorcode=0;
+return;
+#endif
+}
+
+/****************************
+** MoveMemory
+** Moves n bytes from a to b.  Handles overlap.
+** In most cases, this is just a memmove operation.
+** But, not in DOS....noooo....
+*/
+void MoveMemory( farvoid *destination,  /* Destination address */
+		farvoid *source,        /* Source address */
+		unsigned long nbytes)
+{
+
+/* +++16-bit DOS VERSION+++ */
+#ifdef DOS16MEM
+
+	FarDOSmemmove( destination, source, nbytes);
+
+#else
+
+memmove(destination, source, nbytes);
+
+#endif
+}
+
+#ifdef DOS16MEM
+
+/****************************
+** FarDOSmemmove
+** Performs the same function as memmove for DOS when
+** the arrays are defined with far pointers.
+*/
+void FarDOSmemmove(farvoid *destination,        /* Destination pointer */
+		farvoid *source,        /* Source pointer */
+		unsigned long nbytes)   /* # of bytes to move */
+{
+unsigned char huge *uchsource;  /* Temp source */
+unsigned char huge *uchdest;    /* Temp destination */
+unsigned long saddr;            /* Source "true" address */
+unsigned long daddr;            /* Destination "true" address */
+
+
+/*
+** Get unsigned char pointer equivalents
+*/
+uchsource=(unsigned char huge *)source;
+uchdest=(unsigned char huge *)destination;
+
+/*
+** Calculate true address of source and destination and
+** compare.
+*/
+saddr=(unsigned long)(FP_SEG(source)*16 + FP_OFF(source));
+daddr=(unsigned long)(FP_SEG(destination)*16 + FP_OFF(destination));
+
+if(saddr > daddr)
+{
+	/*
+	** Source is greater than destination.
+	** Use a series of standard move operations.
+	** We'll move 65535 bytes at a time.
+	*/
+	while(nbytes>=65535L)
+	{       _fmemmove((farvoid *)uchdest,
+			(farvoid *)uchsource,
+			(size_t) 65535);
+		uchsource+=65535;       /* Advance pointers */
+		uchdest+=65535;
+		nbytes-=65535;
+	}
+
+	/*
+	** Move remaining bytes
+	*/
+	if(nbytes!=0L)
+		_fmemmove((farvoid *)uchdest,
+			(farvoid *)uchsource,
+			(size_t)(nbytes & 0xFFFF));
+
+}
+else
+{
+	/*
+	** Destination is greater than source.
+	** Advance pointers to the end of their
+	** respective blocks.
+	*/
+	uchsource+=nbytes;
+	uchdest+=nbytes;
+
+	/*
+	** Again, move 65535 bytes at a time.  However,
+	** "back" the pointers up before doing the
+	** move.
+	*/
+	while(nbytes>=65535L)
+	{
+		uchsource-=65535;
+		uchdest-=65535;
+		_fmemmove((farvoid *)uchdest,
+			(farvoid *)uchsource,
+			(size_t) 65535);
+		nbytes-=65535;
+	}
+
+	/*
+	** Move remaining bytes.
+	*/
+	if(nbytes!=0L)
+	{       uchsource-=nbytes;
+		uchdest-=nbytes;
+		_fmemmove((farvoid *)uchdest,
+			(farvoid *)uchsource,
+			(size_t)(nbytes & 0xFFFF));
+	}
+}
+return;
+}
+#endif
+
+/***********************************
+** MEMORY ARRAY HANDLING ROUTINES **
+***********************************/
+/****************************
+** InitMemArray
+** Initialize the memory array.  This simply amounts to
+** setting mem_array_ents to zero, indicating that there
+** isn't anything in the memory array.
+*/
+void InitMemArray(void)
+{
+mem_array_ents=0;
+return;
+}
+
+/***************************
+** AddMemArray
+** Add a pair of items to the memory array.
+**  true_addr is the true address (mem_array[0][n])
+**  adj_addr is the adjusted address (mem_array[0][n])
+** Returns 0 if ok
+** -1 if not enough room
+*/
+int AddMemArray(ulong true_addr,
+		ulong adj_addr)
+{
+if(mem_array_ents>=MEM_ARRAY_SIZE)
+	return(-1);
+
+mem_array[0][mem_array_ents]=true_addr;
+mem_array[1][mem_array_ents]=adj_addr;
+mem_array_ents++;
+return(0);
+}
+
+/*************************
+** RemoveMemArray
+** Given an adjusted address value (mem_array[1][n]), locate
+** the entry and remove it from the mem_array.
+** Also returns the associated true address.
+** Returns 0 if ok
+** -1 if not found.
+*/
+int RemoveMemArray(ulong adj_addr,ulong *true_addr)
+{
+int i,j;
+
+/* Locate the item in the array. */
+for(i=0;i<mem_array_ents;i++)
+	if(mem_array[1][i]==adj_addr)
+	{       /* Found it..bubble stuff down */
+		*true_addr=mem_array[0][i];
+		j=i;
+		while(j+1<mem_array_ents)
+		{       mem_array[0][j]=mem_array[0][j+1];
+			mem_array[1][j]=mem_array[1][j+1];
+			j++;
+		}
+		mem_array_ents--;
+		return(0);      /* Return if found */
+	}
+
+/* If we made it here...something's wrong...show error */
+return(-1);
+}
+
+/**********************************
+**    FILE HANDLING ROUTINES     **
+**********************************/
+
+/****************************
+** CreateFile
+** This routine accepts a filename for an argument and
+** creates that file in the current directory (unless the
+** name contains a path that overrides the current directory).
+** Note that the routine does not OPEN the file.
+** If the file exists, it is truncated to length 0.
+*/
+void CreateFile(char *filename,
+		int *errorcode)
+{
+
+#ifdef DOS16
+/*
+** DOS VERSION!!
+*/
+int fhandle;            /* File handle used internally */
+
+fhandle=open(filename,O_CREAT | O_TRUNC, S_IREAD | S_IWRITE);
+
+if(fhandle==-1)
+	*errorcode=ERROR_FILECREATE;
+else
+	*errorcode=0;
+
+/*
+** Since all we're doing here is creating the file,
+** go ahead and close it.
+*/
+close(fhandle);
+
+return;
+#endif
+
+#ifdef LINUX
+FILE *fhandle;            /* File handle used internally */
+
+fhandle=fopen(filename,"w");
+
+if(fhandle==NULL)
+	*errorcode=ERROR_FILECREATE;
+else
+	*errorcode=0;
+
+/*
+** Since all we're doing here is creating the file,
+** go ahead and close it.
+*/
+fclose(fhandle);
+
+return;
+#endif
+}
+
+/****************************
+** bmOpenFile
+** Opens the file given by fname, returning its handle.
+** If an error occurs, returns its code in errorcode.
+** The file is opened in read-write exclusive mode.
+*/
+#ifdef DOS16
+/*
+** DOS VERSION!!
+*/
+
+int bmOpenFile(char *fname,       /* File name */
+	int *errorcode)         /* Error code returned */
+{
+
+int fhandle;            /* Returned file handle */
+
+fhandle=open(fname,O_BINARY | O_RDWR, S_IREAD | S_IWRITE);
+
+if(fhandle==-1)
+	*errorcode=ERROR_FILEOPEN;
+else
+	*errorcode=0;
+
+return(fhandle);
+}
+#endif
+
+
+#ifdef LINUX
+
+FILE *bmOpenFile(char *fname,       /* File name */
+	    int *errorcode)         /* Error code returned */
+{
+
+FILE *fhandle;            /* Returned file handle */
+
+fhandle=fopen(fname,"w+");
+
+if(fhandle==NULL)
+	*errorcode=ERROR_FILEOPEN;
+else
+	*errorcode=0;
+
+return(fhandle);
+}
+#endif
+
+
+/****************************
+** CloseFile
+** Closes the file identified by fhandle.
+** A more inocuous routine there never was.
+*/
+#ifdef DOS16
+/*
+** DOS VERSION!!!
+*/
+void CloseFile(int fhandle,             /* File handle */
+		int *errorcode)         /* Returned error code */
+{
+
+close(fhandle);
+*errorcode=0;
+return;
+}
+#endif
+#ifdef LINUX
+void CloseFile(FILE *fhandle,             /* File handle */
+		int *errorcode)         /* Returned error code */
+{
+fclose(fhandle);
+*errorcode=0;
+return;
+}
+#endif
+
+/****************************
+** readfile
+** Read bytes from an opened file.  This routine
+** is a combination seek-and-read.
+** Note that this routine expects the offset to be from
+** the beginning of the file.
+*/
+#ifdef DOS16
+/*
+** DOS VERSION!!
+*/
+
+void readfile(int fhandle,              /* File handle */
+	unsigned long offset,           /* Offset into file */
+	unsigned long nbytes,           /* # of bytes to read */
+	void *buffer,                   /* Buffer to read into */
+	int *errorcode)                 /* Returned error code */
+{
+
+long newoffset;                         /* New offset by lseek */
+int readcode;                           /* Return code from read */
+
+/*
+** Presume success.
+*/
+*errorcode=0;
+
+/*
+** Seek to the proper offset.
+*/
+newoffset=lseek(fhandle,(long)offset,SEEK_SET);
+if(newoffset==-1L)
+{       *errorcode=ERROR_FILESEEK;
+	return;
+}
+
+/*
+** Do the read.
+*/
+readcode=read(fhandle,buffer,(unsigned)(nbytes & 0xFFFF));
+if(readcode==-1)
+	*errorcode=ERROR_FILEREAD;
+
+return;
+}
+#endif
+#ifdef LINUX
+void readfile(FILE *fhandle,            /* File handle */
+	unsigned long offset,           /* Offset into file */
+	unsigned long nbytes,           /* # of bytes to read */
+	void *buffer,                   /* Buffer to read into */
+	int *errorcode)                 /* Returned error code */
+{
+
+long newoffset;                         /* New offset by fseek */
+size_t nelems;                          /* Expected return code from read */
+size_t readcode;                        /* Actual return code from read */
+
+/*
+** Presume success.
+*/
+*errorcode=0;
+
+/*
+** Seek to the proper offset.
+*/
+newoffset=fseek(fhandle,(long)offset,SEEK_SET);
+if(newoffset==-1L)
+{       *errorcode=ERROR_FILESEEK;
+	return;
+}
+
+/*
+** Do the read.
+*/
+nelems=(size_t)(nbytes & 0xFFFF);
+readcode=fread(buffer,(size_t)1,nelems,fhandle);
+if(readcode!=nelems)
+	*errorcode=ERROR_FILEREAD;
+
+return;
+}
+#endif
+
+/****************************
+** writefile
+** writes bytes to an opened file.  This routine is
+** a combination seek-and-write.
+** Note that this routine expects the offset to be from
+** the beinning of the file.
+*/
+#ifdef DOS16
+/*
+** DOS VERSION!!
+*/
+
+void writefile(int fhandle,             /* File handle */
+	unsigned long offset,           /* Offset into file */
+	unsigned long nbytes,           /* # of bytes to read */
+	void *buffer,                   /* Buffer to read into */
+	int *errorcode)                 /* Returned error code */
+{
+
+long newoffset;                         /* New offset by lseek */
+int writecode;                          /* Return code from write */
+
+/*
+** Presume success.
+*/
+*errorcode=0;
+
+/*
+** Seek to the proper offset.
+*/
+newoffset=lseek(fhandle,(long)offset,SEEK_SET);
+if(newoffset==-1L)
+{       *errorcode=ERROR_FILESEEK;
+	return;
+}
+
+/*
+** Do the write.
+*/
+writecode=write(fhandle,buffer,(unsigned)(nbytes & 0xFFFF));
+if(writecode==-1)
+	*errorcode=ERROR_FILEWRITE;
+
+return;
+}
+#endif
+
+#ifdef LINUX
+
+void writefile(FILE *fhandle,           /* File handle */
+	unsigned long offset,           /* Offset into file */
+	unsigned long nbytes,           /* # of bytes to read */
+	void *buffer,                   /* Buffer to read into */
+	int *errorcode)                 /* Returned error code */
+{
+
+long newoffset;                         /* New offset by lseek */
+size_t nelems;                          /* Expected return code from write */
+size_t writecode;                       /* Actual return code from write */
+
+/*
+** Presume success.
+*/
+*errorcode=0;
+
+/*
+** Seek to the proper offset.
+*/
+newoffset=fseek(fhandle,(long)offset,SEEK_SET);
+if(newoffset==-1L)
+{       *errorcode=ERROR_FILESEEK;
+	return;
+}
+
+/*
+** Do the write.
+*/
+nelems=(size_t)(nbytes & 0xFFFF);
+writecode=fwrite(buffer,(size_t)1,nelems,fhandle);
+if(writecode==nelems)
+	*errorcode=ERROR_FILEWRITE;
+
+return;
+}
+#endif
+
+
+/********************************
+**   ERROR HANDLING ROUTINES   **
+********************************/
+
+/****************************
+** ReportError
+** Report error message condition.
+*/
+void ReportError(char *errorcontext,    /* Error context string */
+		int errorcode)          /* Error code number */
+{
+
+/*
+** Display error context
+*/
+printf("ERROR CONDITION\nContext: %s\n",errorcontext);
+
+/*
+** Display code
+*/
+printf("Code: %d",errorcode);
+
+return;
+}
+
+/****************************
+** ErrorExit
+** Peforms an exit from an error condition.
+*/
+void ErrorExit()
+{
+
+/*
+** For profiling on the Mac with MetroWerks -- 11/17/94 RG
+** Have to do this to turn off profiler.
+*/
+#ifdef MACCWPROF
+#if __profile__
+ProfilerTerm();
+#endif
+#endif
+
+/*
+** FOR NOW...SIMPLE EXIT
+*/
+exit(1);
+}
+
+/*****************************
+**    STOPWATCH ROUTINES    **
+*****************************/
+
+/****************************
+** StartStopwatch
+** Starts a software stopwatch.  Returns the first value of
+** the stopwatch in ticks.
+*/
+unsigned long StartStopwatch()
+{
+#ifdef MACTIMEMGR
+/*
+** For Mac code warrior, use timer. In this case, what we return is really
+** a dummy value.
+*/
+InsTime((QElemPtr)&myTMTask);
+PrimeTime((QElemPtr)&myTMTask,-MacHSTdelay);
+return((unsigned long)1);
+#else
+#ifdef WIN31TIMER
+/*
+** Win 3.x timer returns a DWORD, which we coax into a long.
+*/
+_Call16(lpfn,"p",&win31tinfo);
+return((unsigned long)win31tinfo.dwmsSinceStart);
+#else
+return((unsigned long)clock());
+#endif
+#endif
+}
+
+/****************************
+** StopStopwatch
+** Stops the software stopwatch.  Expects as an input argument
+** the stopwatch start time.
+*/
+unsigned long StopStopwatch(unsigned long startticks)
+{
+	
+#ifdef MACTIMEMGR
+/*
+** For Mac code warrior...ignore startticks.  Return val. in microseconds
+*/
+RmvTime((QElemPtr)&myTMTask);
+return((unsigned long)(MacHSTdelay+myTMTask.tmCount-MacHSTohead));
+#else
+#ifdef WIN31TIMER
+_Call16(lpfn,"p",&win31tinfo);
+return((unsigned long)win31tinfo.dwmsSinceStart-startticks);
+#else
+return((unsigned long)clock()-startticks);
+#endif
+#endif
+}
+
+/****************************
+** TicksToSecs
+** Converts ticks to seconds.  Converts ticks to integer
+** seconds, discarding any fractional amount.
+*/
+unsigned long TicksToSecs(unsigned long tickamount)
+{
+#ifdef CLOCKWCT
+return((unsigned long)(tickamount/CLK_TCK));
+#endif
+
+#ifdef MACTIMEMGR
+/* +++ MAC time manager version (using timer in microseconds) +++ */
+return((unsigned long)(tickamount/1000000));
+#endif
+
+#ifdef CLOCKWCPS
+/* Everybody else */
+return((unsigned long)(tickamount/CLOCKS_PER_SEC));
+#endif
+
+#ifdef WIN31TIMER
+/* Each tick is 840 nanoseconds */
+return((unsigned long)(tickamount/1000L));
+#endif
+
+}
+
+/****************************
+** TicksToFracSecs
+** Converts ticks to fractional seconds.  In other words,
+** this returns the exact conversion from ticks to
+** seconds.
+*/
+double TicksToFracSecs(unsigned long tickamount)
+{
+#ifdef CLOCKWCT
+return((double)tickamount/(double)CLK_TCK);
+#endif
+
+#ifdef MACTIMEMGR
+/* +++ MAC time manager version +++ */
+return((double)tickamount/(double)1000000);
+#endif
+
+#ifdef CLOCKWCPS
+/* Everybody else */
+return((double)tickamount/(double)CLOCKS_PER_SEC);
+#endif
+
+#ifdef WIN31TIMER
+/* Using 840 nanosecond ticks */
+return((double)tickamount/(double)1000);
+#endif
+}
+
diff --git a/sysspec.h b/sysspec.h
new file mode 100644
index 0000000..ba57a96
--- /dev/null
+++ b/sysspec.h
@@ -0,0 +1,168 @@
+/*
+** sysspec.h
+** Header file for sysspec.c
+** BYTEmark (tm)
+** BYTE's Native Mode Benchmarks
+** Rick Grehan, BYTE Magazine
+**
+** Creation:
+** Revision: 3/95
+**
+** DISCLAIMER
+** The source, executable, and documentation files that comprise
+** the BYTEmark benchmarks are made available on an "as is" basis.
+** This means that we at BYTE Magazine have made every reasonable
+** effort to verify that the there are no errors in the source and
+** executable code.  We cannot, however, guarantee that the programs
+** are error-free.  Consequently, McGraw-HIll and BYTE Magazine make
+** no claims in regard to the fitness of the source code, executable
+** code, and documentation of the BYTEmark.
+**  Furthermore, BYTE Magazine, McGraw-Hill, and all employees
+** of McGraw-Hill cannot be held responsible for any damages resulting
+** from the use of this code or the results obtained from using
+** this code.
+*/
+
+/*
+** Standard includes
+*/
+#include <stdlib.h>
+#include <stdio.h>
+#include <time.h>
+#include <string.h>
+
+#include "nmglobal.h"
+
+#if !defined(MAC) && !defined(OSX)
+#include <malloc.h>
+#endif
+
+
+/*
+** System-specific includes
+*/
+
+#ifdef DOS16MEM
+#include "dos.h"
+#endif
+
+/* #include "time.h"
+#include "io.h"
+#include "fcntl.h"
+#include "sys\stat.h" */
+/* Removed for MSVC++
+#include "alloc.h"
+*/
+
+/*
+** MAC Time Manager routines (from Code Warrior)
+*/
+#ifdef MACTIMEMGR
+#include <memory.h>
+#include <lowmem.h>
+#include <Types.h>
+#include <Timer.h>
+extern struct TMTask myTMTask;
+extern long MacHSTdelay,MacHSTohead;
+#endif
+
+/*
+** Windows 3.1 timer defines
+*/
+#ifdef WIN31TIMER
+#include <windows.h>
+#include <toolhelp.h>
+TIMERINFO win31tinfo;
+HANDLE hThlp;
+FARPROC lpfn;
+#endif
+
+/**************
+** EXTERNALS **
+**************/
+extern ulong mem_array[2][MEM_ARRAY_SIZE];
+extern int mem_array_ents;
+extern int global_align;
+
+/****************************
+**   FUNCTION PROTOTYPES   **
+****************************/
+
+farvoid *AllocateMemory(unsigned long nbytes,
+                int *errorcode);
+
+void FreeMemory(farvoid *mempointer,
+                int *errorcode);
+
+void MoveMemory( farvoid *destination,
+                farvoid *source,
+                unsigned long nbytes);
+
+#ifdef DOS16MEM
+void FarDOSmemmove(farvoid *destination,
+                farvoid *source,
+                unsigned long nbytes);
+#endif
+
+void InitMemArray(void);
+
+int AddMemArray(ulong true_addr, ulong adj_addr);
+
+int RemoveMemArray(ulong adj_addr,ulong *true_addr);
+
+void ReportError(char *context, int errorcode);
+
+void ErrorExit();
+
+void CreateFile(char *filename,
+                int *errorcode);
+
+#ifdef DOS16
+int bmOpenFile(char *fname,
+                int *errorcode);
+
+void CloseFile(int fhandle,
+                int *errorcode);
+
+void readfile(int fhandle,
+                unsigned long offset,
+                unsigned long nbytes,
+                void *buffer,
+                int *errorcode);
+
+void writefile(int fhandle,
+                unsigned long offset,
+                unsigned long nbytes,
+                void *buffer,
+                int *errorcode);
+#endif
+
+#ifdef LINUX
+FILE *bmOpenFile(char *fname,
+                int *errorcode);
+
+void CloseFile(FILE *fhandle,
+                int *errorcode);
+
+void readfile(FILE *fhandle,
+                unsigned long offset,
+                unsigned long nbytes,
+                void *buffer,
+                int *errorcode);
+
+void writefile(FILE *fhandle,
+                unsigned long offset,
+                unsigned long nbytes,
+                void *buffer,
+                int *errorcode);
+
+#endif
+
+unsigned long StartStopwatch();
+
+unsigned long StopStopwatch(unsigned long startticks);
+
+unsigned long TicksToSecs(unsigned long tickamount);
+
+double TicksToFracSecs(unsigned long tickamount);
+
diff --git a/wordcat.h b/wordcat.h
new file mode 100644
index 0000000..9f18b42
--- /dev/null
+++ b/wordcat.h
@@ -0,0 +1,81 @@
+/*
+** wordcat.h
+** Word catalog
+** BYTEmark (tm)
+** BYTE's Native Mode Benchmarks
+** Rick Grehan, BYTE Magazine
+**
+** Creation:
+** Revision: 3/95
+**
+** DISCLAIMER
+** The source, executable, and documentation files that comprise
+** the BYTEmark benchmarks are made available on an "as is" basis.
+** This means that we at BYTE Magazine have made every reasonable
+** effort to verify that the there are no errors in the source and
+** executable code.  We cannot, however, guarantee that the programs
+** are error-free.  Consequently, McGraw-HIll and BYTE Magazine make
+** no claims in regard to the fitness of the source code, executable
+** code, and documentation of the BYTEmark.
+**  Furthermore, BYTE Magazine, McGraw-Hill, and all employees
+** of McGraw-Hill cannot be held responsible for any damages resulting
+** from the use of this code or the results obtained from using
+** this code.
+*/
+
+/*
+** Word catalog
+*/
+#define WORDCATSIZE 50
+
+char *wordcatarray[WORDCATSIZE] =
+{	"Hello",
+	"He",
+	"Him",
+	"the",
+	"this",
+	"that",
+	"though",
+	"rough",
+	"cough",
+	"obviously",
+	"But",
+	"but",
+	"bye",
+	"begin",
+	"beginning",
+	"beginnings",
+	"of",
+	"our",
+	"ourselves",
+	"yourselves",
+	"to",
+	"together",
+	"togetherness",
+	"from",
+	"either",
+	"I",
+	"A",
+	"return",
+	"However",
+	"that",
+	"example",
+	"yet",
+	"quickly",
+	"all",
+	"if",
+	"were",
+	"includes",
+	"always",
+	"never",
+	"not",
+	"small",
+	"returns",
+	"set",
+	"basic",
+	"Entered",
+	"with",
+	"used",
+	"shown",
+	"you",
+	"know" };