fpemulation.c


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186

#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <stdbool.h>
#include <string.h>
#include <math.h>
#include <limits.h>
#include <time.h>

#include "cleanbench.h"
#include "randnum.h"
#include "emfloat.h"


/*****************************
** FLOATING-POINT EMULATION **
*****************************/

/*
** The following constant is the maximum number of loops
** of the emulated floating point test that the system
** will allow before flagging an error.  This is not a
** critical constant, and can be altered if your system is
** a real barn-burner.
*/
#define LOOP_MAX 500000L

#define ARRAY_SIZE 3000

static clock_t DoEmFloatIteration(InternalFPF *abase, InternalFPF *bbase, InternalFPF *cbase,
        unsigned long loops);
static void SetupCPUEmFloatArrays(InternalFPF *abase, InternalFPF *bbase);

/**************
** DoEmFloat **
***************
** Perform the floating-point emulation routines portion of the
** CPU benchmark.  Returns the operations per second.
*/
double
DoEmFloat(void)
{
        InternalFPF*    abase = NULL;
        InternalFPF*    bbase = NULL;
        InternalFPF*    cbase = NULL;
        clock_t         total_time = 0;
        int             iterations = 0;
        static bool     is_adjusted = false;
        static long     loops = 1;

        abase = malloc(ARRAY_SIZE * sizeof(InternalFPF));

        bbase = malloc(ARRAY_SIZE * sizeof(InternalFPF));

        cbase = malloc(ARRAY_SIZE * sizeof(InternalFPF));

        SetupCPUEmFloatArrays(abase, bbase); /* FIXME: ugly */

        /* See if we need to do self-adjusting code.*/
        if (is_adjusted == false) {
        	is_adjusted = true;

        	/*
        	** Do an iteration of the tests.  If the elapsed time is
        	** less than minimum, increase the loop count and try
        	** again.
        	*/
                do {
                        loops += loops;
                } while ((DoEmFloatIteration(abase, bbase, cbase, loops) <= MINIMUM_TICKS) && (loops < LOOP_MAX));
        }

        /*
        ** Verify that selft adjustment code worked.
        */
        if (loops == 0) {
                fputs("CPU:EMFPU -- CMPUEMFLOATLOOPMAX limit hit", stderr);
                free(abase);
                free(bbase);
                free(cbase);
                exit(1);
        }

        /*
        ** All's well if we get here.  Repeatedly perform floating
        ** tests until the accumulated time is greater than the
        ** # of seconds requested.
        ** Each iteration performs ARRAY_SIZE * 3 operations.
        */
        do {
	        total_time += DoEmFloatIteration(abase, bbase, cbase, loops);
               	++iterations;
        } while (total_time < MINIMUM_SECONDS * CLOCKS_PER_SEC);

        free(abase);
        free(bbase);
        free(cbase);

        return (double)(iterations * loops * CLOCKS_PER_SEC) / (double)total_time;
}

/***********************
** DoEmFloatIteration **
************************
** Perform an iteration of the emulated floating-point
** benchmark.  Note that "an iteration" can involve multiple
** loops through the benchmark.
*/
static clock_t
DoEmFloatIteration(InternalFPF *abase,
                InternalFPF *bbase,
                InternalFPF *cbase,
                unsigned long loops)
{
        clock_t start, stop;
static unsigned char jtable[16] = {0,0,0,0,1,1,1,1,2,2,2,2,2,3,3,3};
unsigned long i;

        start = clock();

/*
** Each pass through the array performs operations in
** the followingratios:
**   4 adds, 4 subtracts, 5 multiplies, 3 divides
** (adds and subtracts being nearly the same operation)
*/
while(loops--)
{
        for(i=0;i<ARRAY_SIZE;i++)
                switch(jtable[i % 16])
                {
                        case 0: /* Add */
                                AddSubInternalFPF(0,abase+i,
                                  bbase+i,
                                  cbase+i);
                                break;
                        case 1: /* Subtract */
                                AddSubInternalFPF(1,abase+i,
                                  bbase+i,
                                  cbase+i);
                                break;
                        case 2: /* Multiply */
                                MultiplyInternalFPF(abase+i,
                                  bbase+i,
                                  cbase+i);
                                break;
                        case 3: /* Divide */
                                DivideInternalFPF(abase+i,
                                  bbase+i,
                                  cbase+i);
                                break;
                }
        }

        stop = clock();

        return stop - start;
}

/**************************
** SetupCPUEmFloatArrays **
***************************
** Set up the arrays that will be used in the emulated
** floating-point tests.
** This is done by loading abase and bbase elements with
** random numbers.  We use our long-to-floating point
** routine to set them up.
*/
static void
SetupCPUEmFloatArrays(InternalFPF *abase, InternalFPF *bbase)
{
	unsigned long i;
	InternalFPF locFPF1,locFPF2;
	/*
	** Reset random number generator so things repeat. Inserted by Uwe F. Mayer.
	*/
	randnum(13);

	for (i = 0; i < ARRAY_SIZE; i++) {
		Int32ToInternalFPF(randwc(50000),&locFPF1);
		Int32ToInternalFPF(randwc(50000)+1,&locFPF2);
		DivideInternalFPF(&locFPF1,&locFPF2,abase+i);
		Int32ToInternalFPF(randwc(50000)+1,&locFPF2);
		DivideInternalFPF(&locFPF1,&locFPF2,bbase+i);
	}
}