1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
|
#include <stdio.h>
#include <stdint.h>
#include <string.h>
#include <time.h>
#include "../alpha_mmintrin.h"
#include "minmax.h"
#define ITERATIONS 10000
int main(int argc, char ** argv[]) {
static uint64_t a[512], b[512];
static uint64_t c[512] = {0}, d[512] = {0};
uint64_t l64_0, l64_1, l64_2;
uint64_t *a64, *b64, *c64, *d64;
uint64_t start, end, overhead;
uint64_t simd_total = 0, no_simd_total = 0;
uint16_t l16_0, l16_1, l16_2, l16_3, l16_4, l16_5, l16_6, l16_7, l16_8, l16_9, l16_10, l16_11, l16_12;
uint16_t *a16, *b16, *c16;
uint8_t l8_0, l8_1, l8_2;
uint8_t *a8, *b8, *c8;
int i, j;
start = __rpcc();
end = __rpcc();
overhead = end - start;
srand(time(NULL));
a8 = (uint8_t *)a;
b8 = (uint8_t *)b;
for (i = 0; i < 4096; i++) {
*a8 = rand() % 255;
*b8 = rand() % 255;
a8++;
b8++;
}
for (j = 0; j < ITERATIONS; j++) {
a16 = (uint16_t *)a;
b16 = (uint16_t *)b;
c16 = (uint16_t *)c;
start = __rpcc();
for (i = 0; i < 2048; i++) {
l16_1 = *a16;
l16_2 = *b16;
l16_0 = MIN(l16_1, l16_2);
*c16 = l16_0;
a16++;
b16++;
c16++;
}
end = __rpcc() - overhead;
no_simd_total += end - start;
memset(c, 0, 4096);
}
no_simd_total /= ITERATIONS;
for (j = 0; j < ITERATIONS; j++) {
a64 = a;
b64 = b;
d64 = d;
start = __rpcc();
for (i = 0; i < 512; i++) {
l64_1 = *a64;
l64_2 = *b64;
l64_0 = __minuw4(l64_1, l64_2);
*d64 = l64_0;
a64++;
b64++;
d64++;
}
end = __rpcc() - overhead;
simd_total += end - start;
memset(d, 0, 4096);
}
simd_total /= ITERATIONS;
if (memcmp(c, d, 4096) == 0) {
printf("%s:\n", argv[0]);
printf(" SIMD time: %9lu nanoseconds\nnon-SIMD time: %9lu nanoseconds\n", simd_total, no_simd_total);
} else {
puts("Final arrays are not equal. Something happened.");
}
return 0;
}
|