summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.gitignore7
-rw-r--r--Makefile24
-rw-r--r--clear_page.c88
-rw-r--r--ev5-clear_page.S39
-rw-r--r--ev6-clear_page-prefetch_m.S54
-rw-r--r--ev6-clear_page.S54
-rw-r--r--test.c170
7 files changed, 436 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..918429d
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,7 @@
+clear_page
+ev5
+ev6
+ev6-prefetch_m
+ev6-no-prefetch
+ev7
+test.o
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..73a9d66
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,24 @@
+CFLAGS ?= -g -O2 -mcpu=ev67 -pipe -std=gnu99
+
+all: ev5 ev6 ev6-no-prefetch ev7 clear_page ev6-prefetch_m
+
+ev5: test.o ev5-clear_page.S
+ $(CC) $^ -o $@
+
+ev6: test.o ev6-clear_page.S
+ $(CC) -DWH64="wh64" $^ -o $@
+
+ev6-no-prefetch: test.o ev6-clear_page.S
+ $(CC) -DWH64="nop #" $^ -o $@
+
+ev7: test.o ev6-clear_page.S
+ $(CC) -DWH64="wh64en" $^ -o $@
+
+clear_page: test.o clear_page.c
+ $(CC) $(CFLAGS) $^ -o $@
+
+ev6-prefetch_m: test.o ev6-clear_page-prefetch_m.S
+ $(CC) $(CFLAGS) $^ -o $@
+
+clean:
+ rm -f *.o ev5 ev6 ev6-no-prefetch ev7 clear_page ev6-prefetch_m
diff --git a/clear_page.c b/clear_page.c
new file mode 100644
index 0000000..6940328
--- /dev/null
+++ b/clear_page.c
@@ -0,0 +1,88 @@
+/* vim: set expandtab tabstop=4 softtabstop=4 shiftwidth=4: */
+
+#define PAGE_SIZE 8192
+
+#define wh64(p) asm ("wh64 (%0)" : : "r"(p) : "memory")
+#define ecb(p) asm("ecb (%0)" : : "r"(p) : "memory")
+#define prefetch(p) asm("ldl $31, 0(%0)" : : "r"(p))
+#define prefetch_en(p) asm("ldq $31, 0(%0)" : : "r"(p))
+#define prefetch_m(p) asm("lds $f31, 0(%0)" : : "r"(p))
+#define prefetch_men(p) asm("ldt $f31, 0(%0)" : : "r"(p))
+#define nop asm("nop")
+
+#define PREFETCH_LINES 2
+
+//#define UNROLL_EVERYTHING
+
+#define ZERO(p) do { \
+ prefetch_m(p + PREFETCH_LINES * 8); \
+ (p)[0] = 0; nop; (p)[1] = 0; nop; (p)[2] = 0; nop; (p)[3] = 0; nop; \
+ (p)[4] = 0; nop; (p)[5] = 0; nop; (p)[6] = 0; nop; (p)[7] = 0; nop; \
+ p += 8; \
+} while (0)
+
+#define ZERO_PAGE(p) do { \
+ ZERO(p); ZERO(p); ZERO(p); ZERO(p); ZERO(p); ZERO(p); ZERO(p); ZERO(p); \
+ ZERO(p); ZERO(p); ZERO(p); ZERO(p); ZERO(p); ZERO(p); ZERO(p); ZERO(p); \
+ ZERO(p); ZERO(p); ZERO(p); ZERO(p); ZERO(p); ZERO(p); ZERO(p); ZERO(p); \
+ ZERO(p); ZERO(p); ZERO(p); ZERO(p); ZERO(p); ZERO(p); ZERO(p); ZERO(p); \
+ ZERO(p); ZERO(p); ZERO(p); ZERO(p); ZERO(p); ZERO(p); ZERO(p); ZERO(p); \
+ ZERO(p); ZERO(p); ZERO(p); ZERO(p); ZERO(p); ZERO(p); ZERO(p); ZERO(p); \
+ ZERO(p); ZERO(p); ZERO(p); ZERO(p); ZERO(p); ZERO(p); ZERO(p); ZERO(p); \
+ ZERO(p); ZERO(p); ZERO(p); ZERO(p); ZERO(p); ZERO(p); ZERO(p); ZERO(p); \
+ ZERO(p); ZERO(p); ZERO(p); ZERO(p); ZERO(p); ZERO(p); ZERO(p); ZERO(p); \
+ ZERO(p); ZERO(p); ZERO(p); ZERO(p); ZERO(p); ZERO(p); ZERO(p); ZERO(p); \
+ ZERO(p); ZERO(p); ZERO(p); ZERO(p); ZERO(p); ZERO(p); ZERO(p); ZERO(p); \
+ ZERO(p); ZERO(p); ZERO(p); ZERO(p); ZERO(p); ZERO(p); ZERO(p); ZERO(p); \
+ ZERO(p); ZERO(p); ZERO(p); ZERO(p); ZERO(p); ZERO(p); ZERO(p); ZERO(p); \
+ ZERO(p); ZERO(p); ZERO(p); ZERO(p); ZERO(p); ZERO(p); ZERO(p); ZERO(p); \
+ ZERO(p); ZERO(p); ZERO(p); ZERO(p); ZERO(p); ZERO(p); ZERO(p); ZERO(p); \
+ ZERO(p); ZERO(p); ZERO(p); ZERO(p); ZERO(p); ZERO(p); ZERO(p); ZERO(p); \
+} while (0)
+
+void
+clear_page(void *page)
+{
+ int chunks = PAGE_SIZE / 64;
+ int chunks_prefetch = chunks - PREFETCH_LINES;
+ int chunks_no_prefetch = PREFETCH_LINES;
+
+ long *p = page;
+#ifdef UNROLL_EVERYTHING
+ long *end = page + PAGE_SIZE;
+ ZERO_PAGE(p);
+#else
+ while (chunks_prefetch > 0) {
+ prefetch_m(p + PREFETCH_LINES * 8);
+ p[0] = 0; nop;
+ p[1] = 0; nop;
+ p[2] = 0; nop;
+ p[3] = 0; nop;
+ p[4] = 0; nop;
+ p[5] = 0; nop;
+ p[6] = 0; nop;
+ p[7] = 0; nop;
+
+ nop; nop; nop; nop;
+
+ chunks_prefetch--;
+ p += 8;
+ }
+
+ while (chunks_no_prefetch > 0) {
+ p[0] = 0; nop;
+ p[1] = 0; nop;
+ p[2] = 0; nop;
+ p[3] = 0; nop;
+ p[4] = 0; nop;
+ p[5] = 0; nop;
+ p[6] = 0; nop;
+ p[7] = 0; nop;
+
+ nop; nop; nop; nop;
+
+ chunks_no_prefetch--;
+ p += 8;
+ }
+#endif
+}
diff --git a/ev5-clear_page.S b/ev5-clear_page.S
new file mode 100644
index 0000000..a221ae2
--- /dev/null
+++ b/ev5-clear_page.S
@@ -0,0 +1,39 @@
+/*
+ * arch/alpha/lib/clear_page.S
+ *
+ * Zero an entire page.
+ */
+
+ .text
+ .align 4
+ .global clear_page
+ .ent clear_page
+clear_page:
+ .prologue 0
+
+ lda $0,128
+ nop
+ unop
+ nop
+
+1: stq $31,0($16)
+ stq $31,8($16)
+ stq $31,16($16)
+ stq $31,24($16)
+
+ stq $31,32($16)
+ stq $31,40($16)
+ stq $31,48($16)
+ subq $0,1,$0
+
+ stq $31,56($16)
+ addq $16,64,$16
+ unop
+ bne $0,1b
+
+ ret
+ nop
+ unop
+ nop
+
+ .end clear_page
diff --git a/ev6-clear_page-prefetch_m.S b/ev6-clear_page-prefetch_m.S
new file mode 100644
index 0000000..7b39954
--- /dev/null
+++ b/ev6-clear_page-prefetch_m.S
@@ -0,0 +1,54 @@
+/*
+ * arch/alpha/lib/ev6-clear_page.S
+ *
+ * Zero an entire page.
+ */
+
+ .text
+ .align 4
+ .global clear_page
+ .ent clear_page
+clear_page:
+ .prologue 0
+
+ lda $0,128
+ lda $1,125
+ addq $16,192,$17
+ nop
+
+ lds $f31,0($16)
+ lds $f31,64($16)
+ lds $f31,128($16)
+ nop
+
+1: lds $f31,0($17)
+ stq $31,0($16)
+ subq $0,1,$0
+ subq $1,1,$1
+
+ stq $31,8($16)
+ stq $31,16($16)
+ addq $17,64,$2
+ nop
+
+ stq $31,24($16)
+ stq $31,32($16)
+ cmovgt $1,$2,$17
+ nop
+
+ stq $31,40($16)
+ stq $31,48($16)
+ nop
+ nop
+
+ stq $31,56($16)
+ addq $16,64,$16
+ nop
+ bne $0,1b
+
+ ret
+ nop
+ nop
+ nop
+
+ .end clear_page
diff --git a/ev6-clear_page.S b/ev6-clear_page.S
new file mode 100644
index 0000000..c647f49
--- /dev/null
+++ b/ev6-clear_page.S
@@ -0,0 +1,54 @@
+/*
+ * arch/alpha/lib/ev6-clear_page.S
+ *
+ * Zero an entire page.
+ */
+
+ .text
+ .align 4
+ .global clear_page
+ .ent clear_page
+clear_page:
+ .prologue 0
+
+ lda $0,128
+ lda $1,125
+ addq $16,64,$2
+ addq $16,128,$3
+
+ addq $16,192,$17
+ WH64 ($16)
+ WH64 ($2)
+ WH64 ($3)
+
+1: WH64 ($17)
+ stq $31,0($16)
+ subq $0,1,$0
+ subq $1,1,$1
+
+ stq $31,8($16)
+ stq $31,16($16)
+ addq $17,64,$2
+ nop
+
+ stq $31,24($16)
+ stq $31,32($16)
+ cmovgt $1,$2,$17
+ nop
+
+ stq $31,40($16)
+ stq $31,48($16)
+ nop
+ nop
+
+ stq $31,56($16)
+ addq $16,64,$16
+ nop
+ bne $0,1b
+
+ ret
+ nop
+ nop
+ nop
+
+ .end clear_page
diff --git a/test.c b/test.c
new file mode 100644
index 0000000..e322754
--- /dev/null
+++ b/test.c
@@ -0,0 +1,170 @@
+/* vim: set expandtab tabstop=4 softtabstop=4 shiftwidth=4: */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <sys/unistd.h>
+#include <sys/syscall.h>
+#include <sys/prctl.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <fcntl.h>
+#include <linux/perf_event.h>
+
+#ifndef __NR_perf_event_open
+#error __NR_perf_event_open not defined.
+#endif
+
+#define PAGE_SIZE 8192
+
+#define NUM_TIMES 100000
+
+static const char *event_name[] = {
+ "cycles",
+ "instructions",
+ "cache-misses",
+ "mbox-replays"
+};
+
+static inline int
+sys_perf_event_open(struct perf_event_attr *attr,
+ pid_t pid, int cpu, int group_fd, unsigned long flags)
+{
+ attr->size = sizeof(*attr);
+ return syscall(__NR_perf_event_open, attr, pid, cpu, group_fd, flags);
+}
+
+void clear_page(void *page);
+
+char *src __attribute__((used));
+
+int
+main(int argc, char *argv[])
+{
+ struct perf_event_attr *attr;
+ int fd[4];
+ char *ptr;
+ int num_pages;
+
+ if (argc != 2) {
+ printf("Usage: %s <num_pages>\n", argv[0]);
+ return 1;
+ }
+ num_pages = atoi(argv[1]);
+
+ /* Sanity test to ensure a whole page is cleared, and nothing more */
+ if (posix_memalign((void **)&src, PAGE_SIZE, 3 * PAGE_SIZE))
+ return 1;
+
+ memset(src, 0xff, 3 * PAGE_SIZE);
+
+ mprotect(src + 0 * PAGE_SIZE, PAGE_SIZE, PROT_NONE);
+ mprotect(src + 2 * PAGE_SIZE, PAGE_SIZE, PROT_NONE);
+
+ clear_page(src + PAGE_SIZE);
+
+ mprotect(src + 0 * PAGE_SIZE, PAGE_SIZE, PROT_READ);
+ mprotect(src + 2 * PAGE_SIZE, PAGE_SIZE, PROT_READ);
+
+ if ((ptr = memchr(src + 0 * PAGE_SIZE, 0x00, PAGE_SIZE)) != NULL) {
+ fprintf(stderr, "Sanity test failed: page 0 cleared at byte %lu\n",
+ ptr - (src + 0 * PAGE_SIZE));
+ return 1;
+ }
+ if ((ptr = memchr(src + 1 * PAGE_SIZE, 0xff, PAGE_SIZE)) != NULL) {
+ fprintf(stderr, "Sanity test failed: page 1 not cleared at byte %lu\n",
+ ptr - (src + 1 * PAGE_SIZE));
+ return 1;
+ }
+ if ((ptr = memchr(src + 2 * PAGE_SIZE, 0x00, PAGE_SIZE)) != NULL) {
+ fprintf(stderr, "Sanity test failed: page 2 cleared at byte %lu\n",
+ ptr - (src + 2 * PAGE_SIZE));
+ return 1;
+ }
+ free(src);
+
+ /* Setup */
+ if (posix_memalign((void **)&src, PAGE_SIZE, num_pages * PAGE_SIZE + 1))
+ return 1;
+ memset(src, 0xff, num_pages * PAGE_SIZE + 1);
+
+ attr = calloc(4, sizeof(*attr));
+ if (src == NULL || attr == NULL)
+ return 1;
+
+ attr[0].type = PERF_TYPE_HARDWARE;
+ attr[0].config = PERF_COUNT_HW_CPU_CYCLES;
+ attr[0].read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
+ PERF_FORMAT_TOTAL_TIME_RUNNING;
+ attr[0].disabled = 1;
+
+ attr[1].type = PERF_TYPE_HARDWARE;
+ attr[1].config = PERF_COUNT_HW_INSTRUCTIONS;
+ attr[1].read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
+ PERF_FORMAT_TOTAL_TIME_RUNNING;
+ attr[1].disabled = 1;
+
+ attr[2].type = PERF_TYPE_HARDWARE;
+ attr[2].read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
+ PERF_FORMAT_TOTAL_TIME_RUNNING;
+ attr[2].config = PERF_COUNT_HW_CACHE_MISSES;
+ attr[2].disabled = 1;
+
+ attr[3].type = PERF_TYPE_RAW;
+ attr[3].config = 4;
+ attr[3].read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
+ PERF_FORMAT_TOTAL_TIME_RUNNING;
+ attr[3].disabled = 1;
+
+ for (int i = 0; i < 4; i++) {
+ fd[i] = sys_perf_event_open(attr + i, 0, -1, -1, 0);
+ if (fd[i] < 0) {
+ perror("perf_open event");
+ return 1;
+ }
+ }
+
+ /* Benchmark */
+ prctl(PR_TASK_PERF_EVENTS_ENABLE);
+
+ for (int i = 0; i < NUM_TIMES; i++) {
+ for (int j = 0; j < num_pages; j++) {
+ clear_page(src + j * PAGE_SIZE);
+ }
+ }
+
+ prctl(PR_TASK_PERF_EVENTS_DISABLE);
+
+ /* Verify */
+ if ((ptr = memchr(src, 0xff, num_pages * PAGE_SIZE + 1)) !=
+ (src + num_pages * PAGE_SIZE)) {
+ fprintf(stderr, "Failed to clear at byte %lu\n", ptr - src);
+ return 1;
+ }
+
+ for (int i = 0; i < 4; i++) {
+ unsigned long count[3];
+
+ if (read(fd[i], count, 3 * sizeof(__u64)) != 3 * sizeof(__u64))
+ perror("read of event failed");
+ printf("%20s: ", event_name[i]);
+
+ if (count[2] == 0) {
+ printf("not counted\n");
+ } else if (count[2] < count[1]) {
+ printf("%10lu (scaled from %3.1f%%)\n",
+ (unsigned long)((double)count[0] * count[1] / count[2] / NUM_TIMES + 0.5),
+ 100.0 * (double)count[2] / count[1]);
+ } else {
+ printf("%lu\n", count[0] / NUM_TIMES);
+ }
+
+ close(fd[i]);
+ }
+
+ free(src);
+ free(attr);
+
+ return 0;
+}