From d106235f5eebe280f4f820fc29bb5d8708e2f54d Mon Sep 17 00:00:00 2001 From: Matt Turner Date: Mon, 23 May 2016 18:10:11 -0700 Subject: Initial import --- .gitignore | 7 ++ Makefile | 24 +++++++ clear_page.c | 88 +++++++++++++++++++++++ ev5-clear_page.S | 39 ++++++++++ ev6-clear_page-prefetch_m.S | 54 ++++++++++++++ ev6-clear_page.S | 54 ++++++++++++++ test.c | 170 ++++++++++++++++++++++++++++++++++++++++++++ 7 files changed, 436 insertions(+) create mode 100644 .gitignore create mode 100644 Makefile create mode 100644 clear_page.c create mode 100644 ev5-clear_page.S create mode 100644 ev6-clear_page-prefetch_m.S create mode 100644 ev6-clear_page.S create mode 100644 test.c diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..918429d --- /dev/null +++ b/.gitignore @@ -0,0 +1,7 @@ +clear_page +ev5 +ev6 +ev6-prefetch_m +ev6-no-prefetch +ev7 +test.o diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..73a9d66 --- /dev/null +++ b/Makefile @@ -0,0 +1,24 @@ +CFLAGS ?= -g -O2 -mcpu=ev67 -pipe -std=gnu99 + +all: ev5 ev6 ev6-no-prefetch ev7 clear_page ev6-prefetch_m + +ev5: test.o ev5-clear_page.S + $(CC) $^ -o $@ + +ev6: test.o ev6-clear_page.S + $(CC) -DWH64="wh64" $^ -o $@ + +ev6-no-prefetch: test.o ev6-clear_page.S + $(CC) -DWH64="nop #" $^ -o $@ + +ev7: test.o ev6-clear_page.S + $(CC) -DWH64="wh64en" $^ -o $@ + +clear_page: test.o clear_page.c + $(CC) $(CFLAGS) $^ -o $@ + +ev6-prefetch_m: test.o ev6-clear_page-prefetch_m.S + $(CC) $(CFLAGS) $^ -o $@ + +clean: + rm -f *.o ev5 ev6 ev6-no-prefetch ev7 clear_page ev6-prefetch_m diff --git a/clear_page.c b/clear_page.c new file mode 100644 index 0000000..6940328 --- /dev/null +++ b/clear_page.c @@ -0,0 +1,88 @@ +/* vim: set expandtab tabstop=4 softtabstop=4 shiftwidth=4: */ + +#define PAGE_SIZE 8192 + +#define wh64(p) asm ("wh64 (%0)" : : "r"(p) : "memory") +#define ecb(p) asm("ecb (%0)" : : "r"(p) : "memory") +#define prefetch(p) asm("ldl $31, 0(%0)" : : "r"(p)) +#define prefetch_en(p) asm("ldq $31, 0(%0)" : : "r"(p)) +#define prefetch_m(p) asm("lds $f31, 0(%0)" : : "r"(p)) +#define prefetch_men(p) asm("ldt $f31, 0(%0)" : : "r"(p)) +#define nop asm("nop") + +#define PREFETCH_LINES 2 + +//#define UNROLL_EVERYTHING + +#define ZERO(p) do { \ + prefetch_m(p + PREFETCH_LINES * 8); \ + (p)[0] = 0; nop; (p)[1] = 0; nop; (p)[2] = 0; nop; (p)[3] = 0; nop; \ + (p)[4] = 0; nop; (p)[5] = 0; nop; (p)[6] = 0; nop; (p)[7] = 0; nop; \ + p += 8; \ +} while (0) + +#define ZERO_PAGE(p) do { \ + ZERO(p); ZERO(p); ZERO(p); ZERO(p); ZERO(p); ZERO(p); ZERO(p); ZERO(p); \ + ZERO(p); ZERO(p); ZERO(p); ZERO(p); ZERO(p); ZERO(p); ZERO(p); ZERO(p); \ + ZERO(p); ZERO(p); ZERO(p); ZERO(p); ZERO(p); ZERO(p); ZERO(p); ZERO(p); \ + ZERO(p); ZERO(p); ZERO(p); ZERO(p); ZERO(p); ZERO(p); ZERO(p); ZERO(p); \ + ZERO(p); ZERO(p); ZERO(p); ZERO(p); ZERO(p); ZERO(p); ZERO(p); ZERO(p); \ + ZERO(p); ZERO(p); ZERO(p); ZERO(p); ZERO(p); ZERO(p); ZERO(p); ZERO(p); \ + ZERO(p); ZERO(p); ZERO(p); ZERO(p); ZERO(p); ZERO(p); ZERO(p); ZERO(p); \ + ZERO(p); ZERO(p); ZERO(p); ZERO(p); ZERO(p); ZERO(p); ZERO(p); ZERO(p); \ + ZERO(p); ZERO(p); ZERO(p); ZERO(p); ZERO(p); ZERO(p); ZERO(p); ZERO(p); \ + ZERO(p); ZERO(p); ZERO(p); ZERO(p); ZERO(p); ZERO(p); ZERO(p); ZERO(p); \ + ZERO(p); ZERO(p); ZERO(p); ZERO(p); ZERO(p); ZERO(p); ZERO(p); ZERO(p); \ + ZERO(p); ZERO(p); ZERO(p); ZERO(p); ZERO(p); ZERO(p); ZERO(p); ZERO(p); \ + ZERO(p); ZERO(p); ZERO(p); ZERO(p); ZERO(p); ZERO(p); ZERO(p); ZERO(p); \ + ZERO(p); ZERO(p); ZERO(p); ZERO(p); ZERO(p); ZERO(p); ZERO(p); ZERO(p); \ + ZERO(p); ZERO(p); ZERO(p); ZERO(p); ZERO(p); ZERO(p); ZERO(p); ZERO(p); \ + ZERO(p); ZERO(p); ZERO(p); ZERO(p); ZERO(p); ZERO(p); ZERO(p); ZERO(p); \ +} while (0) + +void +clear_page(void *page) +{ + int chunks = PAGE_SIZE / 64; + int chunks_prefetch = chunks - PREFETCH_LINES; + int chunks_no_prefetch = PREFETCH_LINES; + + long *p = page; +#ifdef UNROLL_EVERYTHING + long *end = page + PAGE_SIZE; + ZERO_PAGE(p); +#else + while (chunks_prefetch > 0) { + prefetch_m(p + PREFETCH_LINES * 8); + p[0] = 0; nop; + p[1] = 0; nop; + p[2] = 0; nop; + p[3] = 0; nop; + p[4] = 0; nop; + p[5] = 0; nop; + p[6] = 0; nop; + p[7] = 0; nop; + + nop; nop; nop; nop; + + chunks_prefetch--; + p += 8; + } + + while (chunks_no_prefetch > 0) { + p[0] = 0; nop; + p[1] = 0; nop; + p[2] = 0; nop; + p[3] = 0; nop; + p[4] = 0; nop; + p[5] = 0; nop; + p[6] = 0; nop; + p[7] = 0; nop; + + nop; nop; nop; nop; + + chunks_no_prefetch--; + p += 8; + } +#endif +} diff --git a/ev5-clear_page.S b/ev5-clear_page.S new file mode 100644 index 0000000..a221ae2 --- /dev/null +++ b/ev5-clear_page.S @@ -0,0 +1,39 @@ +/* + * arch/alpha/lib/clear_page.S + * + * Zero an entire page. + */ + + .text + .align 4 + .global clear_page + .ent clear_page +clear_page: + .prologue 0 + + lda $0,128 + nop + unop + nop + +1: stq $31,0($16) + stq $31,8($16) + stq $31,16($16) + stq $31,24($16) + + stq $31,32($16) + stq $31,40($16) + stq $31,48($16) + subq $0,1,$0 + + stq $31,56($16) + addq $16,64,$16 + unop + bne $0,1b + + ret + nop + unop + nop + + .end clear_page diff --git a/ev6-clear_page-prefetch_m.S b/ev6-clear_page-prefetch_m.S new file mode 100644 index 0000000..7b39954 --- /dev/null +++ b/ev6-clear_page-prefetch_m.S @@ -0,0 +1,54 @@ +/* + * arch/alpha/lib/ev6-clear_page.S + * + * Zero an entire page. + */ + + .text + .align 4 + .global clear_page + .ent clear_page +clear_page: + .prologue 0 + + lda $0,128 + lda $1,125 + addq $16,192,$17 + nop + + lds $f31,0($16) + lds $f31,64($16) + lds $f31,128($16) + nop + +1: lds $f31,0($17) + stq $31,0($16) + subq $0,1,$0 + subq $1,1,$1 + + stq $31,8($16) + stq $31,16($16) + addq $17,64,$2 + nop + + stq $31,24($16) + stq $31,32($16) + cmovgt $1,$2,$17 + nop + + stq $31,40($16) + stq $31,48($16) + nop + nop + + stq $31,56($16) + addq $16,64,$16 + nop + bne $0,1b + + ret + nop + nop + nop + + .end clear_page diff --git a/ev6-clear_page.S b/ev6-clear_page.S new file mode 100644 index 0000000..c647f49 --- /dev/null +++ b/ev6-clear_page.S @@ -0,0 +1,54 @@ +/* + * arch/alpha/lib/ev6-clear_page.S + * + * Zero an entire page. + */ + + .text + .align 4 + .global clear_page + .ent clear_page +clear_page: + .prologue 0 + + lda $0,128 + lda $1,125 + addq $16,64,$2 + addq $16,128,$3 + + addq $16,192,$17 + WH64 ($16) + WH64 ($2) + WH64 ($3) + +1: WH64 ($17) + stq $31,0($16) + subq $0,1,$0 + subq $1,1,$1 + + stq $31,8($16) + stq $31,16($16) + addq $17,64,$2 + nop + + stq $31,24($16) + stq $31,32($16) + cmovgt $1,$2,$17 + nop + + stq $31,40($16) + stq $31,48($16) + nop + nop + + stq $31,56($16) + addq $16,64,$16 + nop + bne $0,1b + + ret + nop + nop + nop + + .end clear_page diff --git a/test.c b/test.c new file mode 100644 index 0000000..e322754 --- /dev/null +++ b/test.c @@ -0,0 +1,170 @@ +/* vim: set expandtab tabstop=4 softtabstop=4 shiftwidth=4: */ + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#ifndef __NR_perf_event_open +#error __NR_perf_event_open not defined. +#endif + +#define PAGE_SIZE 8192 + +#define NUM_TIMES 100000 + +static const char *event_name[] = { + "cycles", + "instructions", + "cache-misses", + "mbox-replays" +}; + +static inline int +sys_perf_event_open(struct perf_event_attr *attr, + pid_t pid, int cpu, int group_fd, unsigned long flags) +{ + attr->size = sizeof(*attr); + return syscall(__NR_perf_event_open, attr, pid, cpu, group_fd, flags); +} + +void clear_page(void *page); + +char *src __attribute__((used)); + +int +main(int argc, char *argv[]) +{ + struct perf_event_attr *attr; + int fd[4]; + char *ptr; + int num_pages; + + if (argc != 2) { + printf("Usage: %s \n", argv[0]); + return 1; + } + num_pages = atoi(argv[1]); + + /* Sanity test to ensure a whole page is cleared, and nothing more */ + if (posix_memalign((void **)&src, PAGE_SIZE, 3 * PAGE_SIZE)) + return 1; + + memset(src, 0xff, 3 * PAGE_SIZE); + + mprotect(src + 0 * PAGE_SIZE, PAGE_SIZE, PROT_NONE); + mprotect(src + 2 * PAGE_SIZE, PAGE_SIZE, PROT_NONE); + + clear_page(src + PAGE_SIZE); + + mprotect(src + 0 * PAGE_SIZE, PAGE_SIZE, PROT_READ); + mprotect(src + 2 * PAGE_SIZE, PAGE_SIZE, PROT_READ); + + if ((ptr = memchr(src + 0 * PAGE_SIZE, 0x00, PAGE_SIZE)) != NULL) { + fprintf(stderr, "Sanity test failed: page 0 cleared at byte %lu\n", + ptr - (src + 0 * PAGE_SIZE)); + return 1; + } + if ((ptr = memchr(src + 1 * PAGE_SIZE, 0xff, PAGE_SIZE)) != NULL) { + fprintf(stderr, "Sanity test failed: page 1 not cleared at byte %lu\n", + ptr - (src + 1 * PAGE_SIZE)); + return 1; + } + if ((ptr = memchr(src + 2 * PAGE_SIZE, 0x00, PAGE_SIZE)) != NULL) { + fprintf(stderr, "Sanity test failed: page 2 cleared at byte %lu\n", + ptr - (src + 2 * PAGE_SIZE)); + return 1; + } + free(src); + + /* Setup */ + if (posix_memalign((void **)&src, PAGE_SIZE, num_pages * PAGE_SIZE + 1)) + return 1; + memset(src, 0xff, num_pages * PAGE_SIZE + 1); + + attr = calloc(4, sizeof(*attr)); + if (src == NULL || attr == NULL) + return 1; + + attr[0].type = PERF_TYPE_HARDWARE; + attr[0].config = PERF_COUNT_HW_CPU_CYCLES; + attr[0].read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | + PERF_FORMAT_TOTAL_TIME_RUNNING; + attr[0].disabled = 1; + + attr[1].type = PERF_TYPE_HARDWARE; + attr[1].config = PERF_COUNT_HW_INSTRUCTIONS; + attr[1].read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | + PERF_FORMAT_TOTAL_TIME_RUNNING; + attr[1].disabled = 1; + + attr[2].type = PERF_TYPE_HARDWARE; + attr[2].read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | + PERF_FORMAT_TOTAL_TIME_RUNNING; + attr[2].config = PERF_COUNT_HW_CACHE_MISSES; + attr[2].disabled = 1; + + attr[3].type = PERF_TYPE_RAW; + attr[3].config = 4; + attr[3].read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | + PERF_FORMAT_TOTAL_TIME_RUNNING; + attr[3].disabled = 1; + + for (int i = 0; i < 4; i++) { + fd[i] = sys_perf_event_open(attr + i, 0, -1, -1, 0); + if (fd[i] < 0) { + perror("perf_open event"); + return 1; + } + } + + /* Benchmark */ + prctl(PR_TASK_PERF_EVENTS_ENABLE); + + for (int i = 0; i < NUM_TIMES; i++) { + for (int j = 0; j < num_pages; j++) { + clear_page(src + j * PAGE_SIZE); + } + } + + prctl(PR_TASK_PERF_EVENTS_DISABLE); + + /* Verify */ + if ((ptr = memchr(src, 0xff, num_pages * PAGE_SIZE + 1)) != + (src + num_pages * PAGE_SIZE)) { + fprintf(stderr, "Failed to clear at byte %lu\n", ptr - src); + return 1; + } + + for (int i = 0; i < 4; i++) { + unsigned long count[3]; + + if (read(fd[i], count, 3 * sizeof(__u64)) != 3 * sizeof(__u64)) + perror("read of event failed"); + printf("%20s: ", event_name[i]); + + if (count[2] == 0) { + printf("not counted\n"); + } else if (count[2] < count[1]) { + printf("%10lu (scaled from %3.1f%%)\n", + (unsigned long)((double)count[0] * count[1] / count[2] / NUM_TIMES + 0.5), + 100.0 * (double)count[2] / count[1]); + } else { + printf("%lu\n", count[0] / NUM_TIMES); + } + + close(fd[i]); + } + + free(src); + free(attr); + + return 0; +} -- cgit v1.2.3