#include #include #include #include #include #include #include #include static int init_pmc(void) { static struct perf_event_attr perf_attr = { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES, .pinned = 1, }; struct perf_event_mmap_page *pc; int perf_fd; perf_fd = syscall(__NR_perf_event_open, &perf_attr, 0, -1, -1, 0); if (perf_fd < 0) { fprintf(stderr, "perf_event_open failed: errno %d\n", errno); exit(1); } pc = mmap(NULL, 4096, PROT_READ, MAP_SHARED, perf_fd, 0); if (pc == MAP_FAILED) { fprintf(stderr, "perf_event mmap() failed: errno %d\n", errno); exit(1); } return pc->index - 1; } static inline unsigned int rdpmc(unsigned int counter) { unsigned int low, high; // asm volatile("rdtsc" : "=a" (low), "=d" (high)); asm volatile("lfence"); asm volatile("rdpmc" : "=a" (low), "=d" (high) : "c" (counter)); // return low bits, counter might to 32 or 40 bits wide. return low; } #ifndef MODE #define MODE 0 #endif __attribute__((noinline)) void memcpy_perf(unsigned char *d_buff, const unsigned char *s_buff, unsigned long len) { #if MODE == -1 // 'No copy' loop for baseline overhead asm volatile(" nop\n" : "+&D" (d_buff), "+&S" (s_buff), "+&c" (len) : : "memory"); #endif #if MODE == 0 // Simple 'rep movs' loop asm volatile(" rep movsb\n" : "+&D" (d_buff), "+&S" (s_buff), "+&c" (len) : : "memory"); #endif #if MODE == 1 // Simple 'rep movq' loop len /= 8; asm volatile(" rep movsq\n" : "+&D" (d_buff), "+&S" (s_buff), "+&c" (len) : : "memory"); #endif } unsigned char s_buff[8192] __attribute__((aligned(4096))); unsigned char d_buff[8192 + 1] __attribute__((aligned(4096))); #ifndef PASSES #define PASSES 5 #endif #ifndef OFFSET #define OFFSET 0 #endif int main(int argc, char **argv) { unsigned int tick; unsigned int ticks[PASSES]; unsigned int len, s_off = 0, d_off = 0; unsigned int i; unsigned int id = init_pmc(); unsigned int offset = OFFSET; len = sizeof s_buff; for (;;) { switch (getopt(argc, argv, "l:s:d:o:")) { case -1: break; default: exit(1); case 'l': len = atoi(optarg); continue; case 's': s_off = atoi(optarg); continue; case 'd': d_off = atoi(optarg); continue; case 'o': offset = atoi(optarg); continue; } break; } if (s_off + len > sizeof s_buff || d_off + len > sizeof d_buff - 1) { fprintf(stderr, "too long\n"); exit(1); } for (i = 0; i < len; i++) s_buff[i] = rand(); for (i = 0; i < PASSES; i++) { tick = rdpmc(id); memcpy_perf(d_buff + d_off, s_buff + s_off, len); ticks[i] = rdpmc(id) - tick; } printf(" ticks rate mode %d\n", MODE); for (i = 0; i < PASSES; i++) printf(" %7u %7u\n", ticks[i], 100 * len / (ticks[i] - offset)); if (memcmp(d_buff + d_off, s_buff + s_off, len) || d_buff[d_off + len]) { fprintf(stderr, "copy mismatch\n"); exit(1); } return 0; }