#include #include #include #include #include #include #include #define _GNU_SOURCE #define _OPEN_THREADS #include #include #include #include #include #include #include #define LOG(format, ...) {printf("%lx:%s: " format, getpid(), __func__ __VA_OPT__(,)__VA_ARGS__);} #define MAX_THREAD_COUNT 64 #define PAGE_SIZE 0x1000 #define TEST_TIME (3.0 * 1000.0) * 10 static bool finish; static int nthreads; static volatile long long raw_writes_count, writes_time; static char *mem; static bool random_access, read_reset; #if defined(__i386__) static __inline__ unsigned long long rdtsc(void) { unsigned long long int x; __asm__ volatile (".byte 0x0f, 0x31" : "=A" (x)); return x; } #elif defined(__x86_64__) static __inline__ unsigned long long rdtsc(void) { unsigned hi, lo; __asm__ __volatile__ ("rdtsc" : "=a"(lo), "=d"(hi)); return ( (unsigned long long)lo)|( ((unsigned long long)hi)<<32 ); } #endif struct thread_info { volatile void *mem; size_t npages; }; void *thread_proc(void *data) { struct thread_info *info = data; volatile unsigned char *m = info->mem; size_t page; unsigned long long t1, t2; page = 0; while (!finish) { t1 = rdtsc(); if (random_access) page = rand() % info->npages; else page = (page + 1) % info->npages; ++*(volatile unsigned int *)(m + page * PAGE_SIZE); __atomic_add_fetch (&raw_writes_count, 1, __ATOMIC_RELAXED); t2 = rdtsc(); __atomic_add_fetch (&writes_time, t2 - t1, __ATOMIC_RELAXED); } return 0; } double curr_time_ms(void) { struct timespec current_time; clock_gettime(CLOCK_MONOTONIC, ¤t_time); return current_time.tv_sec * 1000 + current_time.tv_nsec / 1000000; } static double rdtsc_c; //static double inline mcs_from_rdtsc_avg(void *tsc, unsigned long long count) //{ // return (unsigned int)tsc * rdtsc_c / count; //} //////////////////////////////////////////////////////////////////////////////////////////////// #define UFFD_FEATURE_WP_UNPOPULATED (1<<13) #define UFFD_FEATURE_WP_ASYNC (1<<14) #ifndef PAGEMAP_SCAN /* Bits are set in the bitmap of the page_region and masks in pagemap_scan_args */ #define PAGE_IS_WRITTEN (1 << 0) #define PAGE_IS_FILE (1 << 1) #define PAGE_IS_PRESENT (1 << 2) #define PAGE_IS_SWAPPED (1 << 3) /* * struct page_region - Page region with bitmap flags * @start: Start of the region * @len: Length of the region * bitmap: Bits sets for the region */ struct page_region { unsigned long long start; unsigned long long len; unsigned long long bitmap; }; /* * struct pm_scan_arg - Pagemap ioctl argument * @size: Size of the structure * @flags: Flags for the IOCTL * @start: Starting address of the region * @len: Length of the region (All the pages in this length are included) * @vec: Address of page_region struct array for output * @vec_len: Length of the page_region struct array * @max_pages: Optional max return pages * @required_mask: Required mask - All of these bits have to be set in the PTE * @anyof_mask: Any mask - Any of these bits are set in the PTE * @excluded_mask: Exclude mask - None of these bits are set in the PTE * @return_mask: Bits that are to be reported in page_region */ struct pm_scan_arg { unsigned long long size; unsigned long long flags; unsigned long long start; unsigned long long len; unsigned long long vec; unsigned long long vec_len; unsigned long long max_pages; unsigned long long required_mask; unsigned long long anyof_mask; unsigned long long excluded_mask; unsigned long long return_mask; }; #define PM_SCAN_OP_GET (1 << 0) #define PM_SCAN_OP_WP (1 << 1) /* Pagemap ioctl */ #define PAGEMAP_SCAN _IOWR('f', 16, struct pm_scan_arg) #endif #define __NR_userfaultfd 323 #define PAGEMAP "/proc/self/pagemap" int pagemap_fd; int uffd; static long pagemap_ioctl(void *start, int len, void *vec, int vec_len, int flag, int max_pages, long required_mask, long anyof_mask, long excluded_mask, long return_mask) { struct pm_scan_arg arg; arg.start = (uintptr_t)start; arg.len = len; arg.vec = (uintptr_t)vec; arg.vec_len = vec_len; arg.flags = flag; arg.size = sizeof(struct pm_scan_arg); arg.max_pages = max_pages; arg.required_mask = required_mask; arg.anyof_mask = anyof_mask; arg.excluded_mask = excluded_mask; arg.return_mask = return_mask; return ioctl(pagemap_fd, PAGEMAP_SCAN, &arg); } int init_uffd(void) { struct uffdio_api uffdio_api; uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK); if (uffd == -1) exit(1); uffdio_api.api = UFFD_API; uffdio_api.features = UFFD_FEATURE_WP_UNPOPULATED | UFFD_FEATURE_WP_ASYNC | UFFD_FEATURE_WP_HUGETLBFS_SHMEM; if (ioctl(uffd, UFFDIO_API, &uffdio_api)) exit(1); if (!(uffdio_api.api & UFFDIO_REGISTER_MODE_WP) || !(uffdio_api.features & UFFD_FEATURE_WP_UNPOPULATED) || !(uffdio_api.features & UFFD_FEATURE_WP_ASYNC) || !(uffdio_api.features & UFFD_FEATURE_WP_HUGETLBFS_SHMEM)) exit(1); return 0; } int wp_init(void *lpBaseAddress, int dwRegionSize) { struct uffdio_register uffdio_register; struct uffdio_writeprotect wp; uffdio_register.range.start = (unsigned long)lpBaseAddress; uffdio_register.range.len = dwRegionSize; uffdio_register.mode = UFFDIO_REGISTER_MODE_WP; if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register)) exit(1); if (!(uffdio_register.ioctls & UFFDIO_WRITEPROTECT)) exit(1); wp.range.start = (unsigned long)lpBaseAddress; wp.range.len = dwRegionSize; wp.mode = UFFDIO_WRITEPROTECT_MODE_WP; if (ioctl(uffd, UFFDIO_WRITEPROTECT, &wp)) exit(1); return 0; } int wp_free(void *lpBaseAddress, int dwRegionSize) { struct uffdio_register uffdio_register; uffdio_register.range.start = (unsigned long)lpBaseAddress; uffdio_register.range.len = dwRegionSize; uffdio_register.mode = UFFDIO_REGISTER_MODE_WP; if (ioctl(uffd, UFFDIO_UNREGISTER, &uffdio_register.range)) exit(1); return 0; } int wp_addr_range(void *lpBaseAddress, int dwRegionSize) { struct uffdio_writeprotect wp; wp.range.start = (unsigned long)lpBaseAddress; wp.range.len = dwRegionSize; wp.mode = UFFDIO_WRITEPROTECT_MODE_WP; if (ioctl(uffd, UFFDIO_WRITEPROTECT, &wp)) exit(1); return 0; } //int wp_addr_range_(void *lpBaseAddress, int dwRegionSize) //{ // struct page_region *vec; // int ret; // // vec = malloc(sizeof(struct page_region) * dwRegionSize); // // ret = pagemap_ioctl(lpBaseAddress, dwRegionSize, vec, dwRegionSize, PM_SCAN_OP_GET | PM_SCAN_OP_WP, // 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN); // // if (ret < 0) // exit(1); // // free(vec); // return 0; //} int wp_addr_range__(void *lpBaseAddress, int dwRegionSize) { int ret; ret = pagemap_ioctl(lpBaseAddress, dwRegionSize, NULL, 0, PM_SCAN_OP_WP, 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN); if (ret < 0) exit(1); return 0; } #define WRITE_WATCH_FLAG_RESET 1 struct page_region *buf; int GetWriteWatch(int flag, char *start, long len, long **vec, unsigned int *ww_count, unsigned int *granularity) { struct pm_scan_arg arg; int i, j, ind = 0, ret; arg.start = (uintptr_t)start; arg.len = len; arg.vec = (uintptr_t)buf; //(uintptr_t)vec; arg.vec_len = *ww_count; arg.flags = PM_SCAN_OP_GET; if (flag == WRITE_WATCH_FLAG_RESET) arg.flags |= PM_SCAN_OP_WP; arg.size = sizeof(struct pm_scan_arg); arg.max_pages = *ww_count; arg.required_mask = PAGE_IS_WRITTEN; arg.anyof_mask = 0; arg.excluded_mask = 0; arg.return_mask = PAGE_IS_WRITTEN; if (granularity) *granularity = 4096; ret = ioctl(pagemap_fd, PAGEMAP_SCAN, &arg); if (ret < 0) goto free_and_return; *ww_count = 0; for (i = 0; i < ret; i++) { *ww_count += buf[i].len; for (j = 0; j < buf[i].len; j++) vec[ind++] = (long int *)(buf[i].start + j * 4096); } ret = 0; free_and_return: return ret; } //#define USE_PAGEMAP_GET_WP //#define USE_PAGEMAP_WP unsigned long long reset_total; int ResetWriteWatch(void *lpBaseAddress, int dwRegionSize) { unsigned long long t = rdtsc(); int ret; #ifdef USE_PAGEMAP_GET_WP ret = wp_addr_range_(lpBaseAddress, dwRegionSize); #elif defined USE_PAGEMAP_WP ret = wp_addr_range__(lpBaseAddress, dwRegionSize); #else ret = wp_addr_range(lpBaseAddress, dwRegionSize); #endif reset_total += rdtsc() - t; return ret; } int main(int argc, char *argv[]) { unsigned int ww_count, ww_total, cycle_count, fault_count; struct thread_info info[MAX_THREAD_COUNT]; unsigned long long t1, t2, rdtsc_start, rdtsc_end; double start, curr, cycle_start; static long **ww_addr; long long writes_count; unsigned long long wwread_time; double rw_delay_ms; unsigned int granularity; unsigned int old_prot; unsigned int count; unsigned int i; int get_count; size_t npages; pagemap_fd = open(PAGEMAP, O_RDWR); if (pagemap_fd < 0) { perror("pagemapfd"); return -EINVAL; } if (init_uffd()) return -1; if (argc < 6) { puts("Usage: win.exe \n"); return -1; } nthreads = atoi(argv[1]); if (nthreads > MAX_THREAD_COUNT) { LOG("Maximum of %u threads supported.\n", MAX_THREAD_COUNT); return -1; } npages = atoi(argv[2]); if (npages < nthreads || npages % nthreads) { LOG("npages should be > nthreads and evenly divisible by nthreads.\n"); return -1; } rw_delay_ms = atof(argv[3]); random_access = atoi(argv[4]); read_reset = atoi(argv[5]); ww_addr = malloc(sizeof(*ww_addr) * npages); mem = mmap(NULL, npages * PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0); if (!mem) { LOG("Failed to allocate virtual memory.\n"); return -1; } wp_init(mem, npages * PAGE_SIZE); wp_addr_range(mem, npages * PAGE_SIZE); mem[0] = 0x28; mem[0x1000] = 0x29; buf = malloc(100000 * sizeof(struct page_region)); ww_count = 100; if (GetWriteWatch(0, mem, 0x1000 * npages, ww_addr, &ww_count, &granularity)) { LOG("GetWriteWatch() failed, GetLastError() %lu.\n", errno); return -1; } // LOG("count %llu, %p, %p.\n", ww_count, mem, ww_addr[0]); mem = mmap(NULL, npages * 0x1000, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0); if (!mem) { perror("Error commit"); return -1; } wp_init(mem, npages * 0x1000); wp_addr_range(mem, npages * 0x1000); ww_count = 100; if (GetWriteWatch(0, mem, 0x1000 * npages, ww_addr, &ww_count, &granularity)) { LOG("GetWriteWatch() failed, GetLastError() %lu.\n", errno); return -1; } // LOG("count %llu, %p, %p.\n", ww_count, mem, ww_addr[0]); memset(mem, 0, npages * PAGE_SIZE); ResetWriteWatch(mem, npages * PAGE_SIZE); ww_count = 100; if (GetWriteWatch(0, mem, 0x1000 * npages, ww_addr, &ww_count, &granularity)) { LOG("GetWriteWatch() failed, GetLastError() %lu.\n", errno); return -1; } // LOG("read bytes %lu, count after read %llu, %p, %p.\n", count, ww_count, mem, ww_addr[0]); memset(mem, 0, npages * PAGE_SIZE); ResetWriteWatch(mem, npages * PAGE_SIZE); pthread_t th; for (i = 0; i < nthreads; ++i) { info[i].mem = mem + 0x1000 * i * npages / nthreads; info[i].npages = npages / nthreads; pthread_create(&th, NULL, thread_proc, &info[i]); } get_count = npages; wwread_time = 0; curr = start = curr_time_ms(); ww_total = 0; cycle_count = 0; rdtsc_start = rdtsc(); while (curr - start < TEST_TIME) { char *addr, *end; cycle_start = curr; t1 = rdtsc(); addr = mem; end = mem + npages * PAGE_SIZE; // LOG("cycle %I64u.\n", cycle_count); while (addr < end) { ww_count = get_count; if (GetWriteWatch(read_reset ? WRITE_WATCH_FLAG_RESET : 0, addr, end - addr, ww_addr, &ww_count, &granularity)) { LOG("GetWriteWatch() failed, GetLastError() %lu.\n", errno); return -1; } ww_total += ww_count; if (ww_count < get_count) break; addr = (char *)ww_addr[ww_count - 1] + 0x1000; LOG("addr %p, end %p, ww_count %I64u.\n", addr, end, ww_count); } if (!read_reset) ResetWriteWatch(mem, end - mem); t2 = rdtsc(); wwread_time += t2 - t1; curr = curr_time_ms(); while (curr - start < TEST_TIME && curr - cycle_start < rw_delay_ms) { sched_yield(); curr = curr_time_ms(); } ++cycle_count; } // rdtsc_end = rdtsc(); writes_count = raw_writes_count; finish = true; // rdtsc_c = 1000.0 * (curr - start) / (rdtsc_end - rdtsc_start); // LOG("rdtsc_c %lf.\n", rdtsc_c); sleep(1); LOG("Elapsed %.1lf, cycle_count %llu, writes_count %lld, writes watched %llu.\n", curr - start, cycle_count, writes_count, ww_total); LOG("writes per thread * msec %.3lf, avg. write time %.1lf, GetWriteWatch() avg %.1lf.\n", writes_count / (TEST_TIME * nthreads), (float)writes_time/writes_count, wwread_time / cycle_count); printf("ResetWriteWatch() time rdtsc --> %llu M\n", reset_total/1000000); free(buf); return 0; }