// SPDX-License-Identifier: GPL-2.0 #define _GNU_SOURCE #include #include #include #include #include #include #include "vm_util.h" #include "../kselftest.h" #include #include #include #include #include #include #include #include #include #include #include #include #define PAGEMAP_BITS_ALL (PAGE_IS_WRITTEN | PAGE_IS_FILE | \ PAGE_IS_PRESENT | PAGE_IS_SWAPPED) #define PAGEMAP_NON_WRITTEN_BITS (PAGE_IS_FILE | PAGE_IS_PRESENT | \ PAGE_IS_SWAPPED) #define TEST_ITERATIONS 10 #define PAGEMAP "/proc/self/pagemap" int pagemap_fd; int uffd; int page_size; int hpage_size; static long pagemap_ioctl(void *start, int len, void *vec, int vec_len, int flag, int max_pages, long required_mask, long anyof_mask, long excluded_mask, long return_mask) { struct pm_scan_arg arg; arg.start = (uintptr_t)start; arg.len = len; arg.vec = (uintptr_t)vec; arg.vec_len = vec_len; arg.flags = flag; arg.size = sizeof(struct pm_scan_arg); arg.max_pages = max_pages; arg.required_mask = required_mask; arg.anyof_mask = anyof_mask; arg.excluded_mask = excluded_mask; arg.return_mask = return_mask; return ioctl(pagemap_fd, PAGEMAP_SCAN, &arg); } int init_uffd(void) { struct uffdio_api uffdio_api; uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK); if (uffd == -1) ksft_exit_fail_msg("uffd syscall failed\n"); uffdio_api.api = UFFD_API; uffdio_api.features = UFFD_FEATURE_WP_UNPOPULATED | UFFD_FEATURE_WP_ASYNC | UFFD_FEATURE_WP_HUGETLBFS_SHMEM; if (ioctl(uffd, UFFDIO_API, &uffdio_api)) ksft_exit_fail_msg("UFFDIO_API\n"); if (!(uffdio_api.api & UFFDIO_REGISTER_MODE_WP) || !(uffdio_api.features & UFFD_FEATURE_WP_UNPOPULATED) || !(uffdio_api.features & UFFD_FEATURE_WP_ASYNC) || !(uffdio_api.features & UFFD_FEATURE_WP_HUGETLBFS_SHMEM)) ksft_exit_fail_msg("UFFDIO_API error %llu\n", uffdio_api.api); return 0; } int wp_init(void *lpBaseAddress, int dwRegionSize) { struct uffdio_register uffdio_register; struct uffdio_writeprotect wp; uffdio_register.range.start = (unsigned long)lpBaseAddress; uffdio_register.range.len = dwRegionSize; uffdio_register.mode = UFFDIO_REGISTER_MODE_WP;// UFFDIO_REGISTER_MODE_MISSING | if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register)) ksft_exit_fail_msg("ioctl(UFFDIO_REGISTER) %d %s\n", errno, strerror(errno)); if (!(uffdio_register.ioctls & UFFDIO_WRITEPROTECT)) ksft_exit_fail_msg("ioctl set is incorrect\n"); wp.range.start = (unsigned long)lpBaseAddress; wp.range.len = dwRegionSize; wp.mode = UFFDIO_WRITEPROTECT_MODE_WP; if (ioctl(uffd, UFFDIO_WRITEPROTECT, &wp)) ksft_exit_fail_msg("ioctl(UFFDIO_WRITEPROTECT)\n"); return 0; } int wp_free(void *lpBaseAddress, int dwRegionSize) { struct uffdio_register uffdio_register; uffdio_register.range.start = (unsigned long)lpBaseAddress; uffdio_register.range.len = dwRegionSize; uffdio_register.mode = UFFDIO_REGISTER_MODE_WP; // UFFDIO_REGISTER_MODE_MISSING | if (ioctl(uffd, UFFDIO_UNREGISTER, &uffdio_register.range)) ksft_exit_fail_msg("ioctl unregister failure\n"); return 0; } int wp_addr_range(void *lpBaseAddress, int dwRegionSize) { struct uffdio_writeprotect wp; wp.range.start = (unsigned long)lpBaseAddress; wp.range.len = dwRegionSize; wp.mode = UFFDIO_WRITEPROTECT_MODE_WP; if (ioctl(uffd, UFFDIO_WRITEPROTECT, &wp)) ksft_exit_fail_msg("ioctl(UFFDIO_WRITEPROTECT)\n"); return 0; } void *gethugetlb_mem(int size, int *shmid) { char *mem; if (shmid) { *shmid = shmget(2, size, SHM_HUGETLB | IPC_CREAT | SHM_R | SHM_W); if (*shmid < 0) ksft_exit_fail_msg("shmget error\n"); mem = shmat(*shmid, 0, 0); if (mem == (char *)-1) { shmctl(*shmid, IPC_RMID, NULL); ksft_exit_fail_msg("Shared memory attach failure\n"); } } else { mem = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_HUGETLB | MAP_PRIVATE, -1, 0); if (mem == MAP_FAILED) ksft_exit_fail_msg("mmap of hugetlbfs file failed \n"); } return mem; } int userfaultfd_tests(void) { int mem_size, vec_size, written, num_pages = 16; char *mem, *vec; mem_size = num_pages * page_size; mem = mmap(NULL, mem_size, PROT_NONE, MAP_PRIVATE | MAP_ANON, -1, 0); if (mem == MAP_FAILED) ksft_exit_fail_msg("error nomem\n"); wp_init(mem, mem_size); /* Change protection of pages differently */ mprotect(mem, mem_size/8, PROT_READ|PROT_WRITE); mprotect(mem + 1 * mem_size/8, mem_size/8, PROT_READ); mprotect(mem + 2 * mem_size/8, mem_size/8, PROT_READ|PROT_WRITE); mprotect(mem + 3 * mem_size/8, mem_size/8, PROT_READ); mprotect(mem + 4 * mem_size/8, mem_size/8, PROT_READ|PROT_WRITE); mprotect(mem + 5 * mem_size/8, mem_size/8, PROT_NONE); mprotect(mem + 6 * mem_size/8, mem_size/8, PROT_READ|PROT_WRITE); mprotect(mem + 7 * mem_size/8, mem_size/8, PROT_READ); wp_addr_range(mem + (mem_size/16), mem_size - 2 * (mem_size/8)); wp_addr_range(mem, mem_size); vec_size = mem_size/page_size; vec = malloc(sizeof(struct page_region) * vec_size); written = pagemap_ioctl(mem, mem_size, vec, 1, PM_SCAN_OP_GET | PM_SCAN_OP_WP, vec_size - 2, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN); if (written < 0) ksft_exit_fail_msg("error %d %d %s\n", written, errno, strerror(errno)); ksft_test_result(written == 0, "%s all new pages must not be written (dirty)\n", __func__); wp_free(mem, mem_size); munmap(mem, mem_size); free(vec); return 0; } int sanity_tests_sd(void) { char *mem, *m[2]; int mem_size, vec_size, ret, ret2, ret3, i, num_pages = 10; struct page_region *vec, *vec2; vec_size = 100; mem_size = num_pages * page_size; vec = malloc(sizeof(struct page_region) * vec_size); if (!vec) ksft_exit_fail_msg("error nomem\n"); vec2 = malloc(sizeof(struct page_region) * vec_size); if (!vec2) ksft_exit_fail_msg("error nomem\n"); mem = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0); if (mem == MAP_FAILED) ksft_exit_fail_msg("error nomem\n"); wp_init(mem, mem_size); wp_addr_range(mem, mem_size); /* 1. wrong operation */ ksft_test_result(pagemap_ioctl(mem, mem_size, vec, vec_size, -1, 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN) < 0, "%s wrong flag specified\n", __func__); ksft_test_result(pagemap_ioctl(mem, mem_size, vec, vec_size, 8, 0, 0x1111, 0, 0, PAGE_IS_WRITTEN) < 0, "%s wrong mask specified\n", __func__); ksft_test_result(pagemap_ioctl(mem, mem_size, vec, vec_size, 0, 0, PAGE_IS_WRITTEN, 0, 0, 0x1000) < 0, "%s wrong return mask specified\n", __func__); ksft_test_result(pagemap_ioctl(mem, mem_size, vec, vec_size, PM_SCAN_OP_WP | PM_SCAN_OP_GET | 0x32, 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN) < 0, "%s mixture of correct and wrong flag\n", __func__); ksft_test_result(pagemap_ioctl(mem, mem_size, NULL, 0, PM_SCAN_OP_WP, 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN) < 0, "%s PM_SCAN_OP_WP cannot be used without get\n", __func__); /* 2. Clear area with larger vec size */ ret = pagemap_ioctl(mem, mem_size, vec, vec_size, PM_SCAN_OP_GET | PM_SCAN_OP_WP, 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN); ksft_test_result(ret >= 0, "%s Clear area with larger vec size\n", __func__); /* 3. Repeated pattern of written and non-written pages */ for (i = 0; i < mem_size; i += 2 * page_size) mem[i]++; ret = pagemap_ioctl(mem, mem_size, vec, vec_size, PM_SCAN_OP_GET, 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN); if (ret < 0) ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); ksft_test_result(ret == mem_size/(page_size * 2), "%s Repeated pattern of written and non-written pages %d\n", __func__, ret); /* 4. Repeated pattern of written and non-written pages in parts */ ret = pagemap_ioctl(mem, mem_size, vec, vec_size, PM_SCAN_OP_GET | PM_SCAN_OP_WP, num_pages/2 - 2, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN); if (ret < 0) ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); ret2 = pagemap_ioctl(mem, mem_size, vec, 2, PM_SCAN_OP_GET, 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN); if (ret2 < 0) ksft_exit_fail_msg("error %d %d %s\n", ret2, errno, strerror(errno)); ret3 = pagemap_ioctl(mem, mem_size, vec, vec_size, PM_SCAN_OP_GET | PM_SCAN_OP_WP, 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN); if (ret3 < 0) ksft_exit_fail_msg("error %d %d %s\n", ret3, errno, strerror(errno)); ksft_test_result((ret + ret3) == num_pages/2 && ret2 == 2, "%s Repeated pattern of written and non-written pages in parts %d %d %d\n", __func__, ret, ret2, ret3); /* 5. only get 2 dirty pages and clear them as well */ vec_size = mem_size/page_size; memset(mem, -1, mem_size); /* get and clear second and third pages */ ret = pagemap_ioctl(mem + page_size, 2 * page_size, vec, 1, PM_SCAN_OP_GET | PM_SCAN_OP_WP, 2, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN); if (ret < 0) ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); ret2 = pagemap_ioctl(mem, mem_size, vec2, vec_size, PM_SCAN_OP_GET, 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN); if (ret2 < 0) ksft_exit_fail_msg("error %d %d %s\n", ret2, errno, strerror(errno)); ksft_test_result(ret == 1 && vec[0].len == 2 && vec[0].start == (uintptr_t)(mem + page_size) && ret2 == 2 && vec2[0].len == 1 && vec2[0].start == (uintptr_t)mem && vec2[1].len == vec_size - 3 && vec2[1].start == (uintptr_t)(mem + 3 * page_size), "%s only get 2 written pages and clear them as well %d %d %d %d %d\n", __func__, ret, vec[0].len, ret2, vec2[0].len, vec2[1].len); wp_free(mem, mem_size); munmap(mem, mem_size); /* 6. Two regions */ m[0] = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0); if (m[0] == MAP_FAILED) ksft_exit_fail_msg("error nomem\n"); m[1] = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0); if (m[1] == MAP_FAILED) ksft_exit_fail_msg("error nomem\n"); wp_init(m[0], mem_size); wp_init(m[1], mem_size); wp_addr_range(m[0], mem_size); wp_addr_range(m[1], mem_size); memset(m[0], 'a', mem_size); memset(m[1], 'b', mem_size); wp_addr_range(m[0], mem_size); ret = pagemap_ioctl(m[1], mem_size, vec, 1, PM_SCAN_OP_GET, 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN); if (ret < 0) ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); ksft_test_result(ret == 1 && vec[0].len == mem_size/page_size, "%s Two regions\n", __func__); wp_free(m[0], mem_size); wp_free(m[1], mem_size); munmap(m[0], mem_size); munmap(m[1], mem_size); free(vec); free(vec2); return 0; } int base_tests(char *prefix, char *mem, int mem_size, int skip) { int vec_size, written; struct page_region *vec, *vec2; if (skip) { ksft_test_result_skip("%s all new pages must not be written (dirty)\n", prefix); ksft_test_result_skip("%s all pages must be written (dirty)\n", prefix); ksft_test_result_skip("%s all pages dirty other than first and the last one\n", prefix); ksft_test_result_skip("%s only middle page dirty\n", prefix); ksft_test_result_skip("%s only two middle pages dirty\n", prefix); return 0; } vec_size = mem_size/page_size; vec = malloc(sizeof(struct page_region) * vec_size); vec2 = malloc(sizeof(struct page_region) * vec_size); // /* 1. all new pages must be not be written (dirty) */ // written = pagemap_ioctl(mem, mem_size, vec, 1, PM_SCAN_OP_GET | PM_SCAN_OP_WP, vec_size - 2, // PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN); // if (written < 0) // ksft_exit_fail_msg("error %d %d %s\n", written, errno, strerror(errno)); // // ksft_test_result(written == 0, "%s all new pages must not be written (dirty) %d\n", prefix, // written); /* 2. all pages must be written */ memset(mem, -1, mem_size); //#define tmpfix #ifdef tmpfix /* fix for hugetlb mem */ wp_addr_range(mem, mem_size); memset(mem, 1, mem_size); #endif written = pagemap_ioctl(mem, mem_size, vec, 1, PM_SCAN_OP_GET, 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN); if (written < 0) ksft_exit_fail_msg("error %d %d %s\n", written, errno, strerror(errno)); ksft_test_result(written == 1 && vec[0].len == mem_size/page_size, "%s all pages must be written (dirty) %d %d %d %d\n", prefix, written, vec[0].len, vec[1].len, mem_size/page_size); // /* 3. all pages dirty other than first and the last one */ // wp_addr_range(mem, mem_size); // memset(mem + page_size, 0, mem_size - (2 * page_size)); // // written = pagemap_ioctl(mem, mem_size, vec, 1, PM_SCAN_OP_GET, 0, PAGE_IS_WRITTEN, 0, 0, // PAGE_IS_WRITTEN); // if (written < 0) // ksft_exit_fail_msg("error %d %d %s\n", written, errno, strerror(errno)); // // ksft_test_result(written == 1 && vec[0].len >= vec_size - 2 && vec[0].len <= vec_size, // "%s all pages dirty other than first and the last one %d %d\n", prefix, // written, vec[0].len); // // /* 4. only middle page dirty */ // wp_addr_range(mem, mem_size); // mem[vec_size/2 * page_size]++; // // written = pagemap_ioctl(mem, mem_size, vec, vec_size, PM_SCAN_OP_GET, 0, PAGE_IS_WRITTEN, // 0, 0, PAGE_IS_WRITTEN); // if (written < 0) // ksft_exit_fail_msg("error %d %d %s\n", written, errno, strerror(errno)); // // ksft_test_result(written == 1 && vec[0].len >= 1, // "%s only middle page dirty\n", prefix); // // /* 5. only two middle pages dirty and walk over only middle pages */ // wp_addr_range(mem, mem_size); // mem[vec_size/2 * page_size]++; // mem[(vec_size/2 + 1) * page_size]++; // // written = pagemap_ioctl(&mem[vec_size/2 * page_size], 2 * page_size, vec, 1, PM_SCAN_OP_GET, // 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN); // if (written < 0) // ksft_exit_fail_msg("error %d %d %s\n", written, errno, strerror(errno)); // // ksft_test_result(written == 1 && vec[0].start == (uintptr_t)(&mem[vec_size/2 * page_size]) // && vec[0].len == 2, // "%s only two middle pages dirty\n", prefix); free(vec); free(vec2); return 0; } void *gethugepage(int map_size) { int ret; char *map; map = memalign(hpage_size, map_size); if (!map) ksft_exit_fail_msg("memalign failed %d %s\n", errno, strerror(errno)); ret = madvise(map, map_size, MADV_HUGEPAGE); if (ret) ksft_exit_fail_msg("madvise failed %d %d %s\n", ret, errno, strerror(errno)); return map; } int hpage_unit_tests(void) { char *map; int ret, ret2; size_t num_pages = 10; int map_size = hpage_size * num_pages; int vec_size = map_size/page_size; struct page_region *vec, *vec2; vec = malloc(sizeof(struct page_region) * vec_size); vec2 = malloc(sizeof(struct page_region) * vec_size); if (!vec || !vec2) ksft_exit_fail_msg("malloc failed\n"); map = gethugepage(map_size); if (map) { wp_init(map, map_size); wp_addr_range(map, map_size); /* 1. all new huge page must not be written (dirty) */ ret = pagemap_ioctl(map, map_size, vec, vec_size, PM_SCAN_OP_GET | PM_SCAN_OP_WP, 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN); if (ret < 0) ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); ksft_test_result(ret == 0, "%s all new huge page must not be written (dirty)\n", __func__); /* 2. all the huge page must not be written */ ret = pagemap_ioctl(map, map_size, vec, vec_size, PM_SCAN_OP_GET, 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN); if (ret < 0) ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); ksft_test_result(ret == 0, "%s all the huge page must not be written\n", __func__); /* 3. all the huge page must be written and clear dirty as well */ memset(map, -1, map_size); ret = pagemap_ioctl(map, map_size, vec, vec_size, PM_SCAN_OP_GET | PM_SCAN_OP_WP, 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN); if (ret < 0) ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); ksft_test_result(ret == 1 && vec[0].start == (uintptr_t)map && vec[0].len == vec_size && vec[0].bitmap == PAGE_IS_WRITTEN, "%s all the huge page must be written and clear %d %d\n", __func__, ret, vec[0].len); /* 4. only middle page written */ wp_free(map, map_size); free(map); map = gethugepage(map_size); wp_init(map, map_size); wp_addr_range(map, map_size); map[vec_size/2 * page_size]++; ret = pagemap_ioctl(map, map_size, vec, vec_size, PM_SCAN_OP_GET, 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN); if (ret < 0) ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); ksft_test_result(ret == 1 && vec[0].len > 0, "%s only middle page written\n", __func__); wp_free(map, map_size); free(map); } else { ksft_test_result_skip("all new huge page must be written\n"); ksft_test_result_skip("all the huge page must not be written\n"); ksft_test_result_skip("all the huge page must be written and clear\n"); ksft_test_result_skip("only middle page written\n"); } /* 5. clear first half of huge page */ map = gethugepage(map_size); if (map) { wp_init(map, map_size); wp_addr_range(map, map_size); memset(map, 0, map_size); wp_addr_range(map, map_size/2); ret = pagemap_ioctl(map, map_size, vec, vec_size, PM_SCAN_OP_GET, 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN); if (ret < 0) ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); ksft_test_result(ret == 1 && vec[0].len == vec_size/2 && vec[0].start == (uintptr_t)(map + map_size/2), "%s clear first half of huge page %d %d\n", __func__, ret, vec[0].len); wp_free(map, map_size); free(map); } else { ksft_test_result_skip("clear first half of huge page\n"); } /* 6. clear first half of huge page with limited buffer */ map = gethugepage(map_size); if (map) { wp_init(map, map_size); wp_addr_range(map, map_size); memset(map, 0, map_size); ret = pagemap_ioctl(map, map_size, vec, vec_size, PM_SCAN_OP_GET | PM_SCAN_OP_WP, vec_size/2, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN); if (ret < 0) ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); ret = pagemap_ioctl(map, map_size, vec, vec_size, PM_SCAN_OP_GET, 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN); if (ret < 0) ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); ksft_test_result(ret == 1 && vec[0].len == vec_size/2 && vec[0].start == (uintptr_t)(map + map_size/2), "%s clear first half of huge page with limited buffer\n", __func__); wp_free(map, map_size); free(map); } else { ksft_test_result_skip("clear first half of huge page with limited buffer\n"); } /* 7. clear second half of huge page */ map = gethugepage(map_size); if (map) { wp_init(map, map_size); wp_addr_range(map, map_size); memset(map, -1, map_size); wp_addr_range(map + map_size/2, map_size/2); ret = pagemap_ioctl(map, map_size, vec, vec_size, PM_SCAN_OP_GET, 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN); if (ret < 0) ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); ksft_test_result(ret == 1 && vec[0].len == vec_size/2, "%s clear second half huge page %d, %d\n", __func__, ret, vec[0].len); wp_free(map, map_size); free(map); } else { ksft_test_result_skip("clear second half huge page\n"); } /* 8. get half huge page */ map = gethugepage(map_size); if (map) { wp_init(map, map_size); wp_addr_range(map, map_size); memset(map, -1, map_size); usleep(100); ret = pagemap_ioctl(map, map_size, vec, 1, PM_SCAN_OP_GET | PM_SCAN_OP_WP, hpage_size/(2*page_size), PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN); if (ret < 0) ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); ksft_test_result(ret == 1 && vec[0].len == hpage_size/(2*page_size), "%s get half huge page\n", __func__); ret2 = pagemap_ioctl(map, map_size, vec, vec_size, PM_SCAN_OP_GET, 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN); if (ret2 < 0) ksft_exit_fail_msg("error %d %d %s\n", ret2, errno, strerror(errno)); ksft_test_result(ret2 == 1 && vec[0].len == (map_size - hpage_size/2)/page_size, "%s get half huge page %d %d %d\n", __func__, ret2, vec[0].len, vec[1].len); wp_free(map, map_size); free(map); } else { ksft_test_result_skip("get half huge page\n"); } free(vec); free(vec2); return 0; } int unmapped_region_tests(void) { void *start = (void *)0x10000000; int written, len = 0x00040000; int vec_size = len / page_size; struct page_region *vec = malloc(sizeof(struct page_region) * vec_size); /* 1. Get written pages */ written = pagemap_ioctl(start, len, vec, vec_size, PM_SCAN_OP_GET, 0, PAGEMAP_NON_WRITTEN_BITS, 0, 0, PAGEMAP_NON_WRITTEN_BITS); if (written < 0) ksft_exit_fail_msg("error %d %d %s\n", written, errno, strerror(errno)); ksft_test_result(written >= 0, "%s Get status of pages\n", __func__); free(vec); return 0; } static void test_simple(void) { int i; char *map; struct page_region vec; map = aligned_alloc(page_size, page_size); if (!map) ksft_exit_fail_msg("aligned_alloc failed\n"); wp_init(map, page_size); wp_addr_range(map, page_size); for (i = 0 ; i < TEST_ITERATIONS; i++) { if (pagemap_ioctl(map, page_size, &vec, 1, PM_SCAN_OP_GET, 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN) == 1) { ksft_print_msg("written bit was 1, but should be 0 (i=%d)\n", i); break; } wp_addr_range(map, page_size); /* Write something to the page to get the written bit enabled on the page */ map[0]++; if (pagemap_ioctl(map, page_size, &vec, 1, PM_SCAN_OP_GET, 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN) == 0) { ksft_print_msg("written bit was 0, but should be 1 (i=%d)\n", i); break; } wp_addr_range(map, page_size); } wp_free(map, page_size); free(map); ksft_test_result(i == TEST_ITERATIONS, "Test %s\n", __func__); } int sanity_tests(void) { char *mem, *fmem; int mem_size, vec_size, ret; struct page_region *vec; /* 1. wrong operation */ mem_size = 10 * page_size; vec_size = mem_size / page_size; vec = malloc(sizeof(struct page_region) * vec_size); mem = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0); if (mem == MAP_FAILED || vec == MAP_FAILED) ksft_exit_fail_msg("error nomem\n"); wp_init(mem, mem_size); wp_addr_range(mem, mem_size); ksft_test_result(pagemap_ioctl(mem, mem_size, vec, vec_size, PM_SCAN_OP_GET | PM_SCAN_OP_WP, 0, PAGEMAP_BITS_ALL, 0, 0, PAGEMAP_BITS_ALL) < 0, "%s clear op can only be specified with PAGE_IS_WRITTEN\n", __func__); ksft_test_result(pagemap_ioctl(mem, mem_size, vec, vec_size, PM_SCAN_OP_GET, 0, PAGEMAP_BITS_ALL, 0, 0, PAGEMAP_BITS_ALL) >= 0, "%s required_mask specified\n", __func__); ksft_test_result(pagemap_ioctl(mem, mem_size, vec, vec_size, PM_SCAN_OP_GET, 0, 0, PAGEMAP_BITS_ALL, 0, PAGEMAP_BITS_ALL) >= 0, "%s anyof_mask specified\n", __func__); ksft_test_result(pagemap_ioctl(mem, mem_size, vec, vec_size, PM_SCAN_OP_GET, 0, 0, 0, PAGEMAP_BITS_ALL, PAGEMAP_BITS_ALL) >= 0, "%s excluded_mask specified\n", __func__); ksft_test_result(pagemap_ioctl(mem, mem_size, vec, vec_size, PM_SCAN_OP_GET, 0, PAGEMAP_BITS_ALL, PAGEMAP_BITS_ALL, 0, PAGEMAP_BITS_ALL) >= 0, "%s required_mask and anyof_mask specified\n", __func__); wp_free(mem, mem_size); munmap(mem, mem_size); /* 2. Get sd and present pages with anyof_mask */ mem = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0); if (mem == MAP_FAILED) ksft_exit_fail_msg("error nomem\n"); wp_init(mem, mem_size); wp_addr_range(mem, mem_size); memset(mem, 0, mem_size); ret = pagemap_ioctl(mem, mem_size, vec, vec_size, PM_SCAN_OP_GET, 0, 0, PAGEMAP_BITS_ALL, 0, PAGEMAP_BITS_ALL); ksft_test_result(ret >= 0 && vec[0].start == (uintptr_t)mem && vec[0].len == vec_size && vec[0].bitmap == (PAGE_IS_WRITTEN | PAGE_IS_PRESENT), "%s Get sd and present pages with anyof_mask\n", __func__); /* 3. Get sd and present pages with required_mask */ ret = pagemap_ioctl(mem, mem_size, vec, vec_size, PM_SCAN_OP_GET, 0, PAGEMAP_BITS_ALL, 0, 0, PAGEMAP_BITS_ALL); ksft_test_result(ret >= 0 && vec[0].start == (uintptr_t)mem && vec[0].len == vec_size && vec[0].bitmap == (PAGE_IS_WRITTEN | PAGE_IS_PRESENT), "%s Get all the pages with required_mask\n", __func__); /* 4. Get sd and present pages with required_mask and anyof_mask */ ret = pagemap_ioctl(mem, mem_size, vec, vec_size, PM_SCAN_OP_GET, 0, PAGE_IS_WRITTEN, PAGE_IS_PRESENT, 0, PAGEMAP_BITS_ALL); ksft_test_result(ret >= 0 && vec[0].start == (uintptr_t)mem && vec[0].len == vec_size && vec[0].bitmap == (PAGE_IS_WRITTEN | PAGE_IS_PRESENT), "%s Get sd and present pages with required_mask and anyof_mask\n", __func__); /* 5. Don't get sd pages */ ret = pagemap_ioctl(mem, mem_size, vec, vec_size, PM_SCAN_OP_GET, 0, 0, 0, PAGE_IS_WRITTEN, PAGEMAP_BITS_ALL); ksft_test_result(ret == 0, "%s Don't get sd pages\n", __func__); /* 6. Don't get present pages */ ret = pagemap_ioctl(mem, mem_size, vec, vec_size, PM_SCAN_OP_GET, 0, 0, 0, PAGE_IS_PRESENT, PAGEMAP_BITS_ALL); ksft_test_result(ret == 0, "%s Don't get present pages\n", __func__); wp_free(mem, mem_size); munmap(mem, mem_size); /* 8. Find written present pages with return mask */ mem = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0); if (mem == MAP_FAILED) ksft_exit_fail_msg("error nomem\n"); wp_init(mem, mem_size); wp_addr_range(mem, mem_size); memset(mem, 0, mem_size); ret = pagemap_ioctl(mem, mem_size, vec, vec_size, PM_SCAN_OP_GET, 0, 0, PAGEMAP_BITS_ALL, 0, PAGE_IS_WRITTEN); ksft_test_result(ret >= 0 && vec[0].start == (uintptr_t)mem && vec[0].len == vec_size && vec[0].bitmap == PAGE_IS_WRITTEN, "%s Find written present pages with return mask\n", __func__); wp_free(mem, mem_size); munmap(mem, mem_size); /* 9. Memory mapped file */ int fd; struct stat sbuf; fd = open(__FILE__, O_RDONLY); if (fd < 0) { ksft_test_result_skip("%s Memory mapped file\n"); goto free_vec_and_return; } ret = stat(__FILE__, &sbuf); if (ret < 0) ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); fmem = mmap(NULL, sbuf.st_size, PROT_READ, MAP_PRIVATE, fd, 0); if (fmem == MAP_FAILED) ksft_exit_fail_msg("error nomem %ld %s\n", errno, strerror(errno)); ret = pagemap_ioctl(fmem, sbuf.st_size, vec, vec_size, PM_SCAN_OP_GET, 0, 0, PAGEMAP_NON_WRITTEN_BITS, 0, PAGEMAP_NON_WRITTEN_BITS); ksft_test_result(ret >= 0 && vec[0].start == (uintptr_t)fmem && vec[0].len == ceilf((float)sbuf.st_size/page_size) && vec[0].bitmap == PAGE_IS_FILE, "%s Memory mapped file\n", __func__); munmap(fmem, sbuf.st_size); close(fd); free_vec_and_return: free(vec); return 0; } int mprotect_tests(void) { int ret; char *mem, *mem2; struct page_region vec; int pagemap_fd = open("/proc/self/pagemap", O_RDONLY); if (pagemap_fd < 0) { fprintf(stderr, "open() failed\n"); exit(1); } /* 1. Map two pages */ mem = mmap(0, 2 * page_size, PROT_READ|PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0); if (mem == MAP_FAILED) ksft_exit_fail_msg("error nomem\n"); wp_init(mem, 2 * page_size); wp_addr_range(mem, 2 * page_size); /* Populate both pages. */ memset(mem, 1, 2 * page_size); ret = pagemap_ioctl(mem, 2 * page_size, &vec, 1, PM_SCAN_OP_GET, 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN); if (ret < 0) ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); ksft_test_result(ret == 1 && vec.len == 2, "%s Both pages written\n", __func__); /* 2. Start tracking */ wp_addr_range(mem, 2 * page_size); ksft_test_result(pagemap_ioctl(mem, 2 * page_size, &vec, 1, PM_SCAN_OP_GET, 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN) == 0, "%s Both pages are not written (dirty)\n", __func__); /* 3. Remap the second page */ mem2 = mmap(mem + page_size, page_size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANON|MAP_FIXED, -1, 0); if (mem2 == MAP_FAILED) ksft_exit_fail_msg("error nomem\n"); wp_init(mem2, page_size); wp_addr_range(mem2, page_size); /* Protect + unprotect. */ mprotect(mem, page_size, PROT_NONE); mprotect(mem, 2 * page_size, PROT_READ); mprotect(mem, 2 * page_size, PROT_READ|PROT_WRITE); /* Modify both pages. */ memset(mem, 2, 2 * page_size); /* Protect + unprotect. */ mprotect(mem, page_size, PROT_NONE); mprotect(mem, page_size, PROT_READ); mprotect(mem, page_size, PROT_READ|PROT_WRITE); ret = pagemap_ioctl(mem, 2 * page_size, &vec, 1, PM_SCAN_OP_GET, 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN); if (ret < 0) ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); ksft_test_result(ret == 1 && vec.len == 2, "%s Both pages written after remap and mprotect\n", __func__); /* 4. Clear and make the pages written */ wp_addr_range(mem, 2 * page_size); memset(mem, 'A', 2 * page_size); ret = pagemap_ioctl(mem, 2 * page_size, &vec, 1, PM_SCAN_OP_GET, 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN); if (ret < 0) ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); ksft_test_result(ret == 1 && vec.len == 2, "%s Clear and make the pages written\n", __func__); wp_free(mem, 2 * page_size); munmap(mem, 2 * page_size); return 0; } /* transact test */ static const unsigned int nthreads = 6, pages_per_thread = 32, access_per_thread = 8; static pthread_barrier_t start_barrier, end_barrier; static unsigned int extra_thread_faults; static unsigned int iter_count = 1000; static volatile int finish; static ssize_t get_dirty_pages_reset(char *mem, unsigned int count, int reset,int page_size) { struct pm_scan_arg arg = { 0 }; struct page_region rgns[256]; int i, j, cnt, ret; arg.size = sizeof(struct pm_scan_arg); arg.start = (uintptr_t)mem; arg.max_pages = count; arg.len = count * page_size; arg.vec = (uintptr_t)rgns; arg.vec_len = sizeof(rgns) / sizeof(*rgns); arg.flags = PM_SCAN_OP_GET; if (reset) arg.flags |= PM_SCAN_OP_WP; arg.required_mask = PAGE_IS_WRITTEN; arg.return_mask = PAGE_IS_WRITTEN; ret = ioctl(pagemap_fd, PAGEMAP_SCAN, &arg); if (ret < 0) ksft_exit_fail_msg("ioctl failed\n"); cnt = 0; for (i = 0; i < ret; ++i) { if (rgns[i].bitmap != PAGE_IS_WRITTEN) ksft_exit_fail_msg("wrong bitmap\n"); for (j = 0; j < rgns[i].len; ++j) cnt++; } return cnt; } void *thread_proc(void *mem) { volatile int *m = mem; long curr_faults, faults; struct rusage r; unsigned int i; int ret; if (getrusage(RUSAGE_THREAD, &r)) ksft_exit_fail_msg("getrusage\n"); curr_faults = r.ru_minflt; while (!finish) { ret = pthread_barrier_wait(&start_barrier); if (ret && ret != PTHREAD_BARRIER_SERIAL_THREAD) ksft_exit_fail_msg("pthread_barrier_wait\n"); for (i = 0; i < access_per_thread; ++i) __atomic_add_fetch(m + i * (0x1000 / sizeof(*m)), 1, __ATOMIC_SEQ_CST); ret = pthread_barrier_wait(&end_barrier); if (ret && ret != PTHREAD_BARRIER_SERIAL_THREAD) ksft_exit_fail_msg("pthread_barrier_wait\n"); if (getrusage(RUSAGE_THREAD, &r)) ksft_exit_fail_msg("getrusage\n"); faults = r.ru_minflt - curr_faults; if (faults < access_per_thread) ksft_exit_fail_msg("faults < access_per_thread"); __atomic_add_fetch(&extra_thread_faults, faults - access_per_thread, __ATOMIC_SEQ_CST); curr_faults = r.ru_minflt; } return NULL; } static void transact_test(int page_size) { unsigned int i, count, extra_pages; pthread_t th; char *mem; int ret, c; if (pthread_barrier_init(&start_barrier, NULL, nthreads + 1)) ksft_exit_fail_msg("pthread_barrier_init\n"); if (pthread_barrier_init(&end_barrier, NULL, nthreads + 1)) ksft_exit_fail_msg("pthread_barrier_init\n"); mem = mmap(NULL, 0x1000 * nthreads * pages_per_thread, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); if (mem == MAP_FAILED) ksft_exit_fail_msg("Error mmap %s.\n", strerror(errno)); wp_init(mem, 0x1000 * nthreads * pages_per_thread); wp_addr_range(mem, 0x1000 * nthreads * pages_per_thread); memset(mem, 0, 0x1000 * nthreads * pages_per_thread); count = get_dirty_pages_reset(mem, nthreads * pages_per_thread, 1, page_size); ksft_test_result(count > 0, "%s count %d\n", __func__, count); count = get_dirty_pages_reset(mem, nthreads * pages_per_thread, 1, page_size); ksft_test_result(count == 0, "%s count %d\n", __func__, count); finish = 0; for (i = 0; i < nthreads; ++i) pthread_create(&th, NULL, thread_proc, mem + 0x1000 * i * pages_per_thread); extra_pages = 0; for (i = 0; i < iter_count; ++i) { count = 0; ret = pthread_barrier_wait(&start_barrier); if (ret && ret != PTHREAD_BARRIER_SERIAL_THREAD) ksft_exit_fail_msg("pthread_barrier_wait\n"); count = get_dirty_pages_reset(mem, nthreads * pages_per_thread, 1, page_size); ret = pthread_barrier_wait(&end_barrier); if (ret && ret != PTHREAD_BARRIER_SERIAL_THREAD) ksft_exit_fail_msg("pthread_barrier_wait\n"); if (count > nthreads * access_per_thread) ksft_exit_fail_msg("Too big count %d, nthreads * access_per_thread %d, iter %d.\n", count, nthreads * access_per_thread, i); c = get_dirty_pages_reset(mem, nthreads * pages_per_thread, 1, page_size); count += c; if (c > nthreads * access_per_thread) { ksft_test_result_fail(" %s count > nthreads\n", __func__); return; } if (count != nthreads * access_per_thread) { /* * The purpose of the test is to make sure that no page updates are lost * when the page updates and read-resetting soft dirty flags are performed * in parallel. However, it is possible that the application will get the * soft dirty flags twice on the two consecutive read-resets. This seems * unavoidable as soft dirty flag is handled in software through page faults * in kernel. While the updating the flags is supposed to be synchronized * between page fault handling and read-reset, it is possible that * read-reset happens after page fault PTE update but before the application * re-executes write instruction. So read-reset gets the flag, clears write * access and application gets page fault again for the same write. */ if (count < nthreads * access_per_thread) { ksft_test_result_fail("Lost update, iter %d, %d vs %d.\n", i, count, nthreads * access_per_thread); return; } extra_pages += count - nthreads * access_per_thread; } } pthread_barrier_wait(&start_barrier); finish = 1; pthread_barrier_wait(&end_barrier); ksft_test_result_pass("%s Extra pages %u (%.1lf%%), extra thread faults %d.\n", __func__, extra_pages, 100.0 * extra_pages / (iter_count * nthreads * access_per_thread), extra_thread_faults); } int main(void) { int mem_size, shmid; char *mem, *map; ksft_print_header(); ksft_set_plan(66); page_size = getpagesize(); hpage_size = read_pmd_pagesize(); pagemap_fd = open(PAGEMAP, O_RDWR); if (pagemap_fd < 0) return -EINVAL; if (init_uffd()) ksft_exit_fail_msg("uffd init failed\n"); /* * Written (dirty) PTE bit tests */ // /* 1. Sanity testing */ // sanity_tests_sd(); // // /* 2. Normal page testing */ // mem_size = 10 * page_size; // mem = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0); // if (mem == MAP_FAILED) // ksft_exit_fail_msg("error nomem\n"); // wp_init(mem, mem_size); // wp_addr_range(mem, mem_size); // // base_tests("Page testing:", mem, mem_size, 0); // // wp_free(mem, mem_size); // munmap(mem, mem_size); // // /* 3. Large page testing */ // mem_size = 512 * 10 * page_size; // mem = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0); // if (mem == MAP_FAILED) // ksft_exit_fail_msg("error nomem\n"); // wp_init(mem, mem_size); // wp_addr_range(mem, mem_size); // // base_tests("Large Page testing:", mem, mem_size, 0); // // wp_free(mem, mem_size); // munmap(mem, mem_size); // // /* 4. Huge page testing */ // map = gethugepage(hpage_size); // if (map) { // wp_init(map, hpage_size); // wp_addr_range(map, hpage_size); // base_tests("Huge page testing:", map, hpage_size, 0); // wp_free(map, hpage_size); // free(map); // } else { // base_tests("Huge page testing:", NULL, 0, 1); // } /* 5. Hugetlb page testing */ mem_size = 2*1024*1024; mem = gethugetlb_mem(mem_size, &shmid); if (mem) { wp_init(mem, mem_size); wp_addr_range(mem, mem_size); base_tests("Hugetlb shmem testing:", mem, mem_size, 0); wp_free(mem, mem_size); shmctl(shmid, IPC_RMID, NULL); } else { base_tests("Hugetlb shmem testing:", NULL, 0, 1); } /* 6. Hugetlb page testing */ mem = gethugetlb_mem(mem_size, NULL); if (mem) { wp_init(mem, mem_size); wp_addr_range(mem, mem_size); base_tests("Hugetlb mem testing:", mem, mem_size, 0); wp_free(mem, mem_size); } else { base_tests("Hugetlb mem testing:", NULL, 0, 1); } // /* 6. Huge page tests */ // hpage_unit_tests(); // // /* 7. Iterative test */ // test_simple(); // // /* 8. Mprotect test */ // mprotect_tests(); // // /* 9. Transact test */ // transact_test(page_size); // // /* // * Other PTE bit tests // */ // // /* 1. Sanity testing */ // sanity_tests(); // // /* 2. Unmapped address test */ // unmapped_region_tests(); // // /* 3. Userfaultfd tests */ // userfaultfd_tests(); close(pagemap_fd); return ksft_exit_pass(); }