#include #include #include #include #include #include #include #include #include #include #include #include #define HP_SIZE (2 * 1024 * 1024ULL) #define NR_HUGEPAGES (3000) #ifndef NR_userfaultfd #define NR_userfaultfd 282 #endif struct thread_data { pthread_t thread; pthread_barrier_t barrier; int uffd; }; static void *fault_handler(void *data) { struct thread_data *td = data; struct uffd_msg msg; struct pollfd pfd; int ret, nready; pthread_barrier_wait(&td->barrier); do { pfd.fd = td->uffd; pfd.events = POLLIN; nready = poll(&pfd, 1, -1); if (nready < 0) { perror("poll"); exit(1); } ret = read(td->uffd, &msg, sizeof(msg)); if (ret < 0) { if (errno == EAGAIN) continue; perror("read"); exit(1); } if (msg.event != UFFD_EVENT_PAGEFAULT) { printf("unspected event: %x\n", msg.event); exit(1); } printf("Page fault\n"); printf("flags = %lx; ", (long) msg.arg.pagefault.flags); printf("address = %lx\n", (long)msg.arg.pagefault.address); } while (1); return NULL; } static void do_io(struct io_uring *ring, void *buf, size_t len) { struct io_uring_sqe *sqe; struct io_uring_cqe *cqe; int fd, ret, i; fd = open("/dev/nvme0n1", O_RDWR); if (fd < 0) { perror("open create"); return; } /* issue faulting write */ sqe = io_uring_get_sqe(ring); io_uring_prep_write(sqe, fd, buf, len, 0); sqe->user_data = 1; io_uring_submit(ring); printf("blocking issued\n"); sleep(1); /* cancel above write */ sqe = io_uring_get_sqe(ring); io_uring_prep_cancel64(sqe, 1, IORING_ASYNC_CANCEL_USERDATA); sqe->user_data = 2; io_uring_submit(ring); printf("cancel issued\n"); sleep(1); for (i = 0; i < 2; i++) { again: ret = io_uring_wait_cqe(ring, &cqe); if (ret < 0) { printf("wait: %d\n", ret); if (ret == -EINTR) goto again; break; } printf("got res %d, %ld\n", cqe->res, (long) cqe->user_data); io_uring_cqe_seen(ring, cqe); } } static void sig_usr1(int sig) { printf("got USR1\n"); } static int test(void) { struct uffdio_api api = { }; struct uffdio_register reg = { }; struct io_uring ring; struct sigaction act = { }; struct thread_data td = { }; void *buf; act.sa_handler = sig_usr1; sigaction(SIGUSR1, &act, NULL); io_uring_queue_init(4, &ring, 0); buf = mmap(NULL, HP_SIZE, PROT_READ|PROT_WRITE, MAP_PRIVATE | MAP_HUGETLB | MAP_HUGE_2MB | MAP_ANONYMOUS, -1, 0); if (buf == MAP_FAILED) { perror("mmap"); return 1; } printf("got buf %p\n", buf); td.uffd = syscall(NR_userfaultfd, O_CLOEXEC | O_NONBLOCK); if (td.uffd < 0) { perror("userfaultfd"); return 1; } api.api = UFFD_API; if (ioctl(td.uffd, UFFDIO_API, &api) < 0) { perror("ioctl UFFDIO_API"); return 1; } reg.range.start = (unsigned long) buf; reg.range.len = HP_SIZE; reg.mode = UFFDIO_REGISTER_MODE_MISSING; if (ioctl(td.uffd, UFFDIO_REGISTER, ®) < 0) { perror("ioctl UFFDIO_REGISTER"); return 1; } pthread_barrier_init(&td.barrier, NULL, 2); pthread_create(&td.thread, NULL, fault_handler, &td); pthread_barrier_wait(&td.barrier); do_io(&ring, buf, HP_SIZE); return 0; } int main(int argc, char *argv[]) { return test(); }