#include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define HP_SIZE (2 * 1024 * 1024ULL) #ifndef NR_userfaultfd #define NR_userfaultfd 282 #endif struct thread_data { pthread_t thread; pthread_barrier_t barrier; int uffd; }; static void *fault_handler(void *data) { struct thread_data *td = data; struct uffd_msg msg; struct pollfd pfd; int ret, nready; pthread_barrier_wait(&td->barrier); do { pfd.fd = td->uffd; pfd.events = POLLIN; nready = poll(&pfd, 1, -1); if (nready < 0) { perror("poll"); exit(1); } ret = read(td->uffd, &msg, sizeof(msg)); if (ret < 0) { if (errno == EAGAIN) continue; perror("read"); exit(1); } if (msg.event != UFFD_EVENT_PAGEFAULT) { printf("unspected event: %x\n", msg.event); exit(1); } printf("Page fault\n"); printf("flags = %lx; ", (long) msg.arg.pagefault.flags); printf("address = %lx\n", (long)msg.arg.pagefault.address); } while (1); return NULL; } static void *arm_fault_handler(struct thread_data *td, size_t len) { struct uffdio_api api = { }; struct uffdio_register reg = { }; void *buf; buf = mmap(NULL, HP_SIZE, PROT_READ|PROT_WRITE, MAP_PRIVATE | MAP_HUGETLB | MAP_HUGE_2MB | MAP_ANONYMOUS, -1, 0); if (buf == MAP_FAILED) { perror("mmap"); return NULL; } printf("got buf %p\n", buf); td->uffd = syscall(NR_userfaultfd, O_CLOEXEC | O_NONBLOCK); if (td->uffd < 0) { perror("userfaultfd"); return NULL; } api.api = UFFD_API; if (ioctl(td->uffd, UFFDIO_API, &api) < 0) { perror("ioctl UFFDIO_API"); return NULL; } reg.range.start = (unsigned long) buf; reg.range.len = HP_SIZE; reg.mode = UFFDIO_REGISTER_MODE_MISSING; if (ioctl(td->uffd, UFFDIO_REGISTER, ®) < 0) { perror("ioctl UFFDIO_REGISTER"); return NULL; } return buf; } static void sig_usr1(int sig) { } static void __do_io(int fd, void *buf, size_t len) { struct sigaction act = { }; int ret; act.sa_handler = sig_usr1; sigaction(SIGUSR1, &act, NULL); printf("child will write\n"); ret = write(fd, buf, len); printf("ret=%d\n", ret); } static void do_io(struct thread_data *td, size_t len) { void *buf; pid_t pid; int fd; fd = open("/dev/nvme0n1", O_RDWR); if (fd < 0) { perror("open create"); return; } pid = fork(); if (pid) { int wstat; sleep(1); kill(pid, SIGUSR1); printf("wait on child\n"); waitpid(pid, &wstat, 0); } else { buf = arm_fault_handler(td, len); pthread_barrier_wait(&td->barrier); __do_io(fd, buf, len); exit(0); } } int main(int argc, char *argv[]) { struct thread_data td = { }; pthread_barrier_init(&td.barrier, NULL, 2); pthread_create(&td.thread, NULL, fault_handler, &td); do_io(&td, HP_SIZE); return 0; }