/* SPDX-License-Identifier: MIT */ /* based on linux-kernel/tools/testing/selftests/net/msg_zerocopy.c */ /* gcc -luring -O2 -o send-zc ./send-zc.c */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include enum { __IORING_OP_SENDZC = 40, __IORING_SENDZC_FLUSH = (1U << 0), __IORING_SENDZC_FIXED_BUF = (1U << 1), __IORING_REGISTER_TX_CTX = 20, __IORING_UNREGISTER_TX_CTX = 21, }; struct __io_uring_tx_ctx_register { __u64 tag; }; #ifndef SO_ZEROCOPY #define SO_ZEROCOPY 60 #endif #define ZC_TAG 0xfffffffULL static bool fixed_files; static bool zc; static bool flush; static int nr_reqs; static bool fixed_buf; static int cfg_family = PF_UNSPEC; static int cfg_payload_len; static int cfg_port = 8000; static int cfg_runtime_ms = 4200; static socklen_t cfg_alen; static struct sockaddr_storage cfg_dst_addr; static char payload[IP_MAXPACKET] __attribute__((aligned(4096))); static inline int ____sys_io_uring_register(int fd, unsigned opcode, const void *arg, unsigned nr_args) { int ret; ret = syscall(__NR_io_uring_register, fd, opcode, arg, nr_args); return (ret < 0) ? -errno : ret; } static unsigned long gettimeofday_ms(void) { struct timeval tv; gettimeofday(&tv, NULL); return (tv.tv_sec * 1000) + (tv.tv_usec / 1000); } static void do_setsockopt(int fd, int level, int optname, int val) { if (setsockopt(fd, level, optname, &val, sizeof(val))) error(1, errno, "setsockopt %d.%d: %d", level, optname, val); } static void setup_sockaddr(int domain, const char *str_addr, struct sockaddr_storage *sockaddr) { struct sockaddr_in6 *addr6 = (void *) sockaddr; struct sockaddr_in *addr4 = (void *) sockaddr; switch (domain) { case PF_INET: memset(addr4, 0, sizeof(*addr4)); addr4->sin_family = AF_INET; addr4->sin_port = htons(cfg_port); if (str_addr && inet_pton(AF_INET, str_addr, &(addr4->sin_addr)) != 1) error(1, 0, "ipv4 parse error: %s", str_addr); break; case PF_INET6: memset(addr6, 0, sizeof(*addr6)); addr6->sin6_family = AF_INET6; addr6->sin6_port = htons(cfg_port); if (str_addr && inet_pton(AF_INET6, str_addr, &(addr6->sin6_addr)) != 1) error(1, 0, "ipv6 parse error: %s", str_addr); break; default: error(1, 0, "illegal domain"); } } static int do_setup_tx(int domain, int type, int protocol) { int fd; fd = socket(domain, type, protocol); if (fd == -1) error(1, errno, "socket t"); do_setsockopt(fd, SOL_SOCKET, SO_SNDBUF, 1 << 21); do_setsockopt(fd, SOL_SOCKET, SO_ZEROCOPY, 1); if (connect(fd, (void *) &cfg_dst_addr, cfg_alen)) error(1, errno, "connect"); return fd; } static inline struct io_uring_cqe *wait_cqe_fast(struct io_uring *ring) { struct io_uring_cqe *cqe; unsigned head; int ret; io_uring_for_each_cqe(ring, head, cqe) return cqe; ret = io_uring_wait_cqe(ring, &cqe); if (ret) error(1, ret, "wait cqe"); return cqe; } static void do_tx(int domain, int type, int protocol) { unsigned long packets = 0, bytes = 0; struct io_uring ring; struct iovec iov; uint64_t tstop; int i, fd, ret; int compl_cqes = 0; fd = do_setup_tx(domain, type, protocol); ret = io_uring_queue_init(512, &ring, 0); if (ret) error(1, ret, "io_uring: queue init"); struct __io_uring_tx_ctx_register r = { .tag = ZC_TAG, }; ret = ____sys_io_uring_register(ring.ring_fd, __IORING_REGISTER_TX_CTX, (void *)&r, 1); if (ret) error(1, ret, "io_uring: tx ctx registration"); ret = io_uring_register_files(&ring, &fd, 1); if (ret < 0) error(1, ret, "io_uring: files registration"); iov.iov_base = payload; iov.iov_len = cfg_payload_len; ret = io_uring_register_buffers(&ring, &iov, 1); if (ret < 0) error(1, ret, "io_uring: buffer registration"); tstop = gettimeofday_ms() + cfg_runtime_ms; do { struct io_uring_sqe *sqe; struct io_uring_cqe *cqe; compl_cqes += flush ? nr_reqs : 0; for (i = 0; i < nr_reqs; i++) { sqe = io_uring_get_sqe(&ring); io_uring_prep_send(sqe, fd, payload, cfg_payload_len, 0); sqe->user_data = 1; if (fixed_files) { sqe->fd = 0; sqe->flags = IOSQE_FIXED_FILE; } if (zc) { sqe->opcode = __IORING_OP_SENDZC; sqe->file_index = 0; // sqe->tx_ctx_idx = 0; sqe->ioprio = 0; sqe->off = 0; sqe->__pad2[0] = 0; if (flush) sqe->ioprio |= __IORING_SENDZC_FLUSH; if (fixed_buf) { sqe->ioprio |= __IORING_SENDZC_FIXED_BUF; sqe->buf_index = 0; } } } ret = io_uring_submit(&ring); if (ret != nr_reqs) error(1, ret, "submit"); for (i = 0; i < nr_reqs; i++) { cqe = wait_cqe_fast(&ring); if (cqe->user_data == ZC_TAG) { compl_cqes--; i--; } else if (cqe->user_data == 1) { if (cqe->res <= 0) error(1, cqe->res, "send failed"); packets++; bytes += cqe->res; } else { error(1, cqe->user_data, "invalid user_data"); } io_uring_cqe_seen(&ring, cqe); } } while (gettimeofday_ms() < tstop); if (close(fd)) error(1, errno, "close"); fprintf(stderr, "tx=%lu (MB=%lu), tx/s=%lu (MB/s=%lu)\n", packets, bytes >> 20, packets / (cfg_runtime_ms / 1000), (bytes >> 20) / (cfg_runtime_ms / 1000)); while (compl_cqes) { struct io_uring_cqe *cqe = wait_cqe_fast(&ring); io_uring_cqe_seen(&ring, cqe); compl_cqes--; } ret = ____sys_io_uring_register(ring.ring_fd, __IORING_UNREGISTER_TX_CTX, NULL, 0); if (ret) error(1, ret, "io_uring: tx ctx unregistration"); io_uring_queue_exit(&ring); } static void do_test(int domain, int type, int protocol) { int i; for (i = 0; i < IP_MAXPACKET; i++) payload[i] = 'a' + (i % 26); do_tx(domain, type, protocol); } static void usage(const char *filepath) { error(1, 0, "Usage: %s [-f] [-n] [-z0] [-s] " "(-4|-6) [-t