[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20250614134858.790460-8-sashal@kernel.org>
Date: Sat, 14 Jun 2025 09:48:46 -0400
From: Sasha Levin <sashal@...nel.org>
To: linux-kernel@...r.kernel.org
Cc: linux-api@...r.kernel.org,
workflows@...r.kernel.org,
tools@...nel.org,
Sasha Levin <sashal@...nel.org>
Subject: [RFC 07/19] eventpoll: add API specification for epoll_pwait2
Add kernel API specification for the epoll_pwait2() system call.
Signed-off-by: Sasha Levin <sashal@...nel.org>
---
fs/eventpoll.c | 244 +++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 244 insertions(+)
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 8bd25f9230fc8..0e90d66467010 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -3389,6 +3389,250 @@ SYSCALL_DEFINE6(epoll_pwait, int, epfd, struct epoll_event __user *, events,
sigmask, sigsetsize);
}
+
+DEFINE_KERNEL_API_SPEC(sys_epoll_pwait2)
+ KAPI_DESCRIPTION("Wait for events on an epoll instance with nanosecond precision timeout")
+ KAPI_LONG_DESC("Similar to epoll_pwait(), but takes a timespec structure that allows "
+ "nanosecond precision for the timeout value. This provides more accurate "
+ "timeout control compared to the millisecond precision of epoll_pwait(). "
+ "Like epoll_pwait(), it atomically sets a signal mask during the wait.")
+ KAPI_CONTEXT(KAPI_CTX_PROCESS | KAPI_CTX_SLEEPABLE)
+
+ KAPI_PARAM(0, "epfd", "int", "File descriptor referring to the epoll instance")
+ KAPI_PARAM_FLAGS(KAPI_PARAM_IN)
+ .type = KAPI_TYPE_FD,
+ .constraint_type = KAPI_CONSTRAINT_NONE,
+ KAPI_PARAM_END
+
+ KAPI_PARAM(1, "events", "struct epoll_event __user *", "Buffer where ready events will be stored")
+ KAPI_PARAM_FLAGS(KAPI_PARAM_OUT | KAPI_PARAM_USER)
+ .type = KAPI_TYPE_USER_PTR,
+ KAPI_PARAM_SIZE(sizeof(struct epoll_event))
+ .size_param_idx = 2, /* Size determined by maxevents parameter */
+ .size_multiplier = sizeof(struct epoll_event),
+ .constraint_type = KAPI_CONSTRAINT_NONE,
+ .constraints = "Must point to an array of at least maxevents epoll_event structures",
+ KAPI_PARAM_END
+
+ KAPI_PARAM(2, "maxevents", "int", "Maximum number of events to return")
+ KAPI_PARAM_FLAGS(KAPI_PARAM_IN)
+ .type = KAPI_TYPE_INT,
+ KAPI_PARAM_RANGE(1, INT_MAX / sizeof(struct epoll_event)) /* EP_MAX_EVENTS */
+ .constraint_type = KAPI_CONSTRAINT_RANGE,
+ .constraints = "Must be greater than zero and not exceed system limits",
+ KAPI_PARAM_END
+
+ KAPI_PARAM(3, "timeout", "const struct __kernel_timespec __user *", "Timeout with nanosecond precision")
+ KAPI_PARAM_FLAGS(KAPI_PARAM_IN | KAPI_PARAM_USER | KAPI_PARAM_OPTIONAL)
+ .type = KAPI_TYPE_USER_PTR,
+ KAPI_PARAM_SIZE(sizeof(struct __kernel_timespec))
+ .constraint_type = KAPI_CONSTRAINT_NONE,
+ .constraints = "NULL means block indefinitely, {0, 0} returns immediately, "
+ "negative values are invalid",
+ KAPI_PARAM_END
+
+ KAPI_PARAM(4, "sigmask", "const sigset_t __user *", "Signal mask to atomically set during wait")
+ KAPI_PARAM_FLAGS(KAPI_PARAM_IN | KAPI_PARAM_USER | KAPI_PARAM_OPTIONAL)
+ .type = KAPI_TYPE_USER_PTR,
+ KAPI_PARAM_SIZE(sizeof(sigset_t))
+ .constraint_type = KAPI_CONSTRAINT_NONE,
+ .constraints = "Can be NULL if no signal mask change is desired",
+ KAPI_PARAM_END
+
+ KAPI_PARAM(5, "sigsetsize", "size_t", "Size of the signal set in bytes")
+ KAPI_PARAM_FLAGS(KAPI_PARAM_IN)
+ .type = KAPI_TYPE_UINT,
+ KAPI_PARAM_RANGE(sizeof(sigset_t), sizeof(sigset_t))
+ .constraint_type = KAPI_CONSTRAINT_RANGE,
+ .constraints = "Must be sizeof(sigset_t)",
+ KAPI_PARAM_END
+
+ KAPI_RETURN("long", "Number of ready file descriptors on success, negative error code on failure")
+ .type = KAPI_TYPE_INT,
+ .check_type = KAPI_RETURN_RANGE,
+ .success_min = 0,
+ .success_max = INT_MAX,
+ KAPI_RETURN_END
+
+ KAPI_ERROR(0, -EBADF, "EBADF", "epfd is not a valid file descriptor",
+ "The epoll file descriptor is invalid or has been closed.")
+ KAPI_ERROR(1, -EFAULT, "EFAULT", "Memory area not accessible",
+ "The memory area pointed to by events, timeout, or sigmask is not accessible.")
+ KAPI_ERROR(2, -EINTR, "EINTR", "Call interrupted by signal handler",
+ "The call was interrupted by a signal handler before any events "
+ "became ready or the timeout expired.")
+ KAPI_ERROR(3, -EINVAL, "EINVAL", "Invalid parameters",
+ "epfd is not an epoll file descriptor, maxevents is less than or equal to zero, "
+ "sigsetsize is not equal to sizeof(sigset_t), or timeout values are invalid.")
+
+ .error_count = 4,
+ .param_count = 6,
+ .since_version = "5.11",
+
+ /* Side effects */
+ KAPI_SIDE_EFFECT(0, KAPI_EFFECT_MODIFY_STATE,
+ "signal mask",
+ "Atomically sets the signal mask for the calling thread")
+ KAPI_EFFECT_CONDITION("When sigmask is not NULL")
+ KAPI_EFFECT_REVERSIBLE
+ KAPI_SIDE_EFFECT_END
+
+ KAPI_SIDE_EFFECT(1, KAPI_EFFECT_MODIFY_STATE,
+ "ready list",
+ "Removes events from the epoll ready list as they are reported")
+ KAPI_EFFECT_CONDITION("When events are available and level-triggered")
+ KAPI_EFFECT_REVERSIBLE
+ KAPI_SIDE_EFFECT_END
+
+ KAPI_SIDE_EFFECT(2, KAPI_EFFECT_SCHEDULE,
+ "process state",
+ "Blocks the calling thread until events, timeout, or signal")
+ KAPI_EFFECT_CONDITION("When timeout != NULL or timeout->tv_sec/tv_nsec != 0")
+ KAPI_SIDE_EFFECT_END
+
+ KAPI_SIDE_EFFECT(3, KAPI_EFFECT_MODIFY_STATE,
+ "user memory",
+ "Writes event data to user-provided buffer")
+ KAPI_EFFECT_CONDITION("When events are available")
+ KAPI_SIDE_EFFECT_END
+
+ KAPI_SIDE_EFFECT(4, KAPI_EFFECT_PROCESS_STATE,
+ "saved signal mask",
+ "Saves and restores the original signal mask")
+ KAPI_EFFECT_CONDITION("When sigmask is not NULL")
+ KAPI_EFFECT_REVERSIBLE
+ KAPI_SIDE_EFFECT_END
+
+ KAPI_SIDE_EFFECT(5, KAPI_EFFECT_MODIFY_STATE,
+ "timer precision",
+ "Timeout may be rounded up to system timer granularity")
+ KAPI_EFFECT_CONDITION("When timeout is specified")
+ KAPI_SIDE_EFFECT_END
+
+ KAPI_SIDE_EFFECT_COUNT(6)
+
+ /* State transitions */
+ KAPI_STATE_TRANS(0, "signal mask", "original mask", "user-specified mask",
+ "Thread's signal mask is atomically changed to the provided mask")
+ KAPI_STATE_TRANS_COND("When sigmask is not NULL")
+ KAPI_STATE_TRANS_END
+
+ KAPI_STATE_TRANS(1, "process", "running", "blocked",
+ "Process blocks waiting for events with specified signal mask")
+ KAPI_STATE_TRANS_COND("When no events available and not immediate return")
+ KAPI_STATE_TRANS_END
+
+ KAPI_STATE_TRANS(2, "process", "blocked", "running",
+ "Process wakes up due to events, timeout expiry, or unblocked signal")
+ KAPI_STATE_TRANS_COND("When wait condition is satisfied")
+ KAPI_STATE_TRANS_END
+
+ KAPI_STATE_TRANS(3, "signal mask", "user-specified mask", "original mask",
+ "Thread's signal mask is restored to its original value")
+ KAPI_STATE_TRANS_COND("When returning from epoll_pwait2")
+ KAPI_STATE_TRANS_END
+
+ KAPI_STATE_TRANS(4, "pending signals", "blocked", "deliverable",
+ "Signals that were blocked by the temporary mask become deliverable")
+ KAPI_STATE_TRANS_COND("When signal mask is restored and signals were pending")
+ KAPI_STATE_TRANS_END
+
+ KAPI_STATE_TRANS(5, "timeout timer", "not started", "armed with nanosecond precision",
+ "High resolution timer is armed with the specified timeout")
+ KAPI_STATE_TRANS_COND("When timeout is specified and > 0")
+ KAPI_STATE_TRANS_END
+
+ KAPI_STATE_TRANS_COUNT(6)
+
+ /* Signal specifications */
+ KAPI_SIGNAL(0, 0, "ANY_UNBLOCKED", KAPI_SIGNAL_RECEIVE, KAPI_SIGNAL_ACTION_RETURN)
+ KAPI_SIGNAL_CONDITION("Signal not blocked by provided sigmask")
+ KAPI_SIGNAL_DESC("Any signal not blocked by the sigmask parameter will interrupt "
+ "epoll_pwait2() and cause it to return -EINTR. Signal handling is "
+ "identical to epoll_pwait().")
+ KAPI_SIGNAL_RESTARTABLE
+ KAPI_SIGNAL_END
+
+ KAPI_SIGNAL(1, SIGKILL, "SIGKILL", KAPI_SIGNAL_RECEIVE, KAPI_SIGNAL_ACTION_TERMINATE)
+ KAPI_SIGNAL_CONDITION("Cannot be blocked by sigmask")
+ KAPI_SIGNAL_DESC("SIGKILL cannot be blocked and will terminate the process immediately.")
+ KAPI_SIGNAL_END
+
+ KAPI_SIGNAL(2, SIGSTOP, "SIGSTOP", KAPI_SIGNAL_RECEIVE, KAPI_SIGNAL_ACTION_STOP)
+ KAPI_SIGNAL_CONDITION("Cannot be blocked by sigmask")
+ KAPI_SIGNAL_DESC("SIGSTOP cannot be blocked and will stop the process.")
+ KAPI_SIGNAL_END
+
+ KAPI_SIGNAL(3, 0, "BLOCKED_SIGNALS", KAPI_SIGNAL_BLOCK, KAPI_SIGNAL_ACTION_DEFAULT)
+ KAPI_SIGNAL_CONDITION("Signals in provided sigmask")
+ KAPI_SIGNAL_DESC("Signals specified in the sigmask parameter are blocked during "
+ "the epoll_pwait2 call.")
+ KAPI_SIGNAL_END
+
+ KAPI_SIGNAL(4, SIGCONT, "SIGCONT", KAPI_SIGNAL_RECEIVE, KAPI_SIGNAL_ACTION_CONTINUE)
+ KAPI_SIGNAL_CONDITION("When process is stopped")
+ KAPI_SIGNAL_DESC("SIGCONT resumes a stopped process. If epoll_pwait2 was interrupted "
+ "by SIGSTOP, it may return -EINTR when continued.")
+ KAPI_SIGNAL_END
+
+ KAPI_SIGNAL(5, SIGALRM, "SIGALRM", KAPI_SIGNAL_RECEIVE, KAPI_SIGNAL_ACTION_RETURN)
+ KAPI_SIGNAL_CONDITION("Timer expiration")
+ KAPI_SIGNAL_DESC("SIGALRM or other timer signals will interrupt epoll_pwait2 with -EINTR "
+ "if not blocked by sigmask")
+ KAPI_SIGNAL_RESTARTABLE
+ KAPI_SIGNAL_END
+
+ .signal_count = 6,
+
+ /* Signal mask specifications */
+ KAPI_SIGNAL_MASK(0, "user_sigmask", "User-provided signal mask atomically applied")
+ .description = "The signal mask is atomically set and restored exactly as in "
+ "epoll_pwait(), providing the same race-condition prevention."
+ KAPI_SIGNAL_MASK_END
+
+ .signal_mask_count = 1,
+
+ /* Locking specifications */
+ KAPI_LOCK(0, "ep->lock", KAPI_LOCK_SPINLOCK)
+ KAPI_LOCK_ACQUIRED
+ KAPI_LOCK_RELEASED
+ KAPI_LOCK_DESC("Protects the ready list while checking for and consuming events")
+ KAPI_LOCK_END
+
+ KAPI_LOCK(1, "ep->mtx", KAPI_LOCK_MUTEX)
+ KAPI_LOCK_ACQUIRED
+ KAPI_LOCK_RELEASED
+ KAPI_LOCK_DESC("Protects against concurrent epoll_ctl operations during wait")
+ KAPI_LOCK_END
+
+ .lock_count = 2,
+
+ .examples = "sigset_t sigmask;\n"
+ "struct epoll_event events[10];\n"
+ "struct __kernel_timespec ts;\n\n"
+ "/* Block SIGINT during epoll_pwait2 */\n"
+ "sigemptyset(&sigmask);\n"
+ "sigaddset(&sigmask, SIGINT);\n\n"
+ "/* Wait for 1.5 seconds */\n"
+ "ts.tv_sec = 1;\n"
+ "ts.tv_nsec = 500000000; /* 500 milliseconds */\n\n"
+ "int nfds = epoll_pwait2(epfd, events, 10, &ts, &sigmask, sizeof(sigmask));\n"
+ "if (nfds == -1) {\n"
+ " if (errno == EINTR) {\n"
+ " /* Handle signal */\n"
+ " }\n"
+ " perror(\"epoll_pwait2\");\n"
+ " exit(EXIT_FAILURE);\n"
+ "}\n\n"
+ "/* Example with infinite timeout */\n"
+ "nfds = epoll_pwait2(epfd, events, 10, NULL, &sigmask, sizeof(sigmask));",
+ .notes = "epoll_pwait2() provides nanosecond precision timeouts, addressing the limitation "
+ "of epoll_pwait() which only supports millisecond precision. The timeout parameter "
+ "uses struct __kernel_timespec which is compatible with 64-bit time values, making "
+ "it Y2038-safe. Like epoll_pwait(), the signal mask operation is atomic. "
+ "The timeout is still subject to system timer granularity and may be rounded up.",
+KAPI_END_SPEC;
+
SYSCALL_DEFINE6(epoll_pwait2, int, epfd, struct epoll_event __user *, events,
int, maxevents, const struct __kernel_timespec __user *, timeout,
const sigset_t __user *, sigmask, size_t, sigsetsize)
--
2.39.5
Powered by blists - more mailing lists