lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20250624180742.5795-3-sashal@kernel.org>
Date: Tue, 24 Jun 2025 14:07:22 -0400
From: Sasha Levin <sashal@...nel.org>
To: linux-kernel@...r.kernel.org
Cc: linux-doc@...r.kernel.org,
	linux-api@...r.kernel.org,
	workflows@...r.kernel.org,
	tools@...nel.org,
	Sasha Levin <sashal@...nel.org>
Subject: [RFC v2 02/22] eventpoll: add API specification for epoll_create1

Add kernel API specification for the epoll_create1() system call.

Signed-off-by: Sasha Levin <sashal@...nel.org>
---
 fs/eventpoll.c | 124 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 124 insertions(+)

diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index d4dbffdedd08e..620de3ccc7708 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -21,6 +21,7 @@
 #include <linux/hash.h>
 #include <linux/spinlock.h>
 #include <linux/syscalls.h>
+#include <linux/syscall_api_spec.h>
 #include <linux/rbtree.h>
 #include <linux/wait.h>
 #include <linux/eventpoll.h>
@@ -2265,6 +2266,129 @@ static int do_epoll_create(int flags)
 	return error;
 }
 
+
+/* Valid values for epoll_create1 flags parameter */
+static const s64 epoll_create1_valid_values[] = { 0, EPOLL_CLOEXEC };
+
+DEFINE_KERNEL_API_SPEC(sys_epoll_create1)
+	KAPI_DESCRIPTION("Create an epoll instance")
+	KAPI_LONG_DESC("Creates a new epoll instance and returns a file descriptor "
+		       "referring to that instance. The file descriptor is used for all "
+		       "subsequent calls to the epoll interface.")
+	KAPI_CONTEXT(KAPI_CTX_PROCESS | KAPI_CTX_SLEEPABLE)
+
+	KAPI_PARAM(0, "flags", "int", "Creation flags for the epoll instance")
+		KAPI_PARAM_FLAGS(KAPI_PARAM_IN)
+		KAPI_PARAM_CONSTRAINT_TYPE(KAPI_CONSTRAINT_ENUM)
+		KAPI_PARAM_ENUM_VALUES(epoll_create1_valid_values)
+		KAPI_PARAM_CONSTRAINT("Must be 0 or EPOLL_CLOEXEC")
+	KAPI_PARAM_END
+
+	KAPI_RETURN("long", "File descriptor on success, negative error code on failure")
+		KAPI_RETURN_TYPE(KAPI_TYPE_INT)
+		KAPI_RETURN_CHECK_TYPE(KAPI_RETURN_FD)
+	KAPI_RETURN_END
+
+	KAPI_ERROR(0, -EINVAL, "EINVAL", "Invalid flags specified",
+		   "The flags parameter contains invalid values. Only EPOLL_CLOEXEC is allowed.")
+	KAPI_ERROR(1, -EMFILE, "EMFILE", "Per-process file descriptor limit reached",
+		   "The per-process limit on the number of open file descriptors has been reached.")
+	KAPI_ERROR(2, -ENFILE, "ENFILE", "System file table overflow",
+		   "The system-wide limit on the total number of open files has been reached.")
+	KAPI_ERROR(3, -ENOMEM, "ENOMEM", "Insufficient kernel memory",
+		   "There was insufficient kernel memory to create the epoll instance.")
+	KAPI_ERROR(4, -EINTR, "EINTR", "Interrupted by signal",
+		   "The system call was interrupted by a signal before the epoll instance could be created.")
+
+	.error_count = 5,
+	.param_count = 1,
+	.since_version = "2.6.27",
+	KAPI_EXAMPLES("int epfd = epoll_create1(EPOLL_CLOEXEC);")
+	KAPI_NOTES("EPOLL_CLOEXEC sets the close-on-exec (FD_CLOEXEC) flag on the new file descriptor. "
+		   "When all file descriptors referring to an epoll instance are closed, the kernel "
+		   "destroys the instance and releases associated resources. "
+		   "Memory consumption: Each registered fd uses approximately 90 bytes on 32-bit kernels "
+		   "and 160 bytes on 64-bit kernels. The total number of file descriptors registered "
+		   "across all epoll instances is limited by /proc/sys/fs/epoll/max_user_watches. "
+		   "When using dup() or fork(), multiple file descriptors may refer to the same epoll "
+		   "instance and all will receive events.")
+
+	/* Side effects */
+	KAPI_SIDE_EFFECT(0, KAPI_EFFECT_RESOURCE_CREATE | KAPI_EFFECT_ALLOC_MEMORY,
+			 "epoll instance",
+			 "Creates a new epoll instance and allocates kernel memory for it")
+		KAPI_EFFECT_REVERSIBLE
+	KAPI_SIDE_EFFECT_END
+
+	KAPI_SIDE_EFFECT(1, KAPI_EFFECT_RESOURCE_CREATE,
+			 "file descriptor",
+			 "Allocates a new file descriptor in the process's file descriptor table")
+		KAPI_EFFECT_REVERSIBLE
+	KAPI_SIDE_EFFECT_END
+
+	KAPI_SIDE_EFFECT_COUNT(2)
+
+	/* State transitions */
+	KAPI_STATE_TRANS(0, "epoll instance", "non-existent", "created and empty",
+			 "A new epoll instance is created with no monitored file descriptors")
+	KAPI_STATE_TRANS_END
+
+	KAPI_STATE_TRANS_COUNT(1)
+
+	/* Signal specifications */
+	KAPI_SIGNAL(0, SIGINT, "SIGINT", KAPI_SIGNAL_RECEIVE, KAPI_SIGNAL_ACTION_RETURN)
+		KAPI_SIGNAL_CONDITION("During creation if process receives SIGINT")
+		KAPI_SIGNAL_DESC("If interrupted during kernel memory allocation, returns -EINTR")
+		KAPI_SIGNAL_TIMING(KAPI_SIGNAL_TIME_DURING)
+		KAPI_SIGNAL_PRIORITY(1)
+		KAPI_SIGNAL_INTERRUPTIBLE
+		KAPI_SIGNAL_ERROR(-EINTR)
+		KAPI_SIGNAL_STATE_REQ(KAPI_SIGNAL_STATE_RUNNING | KAPI_SIGNAL_STATE_SLEEPING)
+		KAPI_SIGNAL_RESTARTABLE
+	KAPI_SIGNAL_END
+
+	KAPI_SIGNAL(1, SIGTERM, "SIGTERM", KAPI_SIGNAL_RECEIVE, KAPI_SIGNAL_ACTION_RETURN)
+		KAPI_SIGNAL_CONDITION("During creation if process receives SIGTERM")
+		KAPI_SIGNAL_DESC("If interrupted during kernel memory allocation, returns -EINTR")
+		KAPI_SIGNAL_TIMING(KAPI_SIGNAL_TIME_DURING)
+		KAPI_SIGNAL_PRIORITY(1)
+		KAPI_SIGNAL_INTERRUPTIBLE
+		KAPI_SIGNAL_ERROR(-EINTR)
+		KAPI_SIGNAL_STATE_REQ(KAPI_SIGNAL_STATE_RUNNING | KAPI_SIGNAL_STATE_SLEEPING)
+		KAPI_SIGNAL_RESTARTABLE
+	KAPI_SIGNAL_END
+
+	KAPI_SIGNAL(2, SIGKILL, "SIGKILL", KAPI_SIGNAL_RECEIVE, KAPI_SIGNAL_ACTION_TERMINATE)
+		KAPI_SIGNAL_CONDITION("At any point during the syscall")
+		KAPI_SIGNAL_DESC("Process is terminated immediately, epoll instance creation may be incomplete")
+		KAPI_SIGNAL_TIMING(KAPI_SIGNAL_TIME_ANYTIME)
+		KAPI_SIGNAL_PRIORITY(0)
+		KAPI_SIGNAL_QUEUE("uncatchable")
+	KAPI_SIGNAL_END
+
+	.signal_count = 3,
+
+	/* Additional constraints */
+	KAPI_CONSTRAINT(0, "User Watch Limit",
+			"Although epoll_create1() itself doesn't register any watches, the "
+			"user is subject to a global limit on total watches across all epoll "
+			"instances. This limit is configured via /proc/sys/fs/epoll/max_user_watches "
+			"(default: 1/25 of lowmem or 1/32 of total memory). Each registered "
+			"file descriptor counts against this limit.")
+		KAPI_CONSTRAINT_EXPR("current_user_watches < max_user_watches")
+	KAPI_CONSTRAINT_END
+
+	KAPI_CONSTRAINT(1, "Memory Accounting",
+			"Each epoll instance consumes kernel memory that is not swappable. "
+			"The instance itself uses approximately 1KB, plus additional memory "
+			"for each registered file descriptor (90 bytes on 32-bit, 160 bytes "
+			"on 64-bit systems). This memory is charged to the user's locked "
+			"memory limit if memory cgroups are enabled.")
+	KAPI_CONSTRAINT_END
+
+	KAPI_CONSTRAINT_COUNT(2)
+
+KAPI_END_SPEC;
 SYSCALL_DEFINE1(epoll_create1, int, flags)
 {
 	return do_epoll_create(flags);
-- 
2.39.5


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ