lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20211129194707.5863-7-michael.christie@oracle.com>
Date:   Mon, 29 Nov 2021 13:47:03 -0600
From:   Mike Christie <michael.christie@...cle.com>
To:     geert@...ux-m68k.org, vverma@...italocean.com, hdanton@...a.com,
        hch@...radead.org, stefanha@...hat.com, jasowang@...hat.com,
        mst@...hat.com, sgarzare@...hat.com,
        virtualization@...ts.linux-foundation.org,
        christian.brauner@...ntu.com, axboe@...nel.dk,
        linux-kernel@...r.kernel.org
Cc:     Mike Christie <michael.christie@...cle.com>,
        Christoph Hellwig <hch@....de>
Subject: [PATCH V6 06/10] fork: add helpers to clone a process for kernel use

The vhost layer is creating kthreads to execute IO and management
operations. These threads need to share a mm with a userspace thread,
inherit cgroups, and we would like to have the thread accounted for
under the userspace thread's rlimit nproc value so a user can't overwhelm
the system with threads when creating VMs.

We have helpers for cgroups and mm but not for the rlimit nproc and in
the future we will probably want helpers for things like namespaces. For
those two items and to allow future sharing/inheritance, this patch adds
two helpers, user_worker_create and user_worker_start that allow callers
to create threads that copy or inherit the caller's attributes like mm,
cgroups, namespaces, etc, and are accounted for under the callers rlimits
nproc value similar to if the caller did a clone() in userspace. However,
instead of returning to userspace the thread is usable in the kernel for
modules like vhost or layers like io_uring.

[added flag validation code from Christian Brauner's SIG_IGN patch]
Signed-off-by: Mike Christie <michael.christie@...cle.com>
Acked-by: Christian Brauner <christian.brauner@...ntu.com>
Reviewed-by: Christoph Hellwig <hch@....de>
---
 include/linux/sched/task.h |  5 +++
 kernel/fork.c              | 72 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 77 insertions(+)

diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h
index f8a658700075..ecb21c0d95ce 100644
--- a/include/linux/sched/task.h
+++ b/include/linux/sched/task.h
@@ -95,6 +95,11 @@ struct mm_struct *copy_init_mm(void);
 extern pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags);
 extern long kernel_wait4(pid_t, int __user *, int, struct rusage *);
 int kernel_wait(pid_t pid, int *stat);
+struct task_struct *user_worker_create(int (*fn)(void *), void *arg, int node,
+				       unsigned long clone_flags,
+				       u32 worker_flags);
+__printf(2, 3)
+void user_worker_start(struct task_struct *tsk, const char namefmt[], ...);
 
 extern void free_task(struct task_struct *tsk);
 
diff --git a/kernel/fork.c b/kernel/fork.c
index c9152596a285..e72239ae1e08 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -2543,6 +2543,78 @@ struct task_struct *create_io_thread(int (*fn)(void *), void *arg, int node)
 	return copy_process(NULL, 0, node, &args);
 }
 
+static bool user_worker_flags_valid(struct kernel_clone_args *kargs)
+{
+	/* Verify that no unknown flags are passed along. */
+	if (kargs->worker_flags & ~(USER_WORKER_IO | USER_WORKER |
+				    USER_WORKER_NO_FILES | USER_WORKER_SIG_IGN))
+		return false;
+
+	/*
+	 * If we're ignoring all signals don't allow sharing struct sighand and
+	 * don't bother clearing signal handlers.
+	 */
+	if ((kargs->flags & (CLONE_SIGHAND | CLONE_CLEAR_SIGHAND)) &&
+	    (kargs->worker_flags & USER_WORKER_SIG_IGN))
+		return false;
+
+	return true;
+}
+
+/**
+ * user_worker_create - create a copy of a process to be used by the kernel
+ * @fn: thread stack
+ * @arg: data to be passed to fn
+ * @node: numa node to allocate task from
+ * @clone_flags: CLONE flags
+ * @worker_flags: USER_WORKER flags
+ *
+ * This returns a created task, or an error pointer. The returned task is
+ * inactive, and the caller must fire it up through user_worker_start(). If
+ * this is an PF_IO_WORKER all singals but KILL and STOP are blocked.
+ */
+struct task_struct *user_worker_create(int (*fn)(void *), void *arg, int node,
+				       unsigned long clone_flags,
+				       u32 worker_flags)
+{
+	struct kernel_clone_args args = {
+		.flags		= ((lower_32_bits(clone_flags) | CLONE_VM |
+				   CLONE_UNTRACED) & ~CSIGNAL),
+		.exit_signal	= (lower_32_bits(clone_flags) & CSIGNAL),
+		.stack		= (unsigned long)fn,
+		.stack_size	= (unsigned long)arg,
+		.worker_flags	= USER_WORKER | worker_flags,
+	};
+
+	if (!user_worker_flags_valid(&args))
+		return ERR_PTR(-EINVAL);
+
+	return copy_process(NULL, 0, node, &args);
+}
+EXPORT_SYMBOL_GPL(user_worker_create);
+
+/**
+ * user_worker_start - Start a task created with user_worker_create
+ * @tsk: task to wake up
+ * @namefmt: printf-style format string for the thread name
+ * @arg: arguments for @namefmt
+ */
+void user_worker_start(struct task_struct *tsk, const char namefmt[], ...)
+{
+	char name[TASK_COMM_LEN];
+	va_list args;
+
+	WARN_ON(!(tsk->flags & PF_USER_WORKER));
+
+	va_start(args, namefmt);
+	vsnprintf(name, sizeof(name), namefmt, args);
+	set_task_comm(tsk, name);
+	va_end(args);
+
+	wake_up_new_task(tsk);
+}
+EXPORT_SYMBOL_GPL(user_worker_start);
+
 /*
  *  Ok, this is the main fork-routine.
  *
-- 
2.25.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ