lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20210916212051.6918-8-michael.christie@oracle.com>
Date:   Thu, 16 Sep 2021 16:20:50 -0500
From:   Mike Christie <michael.christie@...cle.com>
To:     stefanha@...hat.com, jasowang@...hat.com, mst@...hat.com,
        sgarzare@...hat.com, virtualization@...ts.linux-foundation.org,
        christian.brauner@...ntu.com, axboe@...nel.dk,
        linux-kernel@...r.kernel.org
Cc:     Mike Christie <michael.christie@...cle.com>
Subject: [PATCH 7/8] vhost: use kernel_copy_process to check RLIMITs and inherit cgroups

For vhost workers we use the kthread API which inherit's its values from
and checks against the kthreadd thread. This results in cgroups v2 not
working and the wrong RLIMITs being checked. This patch has us use the
kernel_copy_process function which will inherit its values/checks from the
thread that owns the device.

Note this patch converts us. The next patch will remove the code that is
no longer needed.

Signed-off-by: Mike Christie <michael.christie@...cle.com>
---
 drivers/vhost/vhost.c | 49 +++++++++++++++++++++++++++----------------
 drivers/vhost/vhost.h |  7 ++++++-
 2 files changed, 37 insertions(+), 19 deletions(-)

diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index c9a1f706989c..6e58417b13fc 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -344,17 +344,14 @@ static void vhost_vq_reset(struct vhost_dev *dev,
 static int vhost_worker(void *data)
 {
 	struct vhost_worker *worker = data;
-	struct vhost_dev *dev = worker->dev;
 	struct vhost_work *work, *work_next;
 	struct llist_node *node;
 
-	kthread_use_mm(dev->mm);
-
 	for (;;) {
 		/* mb paired w/ kthread_stop */
 		set_current_state(TASK_INTERRUPTIBLE);
 
-		if (kthread_should_stop()) {
+		if (test_bit(VHOST_WORKER_FLAG_STOP, &worker->flags)) {
 			__set_current_state(TASK_RUNNING);
 			break;
 		}
@@ -376,8 +373,9 @@ static int vhost_worker(void *data)
 				schedule();
 		}
 	}
-	kthread_unuse_mm(dev->mm);
-	return 0;
+
+	complete(worker->exit_done);
+	do_exit(0);
 }
 
 static void vhost_vq_free_iovecs(struct vhost_virtqueue *vq)
@@ -579,6 +577,16 @@ static void vhost_detach_mm(struct vhost_dev *dev)
 	dev->mm = NULL;
 }
 
+static void vhost_worker_stop(struct vhost_worker *worker)
+{
+	DECLARE_COMPLETION_ONSTACK(exit_done);
+
+	worker->exit_done = &exit_done;
+	set_bit(VHOST_WORKER_FLAG_STOP, &worker->flags);
+	wake_up_process(worker->task);
+	wait_for_completion(worker->exit_done);
+}
+
 static void vhost_worker_free(struct vhost_dev *dev)
 {
 	struct vhost_worker *worker = dev->worker;
@@ -588,14 +596,16 @@ static void vhost_worker_free(struct vhost_dev *dev)
 
 	dev->worker = NULL;
 	WARN_ON(!llist_empty(&worker->work_list));
-	kthread_stop(worker->task);
+	vhost_worker_stop(worker);
 	kfree(worker);
 }
 
 static int vhost_worker_create(struct vhost_dev *dev)
 {
+	DECLARE_COMPLETION_ONSTACK(start_done);
 	struct vhost_worker *worker;
 	struct task_struct *task;
+	char buf[TASK_COMM_LEN];
 	int ret;
 
 	worker = kzalloc(sizeof(*worker), GFP_KERNEL_ACCOUNT);
@@ -603,27 +613,30 @@ static int vhost_worker_create(struct vhost_dev *dev)
 		return -ENOMEM;
 
 	dev->worker = worker;
-	worker->dev = dev;
 	worker->kcov_handle = kcov_common_handle();
 	init_llist_head(&worker->work_list);
 
-	task = kthread_create(vhost_worker, worker, "vhost-%d", current->pid);
-	if (IS_ERR(task)) {
-		ret = PTR_ERR(task);
+	/*
+	 * vhost used to use the kthread API which ignores all signals by
+	 * default and the drivers expect this behavior. So we do not want to
+	 * ineherit the parent's signal handlers and set our worker to ignore
+	 * everything below.
+	 */
+	task = kernel_copy_process(vhost_worker, worker, NUMA_NO_NODE,
+				   CLONE_FS|CLONE_CLEAR_SIGHAND, 0, 1);
+	if (IS_ERR(task))
 		goto free_worker;
-	}
 
 	worker->task = task;
-	wake_up_process(task); /* avoid contributing to loadavg */
 
-	ret = vhost_attach_cgroups(dev);
-	if (ret)
-		goto stop_worker;
+	snprintf(buf, sizeof(buf), "vhost-%d", current->pid);
+	set_task_comm(task, buf);
+
+	ignore_signals(task);
 
+	wake_up_new_task(task);
 	return 0;
 
-stop_worker:
-	kthread_stop(worker->task);
 free_worker:
 	kfree(worker);
 	dev->worker = NULL;
diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
index 102ce25e4e13..09748694cb66 100644
--- a/drivers/vhost/vhost.h
+++ b/drivers/vhost/vhost.h
@@ -25,11 +25,16 @@ struct vhost_work {
 	unsigned long		flags;
 };
 
+enum {
+	VHOST_WORKER_FLAG_STOP,
+};
+
 struct vhost_worker {
 	struct task_struct	*task;
+	struct completion	*exit_done;
 	struct llist_head	work_list;
-	struct vhost_dev	*dev;
 	u64			kcov_handle;
+	unsigned long		flags;
 };
 
 /* Poll a file (eventfd or socket) */
-- 
2.25.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ