linux-kernel - Re: [PATCH V2 9/9] vhost: use kernel_worker to check RLIMITs and inherit v2 cgroups

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <20211004091216-mutt-send-email-mst@kernel.org>
Date:   Mon, 4 Oct 2021 09:12:49 -0400
From:   "Michael S. Tsirkin" <mst@...hat.com>
To:     Mike Christie <michael.christie@...cle.com>
Cc:     hdanton@...a.com, hch@...radead.org, stefanha@...hat.com,
        jasowang@...hat.com, sgarzare@...hat.com,
        virtualization@...ts.linux-foundation.org,
        christian.brauner@...ntu.com, axboe@...nel.dk,
        linux-kernel@...r.kernel.org
Subject: Re: [PATCH V2 9/9] vhost: use kernel_worker to check RLIMITs and
 inherit v2 cgroups

On Tue, Sep 21, 2021 at 04:52:18PM -0500, Mike Christie wrote:
> For vhost workers we use the kthread API which inherit's its values from
> and checks against the kthreadd thread. This results in cgroups v2 not
> working and the wrong RLIMITs being checked. This patch has us use the
> kernel_copy_process function which will inherit its values/checks from the
> thread that owns the device.
> 
> Signed-off-by: Mike Christie <michael.christie@...cle.com>

Acked-by: Michael S. Tsirkin <mst@...hat.com>

Feel free to merge with other bits.

> ---
>  drivers/vhost/vhost.c | 68 ++++++++++++++++---------------------------
>  drivers/vhost/vhost.h |  7 ++++-
>  2 files changed, 31 insertions(+), 44 deletions(-)
> 
> diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
> index c9a1f706989c..7a5142dcde1b 100644
> --- a/drivers/vhost/vhost.c
> +++ b/drivers/vhost/vhost.c
> @@ -22,7 +22,6 @@
>  #include <linux/slab.h>
>  #include <linux/vmalloc.h>
>  #include <linux/kthread.h>
> -#include <linux/cgroup.h>
>  #include <linux/module.h>
>  #include <linux/sort.h>
>  #include <linux/sched/mm.h>
> @@ -344,17 +343,14 @@ static void vhost_vq_reset(struct vhost_dev *dev,
>  static int vhost_worker(void *data)
>  {
>  	struct vhost_worker *worker = data;
> -	struct vhost_dev *dev = worker->dev;
>  	struct vhost_work *work, *work_next;
>  	struct llist_node *node;
>  
> -	kthread_use_mm(dev->mm);
> -
>  	for (;;) {
>  		/* mb paired w/ kthread_stop */
>  		set_current_state(TASK_INTERRUPTIBLE);
>  
> -		if (kthread_should_stop()) {
> +		if (test_bit(VHOST_WORKER_FLAG_STOP, &worker->flags)) {
>  			__set_current_state(TASK_RUNNING);
>  			break;
>  		}
> @@ -376,8 +372,9 @@ static int vhost_worker(void *data)
>  				schedule();
>  		}
>  	}
> -	kthread_unuse_mm(dev->mm);
> -	return 0;
> +
> +	complete(worker->exit_done);
> +	do_exit(0);
>  }
>  
>  static void vhost_vq_free_iovecs(struct vhost_virtqueue *vq)
> @@ -517,31 +514,6 @@ long vhost_dev_check_owner(struct vhost_dev *dev)
>  }
>  EXPORT_SYMBOL_GPL(vhost_dev_check_owner);
>  
> -struct vhost_attach_cgroups_struct {
> -	struct vhost_work work;
> -	struct task_struct *owner;
> -	int ret;
> -};
> -
> -static void vhost_attach_cgroups_work(struct vhost_work *work)
> -{
> -	struct vhost_attach_cgroups_struct *s;
> -
> -	s = container_of(work, struct vhost_attach_cgroups_struct, work);
> -	s->ret = cgroup_attach_task_all(s->owner, current);
> -}
> -
> -static int vhost_attach_cgroups(struct vhost_dev *dev)
> -{
> -	struct vhost_attach_cgroups_struct attach;
> -
> -	attach.owner = current;
> -	vhost_work_init(&attach.work, vhost_attach_cgroups_work);
> -	vhost_work_queue(dev, &attach.work);
> -	vhost_work_dev_flush(dev);
> -	return attach.ret;
> -}
> -
>  /* Caller should have device mutex */
>  bool vhost_dev_has_owner(struct vhost_dev *dev)
>  {
> @@ -579,6 +551,16 @@ static void vhost_detach_mm(struct vhost_dev *dev)
>  	dev->mm = NULL;
>  }
>  
> +static void vhost_worker_stop(struct vhost_worker *worker)
> +{
> +	DECLARE_COMPLETION_ONSTACK(exit_done);
> +
> +	worker->exit_done = &exit_done;
> +	set_bit(VHOST_WORKER_FLAG_STOP, &worker->flags);
> +	wake_up_process(worker->task);
> +	wait_for_completion(worker->exit_done);
> +}
> +
>  static void vhost_worker_free(struct vhost_dev *dev)
>  {
>  	struct vhost_worker *worker = dev->worker;
> @@ -588,7 +570,7 @@ static void vhost_worker_free(struct vhost_dev *dev)
>  
>  	dev->worker = NULL;
>  	WARN_ON(!llist_empty(&worker->work_list));
> -	kthread_stop(worker->task);
> +	vhost_worker_stop(worker);
>  	kfree(worker);
>  }
>  
> @@ -603,27 +585,27 @@ static int vhost_worker_create(struct vhost_dev *dev)
>  		return -ENOMEM;
>  
>  	dev->worker = worker;
> -	worker->dev = dev;
>  	worker->kcov_handle = kcov_common_handle();
>  	init_llist_head(&worker->work_list);
>  
> -	task = kthread_create(vhost_worker, worker, "vhost-%d", current->pid);
> +	/*
> +	 * vhost used to use the kthread API which ignores all signals by
> +	 * default and the drivers expect this behavior. So we do not want to
> +	 * ineherit the parent's signal handlers and set our worker to ignore
> +	 * everything below.
> +	 */
> +	task = kernel_worker(vhost_worker, worker, NUMA_NO_NODE,
> +			     CLONE_FS | CLONE_CLEAR_SIGHAND,
> +			     KERN_WORKER_NO_FILES | KERN_WORKER_NO_SIGS);
>  	if (IS_ERR(task)) {
>  		ret = PTR_ERR(task);
>  		goto free_worker;
>  	}
>  
>  	worker->task = task;
> -	wake_up_process(task); /* avoid contributing to loadavg */
> -
> -	ret = vhost_attach_cgroups(dev);
> -	if (ret)
> -		goto stop_worker;
> -
> +	kernel_worker_start(task, "vhost-%d", current->pid);
>  	return 0;
>  
> -stop_worker:
> -	kthread_stop(worker->task);
>  free_worker:
>  	kfree(worker);
>  	dev->worker = NULL;
> diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
> index 102ce25e4e13..09748694cb66 100644
> --- a/drivers/vhost/vhost.h
> +++ b/drivers/vhost/vhost.h
> @@ -25,11 +25,16 @@ struct vhost_work {
>  	unsigned long		flags;
>  };
>  
> +enum {
> +	VHOST_WORKER_FLAG_STOP,
> +};
> +
>  struct vhost_worker {
>  	struct task_struct	*task;
> +	struct completion	*exit_done;
>  	struct llist_head	work_list;
> -	struct vhost_dev	*dev;
>  	u64			kcov_handle;
> +	unsigned long		flags;
>  };
>  
>  /* Poll a file (eventfd or socket) */
> -- 
> 2.25.1