linux-kernel - Re: [PATCH] nbd: Add locking for tasks

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <1444335269.2956.268.camel@decadent.org.uk>
Date:	Thu, 08 Oct 2015 21:14:29 +0100
From:	Ben Hutchings <ben@...adent.org.uk>
To:	Markus Pargmann <mpa@...gutronix.de>
Cc:	Jens Axboe <axboe@...nel.dk>, nbd-general@...ts.sourceforge.net,
	Michal Belczyk <belczyk@....krakow.pl>,
	Hermann Lauer <Hermann.Lauer@....uni-heidelberg.de>,
	linux-kernel@...r.kernel.org
Subject: Re: [PATCH] nbd: Add locking for tasks

On Tue, 2015-10-06 at 20:03 +0200, Markus Pargmann wrote:
> The timeout handling introduced in
> 	> 7e2893a16d3e (nbd: Fix timeout detection)
> introduces a race condition which may lead to killing of tasks that are
> not in nbd context anymore. This was not observed or reproducable yet.
> 
> This patch adds locking to critical use of task_recv and task_send to
> avoid killing tasks that already left the NBD thread functions. This
> lock is only acquired if a timeout occures or the nbd device
> starts/stops.
> 
> Reported-by: Ben Hutchings <ben@...adent.org.uk>
> Signed-off-by: Markus Pargmann <mpa@...gutronix.de>

Reviewed-by: Ben Hutchings <ben@...adent.org.uk>

You could add 'Fixes: 7e2893a16d3e ("nbd: Fix timeout detection")' to
the commit message as well.

nbd_dbg_tasks_show() can still race with thread exit and two tasks can 
race to become the receive thread, but those aren't new bugs.

Ben.

> ---
>  drivers/block/nbd.c | 36 ++++++++++++++++++++++++++++++------
>  1 file changed, 30 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
> index 039c7c4f0539..1a70852ac808 100644
> --- a/drivers/block/nbd.c
> +++ b/drivers/block/nbd.c
> @@ -60,6 +60,7 @@ struct nbd_device {
>  > 	> bool disconnect; /* a disconnect has been requested by user */
>  
>  > 	> struct timer_list timeout_timer;
> +> 	> spinlock_t tasks_lock;
>  > 	> struct task_struct *task_recv;
>  > 	> struct task_struct *task_send;
>  
> @@ -140,21 +141,23 @@ static void sock_shutdown(struct nbd_device *nbd)
>  static void nbd_xmit_timeout(unsigned long arg)
>  {
>  > 	> struct nbd_device *nbd = (struct nbd_device *)arg;
> -> 	> struct task_struct *task;
> +> 	> unsigned long flags;
>  
>  > 	> if (list_empty(&nbd->queue_head))
>  > 	> 	> return;
>  
>  > 	> nbd->disconnect = true;
>  
> -> 	> task = READ_ONCE(nbd->task_recv);
> -> 	> if (task)
> -> 	> 	> force_sig(SIGKILL, task);
> +> 	> spin_lock_irqsave(&nbd->tasks_lock, flags);
> +
> +> 	> if (nbd->task_recv)
> +> 	> 	> force_sig(SIGKILL, nbd->task_recv);
>  
> -> 	> task = READ_ONCE(nbd->task_send);
> -> 	> if (task)
> +> 	> if (nbd->task_send)
>  > 	> 	> force_sig(SIGKILL, nbd->task_send);
>  
> +> 	> spin_unlock_irqrestore(&nbd->tasks_lock, flags);
> +
>  > 	> dev_err(nbd_to_dev(nbd), "Connection timed out, killed receiver and sender, shutting down connection\n");
>  }
>  
> @@ -403,17 +406,24 @@ static int nbd_thread_recv(struct nbd_device *nbd)
>  {
>  > 	> struct request *req;
>  > 	> int ret;
> +> 	> unsigned long flags;
>  
>  > 	> BUG_ON(nbd->magic != NBD_MAGIC);
>  
>  > 	> sk_set_memalloc(nbd->sock->sk);
>  
> +> 	> spin_lock_irqsave(&nbd->tasks_lock, flags);
>  > 	> nbd->task_recv = current;
> +> 	> spin_unlock_irqrestore(&nbd->tasks_lock, flags);
>  
>  > 	> ret = device_create_file(disk_to_dev(nbd->disk), &pid_attr);
>  > 	> if (ret) {
>  > 	> 	> dev_err(disk_to_dev(nbd->disk), "device_create_file failed!\n");
> +
> +> 	> 	> spin_lock_irqsave(&nbd->tasks_lock, flags);
>  > 	> 	> nbd->task_recv = NULL;
> +> 	> 	> spin_unlock_irqrestore(&nbd->tasks_lock, flags);
> +
>  > 	> 	> return ret;
>  > 	> }
>  
> @@ -429,7 +439,9 @@ static int nbd_thread_recv(struct nbd_device *nbd)
>  
>  > 	> device_remove_file(disk_to_dev(nbd->disk), &pid_attr);
>  
> +> 	> spin_lock_irqsave(&nbd->tasks_lock, flags);
>  > 	> nbd->task_recv = NULL;
> +> 	> spin_unlock_irqrestore(&nbd->tasks_lock, flags);
>  
>  > 	> if (signal_pending(current)) {
>  > 	> 	> siginfo_t info;
> @@ -534,8 +546,11 @@ static int nbd_thread_send(void *data)
>  {
>  > 	> struct nbd_device *nbd = data;
>  > 	> struct request *req;
> +> 	> unsigned long flags;
>  
> +> 	> spin_lock_irqsave(&nbd->tasks_lock, flags);
>  > 	> nbd->task_send = current;
> +> 	> spin_unlock_irqrestore(&nbd->tasks_lock, flags);
>  
>  > 	> set_user_nice(current, MIN_NICE);
>  > 	> while (!kthread_should_stop() || !list_empty(&nbd->waiting_queue)) {
> @@ -572,7 +587,15 @@ static int nbd_thread_send(void *data)
>  > 	> 	> nbd_handle_req(nbd, req);
>  > 	> }
>  
> +> 	> spin_lock_irqsave(&nbd->tasks_lock, flags);
>  > 	> nbd->task_send = NULL;
> +> 	> spin_unlock_irqrestore(&nbd->tasks_lock, flags);
> +
> +> 	> /* Clear maybe pending signals */
> +> 	> if (signal_pending(current)) {
> +> 	> 	> siginfo_t info;
> +> 	> 	> dequeue_signal_lock(current, ¤t->blocked, &info);
> +> 	> }
>  
>  > 	> return 0;
>  }
> @@ -1027,6 +1050,7 @@ static int __init nbd_init(void)
>  > 	> 	> nbd_dev[i].magic = NBD_MAGIC;
>  > 	> 	> INIT_LIST_HEAD(&nbd_dev[i].waiting_queue);
>  > 	> 	> spin_lock_init(&nbd_dev[i].queue_lock);
> +> 	> 	> spin_lock_init(&nbd_dev[i].tasks_lock);
>  > 	> 	> INIT_LIST_HEAD(&nbd_dev[i].queue_head);
>  > 	> 	> mutex_init(&nbd_dev[i].tx_lock);
>  > 	> 	> init_timer(&nbd_dev[i].timeout_timer);
-- 
Ben Hutchings
Once a job is fouled up, anything done to improve it makes it worse.
Download attachment "signature.asc" of type "application/pgp-signature" (812 bytes)