lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:	Thu, 14 Jul 2016 11:29:08 +0530
From:	Pranay Srivastava <pranjas@...il.com>
To:	Markus Pargmann <mpa@...gutronix.de>
Cc:	"nbd-general@...ts.sourceforge.net" 
	<nbd-general@...ts.sourceforge.net>,
	"linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>
Subject: Re: [PATCH v4 2/5]nbd: fix might_sleep warning on socket shutdown

On Wed, Jul 13, 2016 at 12:43 PM, Markus Pargmann <mpa@...gutronix.de> wrote:
> On Sunday 10 July 2016 21:03:05 Pranay Srivastava wrote:
>> On Sunday, July 10, 2016, Markus Pargmann <mpa@...gutronix.de> wrote:
>> > Hi,
>> >
>> > On 2016 M06 30, Thu 14:02:02 CEST Pranay Kr. Srivastava wrote:
>> >> spinlocked ranges should be small and not contain calls into huge
>> >> subfunctions. Fix my mistake and just get the pointer to the socket
>> >> instead of doing everything with spinlock held.
>> >>
>> >> Reported-by: Mikulas Patocka <mikulas@...bright.com>
>> >> Signed-off-by: Markus Pargmann <mpa@...gutronix.de>
>> >>
>> >> Changelog:
>> >> Pranay Kr. Srivastava<pranjas@...il.com>:
>> >>
>> >> 1) Use spin_lock instead of irq version for sock_shutdown.
>> >>
>> >> 2) Use system work queue to actually trigger the shutdown of
>> >> socket. This solves the issue when kernel_sendmsg is currently
>> >> blocked while a timeout occurs.
>> >>
>> >> Signed-off-by: Pranay Kr. Srivastava <pranjas@...il.com>
>> >> ---
>> >>  drivers/block/nbd.c | 57
>> >> +++++++++++++++++++++++++++++++++-------------------- 1 file changed, 36
>> >> insertions(+), 21 deletions(-)
>> >>
>> >> diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
>> >> index 766c401..e362d44 100644
>> >> --- a/drivers/block/nbd.c
>> >> +++ b/drivers/block/nbd.c
>> >> @@ -39,6 +39,7 @@
>> >>  #include <asm/types.h>
>> >>
>> >>  #include <linux/nbd.h>
>> >> +#include <linux/workqueue.h>
>> >>
>> >>  struct nbd_device {
>> >>       u32 flags;
>> >> @@ -69,6 +70,8 @@ struct nbd_device {
>> >>  #if IS_ENABLED(CONFIG_DEBUG_FS)
>> >>       struct dentry *dbg_dir;
>> >>  #endif
>> >> +     /* This is specifically for calling sock_shutdown, for now. */
>> >> +     struct work_struct ws_shutdown;
>> >>  };
>> >>
>> >>  #if IS_ENABLED(CONFIG_DEBUG_FS)
>> >> @@ -95,6 +98,8 @@ static int max_part;
>> >>   */
>> >>  static DEFINE_SPINLOCK(nbd_lock);
>> >>
>> >> +static void nbd_ws_func_shutdown(struct work_struct *);
>> >> +
>> >
>> > are you reading all the comments I had?...
>> >
>> > At least respond to my comments if you disagree. I still can't see the
>> benefit
>> > of a function signature here if we can avoid it.
>> >
>>
>> That would require some code to be moved. So to avoid those
>> unnecessary changes it was better to have a prototype.
>>
>> It would've pissed you off more if I had tried
>> to get rid of protoype.
>
> Ah I see, thanks.
>
>>
>> >>  static inline struct device *nbd_to_dev(struct nbd_device *nbd)
>> >>  {
>> >>       return disk_to_dev(nbd->disk);
>> >> @@ -172,39 +177,36 @@ static void nbd_end_request(struct nbd_device *nbd,
>> >> struct request *req) */
>> >>  static void sock_shutdown(struct nbd_device *nbd)
>> >>  {
>> >> -     spin_lock_irq(&nbd->sock_lock);
>> >> -
>> >> -     if (!nbd->sock) {
>> >> -             spin_unlock_irq(&nbd->sock_lock);
>> >> -             return;
>> >> -     }
>> >> +     struct socket *sock;
>> >>
>> >> -     dev_warn(disk_to_dev(nbd->disk), "shutting down socket\n");
>> >> -     kernel_sock_shutdown(nbd->sock, SHUT_RDWR);
>> >> -     sockfd_put(nbd->sock);
>> >> +     spin_lock(&nbd->sock_lock);
>> >> +     sock = nbd->sock;
>> >>       nbd->sock = NULL;
>> >> -     spin_unlock_irq(&nbd->sock_lock);
>> >> +     spin_unlock(&nbd->sock_lock);
>> >> +
>> >> +     if (!sock)
>> >> +             return;
>> >>
>> >>       del_timer(&nbd->timeout_timer);
>> >> +     dev_warn(disk_to_dev(nbd->disk), "shutting down socket\n");
>> >> +     kernel_sock_shutdown(sock, SHUT_RDWR);
>> >> +     sockfd_put(sock);
>> >>  }
>> >>
>> >>  static void nbd_xmit_timeout(unsigned long arg)
>> >>  {
>> >>       struct nbd_device *nbd = (struct nbd_device *)arg;
>> >> -     unsigned long flags;
>> >>
>> >>       if (list_empty(&nbd->queue_head))
>> >>               return;
>> >>
>> >> -     spin_lock_irqsave(&nbd->sock_lock, flags);
>> >> -
>> >>       nbd->timedout = true;
>> >> -
>> >> -     if (nbd->sock)
>> >> -             kernel_sock_shutdown(nbd->sock, SHUT_RDWR);
>> >> -
>> >> -     spin_unlock_irqrestore(&nbd->sock_lock, flags);
>> >> -
>> >> +     /*
>> >> +      * Make sure sender thread sees nbd->timedout.
>> >> +      */
>> >> +     smp_wmb();
>> >> +     schedule_work(&nbd->ws_shutdown);
>> >> +     wake_up(&nbd->waiting_wq);
>> >>       dev_err(nbd_to_dev(nbd), "Connection timed out, shutting down
>> >> connection\n"); }
>> >>
>> >> @@ -588,7 +590,11 @@ static int nbd_thread_send(void *data)
>> >>               spin_unlock_irq(&nbd->queue_lock);
>> >>
>> >>               /* handle request */
>> >> -             nbd_handle_req(nbd, req);
>> >> +             if (nbd->timedout) {
>> >> +                     req->errors++;
>> >> +                     nbd_end_request(nbd, req);
>> >> +             } else
>> >> +                     nbd_handle_req(nbd, req);
>> >
>> > I already commented on this in the last patch. This is unrelated to the
>> patch.
>> > If you disagree then please tell me why instead of sending the same thing
>> > again.
>> >
>>
>> After trigerring worker thread its
>> not necessary that socket shutdown
>> actually was called before we handled
>> a request.
>>
>> So the error would come in
>> actually later probably.
>>
>> So i just wanted to avoid a longer
>> path for error to be thrown up.
>> Do correct me if this cant happen.
>
> Yes socket shutdown may not have been called when we reach this error
> handling code. But the timeout timer is usually in the range of seconds.
> I would assume that the time between triggering the worker and socket
> shutdown is within a few milliseconds. We would need to hit exactly this
> condition which would require a new request to be present.
>
> I think this is very unlikely and it would be fine if we have a longer
> error path there. Am I missing something?

Okay but let's do the socket check under the sock_lock as the sock teardown
is done without tx_lock? Probably be better to have a new function to
check this what do you say?

>
>
> Also I just noticed that wake_up(&nbd->waiting_wq) in nbd_xmit_timeout()
> may not be necessary. In nbd_thread_send():
>
>         wait_event_interruptible(nbd->waiting_wq,
>                                  kthread_should_stop() ||
>                                  !list_empty(&nbd->waiting_queue));
>
>         if (list_empty(&nbd->waiting_queue))
>                 continue;
>
> So wouldn't this wake_up() call simply result in nothing?
> As soon as sock_shutdown() was called, the receiver thread would exit
> and close down nbd_thread_send() as well because of kthread_should_stop().
>

To be honest I don't remember now why I put it there. But yeah you are right
about this. I'll have to check it, shouldn't be a problem though.

>>
>> > Also brackets on the else part would be preferred.
>>
>> It might trigger checkpatch warning
>> but I am not 100% sure.
>
> Documentation/CodingStyle documents this. See line 168.

Okay.

>
> Best Regards,
>
> Markus
>
>> >
>> > Regards,
>> >
>> > Markus
>> >
>> >>       }
>> >>
>> >>       nbd->task_send = NULL;
>> >> @@ -663,6 +669,7 @@ static void nbd_reset(struct nbd_device *nbd)
>> >>       set_capacity(nbd->disk, 0);
>> >>       nbd->flags = 0;
>> >>       nbd->xmit_timeout = 0;
>> >> +     INIT_WORK(&nbd->ws_shutdown, nbd_ws_func_shutdown);
>> >>       queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, nbd->disk->queue);
>> >>       del_timer_sync(&nbd->timeout_timer);
>> >>  }
>> >> @@ -797,11 +804,11 @@ static int __nbd_ioctl(struct block_device *bdev,
>> >> struct nbd_device *nbd, error = nbd_thread_recv(nbd, bdev);
>> >>               nbd_dev_dbg_close(nbd);
>> >>               kthread_stop(thread);
>> >> +             sock_shutdown(nbd);
>> >>
>> >>               mutex_lock(&nbd->tx_lock);
>> >>               nbd->task_recv = NULL;
>> >>
>> >> -             sock_shutdown(nbd);
>> >>               nbd_clear_que(nbd);
>> >>               kill_bdev(bdev);
>> >>               nbd_bdev_reset(bdev);
>> >> @@ -857,6 +864,14 @@ static const struct block_device_operations
>> nbd_fops =
>> >> { .compat_ioctl =     nbd_ioctl,
>> >>  };
>> >>
>> >> +static void nbd_ws_func_shutdown(struct work_struct *ws_nbd)
>> >> +{
>> >> +     struct nbd_device *nbd_dev = container_of(ws_nbd, struct
>> nbd_device,
>> >> +                     ws_shutdown);
>> >> +
>> >> +     sock_shutdown(nbd_dev);
>> >> +}
>> >> +
>> >>  #if IS_ENABLED(CONFIG_DEBUG_FS)
>> >>
>> >>  static int nbd_dbg_tasks_show(struct seq_file *s, void *unused)
>> >
>> >
>> >
>>
>>
>
> --
> Pengutronix e.K.                           |                             |
> Industrial Linux Solutions                 | http://www.pengutronix.de/  |
> Peiner Str. 6-8, 31137 Hildesheim, Germany | Phone: +49-5121-206917-0    |
> Amtsgericht Hildesheim, HRA 2686           | Fax:   +49-5121-206917-5555 |



-- 
        ---P.K.S

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ