[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID:
<IA1PR10MB72400391A961BEACCB257F1298FD2@IA1PR10MB7240.namprd10.prod.outlook.com>
Date: Sat, 1 Jun 2024 05:47:36 +0000
From: Gulam Mohamed <gulam.mohamed@...cle.com>
To: Christoph Hellwig <hch@....de>
CC: "linux-block@...r.kernel.org" <linux-block@...r.kernel.org>,
"linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>,
"yukuai1@...weicloud.com" <yukuai1@...weicloud.com>,
"axboe@...nel.dk"
<axboe@...nel.dk>
Subject: RE: [PATCH V3 for-6.10/block] loop: Fix a race between loop detach
and loop open
Hi ,
> -----Original Message-----
> From: Christoph Hellwig <hch@....de>
> Sent: Friday, May 31, 2024 5:27 PM
> To: Gulam Mohamed <gulam.mohamed@...cle.com>
> Cc: linux-block@...r.kernel.org; linux-kernel@...r.kernel.org;
> yukuai1@...weicloud.com; hch@....de; axboe@...nel.dk
> Subject: Re: [PATCH V3 for-6.10/block] loop: Fix a race between loop detach
> and loop open
>
> On Wed, May 29, 2024 at 08:02:40PM +0000, Gulam Mohamed wrote:
> > Test case involves the following two scripts:
> >
> > script1.sh:
> >
> > while [ 1 ];
> > do
> > losetup -P -f /home/opt/looptest/test10.img
> > blkid /dev/loop0p1
> > done
> >
> > script2.sh:
> >
> > while [ 1 ];
> > do
> > losetup -d /dev/loop0
> > done
>
> When just running these in the background, I just get spam of losetup errors.
> Maybe you can turn this into a proper blktests test case with a sensible
> timeout?
>
Yes, the blktests test case for this is already in its final stage of the review
> A for the fix: I suspect it is bette to simply always defer the actual work of
> disconnecting the backing device, as that avoid the race entirely, and
> simplifies the code in nbd by removing special cases. Something like this:
I agree. This looks to be a good solution. Will implement this and send V4 for review
Thanks & Regards,
Gulam Mohamed.
>
>
> diff --git a/drivers/block/loop.c b/drivers/block/loop.c index
> 93780f41646b75..c2238c1e2ad68d 100644
> --- a/drivers/block/loop.c
> +++ b/drivers/block/loop.c
> @@ -1131,7 +1131,7 @@ static int loop_configure(struct loop_device *lo,
> blk_mode_t mode,
> return error;
> }
>
> -static void __loop_clr_fd(struct loop_device *lo, bool release)
> +static void __loop_clr_fd(struct loop_device *lo)
> {
> struct file *filp;
> gfp_t gfp = lo->old_gfp_mask;
> @@ -1139,14 +1139,6 @@ static void __loop_clr_fd(struct loop_device *lo,
> bool release)
> if (test_bit(QUEUE_FLAG_WC, &lo->lo_queue->queue_flags))
> blk_queue_write_cache(lo->lo_queue, false, false);
>
> - /*
> - * Freeze the request queue when unbinding on a live file descriptor
> and
> - * thus an open device. When called from ->release we are
> guaranteed
> - * that there is no I/O in progress already.
> - */
> - if (!release)
> - blk_mq_freeze_queue(lo->lo_queue);
> -
> spin_lock_irq(&lo->lo_lock);
> filp = lo->lo_backing_file;
> lo->lo_backing_file = NULL;
> @@ -1164,8 +1156,6 @@ static void __loop_clr_fd(struct loop_device *lo,
> bool release)
> mapping_set_gfp_mask(filp->f_mapping, gfp);
> /* This is safe: open() is still holding a reference. */
> module_put(THIS_MODULE);
> - if (!release)
> - blk_mq_unfreeze_queue(lo->lo_queue);
>
> disk_force_media_change(lo->lo_disk);
>
> @@ -1180,11 +1170,7 @@ static void __loop_clr_fd(struct loop_device *lo,
> bool release)
> * must be at least one and it can only become zero when the
> * current holder is released.
> */
> - if (!release)
> - mutex_lock(&lo->lo_disk->open_mutex);
> err = bdev_disk_changed(lo->lo_disk, false);
> - if (!release)
> - mutex_unlock(&lo->lo_disk->open_mutex);
> if (err)
> pr_warn("%s: partition scan of loop%d failed
> (rc=%d)\n",
> __func__, lo->lo_number, err);
> @@ -1232,25 +1218,16 @@ static int loop_clr_fd(struct loop_device *lo)
> loop_global_unlock(lo, true);
> return -ENXIO;
> }
> +
> /*
> - * If we've explicitly asked to tear down the loop device,
> - * and it has an elevated reference count, set it for auto-teardown
> when
> - * the last reference goes away. This stops $!~#$@ udev from
> - * preventing teardown because it decided that it needs to run blkid
> on
> - * the loopback device whenever they appear. xfstests is notorious for
> - * failing tests because blkid via udev races with a losetup
> - * <dev>/do something like mkfs/losetup -d <dev> causing the losetup
> -d
> - * command to fail with EBUSY.
> + * Mark the device for removing the backing device on last close.
> + * If we are the only opener, also switch the state to roundown here
> to
> + * prevent new openers from coming in.
> */
> - if (disk_openers(lo->lo_disk) > 1) {
> - lo->lo_flags |= LO_FLAGS_AUTOCLEAR;
> - loop_global_unlock(lo, true);
> - return 0;
> - }
> - lo->lo_state = Lo_rundown;
> + lo->lo_flags |= LO_FLAGS_AUTOCLEAR;
> + if (disk_openers(lo->lo_disk) == 1)
> + lo->lo_state = Lo_rundown;
> loop_global_unlock(lo, true);
> -
> - __loop_clr_fd(lo, false);
> return 0;
> }
>
> @@ -1720,22 +1697,24 @@ static int lo_compat_ioctl(struct block_device
> *bdev, blk_mode_t mode, static void lo_release(struct gendisk *disk) {
> struct loop_device *lo = disk->private_data;
> + bool need_clear;
>
> if (disk_openers(disk) > 0)
> return;
>
> + /*
> + * Clear the backing device information if this is the last close of
> + * a device that's been marked for auto clear, or on which
> LOOP_CLR_FD
> + * has been called.
> + */
> mutex_lock(&lo->lo_mutex);
> - if (lo->lo_state == Lo_bound && (lo->lo_flags &
> LO_FLAGS_AUTOCLEAR)) {
> + if (lo->lo_state == Lo_bound && (lo->lo_flags &
> LO_FLAGS_AUTOCLEAR))
> lo->lo_state = Lo_rundown;
> - mutex_unlock(&lo->lo_mutex);
> - /*
> - * In autoclear mode, stop the loop thread
> - * and remove configuration after last close.
> - */
> - __loop_clr_fd(lo, true);
> - return;
> - }
> + need_clear = (lo->lo_state == Lo_rundown);
> mutex_unlock(&lo->lo_mutex);
> +
> + if (need_clear)
> + __loop_clr_fd(lo);
> }
>
> static void lo_free_disk(struct gendisk *disk)
Powered by blists - more mailing lists