[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <ee718d4e-7681-d09a-5d2a-4f9ac2172038@i-love.sakura.ne.jp>
Date: Mon, 20 Dec 2021 23:57:40 +0900
From: Tetsuo Handa <penguin-kernel@...ove.sakura.ne.jp>
To: Jan Kara <jack@...e.cz>
Cc: kernel test robot <oliver.sang@...el.com>,
Jens Axboe <axboe@...nel.dk>,
syzbot <syzbot+643e4ce4b6ad1347d372@...kaller.appspotmail.com>,
Christoph Hellwig <hch@...radead.org>,
Christoph Hellwig <hch@....de>,
LKML <linux-kernel@...r.kernel.org>, lkp@...ts.01.org,
lkp@...el.com, linux-block <linux-block@...r.kernel.org>
Subject: Re: [loop] 322c4293ec: xfstests.xfs.049.fail
On 2021/12/20 20:58, Jan Kara wrote:
> On Mon 20-12-21 00:45:46, Tetsuo Handa wrote:
>> On 2021/12/20 0:09, kernel test robot wrote:
>>> @@ -13,3 +13,5 @@
>>> --- clean
>>> --- umount ext2 on xfs
>>> --- umount xfs
>>> +!!! umount xfs failed
>>> +(see /lkp/benchmarks/xfstests/results//xfs/049.full for details)
>>> ...
>>> (Run 'diff -u /lkp/benchmarks/xfstests/tests/xfs/049.out /lkp/benchmarks/xfstests/results//xfs/049.out.bad' to see the entire diff)
>>
>> Yes, we know this race condition can happen.
>>
>> https://lkml.kernel.org/r/16c7d304-60ef-103f-1b2c-8592b48f47c6@i-love.sakura.ne.jp
>> https://lkml.kernel.org/r/YaYfu0H2k0PSQL6W@infradead.org
>>
>> Should we try to wait for autoclear operation to complete?
>
> So I think we should try to fix this because as Dave writes in the
> changelog for a1ecac3b0656 ("loop: Make explicit loop device destruction
> lazy") which started all this, having random EBUSY failures (either from
> losetup or umount) is annoying and you need to work it around it lots of
> unexpected places.
OK. Let's wait for autoclear operation to complete.
>
> We cannot easily wait for work completion in the loop device code without
> reintroducing the deadlock - whole lo_release() is called under
> disk->open_mutex which you also need to grab in __loop_clr_fd(). So to
> avoid holding backing file busy longer than expected, we could use
> task_work instead of ordinary work as I suggested - but you were right that
> we need to be somewhat careful and in case we are running in a kthread, we
> would still need to offload to a normal work (but in that case we don't
> care about delaying file release anyway).
Like fput_many() shows, it is not easy to implement fallback correctly.
I worry that exporting task_work_add() to loadable kernel modules results in
random abuse of task_work which does not implement fallback.
Instead of exporting task_work_add(), I think we can apply below diff.
I chose not to rely on WQ context, for there is fput(filp) in __loop_clr_fd()
which we should make sure that __fput(filp) is processed by a thread which
triggered autoclear operation. If this __fput(filp) is scheduled by other thread
because fput(filp) is called by a thread which did not trigger autoclear operation,
I think it is possible that a thread which triggered autoclear operation fails to
wait for completion of __fput(filp), and results in the same problem.
---
block/bdev.c | 2 ++
drivers/block/loop.c | 42 ++++++++++++------------------------------
drivers/block/loop.h | 2 +-
include/linux/blkdev.h | 5 +++++
4 files changed, 20 insertions(+), 31 deletions(-)
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index c80cfaefc0a8..b252b1d87471 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1227,6 +1227,11 @@ struct block_device_operations {
* driver.
*/
int (*alternative_gpt_sector)(struct gendisk *disk, sector_t *sector);
+ /*
+ * Special callback for doing post-release callback without
+ * disk->open_mutex held. Used by loop driver.
+ */
+ void (*post_release)(struct gendisk *disk);
};
#ifdef CONFIG_COMPAT
diff --git a/block/bdev.c b/block/bdev.c
index 8bf93a19041b..0cb638d81a27 100644
--- a/block/bdev.c
+++ b/block/bdev.c
@@ -948,6 +948,8 @@ void blkdev_put(struct block_device *bdev, fmode_t mode)
else
blkdev_put_whole(bdev, mode);
mutex_unlock(&disk->open_mutex);
+ if (bdev->bd_disk->fops->post_release)
+ bdev->bd_disk->fops->post_release(bdev->bd_disk);
module_put(disk->fops->owner);
blkdev_put_no_open(bdev);
diff --git a/drivers/block/loop.h b/drivers/block/loop.h
index 918a7a2dc025..f2e9f38694dc 100644
--- a/drivers/block/loop.h
+++ b/drivers/block/loop.h
@@ -56,7 +56,7 @@ struct loop_device {
struct gendisk *lo_disk;
struct mutex lo_mutex;
bool idr_visible;
- struct work_struct rundown_work;
+ struct task_struct *rundown_owner; /* current or NULL */
};
struct loop_cmd {
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index b1b05c45c07c..faa3a3097b22 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -1165,40 +1165,12 @@ static void __loop_clr_fd(struct loop_device *lo)
lo->lo_disk->flags |= GENHD_FL_NO_PART;
fput(filp);
-}
-
-static void loop_rundown_completed(struct loop_device *lo)
-{
mutex_lock(&lo->lo_mutex);
lo->lo_state = Lo_unbound;
mutex_unlock(&lo->lo_mutex);
module_put(THIS_MODULE);
}
-static void loop_rundown_workfn(struct work_struct *work)
-{
- struct loop_device *lo = container_of(work, struct loop_device,
- rundown_work);
- struct block_device *bdev = lo->lo_device;
- struct gendisk *disk = lo->lo_disk;
-
- __loop_clr_fd(lo);
- kobject_put(&bdev->bd_device.kobj);
- module_put(disk->fops->owner);
- loop_rundown_completed(lo);
-}
-
-static void loop_schedule_rundown(struct loop_device *lo)
-{
- struct block_device *bdev = lo->lo_device;
- struct gendisk *disk = lo->lo_disk;
-
- __module_get(disk->fops->owner);
- kobject_get(&bdev->bd_device.kobj);
- INIT_WORK(&lo->rundown_work, loop_rundown_workfn);
- queue_work(system_long_wq, &lo->rundown_work);
-}
-
static int loop_clr_fd(struct loop_device *lo)
{
int err;
@@ -1229,7 +1201,6 @@ static int loop_clr_fd(struct loop_device *lo)
mutex_unlock(&lo->lo_mutex);
__loop_clr_fd(lo);
- loop_rundown_completed(lo);
return 0;
}
@@ -1754,7 +1725,7 @@ static void lo_release(struct gendisk *disk, fmode_t mode)
* In autoclear mode, stop the loop thread
* and remove configuration after last close.
*/
- loop_schedule_rundown(lo);
+ lo->rundown_owner = current;
return;
} else if (lo->lo_state == Lo_bound) {
/*
@@ -1769,10 +1740,21 @@ static void lo_release(struct gendisk *disk, fmode_t mode)
mutex_unlock(&lo->lo_mutex);
}
+static void lo_post_release(struct gendisk *disk)
+{
+ struct loop_device *lo = disk->private_data;
+
+ if (lo->rundown_owner != current)
+ return;
+ lo->rundown_owner = NULL;
+ __loop_clr_fd(lo);
+}
+
static const struct block_device_operations lo_fops = {
.owner = THIS_MODULE,
.open = lo_open,
.release = lo_release,
+ .post_release = lo_post_release,
.ioctl = lo_ioctl,
#ifdef CONFIG_COMPAT
.compat_ioctl = lo_compat_ioctl,
--
2.32.0
Powered by blists - more mailing lists