From 9aca30919e20ce7e24a97c3d1639a654eace5ba2 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Tue, 31 Mar 2015 16:47:15 -0400 Subject: [PATCH] block/loop: fix race between open/release and reread part When another task(such as udev) is trying to open/close loop block, blkdev_reread_part() may return -EBUSY because bd_mutex is required for both open and release. The worse thing is that lo_open() need to hold lo_ctl_mutex, so the task trying to open loop can't open the loop disk until lo_ctl_mutex is released when rereading part is completed with failure. This patch trys to fixing the issue by the following approach: 1) introduce lo_open_mutex to protect lo_refcnt and avoid acquiring lo_ctl_mutex in lo_open(): - for open vs. add/del loop, no any problem because of loop_index_mutex - lo_open_mutex is used for syncing open() and loop_clr_fd() - both open() and release() are protected by bd_mutex 2) don't hold lo_ctl_mutex for decreasing/checking lo_refcnt in lo_release() 3) when reread part is run inside release path, rescan partition directly since bd_mutex is held already 4) retry several times if blkdev_reread_part() still returns -EBUSY. CC: Jens Axboe CC: Ming Lei CC: Mike Galbraith CC: Kent Overstreet CC: Mikulas Patocka Reported-by: Jarod Wilson Signed-off-by: Ming Lei --- block/partition-generic.c | 1 + drivers/block/loop.c | 88 +++++++++++++++++++++++++++++++++++++++------ drivers/block/loop.h | 1 + 3 files changed, 80 insertions(+), 10 deletions(-) diff --git a/block/partition-generic.c b/block/partition-generic.c index 0d9e5f9..9efa667 100644 --- a/block/partition-generic.c +++ b/block/partition-generic.c @@ -528,6 +528,7 @@ rescan: free_partitions(state); return 0; } +EXPORT_SYMBOL(rescan_partitions); int invalidate_partitions(struct gendisk *disk, struct block_device *bdev) { diff --git a/drivers/block/loop.c b/drivers/block/loop.c index d1f168b..9a8d52a 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -75,6 +75,8 @@ #include #include #include +#include +#include #include "loop.h" #include @@ -528,6 +530,52 @@ static int loop_flush(struct loop_device *lo) return loop_switch(lo, NULL); } +static int loop_reread_part_no_lock(struct block_device *bdev) +{ + struct gendisk *disk = bdev->bd_disk; + int res; + + if (!disk_part_scan_enabled(disk) || bdev != bdev->bd_contains) + return -EINVAL; + res = rescan_partitions(disk, bdev); + return res; +} + +/* + * Re-reading partitions can fail with an -EBUSY return from + * block/ioctl.c's blkdev_reread_part(), which calls mutex_trylock + * on the bd_mutex, which can be held in both open/release handler, + * so retry several times for the sake of safety. + */ +static void loop_reread_partitions(struct loop_device *lo, + struct block_device *bdev) +{ + int rc, in_release; + int retry = 5; + + + mutex_lock(&lo->lo_open_mutex); + in_release = lo->lo_refcnt == 0; + mutex_unlock(&lo->lo_open_mutex); + + /* bd_mutex has been held already in release path */ + if (in_release) { + rc = loop_reread_part_no_lock(bdev); + goto out; + } + + while (retry-- > 0) { + rc = ioctl_by_bdev(bdev, BLKRRPART, 0); + if (rc != -EBUSY) + break; + msleep(50); + } + out: + if (rc) + pr_warn("%s: partition scan of loop%d (%s) failed (rc=%d)\n", + __func__, lo->lo_number, lo->lo_file_name, rc); +} + /* * loop_change_fd switched the backing store of a loopback device to * a new file. This is useful for operating system installers to free up @@ -576,7 +624,7 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev, fput(old_file); if (lo->lo_flags & LO_FLAGS_PARTSCAN) - ioctl_by_bdev(bdev, BLKRRPART, 0); + loop_reread_partitions(lo, bdev); return 0; out_putf: @@ -807,7 +855,7 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode, if (part_shift) lo->lo_flags |= LO_FLAGS_PARTSCAN; if (lo->lo_flags & LO_FLAGS_PARTSCAN) - ioctl_by_bdev(bdev, BLKRRPART, 0); + loop_reread_partitions(lo, bdev); /* Grab the block_device to prevent its destruction after we * put /dev/loopXX inode. Later in loop_clr_fd() we bdput(bdev). @@ -879,14 +927,18 @@ static int loop_clr_fd(struct loop_device *lo) * /do something like mkfs/losetup -d causing the losetup -d * command to fail with EBUSY. */ + mutex_lock(&lo->lo_open_mutex); if (lo->lo_refcnt > 1) { + mutex_unlock(&lo->lo_open_mutex); lo->lo_flags |= LO_FLAGS_AUTOCLEAR; mutex_unlock(&lo->lo_ctl_mutex); return 0; } - if (filp == NULL) + if (filp == NULL) { + mutex_unlock(&lo->lo_open_mutex); return -EINVAL; + } spin_lock_irq(&lo->lo_lock); lo->lo_state = Lo_rundown; @@ -919,8 +971,17 @@ static int loop_clr_fd(struct loop_device *lo) lo->lo_state = Lo_unbound; /* This is safe: open() is still holding a reference. */ module_put(THIS_MODULE); + + /* + * Unlock open_mutex for avoiding -EBUSY of rereading part: + * - try to acquire bd_mutex from reread part + * - another task is opening the loop with holding bd_mutex + * and trys to acquire open_mutex + */ + mutex_unlock(&lo->lo_open_mutex); + if (lo->lo_flags & LO_FLAGS_PARTSCAN && bdev) - ioctl_by_bdev(bdev, BLKRRPART, 0); + loop_reread_partitions(lo, bdev); lo->lo_flags = 0; if (!part_shift) lo->lo_disk->flags |= GENHD_FL_NO_PART_SCAN; @@ -995,7 +1056,7 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) !(lo->lo_flags & LO_FLAGS_PARTSCAN)) { lo->lo_flags |= LO_FLAGS_PARTSCAN; lo->lo_disk->flags &= ~GENHD_FL_NO_PART_SCAN; - ioctl_by_bdev(lo->lo_device, BLKRRPART, 0); + loop_reread_partitions(lo, lo->lo_device); } lo->lo_encrypt_key_size = info->lo_encrypt_key_size; @@ -1376,9 +1437,9 @@ static int lo_open(struct block_device *bdev, fmode_t mode) goto out; } - mutex_lock(&lo->lo_ctl_mutex); + mutex_lock(&lo->lo_open_mutex); lo->lo_refcnt++; - mutex_unlock(&lo->lo_ctl_mutex); + mutex_unlock(&lo->lo_open_mutex); out: mutex_unlock(&loop_index_mutex); return err; @@ -1387,13 +1448,16 @@ out: static void lo_release(struct gendisk *disk, fmode_t mode) { struct loop_device *lo = disk->private_data; - int err; + int err, ref; - mutex_lock(&lo->lo_ctl_mutex); + mutex_lock(&lo->lo_open_mutex); + ref = --lo->lo_refcnt; + mutex_unlock(&lo->lo_open_mutex); - if (--lo->lo_refcnt) + if (ref) goto out; + mutex_lock(&lo->lo_ctl_mutex); if (lo->lo_flags & LO_FLAGS_AUTOCLEAR) { /* * In autoclear mode, stop the loop thread @@ -1646,6 +1710,7 @@ static int loop_add(struct loop_device **l, int i) disk->flags |= GENHD_FL_NO_PART_SCAN; disk->flags |= GENHD_FL_EXT_DEVT; mutex_init(&lo->lo_ctl_mutex); + mutex_init(&lo->lo_open_mutex); lo->lo_number = i; spin_lock_init(&lo->lo_lock); disk->major = LOOP_MAJOR; @@ -1763,11 +1828,14 @@ static long loop_control_ioctl(struct file *file, unsigned int cmd, mutex_unlock(&lo->lo_ctl_mutex); break; } + mutex_lock(&lo->lo_open_mutex); if (lo->lo_refcnt > 0) { ret = -EBUSY; + mutex_unlock(&lo->lo_open_mutex); mutex_unlock(&lo->lo_ctl_mutex); break; } + mutex_unlock(&lo->lo_open_mutex); lo->lo_disk->private_data = NULL; mutex_unlock(&lo->lo_ctl_mutex); idr_remove(&loop_index_idr, lo->lo_number); diff --git a/drivers/block/loop.h b/drivers/block/loop.h index 301c27f..1b4acf2 100644 --- a/drivers/block/loop.h +++ b/drivers/block/loop.h @@ -59,6 +59,7 @@ struct loop_device { bool write_started; int lo_state; struct mutex lo_ctl_mutex; + struct mutex lo_open_mutex; struct request_queue *lo_queue; struct blk_mq_tag_set tag_set; -- 1.7.9.5