linux-kernel - [PATCH 3.2 15/60] md/raid0: update queue parameter in a safer location.

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <lsq.1447551945.657320195@decadent.org.uk>
Date:	Sun, 15 Nov 2015 01:45:45 +0000
From:	Ben Hutchings <ben@...adent.org.uk>
To:	linux-kernel@...r.kernel.org, stable@...r.kernel.org
CC:	akpm@...ux-foundation.org, "NeilBrown" <neilb@...e.com>,
	"Yi Zhang" <yizhan@...hat.com>
Subject: [PATCH 3.2 15/60] md/raid0: update queue parameter in a safer
 location.

3.2.73-rc1 review patch.  If anyone has any objections, please let me know.

------------------

From: NeilBrown <neilb@...e.com>

commit 199dc6ed5179251fa6158a461499c24bdd99c836 upstream.

When a (e.g.) RAID5 array is reshaped to RAID0, the updating
of queue parameters (e.g. max number of sectors per bio) is
done in the wrong place.
It should be part of ->run, but it is actually part of ->takeover.
This means it happens before level_store() calls:

	blk_set_stacking_limits(&mddev->queue->limits);

and so it ineffective.  This can lead to errors from underlying
devices.

So move all the relevant settings out of create_stripe_zones()
and into raid0_run().

As this can lead to a bug-on it is suitable for any -stable
kernel which supports reshape to RAID0.  So 2.6.35 or later.
As the bug has been present for five years there is no urgency,
so no need to rush into -stable.

Fixes: 9af204cf720c ("md: Add support for Raid5->Raid0 and Raid10->Raid0 takeover")
Reported-by: Yi Zhang <yizhan@...hat.com>
Signed-off-by: NeilBrown <neilb@...e.com>
[bwh: Backported to 3.2:
 - md has no discard or write-same support
 - md is not used by dm-raid so mddev->queue is never null
 - Open-code rdev_for_each()
 - Adjust context]
Signed-off-by: Ben Hutchings <ben@...adent.org.uk>
---
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -88,6 +88,7 @@ static int create_strip_zones(struct mdd
 	char b[BDEVNAME_SIZE];
 	char b2[BDEVNAME_SIZE];
 	struct r0conf *conf = kzalloc(sizeof(*conf), GFP_KERNEL);
+	unsigned short blksize = 512;
 
 	if (!conf)
 		return -ENOMEM;
@@ -102,6 +103,9 @@ static int create_strip_zones(struct mdd
 		sector_div(sectors, mddev->chunk_sectors);
 		rdev1->sectors = sectors * mddev->chunk_sectors;
 
+		blksize = max(blksize, queue_logical_block_size(
+				      rdev1->bdev->bd_disk->queue));
+
 		list_for_each_entry(rdev2, &mddev->disks, same_set) {
 			pr_debug("md/raid0:%s:   comparing %s(%llu)"
 				 " with %s(%llu)\n",
@@ -138,6 +142,18 @@ static int create_strip_zones(struct mdd
 	}
 	pr_debug("md/raid0:%s: FINAL %d zones\n",
 		 mdname(mddev), conf->nr_strip_zones);
+	/*
+	 * now since we have the hard sector sizes, we can make sure
+	 * chunk size is a multiple of that sector size
+	 */
+	if ((mddev->chunk_sectors << 9) % blksize) {
+		printk(KERN_ERR "md/raid0:%s: chunk_size of %d not multiple of block size %d\n",
+		       mdname(mddev),
+		       mddev->chunk_sectors << 9, blksize);
+		err = -EINVAL;
+		goto abort;
+	}
+
 	err = -ENOMEM;
 	conf->strip_zone = kzalloc(sizeof(struct strip_zone)*
 				conf->nr_strip_zones, GFP_KERNEL);
@@ -186,8 +202,6 @@ static int create_strip_zones(struct mdd
 		}
 		dev[j] = rdev1;
 
-		disk_stack_limits(mddev->gendisk, rdev1->bdev,
-				  rdev1->data_offset << 9);
 		/* as we don't honour merge_bvec_fn, we must never risk
 		 * violating it, so limit ->max_segments to 1, lying within
 		 * a single page.
@@ -263,21 +277,6 @@ static int create_strip_zones(struct mdd
 	mddev->queue->backing_dev_info.congested_fn = raid0_congested;
 	mddev->queue->backing_dev_info.congested_data = mddev;
 
-	/*
-	 * now since we have the hard sector sizes, we can make sure
-	 * chunk size is a multiple of that sector size
-	 */
-	if ((mddev->chunk_sectors << 9) % queue_logical_block_size(mddev->queue)) {
-		printk(KERN_ERR "md/raid0:%s: chunk_size of %d not valid\n",
-		       mdname(mddev),
-		       mddev->chunk_sectors << 9);
-		goto abort;
-	}
-
-	blk_queue_io_min(mddev->queue, mddev->chunk_sectors << 9);
-	blk_queue_io_opt(mddev->queue,
-			 (mddev->chunk_sectors << 9) * mddev->raid_disks);
-
 	pr_debug("md/raid0:%s: done.\n", mdname(mddev));
 	*private_conf = conf;
 
@@ -340,6 +339,7 @@ static int raid0_run(struct mddev *mddev
 {
 	struct r0conf *conf;
 	int ret;
+	struct md_rdev *rdev;
 
 	if (mddev->chunk_sectors == 0) {
 		printk(KERN_ERR "md/raid0:%s: chunk size must be set.\n",
@@ -348,7 +348,6 @@ static int raid0_run(struct mddev *mddev
 	}
 	if (md_check_no_bitmap(mddev))
 		return -EINVAL;
-	blk_queue_max_hw_sectors(mddev->queue, mddev->chunk_sectors);
 
 	/* if private is not null, we are here after takeover */
 	if (mddev->private == NULL) {
@@ -359,6 +358,16 @@ static int raid0_run(struct mddev *mddev
 	}
 	conf = mddev->private;
 
+	list_for_each_entry(rdev, &mddev->disks, same_set) {
+		disk_stack_limits(mddev->gendisk, rdev->bdev,
+				  rdev->data_offset << 9);
+	}
+	blk_queue_max_hw_sectors(mddev->queue, mddev->chunk_sectors);
+
+	blk_queue_io_min(mddev->queue, mddev->chunk_sectors << 9);
+	blk_queue_io_opt(mddev->queue,
+			 (mddev->chunk_sectors << 9) * mddev->raid_disks);
+
 	/* calculate array device size */
 	md_set_array_sectors(mddev, raid0_size(mddev, 0, 0));
 

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/