linux-kernel - [PATCH] md: Fixed issue in raid1 that may lead to data corruption

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [day] [month] [year] [list]
Message-Id: <1395062544-11321-1-git-send-email-linux-kernel@rmueck.de>
Date:	Mon, 17 Mar 2014 14:22:24 +0100
From:	Ralph Mueck <linux-kernel@...eck.de>
To:	linux-kernel@...eck.de
Cc:	i4passt@...ts.cs.fau.de, neilb@...e.de, linux-raid@...r.kernel.org,
	linux-kernel@...r.kernel.org,
	Matthias Oefelein <ma.oefelein@...or.de>
Subject: [PATCH] md: Fixed issue in raid1 that may lead to data corruption

If data gets damaged on a disk that is part of a RAID-1 array, it is
possible that the damaged data is mirrored to the other disks of the
array.
In the case of a two-disk array this behavior cannot be avoided as there
is no reference which disk contains the intact data.
However, in a configuration with three or more disks it is possible to
compare the data of all array members to find out which version is
probably the right one to clone on all members.
The patch adds this functionality by adding a mechanism that chooses the
disk with the fewest mismatches.

Signed-off-by: Ralph Mueck <linux-kernel@...eck.de>
Signed-off-by: Matthias Oefelein <ma.oefelein@...or.de>

---
 drivers/md/raid1.c | 109 +++++++++++++++++++++++++++++++++++------------------
 1 file changed, 73 insertions(+), 36 deletions(-)

diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 4a6ca1c..645a6e1 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -1947,6 +1947,12 @@ static int process_checks(struct r1bio *r1_bio)
 	int primary;
 	int i;
 	int vcnt;
+	/* Each time a read block from a device differs from its
+	 * pendant on the other device, its mismatch counter is incremented.
+	 */
+	int mismatch_counter[conf->raid_disks * 2];
+	/* "pointer" to the disk with fewest mismatches */
+	int min_mismatch_disk = -1;
 
 	/* Fix variable parts of all bios */
 	vcnt = (r1_bio->sectors + PAGE_SIZE / 512 - 1) >> (PAGE_SHIFT - 9);
@@ -1982,49 +1988,80 @@ static int process_checks(struct r1bio *r1_bio)
 			size -= PAGE_SIZE;
 		}
 	}
-	for (primary = 0; primary < conf->raid_disks * 2; primary++)
-		if (r1_bio->bios[primary]->bi_end_io == end_sync_read &&
-		    test_bit(BIO_UPTODATE, &r1_bio->bios[primary]->bi_flags)) {
-			r1_bio->bios[primary]->bi_end_io = NULL;
-			rdev_dec_pending(conf->mirrors[primary].rdev, mddev);
-			break;
-		}
-	r1_bio->read_disk = primary;
-	for (i = 0; i < conf->raid_disks * 2; i++) {
-		int j;
-		struct bio *pbio = r1_bio->bios[primary];
-		struct bio *sbio = r1_bio->bios[i];
-		int uptodate = test_bit(BIO_UPTODATE, &sbio->bi_flags);
 
-		if (sbio->bi_end_io != end_sync_read)
+	for (primary = 0; primary < conf->raid_disks * 2; primary++) {
+		if (r1_bio->bios[primary]->bi_end_io != end_sync_read ||
+		    !test_bit(BIO_UPTODATE, &r1_bio->bios[primary]->bi_flags)) {
+			/* ignore this disk for comparison */
+			mismatch_counter[primary] = -1;
 			continue;
-		/* Now we can 'fixup' the BIO_UPTODATE flag */
-		set_bit(BIO_UPTODATE, &sbio->bi_flags);
-
-		if (uptodate) {
-			for (j = vcnt; j-- ; ) {
-				struct page *p, *s;
-				p = pbio->bi_io_vec[j].bv_page;
-				s = sbio->bi_io_vec[j].bv_page;
-				if (memcmp(page_address(p),
-					   page_address(s),
-					   sbio->bi_io_vec[j].bv_len))
-					break;
+		}
+		mismatch_counter[primary] = 0;
+		for (i = 0; i < conf->raid_disks * 2; i++) {
+			int j;
+			struct bio *pbio = r1_bio->bios[primary];
+			struct bio *sbio = r1_bio->bios[i];
+			int uptodate = test_bit(BIO_UPTODATE, &sbio->bi_flags);
+
+			if (sbio->bi_end_io != end_sync_read)
+				continue;
+			/* Now we can 'fixup' the BIO_UPTODATE flag */
+			set_bit(BIO_UPTODATE, &sbio->bi_flags);
+
+			if (uptodate) {
+				for (j = vcnt; j-- ; ) {
+					struct page *p, *s;
+					p = pbio->bi_io_vec[j].bv_page;
+					s = sbio->bi_io_vec[j].bv_page;
+					if (memcmp(page_address(p),
+						  page_address(s),
+						  sbio->bi_io_vec[j].bv_len))
+						++mismatch_counter[primary];
+				}
 			}
-		} else
-			j = 0;
-		if (j >= 0)
-			atomic64_add(r1_bio->sectors, &mddev->resync_mismatches);
-		if (j < 0 || (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)
-			      && uptodate)) {
+			if (mismatch_counter[primary] > 0)
+				atomic64_add(r1_bio->sectors, &mddev->resync_mismatches);
+			if (!mismatch_counter[primary]
+			    || (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)
+				&& uptodate)) {
+				mismatch_counter[primary] = 0;
+			}
+		}
+
+		if (min_mismatch_disk == -1) {
+			min_mismatch_disk = primary;
+		} else {
+			if (mismatch_counter[primary] < mismatch_counter[min_mismatch_disk])
+				min_mismatch_disk = primary;
+		}
+	}
+	r1_bio->read_disk = min_mismatch_disk;
+	/* We have compared everything now. */
+
+	/* If mismatches occured, we try to fix them now */
+	for (primary = 0; primary < conf->raid_disks * 2; primary++) {
+		int uptodate = test_bit(BIO_UPTODATE, &r1_bio->bios[primary]->bi_flags);
+		struct bio *destination = r1_bio->bios[primary];
+		struct bio *source = r1_bio->bios[min_mismatch_disk];
+		/* take only valid disks */
+		if (mismatch_counter[primary] == -1
+		    || primary == min_mismatch_disk)
+			continue;
+
+		if (mismatch_counter[primary] == 0
+		    || (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)
+			&& uptodate)) {
 			/* No need to write to this device. */
-			sbio->bi_end_io = NULL;
-			rdev_dec_pending(conf->mirrors[i].rdev, mddev);
+			r1_bio->bios[primary]->bi_end_io = NULL;
+			rdev_dec_pending(conf->mirrors[primary].rdev, mddev);
 			continue;
 		}
-
-		bio_copy_data(sbio, pbio);
+		/* Write the data from min_mismatch_disk to primary,
+		 * as the data in primary is probably corrupted
+		 */
+		bio_copy_data(destination, source);
 	}
+
 	return 0;
 }
 
-- 
1.8.3.2

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/