lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Thu,  7 Apr 2022 10:45:08 -0600
From:   Logan Gunthorpe <logang@...tatee.com>
To:     linux-kernel@...r.kernel.org, linux-raid@...r.kernel.org,
        Song Liu <song@...nel.org>
Cc:     Shaohua Li <shli@...nel.org>,
        Guoqing Jiang <guoqing.jiang@...ux.dev>,
        Stephen Bates <sbates@...thlin.com>,
        Martin Oliveira <Martin.Oliveira@...eticom.com>,
        David Sloan <David.Sloan@...eticom.com>,
        Logan Gunthorpe <logang@...tatee.com>
Subject: [PATCH v1 5/8] md/raid5: Keep a reference to last stripe_head for batch

When batching, every stripe head has to find the previous stripe head to
add to the batch list. This involves taking the hash lock which is
highly contended during IO.

Instead of finding the previous stripe_head each time, store a
reference to the previous stripe_head in a pointer so that it doesn't
require taking the contended lock another time.

The reference to the previous stripe must be released before scheduling
and waiting for work to get done. Otherwise, it can hold up
raid5_activate_delayed() and deadlock.

Signed-off-by: Logan Gunthorpe <logang@...tatee.com>
---
 drivers/md/raid5.c | 51 +++++++++++++++++++++++++++++++++++-----------
 1 file changed, 39 insertions(+), 12 deletions(-)

diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index f963ffb35484..b852b6439898 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -815,7 +815,8 @@ static bool stripe_can_batch(struct stripe_head *sh)
 }
 
 /* we only do back search */
-static void stripe_add_to_batch_list(struct r5conf *conf, struct stripe_head *sh)
+static void stripe_add_to_batch_list(struct r5conf *conf,
+		struct stripe_head *sh, struct stripe_head *last_sh)
 {
 	struct stripe_head *head;
 	sector_t head_sector, tmp_sec;
@@ -828,15 +829,20 @@ static void stripe_add_to_batch_list(struct r5conf *conf, struct stripe_head *sh
 		return;
 	head_sector = sh->sector - RAID5_STRIPE_SECTORS(conf);
 
-	hash = stripe_hash_locks_hash(conf, head_sector);
-	spin_lock_irq(conf->hash_locks + hash);
-	head = __find_stripe(conf, head_sector, conf->generation, hash);
-	spin_unlock_irq(conf->hash_locks + hash);
-
-	if (!head)
-		return;
-	if (!stripe_can_batch(head))
-		goto out;
+	if (last_sh && head_sector == last_sh->sector) {
+		head = last_sh;
+		atomic_inc(&head->count);
+	} else {
+		hash = stripe_hash_locks_hash(conf, head_sector);
+		spin_lock_irq(conf->hash_locks + hash);
+		head = __find_stripe(conf, head_sector, conf->generation,
+				     hash);
+		spin_unlock_irq(conf->hash_locks + hash);
+		if (!head)
+			return;
+		if (!stripe_can_batch(head))
+			goto out;
+	}
 
 	lock_two_stripes(head, sh);
 	/* clear_batch_ready clear the flag */
@@ -5735,6 +5741,7 @@ static void make_discard_request(struct mddev *mddev, struct bio *bi)
 
 static bool raid5_make_request(struct mddev *mddev, struct bio * bi)
 {
+	struct stripe_head *batch_last = NULL;
 	struct r5conf *conf = mddev->private;
 	int dd_idx;
 	sector_t new_sector;
@@ -5885,8 +5892,13 @@ static bool raid5_make_request(struct mddev *mddev, struct bio * bi)
 			goto schedule_and_retry;
 		}
 
-		if (stripe_can_batch(sh))
-			stripe_add_to_batch_list(conf, sh);
+		if (stripe_can_batch(sh)) {
+			stripe_add_to_batch_list(conf, sh, batch_last);
+			if (batch_last)
+				raid5_release_stripe(batch_last);
+			atomic_inc(&sh->count);
+			batch_last = sh;
+		}
 
 		if (do_flush) {
 			set_bit(STRIPE_R5C_PREFLUSH, &sh->state);
@@ -5905,6 +5917,18 @@ static bool raid5_make_request(struct mddev *mddev, struct bio * bi)
 		continue;
 
 schedule_and_retry:
+		/*
+		 * Must release the reference to batch_last before
+		 * scheduling and waiting for work to be done, otherwise
+		 * the batch_last stripe head could prevent
+		 * raid5_activate_delayed() from making progress
+		 * and thus deadlocking.
+		 */
+		if (batch_last) {
+			raid5_release_stripe(batch_last);
+			batch_last = NULL;
+		}
+
 		schedule();
 		prepare_to_wait(&conf->wait_for_overlap, &w,
 				TASK_UNINTERRUPTIBLE);
@@ -5912,6 +5936,9 @@ static bool raid5_make_request(struct mddev *mddev, struct bio * bi)
 	}
 	finish_wait(&conf->wait_for_overlap, &w);
 
+	if (batch_last)
+		raid5_release_stripe(batch_last);
+
 	if (rw == WRITE)
 		md_write_end(mddev);
 	bio_endio(bi);
-- 
2.30.2

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ