lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20231021154806.4019417-8-yukuai1@huaweicloud.com>
Date:   Sat, 21 Oct 2023 23:48:05 +0800
From:   Yu Kuai <yukuai1@...weicloud.com>
To:     bvanassche@....org, hch@....de, kbusch@...nel.org,
        ming.lei@...hat.com, axboe@...nel.dk
Cc:     linux-block@...r.kernel.org, linux-kernel@...r.kernel.org,
        yukuai3@...wei.com, yukuai1@...weicloud.com, yi.zhang@...wei.com,
        yangerkun@...wei.com
Subject: [PATCH RFC v2 7/8] blk-mq-tag: delay tag sharing until fail to get driver tag

From: Yu Kuai <yukuai3@...wei.com>

Before this patch, tags will be shared when shared node start to handle
IO, however, this will waste tags if some node doen't need all the fair
shared tags and such tags can't be used for other node, even if other
node might want more than fair shared tags.

Prevent such problem by delaying tag sharing from issue io until fail
to get driver tag. Note that such problem still exist if all the tags
are exhausted, and the next patch will implement a algorithm to allow
busy node to borrow tags from idle node.

Signed-off-by: Yu Kuai <yukuai3@...wei.com>
---
 block/blk-mq-tag.c | 67 ++++++++++++++++++++++++++--------------------
 1 file changed, 38 insertions(+), 29 deletions(-)

diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index cd13d8e512f7..a98b25c8d594 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -43,7 +43,7 @@ static void blk_mq_update_available_driver_tags(struct blk_mq_tags *tags,
 						struct shared_tag_info *info,
 						unsigned int users)
 {
-	unsigned int old = tags->ctl.active_queues;
+	unsigned int old = tags->ctl.busy_queues;
 	int nr_tags;
 	struct shared_tag_info *iter;
 
@@ -74,9 +74,7 @@ static void blk_mq_update_available_driver_tags(struct blk_mq_tags *tags,
  */
 void __blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx)
 {
-	unsigned int users;
 	struct blk_mq_tags *tags = hctx->tags;
-	struct shared_tag_info *info;
 
 	/*
 	 * calling test_bit() prior to test_and_set_bit() is intentional,
@@ -88,22 +86,14 @@ void __blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx)
 		if (test_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags) ||
 		    test_and_set_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags))
 			return;
-
-		info = &q->shared_tag_info;
 	} else {
 		if (test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state) ||
 		    test_and_set_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state))
 			return;
-
-		info = &hctx->shared_tag_info;
 	}
 
 	spin_lock_irq(&tags->lock);
-	list_add(&info->node, &tags->ctl.head);
-	users = tags->ctl.active_queues + 1;
-	blk_mq_update_available_driver_tags(tags, info, users);
-	WRITE_ONCE(tags->ctl.active_queues, users);
-	blk_mq_update_wake_batch(tags, users);
+	WRITE_ONCE(tags->ctl.active_queues, tags->ctl.active_queues + 1);
 	spin_unlock_irq(&tags->lock);
 }
 
@@ -123,9 +113,7 @@ void blk_mq_tag_wakeup_all(struct blk_mq_tags *tags, bool include_reserve)
  */
 void __blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx)
 {
-	unsigned int users;
 	struct blk_mq_tags *tags = hctx->tags;
-	struct shared_tag_info *info;
 
 	if (blk_mq_is_shared_tags(hctx->flags)) {
 		struct request_queue *q = hctx->queue;
@@ -137,8 +125,6 @@ void __blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx)
 			spin_unlock_irq(&tags->lock);
 			return;
 		}
-
-		info = &q->shared_tag_info;
 	} else {
 		if (!test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state))
 			return;
@@ -147,28 +133,21 @@ void __blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx)
 			spin_unlock_irq(&tags->lock);
 			return;
 		}
-
-		info = &hctx->shared_tag_info;
 	}
 
-	list_del_init(&info->node);
-	users = tags->ctl.active_queues - 1;
-	blk_mq_update_available_driver_tags(tags, info, users);
-	WRITE_ONCE(tags->ctl.active_queues, users);
-	blk_mq_update_wake_batch(tags, users);
-
+	WRITE_ONCE(tags->ctl.active_queues, tags->ctl.active_queues - 1);
 	if (blk_mq_is_shared_tags(hctx->flags))
 		clear_bit(QUEUE_FLAG_HCTX_ACTIVE, &hctx->queue->queue_flags);
 	else
 		clear_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state);
 	spin_unlock_irq(&tags->lock);
-	blk_mq_tag_wakeup_all(tags, false);
 }
 
 void __blk_mq_driver_tag_busy(struct blk_mq_hw_ctx *hctx)
 {
 	unsigned int users;
 	struct blk_mq_tags *tags = hctx->tags;
+	struct shared_tag_info *info;
 
 	if (blk_mq_is_shared_tags(hctx->flags)) {
 		struct request_queue *q = hctx->queue;
@@ -176,14 +155,21 @@ void __blk_mq_driver_tag_busy(struct blk_mq_hw_ctx *hctx)
 		if (test_bit(QUEUE_FLAG_HCTX_BUSY, &q->queue_flags) ||
 		    test_and_set_bit(QUEUE_FLAG_HCTX_BUSY, &q->queue_flags))
 			return;
+
+		info = &q->shared_tag_info;
 	} else {
 		if (test_bit(BLK_MQ_S_DTAG_BUSY, &hctx->state) ||
 		    test_and_set_bit(BLK_MQ_S_DTAG_BUSY, &hctx->state))
 			return;
+
+		info = &hctx->shared_tag_info;
 	}
 
 	spin_lock_irq(&tags->lock);
+	list_add(&info->node, &tags->ctl.head);
 	users = tags->ctl.busy_queues + 1;
+	blk_mq_update_available_driver_tags(tags, info, users);
+	blk_mq_update_wake_batch(tags, users);
 	WRITE_ONCE(tags->ctl.busy_queues, users);
 	spin_unlock_irq(&tags->lock);
 }
@@ -192,22 +178,45 @@ void __blk_mq_driver_tag_idle(struct blk_mq_hw_ctx *hctx)
 {
 	unsigned int users;
 	struct blk_mq_tags *tags = hctx->tags;
+	struct shared_tag_info *info;
 
 	if (blk_mq_is_shared_tags(hctx->flags)) {
 		struct request_queue *q = hctx->queue;
 
-		if (!test_and_clear_bit(QUEUE_FLAG_HCTX_BUSY,
-					&q->queue_flags))
+		if (!test_bit(QUEUE_FLAG_HCTX_BUSY, &q->queue_flags))
 			return;
+
+		spin_lock_irq(&tags->lock);
+		if (!test_bit(QUEUE_FLAG_HCTX_BUSY, &q->queue_flags)) {
+			spin_unlock_irq(&tags->lock);
+			return;
+		}
+		info = &q->shared_tag_info;
 	} else {
-		if (!test_and_clear_bit(BLK_MQ_S_DTAG_BUSY, &hctx->state))
+		if (!test_bit(BLK_MQ_S_DTAG_BUSY, &hctx->state))
 			return;
+
+		spin_lock_irq(&tags->lock);
+		if (!test_bit(BLK_MQ_S_DTAG_BUSY, &hctx->state)) {
+			spin_unlock_irq(&tags->lock);
+			return;
+		}
+		info = &hctx->shared_tag_info;
 	}
 
-	spin_lock_irq(&tags->lock);
+	list_del_init(&info->node);
 	users = tags->ctl.busy_queues - 1;
+	blk_mq_update_available_driver_tags(tags, info, users);
+	blk_mq_update_wake_batch(tags, users);
 	WRITE_ONCE(tags->ctl.busy_queues, users);
+
+	if (blk_mq_is_shared_tags(hctx->flags))
+		clear_bit(QUEUE_FLAG_HCTX_BUSY, &hctx->queue->queue_flags);
+	else
+		clear_bit(BLK_MQ_S_DTAG_BUSY, &hctx->state);
+
 	spin_unlock_irq(&tags->lock);
+	blk_mq_tag_wakeup_all(tags, false);
 }
 
 static int __blk_mq_get_tag(struct blk_mq_alloc_data *data,
-- 
2.39.2

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ