lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Sat, 21 Oct 2023 23:48:03 +0800
From:   Yu Kuai <yukuai1@...weicloud.com>
To:     bvanassche@....org, hch@....de, kbusch@...nel.org,
        ming.lei@...hat.com, axboe@...nel.dk
Cc:     linux-block@...r.kernel.org, linux-kernel@...r.kernel.org,
        yukuai3@...wei.com, yukuai1@...weicloud.com, yi.zhang@...wei.com,
        yangerkun@...wei.com
Subject: [PATCH RFC v2 5/8] blk-mq: precalculate available tags for hctx_may_queue()

From: Yu Kuai <yukuai3@...wei.com>

Currently, hctx_mq_queue() only need to get how many queues is sharing
tags, then calculate how many tags is available for each queue by fair
sharing.

Add a new field 'available_tags' for struct shared_tag_info to store
how many tags is available directly from slow path, so that
hctx_mq_queue() doesn't need to do calculation.

Currently tags are still fair shared, and now that calculation is in the
slow path, it's okay to refactor tag sharing with more complicated
algorithm, which is implemented in following patches.

Signed-off-by: Yu Kuai <yukuai3@...wei.com>
---
 block/blk-mq-debugfs.c |  3 ++-
 block/blk-mq-tag.c     | 35 ++++++++++++++++++++++++++++++++++-
 block/blk-mq.c         |  4 ++--
 block/blk-mq.h         | 19 ++++++++-----------
 include/linux/blkdev.h |  1 +
 5 files changed, 47 insertions(+), 15 deletions(-)

diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
index d6ebd8d9d3bb..1d460119f5b3 100644
--- a/block/blk-mq-debugfs.c
+++ b/block/blk-mq-debugfs.c
@@ -158,7 +158,8 @@ static ssize_t queue_state_write(void *data, const char __user *buf,
 static void shared_tag_info_show(struct shared_tag_info *info,
 				 struct seq_file *m)
 {
-	seq_printf(m, "%d\n", atomic_read(&info->active_tags));
+	seq_printf(m, "active tags %d\n", atomic_read(&info->active_tags));
+	seq_printf(m, "available tags %u\n", READ_ONCE(info->available_tags));
 }
 
 static int queue_shared_tag_info_show(void *data, struct seq_file *m)
diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index 07d9b513990b..261769251282 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -14,6 +14,8 @@
 #include "blk-mq.h"
 #include "blk-mq-sched.h"
 
+#define shared_tags(tags, users) max((tags->nr_tags + users - 1) / users, 4U)
+
 /*
  * Recalculate wakeup batch when tag is shared by hctx.
  */
@@ -29,10 +31,39 @@ static void blk_mq_update_wake_batch(struct blk_mq_tags *tags,
 			users);
 }
 
-void blk_mq_init_shared_tag_info(struct shared_tag_info *info)
+void blk_mq_init_shared_tag_info(struct shared_tag_info *info,
+				 unsigned int nr_tags)
 {
 	atomic_set(&info->active_tags, 0);
 	INIT_LIST_HEAD(&info->node);
+	info->available_tags = nr_tags;
+}
+
+static void blk_mq_update_available_driver_tags(struct blk_mq_tags *tags,
+						struct shared_tag_info *info,
+						unsigned int users)
+{
+	unsigned int old = tags->ctl.active_queues;
+	int nr_tags;
+	struct shared_tag_info *iter;
+
+	if (!old || !users)
+		return;
+
+	nr_tags = (int)shared_tags(tags, users);
+	if (old < users)
+		WRITE_ONCE(info->available_tags, nr_tags);
+	else
+		WRITE_ONCE(info->available_tags, tags->nr_tags);
+
+	nr_tags -= (int)shared_tags(tags, old);
+	list_for_each_entry(iter, &tags->ctl.head, node) {
+		if (iter == info)
+			continue;
+
+		WRITE_ONCE(iter->available_tags,
+			   (unsigned int)((int)iter->available_tags + nr_tags));
+	}
 }
 
 /*
@@ -70,6 +101,7 @@ void __blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx)
 	spin_lock_irq(&tags->lock);
 	list_add(&info->node, &tags->ctl.head);
 	users = tags->ctl.active_queues + 1;
+	blk_mq_update_available_driver_tags(tags, info, users);
 	WRITE_ONCE(tags->ctl.active_queues, users);
 	blk_mq_update_wake_batch(tags, users);
 	spin_unlock_irq(&tags->lock);
@@ -121,6 +153,7 @@ void __blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx)
 
 	list_del_init(&info->node);
 	users = tags->ctl.active_queues - 1;
+	blk_mq_update_available_driver_tags(tags, info, users);
 	WRITE_ONCE(tags->ctl.active_queues, users);
 	blk_mq_update_wake_batch(tags, users);
 
diff --git a/block/blk-mq.c b/block/blk-mq.c
index de5859dd9f52..8775616bc85c 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -3652,7 +3652,7 @@ static int blk_mq_init_hctx(struct request_queue *q,
 	if (xa_insert(&q->hctx_table, hctx_idx, hctx, GFP_KERNEL))
 		goto exit_flush_rq;
 
-	blk_mq_init_shared_tag_info(&hctx->shared_tag_info);
+	blk_mq_init_shared_tag_info(&hctx->shared_tag_info, set->queue_depth);
 	return 0;
 
  exit_flush_rq:
@@ -4227,7 +4227,7 @@ int blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
 	if (blk_mq_alloc_ctxs(q))
 		goto err_exit;
 
-	blk_mq_init_shared_tag_info(&q->shared_tag_info);
+	blk_mq_init_shared_tag_info(&q->shared_tag_info, set->queue_depth);
 	/* init q->mq_kobj and sw queues' kobjects */
 	blk_mq_sysfs_init(q);
 
diff --git a/block/blk-mq.h b/block/blk-mq.h
index ac58f2e22f20..5c0d19562848 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -63,7 +63,8 @@ struct blk_mq_tags *blk_mq_alloc_map_and_rqs(struct blk_mq_tag_set *set,
 void blk_mq_free_map_and_rqs(struct blk_mq_tag_set *set,
 			     struct blk_mq_tags *tags,
 			     unsigned int hctx_idx);
-void blk_mq_init_shared_tag_info(struct shared_tag_info *info);
+void blk_mq_init_shared_tag_info(struct shared_tag_info *info,
+				 unsigned int nr_tags);
 
 /*
  * CPU -> queue mappings
@@ -416,7 +417,7 @@ static inline void blk_mq_free_requests(struct list_head *list)
 static inline bool hctx_may_queue(struct blk_mq_hw_ctx *hctx,
 				  struct sbitmap_queue *bt)
 {
-	unsigned int depth, users;
+	struct shared_tag_info *info;
 
 	if (!hctx || !(hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED))
 		return true;
@@ -432,20 +433,16 @@ static inline bool hctx_may_queue(struct blk_mq_hw_ctx *hctx,
 
 		if (!test_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags))
 			return true;
+
+		info = &q->shared_tag_info;
 	} else {
 		if (!test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state))
 			return true;
-	}
 
-	users = READ_ONCE(hctx->tags->ctl.active_queues);
-	if (!users)
-		return true;
+		info = &hctx->shared_tag_info;
+	}
 
-	/*
-	 * Allow at least some tags
-	 */
-	depth = max((bt->sb.depth + users - 1) / users, 4U);
-	return __blk_mq_active_requests(hctx) < depth;
+	return atomic_read(&info->active_tags) < READ_ONCE(info->available_tags);
 }
 
 /* run the code block in @dispatch_ops with rcu/srcu read lock held */
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index f97bc2c7acc9..b364d65fe4e5 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -377,6 +377,7 @@ struct blk_independent_access_ranges {
 
 struct shared_tag_info {
 	atomic_t		active_tags;
+	unsigned int		available_tags;
 	struct list_head	node;
 };
 
-- 
2.39.2

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ