lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <YPbqMSjZIA8l0jHQ@T590>
Date:   Tue, 20 Jul 2021 23:22:25 +0800
From:   Ming Lei <ming.lei@...hat.com>
To:     John Garry <john.garry@...wei.com>
Cc:     axboe@...nel.dk, linux-block@...r.kernel.org,
        linux-kernel@...r.kernel.org, linux-scsi@...r.kernel.org,
        kashyap.desai@...adcom.com, hare@...e.de
Subject: Re: [PATCH 0/9] blk-mq: Reduce static requests memory footprint for
 shared sbitmap

On Wed, Jul 14, 2021 at 11:06:26PM +0800, John Garry wrote:
> Currently a full set of static requests are allocated per hw queue per
> tagset when shared sbitmap is used.
> 
> However, only tagset->queue_depth number of requests may be active at
> any given time. As such, only tagset->queue_depth number of static
> requests are required.
> 
> The same goes for using an IO scheduler, which allocates a full set of
> static requests per hw queue per request queue.
> 
> This series very significantly reduces memory usage in both scenarios by
> allocating static rqs per tagset and per request queue, respectively,
> rather than per hw queue per tagset and per request queue.
> 
> For megaraid sas driver on my 128-CPU arm64 system with 1x SATA disk, we
> save approx. 300MB(!) [370MB -> 60MB]
> 
> A couple of patches are marked as RFC, as maybe there is a better
> implementation approach.

There is another candidate for addressing this issue, and looks simpler:

 block/blk-mq-sched.c |  4 ++++
 block/blk-mq-tag.c   |  4 ++++
 block/blk-mq-tag.h   |  3 +++
 block/blk-mq.c       | 18 ++++++++++++++++++
 block/blk-mq.h       | 11 +++++++++++
 5 files changed, 40 insertions(+)

diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index c838d81ac058..b9236ee0fe4e 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -538,6 +538,10 @@ static int blk_mq_sched_alloc_tags(struct request_queue *q,
 	if (!hctx->sched_tags)
 		return -ENOMEM;
 
+	blk_mq_set_master_tags(hctx->sched_tags,
+			q->queue_hw_ctx[0]->sched_tags, hctx->flags,
+			hctx_idx);
+
 	ret = blk_mq_alloc_rqs(set, hctx->sched_tags, hctx_idx, q->nr_requests);
 	if (ret)
 		blk_mq_sched_free_tags(set, hctx, hctx_idx);
diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index 86f87346232a..c471a073234d 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -608,6 +608,10 @@ int blk_mq_tag_update_depth(struct blk_mq_hw_ctx *hctx,
 				tags->nr_reserved_tags, set->flags);
 		if (!new)
 			return -ENOMEM;
+
+		blk_mq_set_master_tags(new,
+			hctx->queue->queue_hw_ctx[0]->sched_tags, set->flags,
+			hctx->queue_num);
 		ret = blk_mq_alloc_rqs(set, new, hctx->queue_num, tdepth);
 		if (ret) {
 			blk_mq_free_rq_map(new, set->flags);
diff --git a/block/blk-mq-tag.h b/block/blk-mq-tag.h
index 8ed55af08427..0a3fbbc61e06 100644
--- a/block/blk-mq-tag.h
+++ b/block/blk-mq-tag.h
@@ -21,6 +21,9 @@ struct blk_mq_tags {
 	struct request **static_rqs;
 	struct list_head page_list;
 
+	/* only used for blk_mq_is_sbitmap_shared() */
+	struct blk_mq_tags	*master;
+
 	/*
 	 * used to clear request reference in rqs[] before freeing one
 	 * request pool
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 2c4ac51e54eb..ef8a6a7e5f7c 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -2348,6 +2348,15 @@ void blk_mq_free_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags,
 {
 	struct page *page;
 
+	if (blk_mq_is_sbitmap_shared(set->flags)) {
+		if (tags->master)
+			tags = tags->master;
+		if (hctx_idx < set->nr_hw_queues - 1) {
+			blk_mq_clear_rq_mapping(set, tags, hctx_idx);
+			return;
+		}
+	}
+
 	if (tags->rqs && set->ops->exit_request) {
 		int i;
 
@@ -2444,6 +2453,12 @@ int blk_mq_alloc_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags,
 	size_t rq_size, left;
 	int node;
 
+	if (blk_mq_is_sbitmap_shared(set->flags) && tags->master) {
+		memcpy(tags->static_rqs, tags->master->static_rqs,
+		       sizeof(tags->static_rqs[0]) * tags->nr_tags);
+		return 0;
+	}
+
 	node = blk_mq_hw_queue_to_node(&set->map[HCTX_TYPE_DEFAULT], hctx_idx);
 	if (node == NUMA_NO_NODE)
 		node = set->numa_node;
@@ -2860,6 +2875,9 @@ static bool __blk_mq_alloc_map_and_request(struct blk_mq_tag_set *set,
 	if (!set->tags[hctx_idx])
 		return false;
 
+	blk_mq_set_master_tags(set->tags[hctx_idx], set->tags[0], flags,
+			       hctx_idx);
+
 	ret = blk_mq_alloc_rqs(set, set->tags[hctx_idx], hctx_idx,
 				set->queue_depth);
 	if (!ret)
diff --git a/block/blk-mq.h b/block/blk-mq.h
index d08779f77a26..a08b89be6acc 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -354,5 +354,16 @@ static inline bool hctx_may_queue(struct blk_mq_hw_ctx *hctx,
 	return __blk_mq_active_requests(hctx) < depth;
 }
 
+static inline void blk_mq_set_master_tags(struct blk_mq_tags *tags,
+		struct blk_mq_tags *master_tags, unsigned int flags,
+		unsigned int hctx_idx)
+{
+	if (blk_mq_is_sbitmap_shared(flags)) {
+		if (hctx_idx)
+			tags->master = master_tags;
+		else
+			tags->master = NULL;
+	}
+}
 
 #endif


Thanks,
Ming

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ