[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <1652181274-136198-3-git-send-email-john.garry@huawei.com>
Date: Tue, 10 May 2022 19:14:34 +0800
From: John Garry <john.garry@...wei.com>
To: <axboe@...nel.dk>, <linux-block@...r.kernel.org>
CC: <linux-kernel@...r.kernel.org>, <linux-scsi@...r.kernel.org>,
John Garry <john.garry@...wei.com>
Subject: [RFC PATCH 2/2] sbitmap: Spread sbitmap word allocation over NUMA nodes
Currently sbitmap words are allocated in a single array. That array is
flagged for allocating at the NUMA node passed in sbitmap_init_node().
However often the sbitmap will be accessed by all the CPUs in the system -
for example, when BLK_MQ_F_TAG_HCTX_SHARED is set for the blk-mq tagset.
This can lead to performance issues where all CPUs in the system are doing
cross-NUMA node accesses to read/set/unset sbitmap tags.
Improve this by spreading the word allocations across all NUMA nodes as
evenly as possible. We set the per-CPU hint to fall within range of words
allocated for the NUMA node to which that CPU belongs.
Known issues:
- sbitmap resize does not work well for this scheme
- Improve updating hint for sbitmap_get() failure and sbitmap_put() when
hint is outside range of CPU's NUMA node words.
- Add intelligence for sub-arrays to be allocated at a single node, e.g.
when numa != NUMA_NO_NODE in sbitmap_init_node()
Signed-off-by: John Garry <john.garry@...wei.com>
---
include/linux/sbitmap.h | 5 ++++
lib/sbitmap.c | 63 +++++++++++++++++++++++++++++++++--------
2 files changed, 56 insertions(+), 12 deletions(-)
diff --git a/include/linux/sbitmap.h b/include/linux/sbitmap.h
index 46268f391e32..6d897032dbc6 100644
--- a/include/linux/sbitmap.h
+++ b/include/linux/sbitmap.h
@@ -60,6 +60,11 @@ struct sbitmap {
*/
unsigned int map_nr;
+ /**
+ * @map_nr_per_node: Number of words being used per NUMA node.
+ */
+ unsigned int map_nr_per_node;
+
/**
* @round_robin: Allocate bits in strict round-robin order.
*/
diff --git a/lib/sbitmap.c b/lib/sbitmap.c
index 64fb9800ed8c..99c87fbfa1a1 100644
--- a/lib/sbitmap.c
+++ b/lib/sbitmap.c
@@ -8,6 +8,17 @@
#include <linux/random.h>
#include <linux/sbitmap.h>
#include <linux/seq_file.h>
+#include <linux/mm.h>
+
+static unsigned int sbitmap_get_new_hint(struct sbitmap *sb, int cpu)
+{
+ unsigned int shift = sb->shift;
+ unsigned int map_nr_per_node = sb->map_nr_per_node;
+ unsigned int bit_per_node = map_nr_per_node << shift;
+ unsigned int hint_base = bit_per_node * cpu_to_node(cpu);
+
+ return hint_base + (prandom_u32() % bit_per_node);
+}
static int init_alloc_hint(struct sbitmap *sb, gfp_t flags)
{
@@ -20,8 +31,10 @@ static int init_alloc_hint(struct sbitmap *sb, gfp_t flags)
if (depth && !sb->round_robin) {
int i;
- for_each_possible_cpu(i)
- *per_cpu_ptr(sb->alloc_hint, i) = prandom_u32() % depth;
+ for_each_possible_cpu(i) {
+ *per_cpu_ptr(sb->alloc_hint, i) =
+ sbitmap_get_new_hint(sb, i);
+ }
}
return 0;
}
@@ -86,7 +99,8 @@ int sbitmap_init_node(struct sbitmap *sb, unsigned int depth, int shift,
{
unsigned int bits_per_word;
struct sbitmap_word *map;
- int index;
+ int index, num_nodes = num_online_nodes();
+ int nid, map_nr_cnt;
if (shift < 0)
shift = sbitmap_calculate_shift(depth);
@@ -105,6 +119,11 @@ int sbitmap_init_node(struct sbitmap *sb, unsigned int depth, int shift,
return 0;
}
+ if (sb->map_nr < num_nodes) {
+ sb->map_nr_per_node = 1;
+ } else {
+ sb->map_nr_per_node = sb->map_nr / num_nodes;
+ }
if (alloc_hint) {
if (init_alloc_hint(sb, flags))
return -ENOMEM;
@@ -113,23 +132,43 @@ int sbitmap_init_node(struct sbitmap *sb, unsigned int depth, int shift,
}
sb->map = kvzalloc_node(sb->map_nr * sizeof(*sb->map), flags, node);
- if (!sb->map) {
- free_percpu(sb->alloc_hint);
- return -ENOMEM;
- }
-
- map = kvzalloc_node(sb->map_nr * sizeof(**sb->map), flags, node);
- if (!map)
- return -ENOMEM;
+ if (!sb->map)
+ goto err_map;
- for (index = 0; index < sb->map_nr; index++, map++) {
+ for (index = 0, nid = 0; index < sb->map_nr; index++, map++, map_nr_cnt++) {
struct sbitmap_word **_map;
+ if ((index % sb->map_nr_per_node) == 0) {
+ int cnt;
+
+ if (index == 0) {
+ cnt = sb->map_nr_per_node +
+ (sb->map_nr % sb->map_nr_per_node);
+ } else {
+ cnt = sb->map_nr_per_node;
+ }
+
+ map = kvzalloc_node(cnt * sizeof(**sb->map), flags, nid);
+ if (!map)
+ goto err_map_numa;
+ nid++;
+ }
+
_map = &sb->map[index];
*_map = map;
}
return 0;
+err_map_numa:
+ for (index = 0; index < sb->map_nr; index++, map++) {
+ if ((index % sb->map_nr_per_node) == 0) {
+ kfree(map);
+ }
+ }
+err_map:
+ free_percpu(sb->alloc_hint);
+
+ return -ENOMEM;
}
EXPORT_SYMBOL_GPL(sbitmap_init_node);
--
2.26.2
Powered by blists - more mailing lists