[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20250910-slub-percpu-caches-v8-11-ca3099d8352c@suse.cz>
Date: Wed, 10 Sep 2025 10:01:13 +0200
From: Vlastimil Babka <vbabka@...e.cz>
To: Suren Baghdasaryan <surenb@...gle.com>,
"Liam R. Howlett" <Liam.Howlett@...cle.com>,
Christoph Lameter <cl@...two.org>, David Rientjes <rientjes@...gle.com>
Cc: Roman Gushchin <roman.gushchin@...ux.dev>,
Harry Yoo <harry.yoo@...cle.com>, Uladzislau Rezki <urezki@...il.com>,
Sidhartha Kumar <sidhartha.kumar@...cle.com>, linux-mm@...ck.org,
linux-kernel@...r.kernel.org, rcu@...r.kernel.org,
maple-tree@...ts.infradead.org, vbabka@...e.cz
Subject: [PATCH v8 11/23] maple_tree: Drop bulk insert support
From: "Liam R. Howlett" <Liam.Howlett@...cle.com>
Bulk insert mode was added to facilitate forking faster, but forking now
uses __mt_dup() to duplicate the tree.
The addition of sheaves has made the bulk allocations difficult to
maintain - since the expected entries would preallocate into the maple
state. A big part of the maple state node allocation was the ability to
push nodes back onto the state for later use, which was essential to the
bulk insert algorithm.
Remove mas_expected_entries() and mas_destroy_rebalance() functions as
well as the MA_STATE_BULK and MA_STATE_REBALANCE maple state flags since
there are no users anymore. Drop the associated testing as well.
Signed-off-by: Liam R. Howlett <Liam.Howlett@...cle.com>
Signed-off-by: Vlastimil Babka <vbabka@...e.cz>
---
lib/maple_tree.c | 270 +--------------------------------------
lib/test_maple_tree.c | 137 --------------------
tools/testing/radix-tree/maple.c | 36 ------
3 files changed, 4 insertions(+), 439 deletions(-)
diff --git a/lib/maple_tree.c b/lib/maple_tree.c
index 38fb68c082915211c80f473d313159599fe97e2c..4f0e30b57b0cef9e5cf791f3f64f5898752db402 100644
--- a/lib/maple_tree.c
+++ b/lib/maple_tree.c
@@ -83,13 +83,9 @@
/*
* Maple state flags
- * * MA_STATE_BULK - Bulk insert mode
- * * MA_STATE_REBALANCE - Indicate a rebalance during bulk insert
* * MA_STATE_PREALLOC - Preallocated nodes, WARN_ON allocation
*/
-#define MA_STATE_BULK 1
-#define MA_STATE_REBALANCE 2
-#define MA_STATE_PREALLOC 4
+#define MA_STATE_PREALLOC 1
#define ma_parent_ptr(x) ((struct maple_pnode *)(x))
#define mas_tree_parent(x) ((unsigned long)(x->tree) | MA_ROOT_PARENT)
@@ -1031,24 +1027,6 @@ static inline void mas_descend(struct ma_state *mas)
mas->node = mas_slot(mas, slots, mas->offset);
}
-/*
- * mte_set_gap() - Set a maple node gap.
- * @mn: The encoded maple node
- * @gap: The offset of the gap to set
- * @val: The gap value
- */
-static inline void mte_set_gap(const struct maple_enode *mn,
- unsigned char gap, unsigned long val)
-{
- switch (mte_node_type(mn)) {
- default:
- break;
- case maple_arange_64:
- mte_to_node(mn)->ma64.gap[gap] = val;
- break;
- }
-}
-
/*
* mas_ascend() - Walk up a level of the tree.
* @mas: The maple state
@@ -1878,21 +1856,7 @@ static inline int mab_calc_split(struct ma_state *mas,
* end on a NULL entry, with the exception of the left-most leaf. The
* limitation means that the split of a node must be checked for this condition
* and be able to put more data in one direction or the other.
- */
- if (unlikely((mas->mas_flags & MA_STATE_BULK))) {
- *mid_split = 0;
- split = b_end - mt_min_slots[bn->type];
-
- if (!ma_is_leaf(bn->type))
- return split;
-
- mas->mas_flags |= MA_STATE_REBALANCE;
- if (!bn->slot[split])
- split--;
- return split;
- }
-
- /*
+ *
* Although extremely rare, it is possible to enter what is known as the 3-way
* split scenario. The 3-way split comes about by means of a store of a range
* that overwrites the end and beginning of two full nodes. The result is a set
@@ -2039,27 +2003,6 @@ static inline void mab_mas_cp(struct maple_big_node *b_node,
}
}
-/*
- * mas_bulk_rebalance() - Rebalance the end of a tree after a bulk insert.
- * @mas: The maple state
- * @end: The maple node end
- * @mt: The maple node type
- */
-static inline void mas_bulk_rebalance(struct ma_state *mas, unsigned char end,
- enum maple_type mt)
-{
- if (!(mas->mas_flags & MA_STATE_BULK))
- return;
-
- if (mte_is_root(mas->node))
- return;
-
- if (end > mt_min_slots[mt]) {
- mas->mas_flags &= ~MA_STATE_REBALANCE;
- return;
- }
-}
-
/*
* mas_store_b_node() - Store an @entry into the b_node while also copying the
* data from a maple encoded node.
@@ -2109,9 +2052,6 @@ static noinline_for_kasan void mas_store_b_node(struct ma_wr_state *wr_mas,
/* Handle new range ending before old range ends */
piv = mas_safe_pivot(mas, wr_mas->pivots, offset_end, wr_mas->type);
if (piv > mas->last) {
- if (piv == ULONG_MAX)
- mas_bulk_rebalance(mas, b_node->b_end, wr_mas->type);
-
if (offset_end != slot)
wr_mas->content = mas_slot_locked(mas, wr_mas->slots,
offset_end);
@@ -3011,126 +2951,6 @@ static inline void mas_rebalance(struct ma_state *mas,
return mas_spanning_rebalance(mas, &mast, empty_count);
}
-/*
- * mas_destroy_rebalance() - Rebalance left-most node while destroying the maple
- * state.
- * @mas: The maple state
- * @end: The end of the left-most node.
- *
- * During a mass-insert event (such as forking), it may be necessary to
- * rebalance the left-most node when it is not sufficient.
- */
-static inline void mas_destroy_rebalance(struct ma_state *mas, unsigned char end)
-{
- enum maple_type mt = mte_node_type(mas->node);
- struct maple_node reuse, *newnode, *parent, *new_left, *left, *node;
- struct maple_enode *eparent, *old_eparent;
- unsigned char offset, tmp, split = mt_slots[mt] / 2;
- void __rcu **l_slots, **slots;
- unsigned long *l_pivs, *pivs, gap;
- bool in_rcu = mt_in_rcu(mas->tree);
- unsigned char new_height = mas_mt_height(mas);
-
- MA_STATE(l_mas, mas->tree, mas->index, mas->last);
-
- l_mas = *mas;
- mas_prev_sibling(&l_mas);
-
- /* set up node. */
- if (in_rcu) {
- newnode = mas_pop_node(mas);
- } else {
- newnode = &reuse;
- }
-
- node = mas_mn(mas);
- newnode->parent = node->parent;
- slots = ma_slots(newnode, mt);
- pivs = ma_pivots(newnode, mt);
- left = mas_mn(&l_mas);
- l_slots = ma_slots(left, mt);
- l_pivs = ma_pivots(left, mt);
- if (!l_slots[split])
- split++;
- tmp = mas_data_end(&l_mas) - split;
-
- memcpy(slots, l_slots + split + 1, sizeof(void *) * tmp);
- memcpy(pivs, l_pivs + split + 1, sizeof(unsigned long) * tmp);
- pivs[tmp] = l_mas.max;
- memcpy(slots + tmp, ma_slots(node, mt), sizeof(void *) * end);
- memcpy(pivs + tmp, ma_pivots(node, mt), sizeof(unsigned long) * end);
-
- l_mas.max = l_pivs[split];
- mas->min = l_mas.max + 1;
- old_eparent = mt_mk_node(mte_parent(l_mas.node),
- mas_parent_type(&l_mas, l_mas.node));
- tmp += end;
- if (!in_rcu) {
- unsigned char max_p = mt_pivots[mt];
- unsigned char max_s = mt_slots[mt];
-
- if (tmp < max_p)
- memset(pivs + tmp, 0,
- sizeof(unsigned long) * (max_p - tmp));
-
- if (tmp < mt_slots[mt])
- memset(slots + tmp, 0, sizeof(void *) * (max_s - tmp));
-
- memcpy(node, newnode, sizeof(struct maple_node));
- ma_set_meta(node, mt, 0, tmp - 1);
- mte_set_pivot(old_eparent, mte_parent_slot(l_mas.node),
- l_pivs[split]);
-
- /* Remove data from l_pivs. */
- tmp = split + 1;
- memset(l_pivs + tmp, 0, sizeof(unsigned long) * (max_p - tmp));
- memset(l_slots + tmp, 0, sizeof(void *) * (max_s - tmp));
- ma_set_meta(left, mt, 0, split);
- eparent = old_eparent;
-
- goto done;
- }
-
- /* RCU requires replacing both l_mas, mas, and parent. */
- mas->node = mt_mk_node(newnode, mt);
- ma_set_meta(newnode, mt, 0, tmp);
-
- new_left = mas_pop_node(mas);
- new_left->parent = left->parent;
- mt = mte_node_type(l_mas.node);
- slots = ma_slots(new_left, mt);
- pivs = ma_pivots(new_left, mt);
- memcpy(slots, l_slots, sizeof(void *) * split);
- memcpy(pivs, l_pivs, sizeof(unsigned long) * split);
- ma_set_meta(new_left, mt, 0, split);
- l_mas.node = mt_mk_node(new_left, mt);
-
- /* replace parent. */
- offset = mte_parent_slot(mas->node);
- mt = mas_parent_type(&l_mas, l_mas.node);
- parent = mas_pop_node(mas);
- slots = ma_slots(parent, mt);
- pivs = ma_pivots(parent, mt);
- memcpy(parent, mte_to_node(old_eparent), sizeof(struct maple_node));
- rcu_assign_pointer(slots[offset], mas->node);
- rcu_assign_pointer(slots[offset - 1], l_mas.node);
- pivs[offset - 1] = l_mas.max;
- eparent = mt_mk_node(parent, mt);
-done:
- gap = mas_leaf_max_gap(mas);
- mte_set_gap(eparent, mte_parent_slot(mas->node), gap);
- gap = mas_leaf_max_gap(&l_mas);
- mte_set_gap(eparent, mte_parent_slot(l_mas.node), gap);
- mas_ascend(mas);
-
- if (in_rcu) {
- mas_replace_node(mas, old_eparent, new_height);
- mas_adopt_children(mas, mas->node);
- }
-
- mas_update_gap(mas);
-}
-
/*
* mas_split_final_node() - Split the final node in a subtree operation.
* @mast: the maple subtree state
@@ -3837,8 +3657,6 @@ static inline void mas_wr_node_store(struct ma_wr_state *wr_mas,
if (mas->last == wr_mas->end_piv)
offset_end++; /* don't copy this offset */
- else if (unlikely(wr_mas->r_max == ULONG_MAX))
- mas_bulk_rebalance(mas, mas->end, wr_mas->type);
/* set up node. */
if (in_rcu) {
@@ -4255,7 +4073,7 @@ static inline enum store_type mas_wr_store_type(struct ma_wr_state *wr_mas)
new_end = mas_wr_new_end(wr_mas);
/* Potential spanning rebalance collapsing a node */
if (new_end < mt_min_slots[wr_mas->type]) {
- if (!mte_is_root(mas->node) && !(mas->mas_flags & MA_STATE_BULK))
+ if (!mte_is_root(mas->node))
return wr_rebalance;
return wr_node_store;
}
@@ -5562,25 +5380,7 @@ void mas_destroy(struct ma_state *mas)
struct maple_alloc *node;
unsigned long total;
- /*
- * When using mas_for_each() to insert an expected number of elements,
- * it is possible that the number inserted is less than the expected
- * number. To fix an invalid final node, a check is performed here to
- * rebalance the previous node with the final node.
- */
- if (mas->mas_flags & MA_STATE_REBALANCE) {
- unsigned char end;
- if (mas_is_err(mas))
- mas_reset(mas);
- mas_start(mas);
- mtree_range_walk(mas);
- end = mas->end + 1;
- if (end < mt_min_slot_count(mas->node) - 1)
- mas_destroy_rebalance(mas, end);
-
- mas->mas_flags &= ~MA_STATE_REBALANCE;
- }
- mas->mas_flags &= ~(MA_STATE_BULK|MA_STATE_PREALLOC);
+ mas->mas_flags &= ~MA_STATE_PREALLOC;
total = mas_allocated(mas);
while (total) {
@@ -5600,68 +5400,6 @@ void mas_destroy(struct ma_state *mas)
}
EXPORT_SYMBOL_GPL(mas_destroy);
-/*
- * mas_expected_entries() - Set the expected number of entries that will be inserted.
- * @mas: The maple state
- * @nr_entries: The number of expected entries.
- *
- * This will attempt to pre-allocate enough nodes to store the expected number
- * of entries. The allocations will occur using the bulk allocator interface
- * for speed. Please call mas_destroy() on the @mas after inserting the entries
- * to ensure any unused nodes are freed.
- *
- * Return: 0 on success, -ENOMEM if memory could not be allocated.
- */
-int mas_expected_entries(struct ma_state *mas, unsigned long nr_entries)
-{
- int nonleaf_cap = MAPLE_ARANGE64_SLOTS - 2;
- struct maple_enode *enode = mas->node;
- int nr_nodes;
- int ret;
-
- /*
- * Sometimes it is necessary to duplicate a tree to a new tree, such as
- * forking a process and duplicating the VMAs from one tree to a new
- * tree. When such a situation arises, it is known that the new tree is
- * not going to be used until the entire tree is populated. For
- * performance reasons, it is best to use a bulk load with RCU disabled.
- * This allows for optimistic splitting that favours the left and reuse
- * of nodes during the operation.
- */
-
- /* Optimize splitting for bulk insert in-order */
- mas->mas_flags |= MA_STATE_BULK;
-
- /*
- * Avoid overflow, assume a gap between each entry and a trailing null.
- * If this is wrong, it just means allocation can happen during
- * insertion of entries.
- */
- nr_nodes = max(nr_entries, nr_entries * 2 + 1);
- if (!mt_is_alloc(mas->tree))
- nonleaf_cap = MAPLE_RANGE64_SLOTS - 2;
-
- /* Leaves; reduce slots to keep space for expansion */
- nr_nodes = DIV_ROUND_UP(nr_nodes, MAPLE_RANGE64_SLOTS - 2);
- /* Internal nodes */
- nr_nodes += DIV_ROUND_UP(nr_nodes, nonleaf_cap);
- /* Add working room for split (2 nodes) + new parents */
- mas_node_count_gfp(mas, nr_nodes + 3, GFP_KERNEL);
-
- /* Detect if allocations run out */
- mas->mas_flags |= MA_STATE_PREALLOC;
-
- if (!mas_is_err(mas))
- return 0;
-
- ret = xa_err(mas->node);
- mas->node = enode;
- mas_destroy(mas);
- return ret;
-
-}
-EXPORT_SYMBOL_GPL(mas_expected_entries);
-
static void mas_may_activate(struct ma_state *mas)
{
if (!mas->node) {
diff --git a/lib/test_maple_tree.c b/lib/test_maple_tree.c
index cb3936595b0d56a9682ff100eba54693a1427829..14fbbee32046a13d54d60dcac2b45be2bd190ac4 100644
--- a/lib/test_maple_tree.c
+++ b/lib/test_maple_tree.c
@@ -2746,139 +2746,6 @@ static noinline void __init check_fuzzer(struct maple_tree *mt)
mtree_test_erase(mt, ULONG_MAX - 10);
}
-/* duplicate the tree with a specific gap */
-static noinline void __init check_dup_gaps(struct maple_tree *mt,
- unsigned long nr_entries, bool zero_start,
- unsigned long gap)
-{
- unsigned long i = 0;
- struct maple_tree newmt;
- int ret;
- void *tmp;
- MA_STATE(mas, mt, 0, 0);
- MA_STATE(newmas, &newmt, 0, 0);
- struct rw_semaphore newmt_lock;
-
- init_rwsem(&newmt_lock);
- mt_set_external_lock(&newmt, &newmt_lock);
-
- if (!zero_start)
- i = 1;
-
- mt_zero_nr_tallocated();
- for (; i <= nr_entries; i++)
- mtree_store_range(mt, i*10, (i+1)*10 - gap,
- xa_mk_value(i), GFP_KERNEL);
-
- mt_init_flags(&newmt, MT_FLAGS_ALLOC_RANGE | MT_FLAGS_LOCK_EXTERN);
- mt_set_non_kernel(99999);
- down_write(&newmt_lock);
- ret = mas_expected_entries(&newmas, nr_entries);
- mt_set_non_kernel(0);
- MT_BUG_ON(mt, ret != 0);
-
- rcu_read_lock();
- mas_for_each(&mas, tmp, ULONG_MAX) {
- newmas.index = mas.index;
- newmas.last = mas.last;
- mas_store(&newmas, tmp);
- }
- rcu_read_unlock();
- mas_destroy(&newmas);
-
- __mt_destroy(&newmt);
- up_write(&newmt_lock);
-}
-
-/* Duplicate many sizes of trees. Mainly to test expected entry values */
-static noinline void __init check_dup(struct maple_tree *mt)
-{
- int i;
- int big_start = 100010;
-
- /* Check with a value at zero */
- for (i = 10; i < 1000; i++) {
- mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE);
- check_dup_gaps(mt, i, true, 5);
- mtree_destroy(mt);
- rcu_barrier();
- }
-
- cond_resched();
- mt_cache_shrink();
- /* Check with a value at zero, no gap */
- for (i = 1000; i < 2000; i++) {
- mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE);
- check_dup_gaps(mt, i, true, 0);
- mtree_destroy(mt);
- rcu_barrier();
- }
-
- cond_resched();
- mt_cache_shrink();
- /* Check with a value at zero and unreasonably large */
- for (i = big_start; i < big_start + 10; i++) {
- mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE);
- check_dup_gaps(mt, i, true, 5);
- mtree_destroy(mt);
- rcu_barrier();
- }
-
- cond_resched();
- mt_cache_shrink();
- /* Small to medium size not starting at zero*/
- for (i = 200; i < 1000; i++) {
- mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE);
- check_dup_gaps(mt, i, false, 5);
- mtree_destroy(mt);
- rcu_barrier();
- }
-
- cond_resched();
- mt_cache_shrink();
- /* Unreasonably large not starting at zero*/
- for (i = big_start; i < big_start + 10; i++) {
- mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE);
- check_dup_gaps(mt, i, false, 5);
- mtree_destroy(mt);
- rcu_barrier();
- cond_resched();
- mt_cache_shrink();
- }
-
- /* Check non-allocation tree not starting at zero */
- for (i = 1500; i < 3000; i++) {
- mt_init_flags(mt, 0);
- check_dup_gaps(mt, i, false, 5);
- mtree_destroy(mt);
- rcu_barrier();
- cond_resched();
- if (i % 2 == 0)
- mt_cache_shrink();
- }
-
- mt_cache_shrink();
- /* Check non-allocation tree starting at zero */
- for (i = 200; i < 1000; i++) {
- mt_init_flags(mt, 0);
- check_dup_gaps(mt, i, true, 5);
- mtree_destroy(mt);
- rcu_barrier();
- cond_resched();
- }
-
- mt_cache_shrink();
- /* Unreasonably large */
- for (i = big_start + 5; i < big_start + 10; i++) {
- mt_init_flags(mt, 0);
- check_dup_gaps(mt, i, true, 5);
- mtree_destroy(mt);
- rcu_barrier();
- mt_cache_shrink();
- cond_resched();
- }
-}
-
static noinline void __init check_bnode_min_spanning(struct maple_tree *mt)
{
int i = 50;
@@ -4077,10 +3944,6 @@ static int __init maple_tree_seed(void)
check_fuzzer(&tree);
mtree_destroy(&tree);
- mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE);
- check_dup(&tree);
- mtree_destroy(&tree);
-
mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE);
check_bnode_min_spanning(&tree);
mtree_destroy(&tree);
diff --git a/tools/testing/radix-tree/maple.c b/tools/testing/radix-tree/maple.c
index 172700fb7784d29f9403003b4484a5ebd7aa316b..c0543060dae2510477963331fb0ccdffd78ea965 100644
--- a/tools/testing/radix-tree/maple.c
+++ b/tools/testing/radix-tree/maple.c
@@ -35455,17 +35455,6 @@ static void check_dfs_preorder(struct maple_tree *mt)
MT_BUG_ON(mt, count != e);
mtree_destroy(mt);
- mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE);
- mas_reset(&mas);
- mt_zero_nr_tallocated();
- mt_set_non_kernel(200);
- mas_expected_entries(&mas, max);
- for (count = 0; count <= max; count++) {
- mas.index = mas.last = count;
- mas_store(&mas, xa_mk_value(count));
- MT_BUG_ON(mt, mas_is_err(&mas));
- }
- mas_destroy(&mas);
rcu_barrier();
/*
* pr_info(" ->seq test of 0-%lu %luK in %d active (%d total)\n",
@@ -36454,27 +36443,6 @@ static inline int check_vma_modification(struct maple_tree *mt)
return 0;
}
-/*
- * test to check that bulk stores do not use wr_rebalance as the store
- * type.
- */
-static inline void check_bulk_rebalance(struct maple_tree *mt)
-{
- MA_STATE(mas, mt, ULONG_MAX, ULONG_MAX);
- int max = 10;
-
- build_full_tree(mt, 0, 2);
-
- /* erase every entry in the tree */
- do {
- /* set up bulk store mode */
- mas_expected_entries(&mas, max);
- mas_erase(&mas);
- MT_BUG_ON(mt, mas.store_type == wr_rebalance);
- } while (mas_prev(&mas, 0) != NULL);
-
- mas_destroy(&mas);
-}
void farmer_tests(void)
{
@@ -36487,10 +36455,6 @@ void farmer_tests(void)
check_vma_modification(&tree);
mtree_destroy(&tree);
- mt_init(&tree);
- check_bulk_rebalance(&tree);
- mtree_destroy(&tree);
-
tree.ma_root = xa_mk_value(0);
mt_dump(&tree, mt_dump_dec);
--
2.51.0
Powered by blists - more mailing lists