lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <qadlxahyhhm3cihdnwjwhbolk36acybreawwgcy42rltxcpzwy@7xkxdv4ue5pp>
Date: Wed, 26 Feb 2025 10:30:55 -0500
From: "Liam R. Howlett" <Liam.Howlett@...cle.com>
To: Sidhartha Kumar <sidhartha.kumar@...cle.com>
Cc: linux-kernel@...r.kernel.org, maple-tree@...ts.infradead.org,
        linux-mm@...ck.org, akpm@...ux-foundation.org,
        richard.weiyang@...il.com
Subject: Re: [PATCH v2 3/6] maple_tree: use vacant nodes to reduce worst case
 allocations

* Sidhartha Kumar <sidhartha.kumar@...cle.com> [250225 11:22]:
> On 2/25/25 10:46 AM, Liam R. Howlett wrote:
> > * Sidhartha Kumar <sidhartha.kumar@...cle.com> [250221 11:36]:
> > > In order to determine the store type for a maple tree operation, a walk
> > > of the tree is done through mas_wr_walk(). This function descends the
> > > tree until a spanning write is detected or we reach a leaf node. While
> > > descending, keep track of the height at which we encounter a node with
> > > available space. This is done by checking if mas->end is less than the
> > > number of slots a given node type can fit.
> > > 
> > > Now that the height of the vacant node is tracked, we can use the
> > > difference between the height of the tree and the height of the vacant
> > > node to know how many levels we will have to propagate creating new
> > > nodes. Update mas_prealloc_calc() to consider the vacant height and
> > > reduce the number of worst-case allocations.
> > > 
> > > Rebalancing and spanning stores are not supported and fall back to using
> > > the full height of the tree for allocations.
> > > 
> > > Update preallocation testing assertions to take into account vacant
> > > height.
> > > 
> > > Signed-off-by: Sidhartha <sidhartha.kumar@...cle.com>
> > > ---
> > >   include/linux/maple_tree.h       |   2 +
> > >   lib/maple_tree.c                 |  13 ++--
> > >   tools/testing/radix-tree/maple.c | 102 ++++++++++++++++++++++++++++---
> > >   3 files changed, 105 insertions(+), 12 deletions(-)
> > > 
> > > diff --git a/include/linux/maple_tree.h b/include/linux/maple_tree.h
> > > index cbbcd18d4186..7d777aa2d9ed 100644
> > > --- a/include/linux/maple_tree.h
> > > +++ b/include/linux/maple_tree.h
> > > @@ -463,6 +463,7 @@ struct ma_wr_state {
> > >   	void __rcu **slots;		/* mas->node->slots pointer */
> > >   	void *entry;			/* The entry to write */
> > >   	void *content;			/* The existing entry that is being overwritten */
> > > +	unsigned char vacant_height;	/* Height of lowest node with free space */
> > >   };
> > >   #define mas_lock(mas)           spin_lock(&((mas)->tree->ma_lock))
> > > @@ -498,6 +499,7 @@ struct ma_wr_state {
> > >   		.mas = ma_state,					\
> > >   		.content = NULL,					\
> > >   		.entry = wr_entry,					\
> > > +		.vacant_height = 0					\
> > >   	}
> > >   #define MA_TOPIARY(name, tree)						\
> > > diff --git a/lib/maple_tree.c b/lib/maple_tree.c
> > > index d7dac3119748..ef71af0529f4 100644
> > > --- a/lib/maple_tree.c
> > > +++ b/lib/maple_tree.c
> > > @@ -3542,6 +3542,9 @@ static bool mas_wr_walk(struct ma_wr_state *wr_mas)
> > >   		if (ma_is_leaf(wr_mas->type))
> > >   			return true;
> > > +		if (mas->end < mt_slots[wr_mas->type] - 1)
> > > +			wr_mas->vacant_height = mas->depth + 1;
> > > +
> > >   		mas_wr_walk_traverse(wr_mas);
> > >   	}
> > > @@ -4157,7 +4160,9 @@ static inline void mas_wr_prealloc_setup(struct ma_wr_state *wr_mas)
> > >   static inline int mas_prealloc_calc(struct ma_wr_state *wr_mas, void *entry)
> > >   {
> > >   	struct ma_state *mas = wr_mas->mas;
> > > -	int ret = mas_mt_height(mas) * 3 + 1;
> > > +	unsigned char height = mas_mt_height(mas);
> > > +	int ret = height * 3 + 1;
> > > +	unsigned char delta = height - wr_mas->vacant_height;
> > >   	switch (mas->store_type) {
> > >   	case wr_invalid:
> > > @@ -4175,13 +4180,13 @@ static inline int mas_prealloc_calc(struct ma_wr_state *wr_mas, void *entry)
> > >   			ret = 0;
> > >   		break;
> > >   	case wr_spanning_store:
> > > -		ret =  mas_mt_height(mas) * 3 + 1;
> > > +		ret = height * 3 + 1;
> > 
> > ret is already height * 3 + 1, you could add a WARN_ON_ONCE or such to
> > ensure that's the case here and it's not missed in updates to the code.
> > 
> > >   		break;
> > >   	case wr_split_store:
> > > -		ret =  mas_mt_height(mas) * 2 + 1;
> > > +		ret = delta * 2 + 1;
> > >   		break;
> > >   	case wr_rebalance:
> > > -		ret =  mas_mt_height(mas) * 2 - 1;
> > > +		ret = height * 2 + 1;
> > >   		break;
> > >   	case wr_node_store:
> > >   		ret = mt_in_rcu(mas->tree) ? 1 : 0;
> > > diff --git a/tools/testing/radix-tree/maple.c b/tools/testing/radix-tree/maple.c
> > > index bc30050227fd..d22c1008dffe 100644
> > > --- a/tools/testing/radix-tree/maple.c
> > > +++ b/tools/testing/radix-tree/maple.c
> > > @@ -35475,12 +35475,85 @@ static void check_dfs_preorder(struct maple_tree *mt)
> > >   }
> > >   /* End of depth first search tests */
> > > +/* same implementation as mas_is_span_wr() in lib/maple_tree.c */
> > 
> > Is this needed then?  At the start of this file we have:
> > #include "../../../lib/maple_tree.c" so I would think you could use the
> > one already defined?
> 
> 
> I don't think we can use that one because the it takes in a maple write
> state as an argument which is not exposed externally. The check_prealloc()
> tests use a maple state and the maple write state is then defined internally
> in mas_preallocate() so I'm not sure how to get an external facing interface
> to the maple write state.

Ah, yes.  But you have everything it uses, so you can use it by setting:
wr_mas.mas = mas,
wr_mas.r_max = r_max,
wr_mas.type = type,
wr_mas.entry = entry

You can add a new MA_WR_STATE #define like in the maple_tree.h file, but
only to the testing side.

I think it's worth doing because, if the internal checking changes then
this test will still work (or will fail and force an update the wr_mas
setup, or whatever..).  If we have duplicated code that becomes out of
sync, we risk passing this test without actually getting the right answer
in the kernel where it matters most.

> 
> That's why a similar implementation is needed but one that takes in a maple
> state rather than a maple write state.
> 
> Thanks,
> Sid
> 
> 
> > 
> > > +static bool is_span_wr(struct ma_state *mas, unsigned long r_max,
> > > +				enum maple_type type, void *entry)
> > > +{
> > > +	unsigned long max = r_max;
> > > +	unsigned long last = mas->last;
> > > +
> > > +	/* Contained in this pivot, fast path */
> > > +	if (last < max)
> > > +		return false;
> > > +
> > > +	if (ma_is_leaf(type)) {
> > > +		max = mas->max;
> > > +		if (last < max)
> > > +			return false;
> > > +	}
> > > +
> > > +	if (last == max) {
> > > +		/*
> > > +		 * The last entry of leaf node cannot be NULL unless it is the
> > > +		 * rightmost node (writing ULONG_MAX), otherwise it spans slots.
> > > +		 */
> > > +		if (entry || last == ULONG_MAX)
> > > +			return false;
> > > +	}
> > > +
> > > +	return true;
> > > +}
> > > +
> > > +/* get height of the lowest non-leaf node with free space */
> > > +static unsigned char get_vacant_height(struct ma_state *mas, void *entry)
> > > +{
> > > +	char vacant_height = 0;
> > > +	enum maple_type type;
> > > +	unsigned long *pivots;
> > > +	unsigned long min = 0;
> > > +	unsigned long max = ULONG_MAX;
> > > +
> > > +	/* start traversal */
> > > +	mas_reset(mas);
> > > +	mas_start(mas);
> > > +	if (!xa_is_node(mas_root(mas)))
> > > +		return 0;
> > > +
> > > +	type = mte_node_type(mas->node);
> > > +	while (!ma_is_leaf(type)) {
> > > +		mas_node_walk(mas, mte_to_node(mas->node), type, &min, &max);
> > > +		mas->end = mas_data_end(mas);
> > > +		pivots = ma_pivots(mte_to_node(mas->node), type);
> > > +
> > > +		if (pivots) {
> > > +			if (mas->offset)
> > > +				min = pivots[mas->offset - 1];
> > > +			if (mas->offset < mas->end)
> > > +				max = pivots[mas->offset];
> > > +		}
> > > +
> > > +		/* detect spanning write */
> > > +		if (is_span_wr(mas, max, type, entry))
> > > +			break;
> > > +
> > > +		if (mas->end < mt_slot_count(mas->node) - 1)
> > > +			vacant_height = mas->depth + 1;
> > > +
> > > +		mas_descend(mas);
> > > +		type = mte_node_type(mas->node);
> > > +		mas->depth++;
> > > +	}
> > > +
> > > +	return vacant_height;
> > > +}
> > > +
> > >   /* Preallocation testing */
> > >   static noinline void __init check_prealloc(struct maple_tree *mt)
> > >   {
> > >   	unsigned long i, max = 100;
> > >   	unsigned long allocated;
> > >   	unsigned char height;
> > > +	unsigned char vacant_height;
> > >   	struct maple_node *mn;
> > >   	void *ptr = check_prealloc;
> > >   	MA_STATE(mas, mt, 10, 20);
> > > @@ -35494,8 +35567,9 @@ static noinline void __init check_prealloc(struct maple_tree *mt)
> > >   	MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0);
> > >   	allocated = mas_allocated(&mas);
> > >   	height = mas_mt_height(&mas);
> > > +	vacant_height = get_vacant_height(&mas, ptr);
> > >   	MT_BUG_ON(mt, allocated == 0);
> > > -	MT_BUG_ON(mt, allocated != 1 + height * 3);
> > > +	MT_BUG_ON(mt, allocated != 1 + (height - vacant_height) * 3);
> > >   	mas_destroy(&mas);
> > >   	allocated = mas_allocated(&mas);
> > >   	MT_BUG_ON(mt, allocated != 0);
> > > @@ -35503,8 +35577,9 @@ static noinline void __init check_prealloc(struct maple_tree *mt)
> > >   	MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0);
> > >   	allocated = mas_allocated(&mas);
> > >   	height = mas_mt_height(&mas);
> > > +	vacant_height = get_vacant_height(&mas, ptr);
> > >   	MT_BUG_ON(mt, allocated == 0);
> > > -	MT_BUG_ON(mt, allocated != 1 + height * 3);
> > > +	MT_BUG_ON(mt, allocated != 1 + (height - vacant_height) * 3);
> > >   	MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0);
> > >   	mas_destroy(&mas);
> > >   	allocated = mas_allocated(&mas);
> > > @@ -35514,7 +35589,8 @@ static noinline void __init check_prealloc(struct maple_tree *mt)
> > >   	MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0);
> > >   	allocated = mas_allocated(&mas);
> > >   	height = mas_mt_height(&mas);
> > > -	MT_BUG_ON(mt, allocated != 1 + height * 3);
> > > +	vacant_height = get_vacant_height(&mas, ptr);
> > > +	MT_BUG_ON(mt, allocated != 1 + (height - vacant_height) * 3);
> > >   	mn = mas_pop_node(&mas);
> > >   	MT_BUG_ON(mt, mas_allocated(&mas) != allocated - 1);
> > >   	mn->parent = ma_parent_ptr(mn);
> > > @@ -35527,7 +35603,8 @@ static noinline void __init check_prealloc(struct maple_tree *mt)
> > >   	MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0);
> > >   	allocated = mas_allocated(&mas);
> > >   	height = mas_mt_height(&mas);
> > > -	MT_BUG_ON(mt, allocated != 1 + height * 3);
> > > +	vacant_height = get_vacant_height(&mas, ptr);
> > > +	MT_BUG_ON(mt, allocated != 1 + (height - vacant_height) * 3);
> > >   	mn = mas_pop_node(&mas);
> > >   	MT_BUG_ON(mt, mas_allocated(&mas) != allocated - 1);
> > >   	MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0);
> > > @@ -35540,7 +35617,8 @@ static noinline void __init check_prealloc(struct maple_tree *mt)
> > >   	MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0);
> > >   	allocated = mas_allocated(&mas);
> > >   	height = mas_mt_height(&mas);
> > > -	MT_BUG_ON(mt, allocated != 1 + height * 3);
> > > +	vacant_height = get_vacant_height(&mas, ptr);
> > > +	MT_BUG_ON(mt, allocated != 1 + (height - vacant_height) * 3);
> > >   	mn = mas_pop_node(&mas);
> > >   	MT_BUG_ON(mt, mas_allocated(&mas) != allocated - 1);
> > >   	mas_push_node(&mas, mn);
> > > @@ -35553,7 +35631,8 @@ static noinline void __init check_prealloc(struct maple_tree *mt)
> > >   	MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0);
> > >   	allocated = mas_allocated(&mas);
> > >   	height = mas_mt_height(&mas);
> > > -	MT_BUG_ON(mt, allocated != 1 + height * 3);
> > > +	vacant_height = get_vacant_height(&mas, ptr);
> > > +	MT_BUG_ON(mt, allocated != 1 + (height - vacant_height) * 3);
> > >   	mas_store_prealloc(&mas, ptr);
> > >   	MT_BUG_ON(mt, mas_allocated(&mas) != 0);
> > > @@ -35578,7 +35657,8 @@ static noinline void __init check_prealloc(struct maple_tree *mt)
> > >   	MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0);
> > >   	allocated = mas_allocated(&mas);
> > >   	height = mas_mt_height(&mas);
> > > -	MT_BUG_ON(mt, allocated != 1 + height * 2);
> > > +	vacant_height = get_vacant_height(&mas, ptr);
> > > +	MT_BUG_ON(mt, allocated != 1 + (height - vacant_height) * 2);
> > >   	mas_store_prealloc(&mas, ptr);
> > >   	MT_BUG_ON(mt, mas_allocated(&mas) != 0);
> > >   	mt_set_non_kernel(1);
> > > @@ -35595,8 +35675,14 @@ static noinline void __init check_prealloc(struct maple_tree *mt)
> > >   	MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0);
> > >   	allocated = mas_allocated(&mas);
> > >   	height = mas_mt_height(&mas);
> > > +	vacant_height = get_vacant_height(&mas, ptr);
> > >   	MT_BUG_ON(mt, allocated == 0);
> > > -	MT_BUG_ON(mt, allocated != 1 + height * 3);
> > > +	/*
> > > +	 * vacant height cannot be used to compute the number of nodes needed
> > > +	 * as the root contains two entries which means it is on the verge of
> > > +	 * insufficiency. The worst case full height of the tree is needed.
> > > +	 */
> > > +	MT_BUG_ON(mt, allocated != height * 3 + 1);
> > >   	mas_store_prealloc(&mas, ptr);
> > >   	MT_BUG_ON(mt, mas_allocated(&mas) != 0);
> > >   	mas_set_range(&mas, 0, 200);
> > > -- 
> > > 2.43.0
> > > 
> 

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ