lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <c43bfd29-8dfb-4751-86d2-9eb31955d693@oracle.com>
Date: Tue, 25 Feb 2025 11:22:10 -0500
From: Sidhartha Kumar <sidhartha.kumar@...cle.com>
To: "Liam R. Howlett" <Liam.Howlett@...cle.com>, linux-kernel@...r.kernel.org,
        maple-tree@...ts.infradead.org, linux-mm@...ck.org,
        akpm@...ux-foundation.org, richard.weiyang@...il.com
Subject: Re: [PATCH v2 3/6] maple_tree: use vacant nodes to reduce worst case
 allocations

On 2/25/25 10:46 AM, Liam R. Howlett wrote:
> * Sidhartha Kumar <sidhartha.kumar@...cle.com> [250221 11:36]:
>> In order to determine the store type for a maple tree operation, a walk
>> of the tree is done through mas_wr_walk(). This function descends the
>> tree until a spanning write is detected or we reach a leaf node. While
>> descending, keep track of the height at which we encounter a node with
>> available space. This is done by checking if mas->end is less than the
>> number of slots a given node type can fit.
>>
>> Now that the height of the vacant node is tracked, we can use the
>> difference between the height of the tree and the height of the vacant
>> node to know how many levels we will have to propagate creating new
>> nodes. Update mas_prealloc_calc() to consider the vacant height and
>> reduce the number of worst-case allocations.
>>
>> Rebalancing and spanning stores are not supported and fall back to using
>> the full height of the tree for allocations.
>>
>> Update preallocation testing assertions to take into account vacant
>> height.
>>
>> Signed-off-by: Sidhartha <sidhartha.kumar@...cle.com>
>> ---
>>   include/linux/maple_tree.h       |   2 +
>>   lib/maple_tree.c                 |  13 ++--
>>   tools/testing/radix-tree/maple.c | 102 ++++++++++++++++++++++++++++---
>>   3 files changed, 105 insertions(+), 12 deletions(-)
>>
>> diff --git a/include/linux/maple_tree.h b/include/linux/maple_tree.h
>> index cbbcd18d4186..7d777aa2d9ed 100644
>> --- a/include/linux/maple_tree.h
>> +++ b/include/linux/maple_tree.h
>> @@ -463,6 +463,7 @@ struct ma_wr_state {
>>   	void __rcu **slots;		/* mas->node->slots pointer */
>>   	void *entry;			/* The entry to write */
>>   	void *content;			/* The existing entry that is being overwritten */
>> +	unsigned char vacant_height;	/* Height of lowest node with free space */
>>   };
>>   
>>   #define mas_lock(mas)           spin_lock(&((mas)->tree->ma_lock))
>> @@ -498,6 +499,7 @@ struct ma_wr_state {
>>   		.mas = ma_state,					\
>>   		.content = NULL,					\
>>   		.entry = wr_entry,					\
>> +		.vacant_height = 0					\
>>   	}
>>   
>>   #define MA_TOPIARY(name, tree)						\
>> diff --git a/lib/maple_tree.c b/lib/maple_tree.c
>> index d7dac3119748..ef71af0529f4 100644
>> --- a/lib/maple_tree.c
>> +++ b/lib/maple_tree.c
>> @@ -3542,6 +3542,9 @@ static bool mas_wr_walk(struct ma_wr_state *wr_mas)
>>   		if (ma_is_leaf(wr_mas->type))
>>   			return true;
>>   
>> +		if (mas->end < mt_slots[wr_mas->type] - 1)
>> +			wr_mas->vacant_height = mas->depth + 1;
>> +
>>   		mas_wr_walk_traverse(wr_mas);
>>   	}
>>   
>> @@ -4157,7 +4160,9 @@ static inline void mas_wr_prealloc_setup(struct ma_wr_state *wr_mas)
>>   static inline int mas_prealloc_calc(struct ma_wr_state *wr_mas, void *entry)
>>   {
>>   	struct ma_state *mas = wr_mas->mas;
>> -	int ret = mas_mt_height(mas) * 3 + 1;
>> +	unsigned char height = mas_mt_height(mas);
>> +	int ret = height * 3 + 1;
>> +	unsigned char delta = height - wr_mas->vacant_height;
>>   
>>   	switch (mas->store_type) {
>>   	case wr_invalid:
>> @@ -4175,13 +4180,13 @@ static inline int mas_prealloc_calc(struct ma_wr_state *wr_mas, void *entry)
>>   			ret = 0;
>>   		break;
>>   	case wr_spanning_store:
>> -		ret =  mas_mt_height(mas) * 3 + 1;
>> +		ret = height * 3 + 1;
> 
> ret is already height * 3 + 1, you could add a WARN_ON_ONCE or such to
> ensure that's the case here and it's not missed in updates to the code.
> 
>>   		break;
>>   	case wr_split_store:
>> -		ret =  mas_mt_height(mas) * 2 + 1;
>> +		ret = delta * 2 + 1;
>>   		break;
>>   	case wr_rebalance:
>> -		ret =  mas_mt_height(mas) * 2 - 1;
>> +		ret = height * 2 + 1;
>>   		break;
>>   	case wr_node_store:
>>   		ret = mt_in_rcu(mas->tree) ? 1 : 0;
>> diff --git a/tools/testing/radix-tree/maple.c b/tools/testing/radix-tree/maple.c
>> index bc30050227fd..d22c1008dffe 100644
>> --- a/tools/testing/radix-tree/maple.c
>> +++ b/tools/testing/radix-tree/maple.c
>> @@ -35475,12 +35475,85 @@ static void check_dfs_preorder(struct maple_tree *mt)
>>   }
>>   /* End of depth first search tests */
>>   
>> +/* same implementation as mas_is_span_wr() in lib/maple_tree.c */
> 
> Is this needed then?  At the start of this file we have:
> #include "../../../lib/maple_tree.c" so I would think you could use the
> one already defined?


I don't think we can use that one because the it takes in a maple write 
state as an argument which is not exposed externally. The 
check_prealloc() tests use a maple state and the maple write state is 
then defined internally in mas_preallocate() so I'm not sure how to get 
an external facing interface to the maple write state.

That's why a similar implementation is needed but one that takes in a 
maple state rather than a maple write state.

Thanks,
Sid


> 
>> +static bool is_span_wr(struct ma_state *mas, unsigned long r_max,
>> +				enum maple_type type, void *entry)
>> +{
>> +	unsigned long max = r_max;
>> +	unsigned long last = mas->last;
>> +
>> +	/* Contained in this pivot, fast path */
>> +	if (last < max)
>> +		return false;
>> +
>> +	if (ma_is_leaf(type)) {
>> +		max = mas->max;
>> +		if (last < max)
>> +			return false;
>> +	}
>> +
>> +	if (last == max) {
>> +		/*
>> +		 * The last entry of leaf node cannot be NULL unless it is the
>> +		 * rightmost node (writing ULONG_MAX), otherwise it spans slots.
>> +		 */
>> +		if (entry || last == ULONG_MAX)
>> +			return false;
>> +	}
>> +
>> +	return true;
>> +}
>> +
>> +/* get height of the lowest non-leaf node with free space */
>> +static unsigned char get_vacant_height(struct ma_state *mas, void *entry)
>> +{
>> +	char vacant_height = 0;
>> +	enum maple_type type;
>> +	unsigned long *pivots;
>> +	unsigned long min = 0;
>> +	unsigned long max = ULONG_MAX;
>> +
>> +	/* start traversal */
>> +	mas_reset(mas);
>> +	mas_start(mas);
>> +	if (!xa_is_node(mas_root(mas)))
>> +		return 0;
>> +
>> +	type = mte_node_type(mas->node);
>> +	while (!ma_is_leaf(type)) {
>> +		mas_node_walk(mas, mte_to_node(mas->node), type, &min, &max);
>> +		mas->end = mas_data_end(mas);
>> +		pivots = ma_pivots(mte_to_node(mas->node), type);
>> +
>> +		if (pivots) {
>> +			if (mas->offset)
>> +				min = pivots[mas->offset - 1];
>> +			if (mas->offset < mas->end)
>> +				max = pivots[mas->offset];
>> +		}
>> +
>> +		/* detect spanning write */
>> +		if (is_span_wr(mas, max, type, entry))
>> +			break;
>> +
>> +		if (mas->end < mt_slot_count(mas->node) - 1)
>> +			vacant_height = mas->depth + 1;
>> +
>> +		mas_descend(mas);
>> +		type = mte_node_type(mas->node);
>> +		mas->depth++;
>> +	}
>> +
>> +	return vacant_height;
>> +}
>> +
>>   /* Preallocation testing */
>>   static noinline void __init check_prealloc(struct maple_tree *mt)
>>   {
>>   	unsigned long i, max = 100;
>>   	unsigned long allocated;
>>   	unsigned char height;
>> +	unsigned char vacant_height;
>>   	struct maple_node *mn;
>>   	void *ptr = check_prealloc;
>>   	MA_STATE(mas, mt, 10, 20);
>> @@ -35494,8 +35567,9 @@ static noinline void __init check_prealloc(struct maple_tree *mt)
>>   	MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0);
>>   	allocated = mas_allocated(&mas);
>>   	height = mas_mt_height(&mas);
>> +	vacant_height = get_vacant_height(&mas, ptr);
>>   	MT_BUG_ON(mt, allocated == 0);
>> -	MT_BUG_ON(mt, allocated != 1 + height * 3);
>> +	MT_BUG_ON(mt, allocated != 1 + (height - vacant_height) * 3);
>>   	mas_destroy(&mas);
>>   	allocated = mas_allocated(&mas);
>>   	MT_BUG_ON(mt, allocated != 0);
>> @@ -35503,8 +35577,9 @@ static noinline void __init check_prealloc(struct maple_tree *mt)
>>   	MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0);
>>   	allocated = mas_allocated(&mas);
>>   	height = mas_mt_height(&mas);
>> +	vacant_height = get_vacant_height(&mas, ptr);
>>   	MT_BUG_ON(mt, allocated == 0);
>> -	MT_BUG_ON(mt, allocated != 1 + height * 3);
>> +	MT_BUG_ON(mt, allocated != 1 + (height - vacant_height) * 3);
>>   	MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0);
>>   	mas_destroy(&mas);
>>   	allocated = mas_allocated(&mas);
>> @@ -35514,7 +35589,8 @@ static noinline void __init check_prealloc(struct maple_tree *mt)
>>   	MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0);
>>   	allocated = mas_allocated(&mas);
>>   	height = mas_mt_height(&mas);
>> -	MT_BUG_ON(mt, allocated != 1 + height * 3);
>> +	vacant_height = get_vacant_height(&mas, ptr);
>> +	MT_BUG_ON(mt, allocated != 1 + (height - vacant_height) * 3);
>>   	mn = mas_pop_node(&mas);
>>   	MT_BUG_ON(mt, mas_allocated(&mas) != allocated - 1);
>>   	mn->parent = ma_parent_ptr(mn);
>> @@ -35527,7 +35603,8 @@ static noinline void __init check_prealloc(struct maple_tree *mt)
>>   	MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0);
>>   	allocated = mas_allocated(&mas);
>>   	height = mas_mt_height(&mas);
>> -	MT_BUG_ON(mt, allocated != 1 + height * 3);
>> +	vacant_height = get_vacant_height(&mas, ptr);
>> +	MT_BUG_ON(mt, allocated != 1 + (height - vacant_height) * 3);
>>   	mn = mas_pop_node(&mas);
>>   	MT_BUG_ON(mt, mas_allocated(&mas) != allocated - 1);
>>   	MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0);
>> @@ -35540,7 +35617,8 @@ static noinline void __init check_prealloc(struct maple_tree *mt)
>>   	MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0);
>>   	allocated = mas_allocated(&mas);
>>   	height = mas_mt_height(&mas);
>> -	MT_BUG_ON(mt, allocated != 1 + height * 3);
>> +	vacant_height = get_vacant_height(&mas, ptr);
>> +	MT_BUG_ON(mt, allocated != 1 + (height - vacant_height) * 3);
>>   	mn = mas_pop_node(&mas);
>>   	MT_BUG_ON(mt, mas_allocated(&mas) != allocated - 1);
>>   	mas_push_node(&mas, mn);
>> @@ -35553,7 +35631,8 @@ static noinline void __init check_prealloc(struct maple_tree *mt)
>>   	MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0);
>>   	allocated = mas_allocated(&mas);
>>   	height = mas_mt_height(&mas);
>> -	MT_BUG_ON(mt, allocated != 1 + height * 3);
>> +	vacant_height = get_vacant_height(&mas, ptr);
>> +	MT_BUG_ON(mt, allocated != 1 + (height - vacant_height) * 3);
>>   	mas_store_prealloc(&mas, ptr);
>>   	MT_BUG_ON(mt, mas_allocated(&mas) != 0);
>>   
>> @@ -35578,7 +35657,8 @@ static noinline void __init check_prealloc(struct maple_tree *mt)
>>   	MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0);
>>   	allocated = mas_allocated(&mas);
>>   	height = mas_mt_height(&mas);
>> -	MT_BUG_ON(mt, allocated != 1 + height * 2);
>> +	vacant_height = get_vacant_height(&mas, ptr);
>> +	MT_BUG_ON(mt, allocated != 1 + (height - vacant_height) * 2);
>>   	mas_store_prealloc(&mas, ptr);
>>   	MT_BUG_ON(mt, mas_allocated(&mas) != 0);
>>   	mt_set_non_kernel(1);
>> @@ -35595,8 +35675,14 @@ static noinline void __init check_prealloc(struct maple_tree *mt)
>>   	MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0);
>>   	allocated = mas_allocated(&mas);
>>   	height = mas_mt_height(&mas);
>> +	vacant_height = get_vacant_height(&mas, ptr);
>>   	MT_BUG_ON(mt, allocated == 0);
>> -	MT_BUG_ON(mt, allocated != 1 + height * 3);
>> +	/*
>> +	 * vacant height cannot be used to compute the number of nodes needed
>> +	 * as the root contains two entries which means it is on the verge of
>> +	 * insufficiency. The worst case full height of the tree is needed.
>> +	 */
>> +	MT_BUG_ON(mt, allocated != height * 3 + 1);
>>   	mas_store_prealloc(&mas, ptr);
>>   	MT_BUG_ON(mt, mas_allocated(&mas) != 0);
>>   	mas_set_range(&mas, 0, 200);
>> -- 
>> 2.43.0
>>


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ