lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <nim5zkf6h2x63ketliyde2plkkklg2khme5by7calcawpu7l2m@h7cydfhd6kqj>
Date: Thu, 10 Apr 2025 15:18:29 -0400
From: "Liam R. Howlett" <Liam.Howlett@...cle.com>
To: Sidhartha Kumar <sidhartha.kumar@...cle.com>
Cc: linux-kernel@...r.kernel.org, maple-tree@...ts.infradead.org,
        linux-mm@...ck.org, akpm@...ux-foundation.org, willy@...radead.org
Subject: Re: [PATCH v5 5/6] maple_tree: add sufficient height

* Sidhartha Kumar <sidhartha.kumar@...cle.com> [250410 15:14]:
> In order to support rebalancing and spanning stores using less than the
> worst case number of nodes, we need to track more than just the vacant
> height. Using only vacant height to reduce the worst case maple node
> allocation count can lead to a shortcoming of nodes in the following
> scenarios.
> 
> For rebalancing writes, when a leaf node becomes insufficient, it may be
> combined with a sibling into a single node. This means that the parent node
> which has entries for this children will lose one entry. If this parent node
> was just meeting the minimum entries, losing one entry will now cause this
> parent node to be insufficient. This leads to a cascading operation of
> rebalancing at different levels and can lead to more node allocations than
> simply using vacant height can return.
> 
> For spanning writes, a similar situation occurs. At the location at
> which a spanning write is detected, the number of ancestor nodes may
> similarly need to rebalanced into a smaller number of nodes and the same
> cascading situation could occur.
> 
> To use less than the full height of the tree for the number of
> allocations, we also need to track the height at which a non-leaf node
> cannot become insufficient. This means even if a rebalance occurs to a
> child of this node, it currently has enough entries that it can lose one
> without any further action. This field is stored in the maple write state
> as sufficient height. In mas_prealloc_calc() when figuring out how many
> nodes to allocate, we check if the vacant node is lower in the tree
> than a sufficient node (has a larger value). If it is, we cannot use the
> vacant height and must use the difference in the height and sufficient
> height as the basis for the number of nodes needed.
> 
> An off by one bug was also discovered in mast_overflow() where it is
> using >= rather than >. This caused extra iterations of the
> mas_spanning_rebalance() loop and lead to unneeded allocations. A test
> is also added to check the number of allocations is correct.
> 
> Signed-off-by: Sidhartha Kumar <sidhartha.kumar@...cle.com>

Reviewed-by: Liam R. Howlett <Liam.Howlett@...cle.com>

> ---
>  include/linux/maple_tree.h       |  4 +++-
>  lib/maple_tree.c                 | 19 ++++++++++++++++---
>  tools/testing/radix-tree/maple.c | 28 ++++++++++++++++++++++++++++
>  3 files changed, 47 insertions(+), 4 deletions(-)
> 
> diff --git a/include/linux/maple_tree.h b/include/linux/maple_tree.h
> index 657adb33e61e..9ef129038224 100644
> --- a/include/linux/maple_tree.h
> +++ b/include/linux/maple_tree.h
> @@ -464,6 +464,7 @@ struct ma_wr_state {
>  	void *entry;			/* The entry to write */
>  	void *content;			/* The existing entry that is being overwritten */
>  	unsigned char vacant_height;	/* Height of lowest node with free space */
> +	unsigned char sufficient_height;/* Height of lowest node with min sufficiency + 1 nodes */
>  };
>  
>  #define mas_lock(mas)           spin_lock(&((mas)->tree->ma_lock))
> @@ -499,7 +500,8 @@ struct ma_wr_state {
>  		.mas = ma_state,					\
>  		.content = NULL,					\
>  		.entry = wr_entry,					\
> -		.vacant_height = 0					\
> +		.vacant_height = 0,					\
> +		.sufficient_height = 0					\
>  	}
>  
>  #define MA_TOPIARY(name, tree)						\
> diff --git a/lib/maple_tree.c b/lib/maple_tree.c
> index 5610b3742a79..aa139668bcae 100644
> --- a/lib/maple_tree.c
> +++ b/lib/maple_tree.c
> @@ -2741,7 +2741,7 @@ static inline bool mast_sufficient(struct maple_subtree_state *mast)
>   */
>  static inline bool mast_overflow(struct maple_subtree_state *mast)
>  {
> -	if (mast->bn->b_end >= mt_slot_count(mast->orig_l->node))
> +	if (mast->bn->b_end > mt_slot_count(mast->orig_l->node))
>  		return true;
>  
>  	return false;
> @@ -3550,6 +3550,13 @@ static bool mas_wr_walk(struct ma_wr_state *wr_mas)
>  		if (mas->end < mt_slots[wr_mas->type] - 1)
>  			wr_mas->vacant_height = mas->depth + 1;
>  
> +		if (ma_is_root(mas_mn(mas))) {
> +			/* root needs more than 2 entries to be sufficient + 1 */
> +			if (mas->end > 2)
> +				wr_mas->sufficient_height = 1;
> +		} else if (mas->end > mt_min_slots[wr_mas->type] + 1)
> +			wr_mas->sufficient_height = mas->depth + 1;
> +
>  		mas_wr_walk_traverse(wr_mas);
>  	}
>  
> @@ -4185,13 +4192,19 @@ static inline int mas_prealloc_calc(struct ma_wr_state *wr_mas, void *entry)
>  			ret = 0;
>  		break;
>  	case wr_spanning_store:
> -		WARN_ON_ONCE(ret != height * 3 + 1);
> +		if (wr_mas->sufficient_height < wr_mas->vacant_height)
> +			ret = (height - wr_mas->sufficient_height) * 3 + 1;
> +		else
> +			ret = delta * 3 + 1;
>  		break;
>  	case wr_split_store:
>  		ret = delta * 2 + 1;
>  		break;
>  	case wr_rebalance:
> -		ret = height * 2 + 1;
> +		if (wr_mas->sufficient_height < wr_mas->vacant_height)
> +			ret = (height - wr_mas->sufficient_height) * 2 + 1;
> +		else
> +			ret = delta * 2 + 1;
>  		break;
>  	case wr_node_store:
>  		ret = mt_in_rcu(mas->tree) ? 1 : 0;
> diff --git a/tools/testing/radix-tree/maple.c b/tools/testing/radix-tree/maple.c
> index e37a3ab2e921..2c0b38301253 100644
> --- a/tools/testing/radix-tree/maple.c
> +++ b/tools/testing/radix-tree/maple.c
> @@ -36326,6 +36326,30 @@ static inline void check_spanning_store_height(struct maple_tree *mt)
>  	mas_unlock(&mas);
>  }
>  
> +/*
> + * Test to check the path of a spanning rebalance which results in
> + * a collapse where the rebalancing of the child node leads to
> + * insufficieny in the parent node.
> + */
> +static void check_collapsing_rebalance(struct maple_tree *mt)
> +{
> +	int i = 0;
> +	MA_STATE(mas, mt, ULONG_MAX, ULONG_MAX);
> +
> +	/* create a height 6 tree */
> +	while (mt_height(mt) < 6) {
> +		mtree_store_range(mt, i, i + 10, xa_mk_value(i), GFP_KERNEL);
> +		i += 9;
> +	}
> +
> +	/* delete all entries one at a time, starting from the right */
> +	do {
> +		mas_erase(&mas);
> +	} while (mas_prev(&mas, 0) != NULL);
> +
> +	mtree_unlock(mt);
> +}
> +
>  /* callback function used for check_nomem_writer_race() */
>  static void writer2(void *maple_tree)
>  {
> @@ -36496,6 +36520,10 @@ void farmer_tests(void)
>  	check_spanning_store_height(&tree);
>  	mtree_destroy(&tree);
>  
> +	mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE);
> +	check_collapsing_rebalance(&tree);
> +	mtree_destroy(&tree);
> +
>  	mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE);
>  	check_null_expand(&tree);
>  	mtree_destroy(&tree);
> -- 
> 2.43.0
> 

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ