lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20251117224701.1279139-4-ackerleytng@google.com>
Date: Mon, 17 Nov 2025 14:47:00 -0800
From: Ackerley Tng <ackerleytng@...gle.com>
To: willy@...radead.org, akpm@...ux-foundation.org, 
	linux-fsdevel@...r.kernel.org, linux-mm@...ck.org, 
	linux-kernel@...r.kernel.org
Cc: david@...hat.com, michael.roth@....com, vannapurve@...gle.com, 
	Ackerley Tng <ackerleytng@...gle.com>
Subject: [RFC PATCH 3/4] XArray: Support splitting for arbitrarily large entries

The existing xas_split() function primarily supports splitting an entry
into multiple siblings within the current node, or creating child nodes for
one level below.

It does not handle scenarios where the requested split order requires
wiring up multiple levels of new intermediate nodes to form a deeper
subtree. This limits the xas_split() from splitting arbitrarily large
entries.

This commit extends xas_split() to build subtrees of XArray nodes and then
set these subtrees as children of the node where the split is requested.

Signed-off-by: Ackerley Tng <ackerleytng@...gle.com>
---

I had a recursive implementation which I believe was easier to understand, but I
went with a iterative implementation using a stack because I was concerned about
stack depths in the kernel. Let me know if the recursive implementation is
preferred!

Feedback is always appreciated, and I'd specifically like feedback on:

+ RCU-related handling
+ Handling of xas_update() calls
+ Use of node_set_marks() - can/should that be refactored to be node-focused,
  rather than in some cases also updating the child?
+ Can/should xas_split_alloc() read entry using xas_load(), rather than rely on
  void *entry passed as an argument?


 lib/xarray.c | 158 +++++++++++++++++++++++++++++++++++++++------------
 1 file changed, 121 insertions(+), 37 deletions(-)

diff --git a/lib/xarray.c b/lib/xarray.c
index b7c44a75bb03..6fdace2e73df 100644
--- a/lib/xarray.c
+++ b/lib/xarray.c
@@ -1105,52 +1105,136 @@ EXPORT_SYMBOL_GPL(xas_split_alloc);
 void xas_split(struct xa_state *xas, void *entry, unsigned int order)
 {
 	unsigned int sibs = (1 << (order % XA_CHUNK_SHIFT)) - 1;
-	unsigned int offset, marks;
+	struct xa_node *node_to_split;
+	unsigned int offset_to_split;
+	struct xa_node *stack;
 	struct xa_node *node;
-	void *curr = xas_load(xas);
-	int values = 0;
+	unsigned int offset;
+	unsigned int marks;
+	unsigned int i;
+	void *sibling;

-	node = xas->xa_node;
-	if (xas_top(node))
+	xas_load(xas);
+	node_to_split = xas->xa_node;
+	if (xas_top(node_to_split))
 		return;

-	marks = node_get_marks(node, xas->xa_offset);
+	marks = node_get_marks(node_to_split, xas->xa_offset);

-	offset = xas->xa_offset + sibs;
-	do {
-		if (xas->xa_shift < node->shift) {
-			struct xa_node *child = xas->xa_alloc;
-
-			xas->xa_alloc = rcu_dereference_raw(child->parent);
-			__xas_init_node_for_split(xas, child, entry);
-			child->shift = node->shift - XA_CHUNK_SHIFT;
-			child->offset = offset;
-			child->count = XA_CHUNK_SIZE;
-			child->nr_values = xa_is_value(entry) ?
-					XA_CHUNK_SIZE : 0;
-			RCU_INIT_POINTER(child->parent, node);
-			node_set_marks(node, offset, child, xas->xa_sibs,
-					marks);
-			rcu_assign_pointer(node->slots[offset],
-					xa_mk_node(child));
-			if (xa_is_value(curr))
-				values--;
-			xas_update(xas, child);
+	/* Horizontal split: just fill in values in existing node. */
+	if (node_to_split->shift == xas->xa_shift) {
+		offset = xas->xa_offset;
+
+		for (i = offset; i < offset + sibs + 1; i++) {
+			if ((i & xas->xa_sibs) == 0) {
+				node_set_marks(node_to_split, i, NULL, 0, marks);
+				rcu_assign_pointer(node_to_split->slots[i], entry);
+
+				sibling = xa_mk_sibling(i);
+			} else {
+				rcu_assign_pointer(node_to_split->slots[i], sibling);
+			}
+		}
+
+		xas_update(xas, node_to_split);
+		return;
+	}
+
+	/*
+	 * Vertical split: build tree bottom-up, so that on any RCU lookup (on
+	 * the original tree), the tree is consistent.
+	 */
+	offset_to_split = get_offset(xas->xa_index, node_to_split);
+	stack = NULL;
+	offset = 0;
+	for (;;) {
+		unsigned int next_offset;
+
+		if (stack &&
+		    stack->shift == node_to_split->shift - XA_CHUNK_SHIFT &&
+		    stack->offset == offset_to_split + sibs)
+			break;
+
+		if (stack && stack->offset == XA_CHUNK_SIZE - 1) {
+			unsigned int child_shift;
+
+			node = xas->xa_alloc;
+			xas->xa_alloc = rcu_dereference_raw(node->parent);
+
+			child_shift = stack->shift;
+			for (i = 0; i < XA_CHUNK_SIZE; i++) {
+				struct xa_node *child = stack;
+
+				stack = child->parent;
+
+				node_set_marks(node, child->offset, NULL, 0, marks);
+
+				RCU_INIT_POINTER(child->parent, node);
+				RCU_INIT_POINTER(node->slots[child->offset], xa_mk_node(child));
+			}
+
+			node->array = xas->xa;
+			node->count = XA_CHUNK_SIZE;
+			node->nr_values = 0;
+			node->shift = child_shift + XA_CHUNK_SHIFT;
+			node->offset = 0;
+			if (node->shift == node_to_split->shift - XA_CHUNK_SHIFT)
+				node->offset = offset_to_split;
+			if (stack && stack->shift == node->shift)
+				node->offset = stack->offset + 1;
+
+			next_offset = 0;
+
+			xas_update(xas, node);
 		} else {
-			unsigned int canon = offset - xas->xa_sibs;
+			node = xas->xa_alloc;
+			xas->xa_alloc = rcu_dereference_raw(node->parent);

-			node_set_marks(node, canon, NULL, 0, marks);
-			rcu_assign_pointer(node->slots[canon], entry);
-			while (offset > canon)
-				rcu_assign_pointer(node->slots[offset--],
-						xa_mk_sibling(canon));
-			values += (xa_is_value(entry) - xa_is_value(curr)) *
-					(xas->xa_sibs + 1);
+			for (i = 0; i < XA_CHUNK_SIZE; i++) {
+				if ((i & xas->xa_sibs) == 0) {
+					node_set_marks(node, i, NULL, 0, marks);
+					RCU_INIT_POINTER(node->slots[i], entry);
+
+					sibling = xa_mk_sibling(i);
+				} else {
+					RCU_INIT_POINTER(node->slots[i], sibling);
+				}
+			}
+
+			node->array = xas->xa;
+			node->count = XA_CHUNK_SIZE;
+			node->nr_values = xa_is_value(entry) ? XA_CHUNK_SIZE : 0;
+			node->shift = xas->xa_shift;
+			node->offset = offset;
+			if (node->shift == node_to_split->shift - XA_CHUNK_SHIFT)
+				node->offset = offset_to_split;
+			if (stack && stack->shift == node->shift)
+				node->offset = stack->offset + 1;
+
+			next_offset = offset + 1;
 		}
-	} while (offset-- > xas->xa_offset);

-	node->nr_values += values;
-	xas_update(xas, node);
+		node->parent = stack;
+		stack = node;
+
+		offset = next_offset;
+	}
+
+	/* Combine all the new nodes on the stack into node_to_split. */
+	for (i = 0; i < sibs + 1; i++) {
+		node = stack;
+		stack = node->parent;
+
+		node_set_marks(node_to_split, node->offset, NULL, 0, marks);
+
+		rcu_assign_pointer(node->parent, node_to_split);
+		rcu_assign_pointer(node_to_split->slots[node->offset], xa_mk_node(node));
+	}
+
+	WARN_ON(stack);
+
+	node_to_split->nr_values -= xa_is_value(entry) ? sibs + 1 : 0;
+	xas_update(xas, node_to_split);
 }
 EXPORT_SYMBOL_GPL(xas_split);

--
2.52.0.rc1.455.g30608eb744-goog

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ