lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20240212115500.2078463-9-max.kellermann@ionos.com>
Date: Mon, 12 Feb 2024 12:54:33 +0100
From: Max Kellermann <max.kellermann@...os.com>
To: linux-kernel@...r.kernel.org
Cc: Max Kellermann <max.kellermann@...os.com>
Subject: [PATCH v5 08/35] maple_tree.h: move declarations to maple_tree_types.h

By providing declarations in a lean header, we can reduce header
dependencies.

Signed-off-by: Max Kellermann <max.kellermann@...os.com>
---
 include/linux/maple_tree.h       | 323 +----------------------------
 include/linux/maple_tree_types.h | 341 +++++++++++++++++++++++++++++++
 include/linux/mm.h               |   1 +
 include/linux/mm_types.h         |   2 +-
 4 files changed, 345 insertions(+), 322 deletions(-)
 create mode 100644 include/linux/maple_tree_types.h

diff --git a/include/linux/maple_tree.h b/include/linux/maple_tree.h
index b3d63123b945..7f5dd33f4b94 100644
--- a/include/linux/maple_tree.h
+++ b/include/linux/maple_tree.h
@@ -8,147 +8,13 @@
  *              Matthew Wilcox <willy@...radead.org>
  */
 
+#include <linux/maple_tree_types.h>
+
 #include <linux/kernel.h>
 #include <linux/rcupdate.h>
 #include <linux/spinlock.h>
 /* #define CONFIG_MAPLE_RCU_DISABLED */
 
-/*
- * Allocated nodes are mutable until they have been inserted into the tree,
- * at which time they cannot change their type until they have been removed
- * from the tree and an RCU grace period has passed.
- *
- * Removed nodes have their ->parent set to point to themselves.  RCU readers
- * check ->parent before relying on the value that they loaded from the
- * slots array.  This lets us reuse the slots array for the RCU head.
- *
- * Nodes in the tree point to their parent unless bit 0 is set.
- */
-#if defined(CONFIG_64BIT) || defined(BUILD_VDSO32_64)
-/* 64bit sizes */
-#define MAPLE_NODE_SLOTS	31	/* 256 bytes including ->parent */
-#define MAPLE_RANGE64_SLOTS	16	/* 256 bytes */
-#define MAPLE_ARANGE64_SLOTS	10	/* 240 bytes */
-#define MAPLE_ALLOC_SLOTS	(MAPLE_NODE_SLOTS - 1)
-#else
-/* 32bit sizes */
-#define MAPLE_NODE_SLOTS	63	/* 256 bytes including ->parent */
-#define MAPLE_RANGE64_SLOTS	32	/* 256 bytes */
-#define MAPLE_ARANGE64_SLOTS	21	/* 240 bytes */
-#define MAPLE_ALLOC_SLOTS	(MAPLE_NODE_SLOTS - 2)
-#endif /* defined(CONFIG_64BIT) || defined(BUILD_VDSO32_64) */
-
-#define MAPLE_NODE_MASK		255UL
-
-/*
- * The node->parent of the root node has bit 0 set and the rest of the pointer
- * is a pointer to the tree itself.  No more bits are available in this pointer
- * (on m68k, the data structure may only be 2-byte aligned).
- *
- * Internal non-root nodes can only have maple_range_* nodes as parents.  The
- * parent pointer is 256B aligned like all other tree nodes.  When storing a 32
- * or 64 bit values, the offset can fit into 4 bits.  The 16 bit values need an
- * extra bit to store the offset.  This extra bit comes from a reuse of the last
- * bit in the node type.  This is possible by using bit 1 to indicate if bit 2
- * is part of the type or the slot.
- *
- * Once the type is decided, the decision of an allocation range type or a range
- * type is done by examining the immutable tree flag for the MAPLE_ALLOC_RANGE
- * flag.
- *
- *  Node types:
- *   0x??1 = Root
- *   0x?00 = 16 bit nodes
- *   0x010 = 32 bit nodes
- *   0x110 = 64 bit nodes
- *
- *  Slot size and location in the parent pointer:
- *   type  : slot location
- *   0x??1 : Root
- *   0x?00 : 16 bit values, type in 0-1, slot in 2-6
- *   0x010 : 32 bit values, type in 0-2, slot in 3-6
- *   0x110 : 64 bit values, type in 0-2, slot in 3-6
- */
-
-/*
- * This metadata is used to optimize the gap updating code and in reverse
- * searching for gaps or any other code that needs to find the end of the data.
- */
-struct maple_metadata {
-	unsigned char end;
-	unsigned char gap;
-};
-
-/*
- * Leaf nodes do not store pointers to nodes, they store user data.  Users may
- * store almost any bit pattern.  As noted above, the optimisation of storing an
- * entry at 0 in the root pointer cannot be done for data which have the bottom
- * two bits set to '10'.  We also reserve values with the bottom two bits set to
- * '10' which are below 4096 (ie 2, 6, 10 .. 4094) for internal use.  Some APIs
- * return errnos as a negative errno shifted right by two bits and the bottom
- * two bits set to '10', and while choosing to store these values in the array
- * is not an error, it may lead to confusion if you're testing for an error with
- * mas_is_err().
- *
- * Non-leaf nodes store the type of the node pointed to (enum maple_type in bits
- * 3-6), bit 2 is reserved.  That leaves bits 0-1 unused for now.
- *
- * In regular B-Tree terms, pivots are called keys.  The term pivot is used to
- * indicate that the tree is specifying ranges,  Pivots may appear in the
- * subtree with an entry attached to the value whereas keys are unique to a
- * specific position of a B-tree.  Pivot values are inclusive of the slot with
- * the same index.
- */
-
-struct maple_range_64 {
-	struct maple_pnode *parent;
-	unsigned long pivot[MAPLE_RANGE64_SLOTS - 1];
-	union {
-		void __rcu *slot[MAPLE_RANGE64_SLOTS];
-		struct {
-			void __rcu *pad[MAPLE_RANGE64_SLOTS - 1];
-			struct maple_metadata meta;
-		};
-	};
-};
-
-/*
- * At tree creation time, the user can specify that they're willing to trade off
- * storing fewer entries in a tree in return for storing more information in
- * each node.
- *
- * The maple tree supports recording the largest range of NULL entries available
- * in this node, also called gaps.  This optimises the tree for allocating a
- * range.
- */
-struct maple_arange_64 {
-	struct maple_pnode *parent;
-	unsigned long pivot[MAPLE_ARANGE64_SLOTS - 1];
-	void __rcu *slot[MAPLE_ARANGE64_SLOTS];
-	unsigned long gap[MAPLE_ARANGE64_SLOTS];
-	struct maple_metadata meta;
-};
-
-struct maple_alloc {
-	unsigned long total;
-	unsigned char node_count;
-	unsigned int request_count;
-	struct maple_alloc *slot[MAPLE_ALLOC_SLOTS];
-};
-
-struct maple_topiary {
-	struct maple_pnode *parent;
-	struct maple_enode *next; /* Overlaps the pivot */
-};
-
-enum maple_type {
-	maple_dense,
-	maple_leaf_64,
-	maple_range_64,
-	maple_arange_64,
-};
-
-
 /**
  * DOC: Maple tree flags
  *
@@ -181,7 +47,6 @@ enum maple_type {
 #define MAPLE_RESERVED_RANGE	4096
 
 #ifdef CONFIG_LOCKDEP
-typedef struct lockdep_map *lockdep_map_p;
 #define mt_lock_is_held(mt)                                             \
 	(!(mt)->ma_external_lock || lock_is_held((mt)->ma_external_lock))
 
@@ -194,36 +59,12 @@ typedef struct lockdep_map *lockdep_map_p;
 
 #define mt_on_stack(mt)			(mt).ma_external_lock = NULL
 #else
-typedef struct { /* nothing */ } lockdep_map_p;
 #define mt_lock_is_held(mt)		1
 #define mt_write_lock_is_held(mt)	1
 #define mt_set_external_lock(mt, lock)	do { } while (0)
 #define mt_on_stack(mt)			do { } while (0)
 #endif
 
-/*
- * If the tree contains a single entry at index 0, it is usually stored in
- * tree->ma_root.  To optimise for the page cache, an entry which ends in '00',
- * '01' or '11' is stored in the root, but an entry which ends in '10' will be
- * stored in a node.  Bits 3-6 are used to store enum maple_type.
- *
- * The flags are used both to store some immutable information about this tree
- * (set at tree creation time) and dynamic information set under the spinlock.
- *
- * Another use of flags are to indicate global states of the tree.  This is the
- * case with the MAPLE_USE_RCU flag, which indicates the tree is currently in
- * RCU mode.  This mode was added to allow the tree to reuse nodes instead of
- * re-allocating and RCU freeing nodes when there is a single user.
- */
-struct maple_tree {
-	union {
-		spinlock_t	ma_lock;
-		lockdep_map_p	ma_external_lock;
-	};
-	unsigned int	ma_flags;
-	void __rcu      *ma_root;
-};
-
 /**
  * MTREE_INIT() - Initialize a maple tree
  * @name: The maple tree name
@@ -260,56 +101,6 @@ struct maple_tree {
 		spin_lock_nested((&(mt)->ma_lock), subclass)
 #define mtree_unlock(mt)	spin_unlock((&(mt)->ma_lock))
 
-/*
- * The Maple Tree squeezes various bits in at various points which aren't
- * necessarily obvious.  Usually, this is done by observing that pointers are
- * N-byte aligned and thus the bottom log_2(N) bits are available for use.  We
- * don't use the high bits of pointers to store additional information because
- * we don't know what bits are unused on any given architecture.
- *
- * Nodes are 256 bytes in size and are also aligned to 256 bytes, giving us 8
- * low bits for our own purposes.  Nodes are currently of 4 types:
- * 1. Single pointer (Range is 0-0)
- * 2. Non-leaf Allocation Range nodes
- * 3. Non-leaf Range nodes
- * 4. Leaf Range nodes All nodes consist of a number of node slots,
- *    pivots, and a parent pointer.
- */
-
-struct maple_node {
-	union {
-		struct {
-			struct maple_pnode *parent;
-			void __rcu *slot[MAPLE_NODE_SLOTS];
-		};
-		struct {
-			void *pad;
-			struct rcu_head rcu;
-			struct maple_enode *piv_parent;
-			unsigned char parent_slot;
-			enum maple_type type;
-			unsigned char slot_len;
-			unsigned int ma_flags;
-		};
-		struct maple_range_64 mr64;
-		struct maple_arange_64 ma64;
-		struct maple_alloc alloc;
-	};
-};
-
-/*
- * More complicated stores can cause two nodes to become one or three and
- * potentially alter the height of the tree.  Either half of the tree may need
- * to be rebalanced against the other.  The ma_topiary struct is used to track
- * which nodes have been 'cut' from the tree so that the change can be done
- * safely at a later date.  This is done to support RCU.
- */
-struct ma_topiary {
-	struct maple_enode *head;
-	struct maple_enode *tail;
-	struct maple_tree *mtree;
-};
-
 void *mtree_load(struct maple_tree *mt, unsigned long index);
 
 int mtree_insert(struct maple_tree *mt, unsigned long index,
@@ -349,105 +140,6 @@ static inline bool mtree_empty(const struct maple_tree *mt)
 
 /* Advanced API */
 
-/*
- * Maple State Status
- * ma_active means the maple state is pointing to a node and offset and can
- * continue operating on the tree.
- * ma_start means we have not searched the tree.
- * ma_root means we have searched the tree and the entry we found lives in
- * the root of the tree (ie it has index 0, length 1 and is the only entry in
- * the tree).
- * ma_none means we have searched the tree and there is no node in the
- * tree for this entry.  For example, we searched for index 1 in an empty
- * tree.  Or we have a tree which points to a full leaf node and we
- * searched for an entry which is larger than can be contained in that
- * leaf node.
- * ma_pause means the data within the maple state may be stale, restart the
- * operation
- * ma_overflow means the search has reached the upper limit of the search
- * ma_underflow means the search has reached the lower limit of the search
- * ma_error means there was an error, check the node for the error number.
- */
-enum maple_status {
-	ma_active,
-	ma_start,
-	ma_root,
-	ma_none,
-	ma_pause,
-	ma_overflow,
-	ma_underflow,
-	ma_error,
-};
-
-/*
- * The maple state is defined in the struct ma_state and is used to keep track
- * of information during operations, and even between operations when using the
- * advanced API.
- *
- * If state->node has bit 0 set then it references a tree location which is not
- * a node (eg the root).  If bit 1 is set, the rest of the bits are a negative
- * errno.  Bit 2 (the 'unallocated slots' bit) is clear.  Bits 3-6 indicate the
- * node type.
- *
- * state->alloc either has a request number of nodes or an allocated node.  If
- * stat->alloc has a requested number of nodes, the first bit will be set (0x1)
- * and the remaining bits are the value.  If state->alloc is a node, then the
- * node will be of type maple_alloc.  maple_alloc has MAPLE_NODE_SLOTS - 1 for
- * storing more allocated nodes, a total number of nodes allocated, and the
- * node_count in this node.  node_count is the number of allocated nodes in this
- * node.  The scaling beyond MAPLE_NODE_SLOTS - 1 is handled by storing further
- * nodes into state->alloc->slot[0]'s node.  Nodes are taken from state->alloc
- * by removing a node from the state->alloc node until state->alloc->node_count
- * is 1, when state->alloc is returned and the state->alloc->slot[0] is promoted
- * to state->alloc.  Nodes are pushed onto state->alloc by putting the current
- * state->alloc into the pushed node's slot[0].
- *
- * The state also contains the implied min/max of the state->node, the depth of
- * this search, and the offset. The implied min/max are either from the parent
- * node or are 0-oo for the root node.  The depth is incremented or decremented
- * every time a node is walked down or up.  The offset is the slot/pivot of
- * interest in the node - either for reading or writing.
- *
- * When returning a value the maple state index and last respectively contain
- * the start and end of the range for the entry.  Ranges are inclusive in the
- * Maple Tree.
- *
- * The status of the state is used to determine how the next action should treat
- * the state.  For instance, if the status is ma_start then the next action
- * should start at the root of the tree and walk down.  If the status is
- * ma_pause then the node may be stale data and should be discarded.  If the
- * status is ma_overflow, then the last action hit the upper limit.
- *
- */
-struct ma_state {
-	struct maple_tree *tree;	/* The tree we're operating in */
-	unsigned long index;		/* The index we're operating on - range start */
-	unsigned long last;		/* The last index we're operating on - range end */
-	struct maple_enode *node;	/* The node containing this entry */
-	unsigned long min;		/* The minimum index of this node - implied pivot min */
-	unsigned long max;		/* The maximum index of this node - implied pivot max */
-	struct maple_alloc *alloc;	/* Allocated nodes for this operation */
-	enum maple_status status;	/* The status of the state (active, start, none, etc) */
-	unsigned char depth;		/* depth of tree descent during write */
-	unsigned char offset;
-	unsigned char mas_flags;
-	unsigned char end;		/* The end of the node */
-};
-
-struct ma_wr_state {
-	struct ma_state *mas;
-	struct maple_node *node;	/* Decoded mas->node */
-	unsigned long r_min;		/* range min */
-	unsigned long r_max;		/* range max */
-	enum maple_type type;		/* mas->node type */
-	unsigned char offset_end;	/* The offset where the write ends */
-	unsigned long *pivots;		/* mas->node->pivots pointer */
-	unsigned long end_piv;		/* The pivot at the offset end */
-	void __rcu **slots;		/* mas->node->slots pointer */
-	void *entry;			/* The entry to write */
-	void *content;			/* The existing entry that is being overwritten */
-};
-
 #define mas_lock(mas)           spin_lock(&((mas)->tree->ma_lock))
 #define mas_lock_nested(mas, subclass) \
 		spin_lock_nested(&((mas)->tree->ma_lock), subclass)
@@ -520,17 +212,6 @@ int mas_empty_area(struct ma_state *mas, unsigned long min, unsigned long max,
 int mas_empty_area_rev(struct ma_state *mas, unsigned long min,
 		       unsigned long max, unsigned long size);
 
-static inline void mas_init(struct ma_state *mas, struct maple_tree *tree,
-			    unsigned long addr)
-{
-	memset(mas, 0, sizeof(struct ma_state));
-	mas->tree = tree;
-	mas->index = mas->last = addr;
-	mas->max = ULONG_MAX;
-	mas->status = ma_start;
-	mas->node = NULL;
-}
-
 static inline bool mas_is_active(struct ma_state *mas)
 {
 	return mas->status == ma_active;
diff --git a/include/linux/maple_tree_types.h b/include/linux/maple_tree_types.h
new file mode 100644
index 000000000000..fe13414f239d
--- /dev/null
+++ b/include/linux/maple_tree_types.h
@@ -0,0 +1,341 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+#ifndef _LINUX_MAPLE_TREE_TYPES_H
+#define _LINUX_MAPLE_TREE_TYPES_H
+/*
+ * Maple Tree - An RCU-safe adaptive tree for storing ranges
+ * Copyright (c) 2018-2022 Oracle
+ * Authors:     Liam R. Howlett <Liam.Howlett@...cle.com>
+ *              Matthew Wilcox <willy@...radead.org>
+ */
+
+#include <linux/spinlock_types.h>
+#include <linux/string.h> // for memset()
+#include <linux/limits.h> // for ULONG_MAX
+
+/*
+ * Allocated nodes are mutable until they have been inserted into the tree,
+ * at which time they cannot change their type until they have been removed
+ * from the tree and an RCU grace period has passed.
+ *
+ * Removed nodes have their ->parent set to point to themselves.  RCU readers
+ * check ->parent before relying on the value that they loaded from the
+ * slots array.  This lets us reuse the slots array for the RCU head.
+ *
+ * Nodes in the tree point to their parent unless bit 0 is set.
+ */
+#if defined(CONFIG_64BIT) || defined(BUILD_VDSO32_64)
+/* 64bit sizes */
+#define MAPLE_NODE_SLOTS	31	/* 256 bytes including ->parent */
+#define MAPLE_RANGE64_SLOTS	16	/* 256 bytes */
+#define MAPLE_ARANGE64_SLOTS	10	/* 240 bytes */
+#define MAPLE_ALLOC_SLOTS	(MAPLE_NODE_SLOTS - 1)
+#else
+/* 32bit sizes */
+#define MAPLE_NODE_SLOTS	63	/* 256 bytes including ->parent */
+#define MAPLE_RANGE64_SLOTS	32	/* 256 bytes */
+#define MAPLE_ARANGE64_SLOTS	21	/* 240 bytes */
+#define MAPLE_ALLOC_SLOTS	(MAPLE_NODE_SLOTS - 2)
+#endif /* defined(CONFIG_64BIT) || defined(BUILD_VDSO32_64) */
+
+#define MAPLE_NODE_MASK		255UL
+
+/*
+ * The node->parent of the root node has bit 0 set and the rest of the pointer
+ * is a pointer to the tree itself.  No more bits are available in this pointer
+ * (on m68k, the data structure may only be 2-byte aligned).
+ *
+ * Internal non-root nodes can only have maple_range_* nodes as parents.  The
+ * parent pointer is 256B aligned like all other tree nodes.  When storing a 32
+ * or 64 bit values, the offset can fit into 4 bits.  The 16 bit values need an
+ * extra bit to store the offset.  This extra bit comes from a reuse of the last
+ * bit in the node type.  This is possible by using bit 1 to indicate if bit 2
+ * is part of the type or the slot.
+ *
+ * Once the type is decided, the decision of an allocation range type or a range
+ * type is done by examining the immutable tree flag for the MAPLE_ALLOC_RANGE
+ * flag.
+ *
+ *  Node types:
+ *   0x??1 = Root
+ *   0x?00 = 16 bit nodes
+ *   0x010 = 32 bit nodes
+ *   0x110 = 64 bit nodes
+ *
+ *  Slot size and location in the parent pointer:
+ *   type  : slot location
+ *   0x??1 : Root
+ *   0x?00 : 16 bit values, type in 0-1, slot in 2-6
+ *   0x010 : 32 bit values, type in 0-2, slot in 3-6
+ *   0x110 : 64 bit values, type in 0-2, slot in 3-6
+ */
+
+/*
+ * This metadata is used to optimize the gap updating code and in reverse
+ * searching for gaps or any other code that needs to find the end of the data.
+ */
+struct maple_metadata {
+	unsigned char end;
+	unsigned char gap;
+};
+
+/*
+ * Leaf nodes do not store pointers to nodes, they store user data.  Users may
+ * store almost any bit pattern.  As noted above, the optimisation of storing an
+ * entry at 0 in the root pointer cannot be done for data which have the bottom
+ * two bits set to '10'.  We also reserve values with the bottom two bits set to
+ * '10' which are below 4096 (ie 2, 6, 10 .. 4094) for internal use.  Some APIs
+ * return errnos as a negative errno shifted right by two bits and the bottom
+ * two bits set to '10', and while choosing to store these values in the array
+ * is not an error, it may lead to confusion if you're testing for an error with
+ * mas_is_err().
+ *
+ * Non-leaf nodes store the type of the node pointed to (enum maple_type in bits
+ * 3-6), bit 2 is reserved.  That leaves bits 0-1 unused for now.
+ *
+ * In regular B-Tree terms, pivots are called keys.  The term pivot is used to
+ * indicate that the tree is specifying ranges,  Pivots may appear in the
+ * subtree with an entry attached to the value whereas keys are unique to a
+ * specific position of a B-tree.  Pivot values are inclusive of the slot with
+ * the same index.
+ */
+
+struct maple_range_64 {
+	struct maple_pnode *parent;
+	unsigned long pivot[MAPLE_RANGE64_SLOTS - 1];
+	union {
+		void __rcu *slot[MAPLE_RANGE64_SLOTS];
+		struct {
+			void __rcu *pad[MAPLE_RANGE64_SLOTS - 1];
+			struct maple_metadata meta;
+		};
+	};
+};
+
+/*
+ * At tree creation time, the user can specify that they're willing to trade off
+ * storing fewer entries in a tree in return for storing more information in
+ * each node.
+ *
+ * The maple tree supports recording the largest range of NULL entries available
+ * in this node, also called gaps.  This optimises the tree for allocating a
+ * range.
+ */
+struct maple_arange_64 {
+	struct maple_pnode *parent;
+	unsigned long pivot[MAPLE_ARANGE64_SLOTS - 1];
+	void __rcu *slot[MAPLE_ARANGE64_SLOTS];
+	unsigned long gap[MAPLE_ARANGE64_SLOTS];
+	struct maple_metadata meta;
+};
+
+struct maple_alloc {
+	unsigned long total;
+	unsigned char node_count;
+	unsigned int request_count;
+	struct maple_alloc *slot[MAPLE_ALLOC_SLOTS];
+};
+
+struct maple_topiary {
+	struct maple_pnode *parent;
+	struct maple_enode *next; /* Overlaps the pivot */
+};
+
+enum maple_type {
+	maple_dense,
+	maple_leaf_64,
+	maple_range_64,
+	maple_arange_64,
+};
+
+#ifdef CONFIG_LOCKDEP
+typedef struct lockdep_map *lockdep_map_p;
+#else
+typedef struct { /* nothing */ } lockdep_map_p;
+#endif
+
+/*
+ * If the tree contains a single entry at index 0, it is usually stored in
+ * tree->ma_root.  To optimise for the page cache, an entry which ends in '00',
+ * '01' or '11' is stored in the root, but an entry which ends in '10' will be
+ * stored in a node.  Bits 3-6 are used to store enum maple_type.
+ *
+ * The flags are used both to store some immutable information about this tree
+ * (set at tree creation time) and dynamic information set under the spinlock.
+ *
+ * Another use of flags are to indicate global states of the tree.  This is the
+ * case with the MAPLE_USE_RCU flag, which indicates the tree is currently in
+ * RCU mode.  This mode was added to allow the tree to reuse nodes instead of
+ * re-allocating and RCU freeing nodes when there is a single user.
+ */
+struct maple_tree {
+	union {
+		spinlock_t	ma_lock;
+		lockdep_map_p	ma_external_lock;
+	};
+	unsigned int	ma_flags;
+	void __rcu      *ma_root;
+};
+
+/*
+ * The Maple Tree squeezes various bits in at various points which aren't
+ * necessarily obvious.  Usually, this is done by observing that pointers are
+ * N-byte aligned and thus the bottom log_2(N) bits are available for use.  We
+ * don't use the high bits of pointers to store additional information because
+ * we don't know what bits are unused on any given architecture.
+ *
+ * Nodes are 256 bytes in size and are also aligned to 256 bytes, giving us 8
+ * low bits for our own purposes.  Nodes are currently of 4 types:
+ * 1. Single pointer (Range is 0-0)
+ * 2. Non-leaf Allocation Range nodes
+ * 3. Non-leaf Range nodes
+ * 4. Leaf Range nodes All nodes consist of a number of node slots,
+ *    pivots, and a parent pointer.
+ */
+
+struct maple_node {
+	union {
+		struct {
+			struct maple_pnode *parent;
+			void __rcu *slot[MAPLE_NODE_SLOTS];
+		};
+		struct {
+			void *pad;
+			struct rcu_head rcu;
+			struct maple_enode *piv_parent;
+			unsigned char parent_slot;
+			enum maple_type type;
+			unsigned char slot_len;
+			unsigned int ma_flags;
+		};
+		struct maple_range_64 mr64;
+		struct maple_arange_64 ma64;
+		struct maple_alloc alloc;
+	};
+};
+
+/*
+ * More complicated stores can cause two nodes to become one or three and
+ * potentially alter the height of the tree.  Either half of the tree may need
+ * to be rebalanced against the other.  The ma_topiary struct is used to track
+ * which nodes have been 'cut' from the tree so that the change can be done
+ * safely at a later date.  This is done to support RCU.
+ */
+struct ma_topiary {
+	struct maple_enode *head;
+	struct maple_enode *tail;
+	struct maple_tree *mtree;
+};
+
+/* Advanced API */
+
+/*
+ * Maple State Status
+ * ma_active means the maple state is pointing to a node and offset and can
+ * continue operating on the tree.
+ * ma_start means we have not searched the tree.
+ * ma_root means we have searched the tree and the entry we found lives in
+ * the root of the tree (ie it has index 0, length 1 and is the only entry in
+ * the tree).
+ * ma_none means we have searched the tree and there is no node in the
+ * tree for this entry.  For example, we searched for index 1 in an empty
+ * tree.  Or we have a tree which points to a full leaf node and we
+ * searched for an entry which is larger than can be contained in that
+ * leaf node.
+ * ma_pause means the data within the maple state may be stale, restart the
+ * operation
+ * ma_overflow means the search has reached the upper limit of the search
+ * ma_underflow means the search has reached the lower limit of the search
+ * ma_error means there was an error, check the node for the error number.
+ */
+enum maple_status {
+	ma_active,
+	ma_start,
+	ma_root,
+	ma_none,
+	ma_pause,
+	ma_overflow,
+	ma_underflow,
+	ma_error,
+};
+
+/*
+ * The maple state is defined in the struct ma_state and is used to keep track
+ * of information during operations, and even between operations when using the
+ * advanced API.
+ *
+ * If state->node has bit 0 set then it references a tree location which is not
+ * a node (eg the root).  If bit 1 is set, the rest of the bits are a negative
+ * errno.  Bit 2 (the 'unallocated slots' bit) is clear.  Bits 3-6 indicate the
+ * node type.
+ *
+ * state->alloc either has a request number of nodes or an allocated node.  If
+ * stat->alloc has a requested number of nodes, the first bit will be set (0x1)
+ * and the remaining bits are the value.  If state->alloc is a node, then the
+ * node will be of type maple_alloc.  maple_alloc has MAPLE_NODE_SLOTS - 1 for
+ * storing more allocated nodes, a total number of nodes allocated, and the
+ * node_count in this node.  node_count is the number of allocated nodes in this
+ * node.  The scaling beyond MAPLE_NODE_SLOTS - 1 is handled by storing further
+ * nodes into state->alloc->slot[0]'s node.  Nodes are taken from state->alloc
+ * by removing a node from the state->alloc node until state->alloc->node_count
+ * is 1, when state->alloc is returned and the state->alloc->slot[0] is promoted
+ * to state->alloc.  Nodes are pushed onto state->alloc by putting the current
+ * state->alloc into the pushed node's slot[0].
+ *
+ * The state also contains the implied min/max of the state->node, the depth of
+ * this search, and the offset. The implied min/max are either from the parent
+ * node or are 0-oo for the root node.  The depth is incremented or decremented
+ * every time a node is walked down or up.  The offset is the slot/pivot of
+ * interest in the node - either for reading or writing.
+ *
+ * When returning a value the maple state index and last respectively contain
+ * the start and end of the range for the entry.  Ranges are inclusive in the
+ * Maple Tree.
+ *
+ * The status of the state is used to determine how the next action should treat
+ * the state.  For instance, if the status is ma_start then the next action
+ * should start at the root of the tree and walk down.  If the status is
+ * ma_pause then the node may be stale data and should be discarded.  If the
+ * status is ma_overflow, then the last action hit the upper limit.
+ *
+ */
+struct ma_state {
+	struct maple_tree *tree;	/* The tree we're operating in */
+	unsigned long index;		/* The index we're operating on - range start */
+	unsigned long last;		/* The last index we're operating on - range end */
+	struct maple_enode *node;	/* The node containing this entry */
+	unsigned long min;		/* The minimum index of this node - implied pivot min */
+	unsigned long max;		/* The maximum index of this node - implied pivot max */
+	struct maple_alloc *alloc;	/* Allocated nodes for this operation */
+	enum maple_status status;	/* The status of the state (active, start, none, etc) */
+	unsigned char depth;		/* depth of tree descent during write */
+	unsigned char offset;
+	unsigned char mas_flags;
+	unsigned char end;		/* The end of the node */
+};
+
+struct ma_wr_state {
+	struct ma_state *mas;
+	struct maple_node *node;	/* Decoded mas->node */
+	unsigned long r_min;		/* range min */
+	unsigned long r_max;		/* range max */
+	enum maple_type type;		/* mas->node type */
+	unsigned char offset_end;	/* The offset where the write ends */
+	unsigned long *pivots;		/* mas->node->pivots pointer */
+	unsigned long end_piv;		/* The pivot at the offset end */
+	void __rcu **slots;		/* mas->node->slots pointer */
+	void *entry;			/* The entry to write */
+	void *content;			/* The existing entry that is being overwritten */
+};
+
+static inline void mas_init(struct ma_state *mas, struct maple_tree *tree,
+			    unsigned long addr)
+{
+	memset(mas, 0, sizeof(struct ma_state));
+	mas->tree = tree;
+	mas->index = mas->last = addr;
+	mas->max = ULONG_MAX;
+	mas->status = ma_start;
+	mas->node = NULL;
+}
+
+#endif /*_LINUX_MAPLE_TREE_TYPES_H */
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 95c29b8f573d..0e6bdce6e4e8 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -13,6 +13,7 @@
 #include <linux/debug_locks.h>
 #include <linux/mm_types.h>
 #include <linux/mmap_lock.h>
+#include <linux/maple_tree.h>
 #include <linux/range.h>
 #include <linux/pfn.h>
 #include <linux/bit_spinlock.h>
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index e44ef7852019..ff5d33ace0f9 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -11,7 +11,7 @@
 #include <linux/list.h>
 #include <linux/spinlock_types.h>
 #include <linux/rbtree_types.h>
-#include <linux/maple_tree.h>
+#include <linux/maple_tree_types.h>
 #include <linux/rwsem.h>
 #include <linux/cpumask.h>
 #include <linux/uprobes.h>
-- 
2.39.2


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ