[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20260206182336.1397715-10-neelx@suse.com>
Date: Fri, 6 Feb 2026 19:22:41 +0100
From: Daniel Vacek <neelx@...e.com>
To: Chris Mason <clm@...com>,
Josef Bacik <josef@...icpanda.com>,
Eric Biggers <ebiggers@...nel.org>,
"Theodore Y. Ts'o" <tytso@....edu>,
Jaegeuk Kim <jaegeuk@...nel.org>,
Jens Axboe <axboe@...nel.dk>,
David Sterba <dsterba@...e.com>
Cc: linux-block@...r.kernel.org,
Daniel Vacek <neelx@...e.com>,
linux-fscrypt@...r.kernel.org,
linux-btrfs@...r.kernel.org,
linux-kernel@...r.kernel.org,
Boris Burkov <boris@....io>
Subject: [PATCH v6 09/43] btrfs: add infrastructure for safe em freeing
From: Josef Bacik <josef@...icpanda.com>
When we add fscrypt support we're going to have fscrypt objects hanging
off of extent_maps. This includes a block key, which if we're the last
one freeing the key we may have to unregister it from the block layer.
This requires taking a semaphore in the block layer, which means we
can't free em's under the extent map tree lock.
Thankfully we only do this in two places, one where we're dropping a
range of extent maps, and when we're freeing logged extents. Add a
free_extent_map_safe() which will add the em to a list in the em_tree if
we free'd the object. Currently this is unconditional but will be
changed to conditional on the fscrypt object we will add in a later
patch.
To process these delayed objects add a free_pending_extent_maps() that
is called after the lock has been dropped on the em_tree. This will
process the extent maps on the freed list and do the appropriate freeing
work in a safe manner.
Signed-off-by: Josef Bacik <josef@...icpanda.com>
Reviewed-by: Boris Burkov <boris@....io>
Signed-off-by: Daniel Vacek <neelx@...e.com>
---
v5: https://lore.kernel.org/linux-btrfs/6cf44f7860e94de68df242e69f4c5250bd061cff.1706116485.git.josef@toxicpanda.com/
* No changes since (other than simple function renames).
---
fs/btrfs/extent_map.c | 76 +++++++++++++++++++++++++++++++++++++++++--
fs/btrfs/extent_map.h | 10 ++++++
fs/btrfs/tree-log.c | 6 ++--
3 files changed, 87 insertions(+), 5 deletions(-)
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
index 095a561d733f..58589fc11802 100644
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -34,7 +34,9 @@ void __cold btrfs_extent_map_exit(void)
void btrfs_extent_map_tree_init(struct extent_map_tree *tree)
{
tree->root = RB_ROOT;
+ tree->flags = 0;
INIT_LIST_HEAD(&tree->modified_extents);
+ INIT_LIST_HEAD(&tree->freed_extents);
rwlock_init(&tree->lock);
}
@@ -51,9 +53,15 @@ struct extent_map *btrfs_alloc_extent_map(void)
RB_CLEAR_NODE(&em->rb_node);
refcount_set(&em->refs, 1);
INIT_LIST_HEAD(&em->list);
+ INIT_LIST_HEAD(&em->free_list);
return em;
}
+static void free_extent_map(struct extent_map *em)
+{
+ kmem_cache_free(extent_map_cache, em);
+}
+
/*
* Drop the reference out on @em by one and free the structure if the reference
* count hits zero.
@@ -65,10 +73,69 @@ void btrfs_free_extent_map(struct extent_map *em)
if (refcount_dec_and_test(&em->refs)) {
WARN_ON(btrfs_extent_map_in_tree(em));
WARN_ON(!list_empty(&em->list));
- kmem_cache_free(extent_map_cache, em);
+ free_extent_map(em);
+ }
+}
+
+/*
+ * Drop a ref for the extent map in the given tree.
+ *
+ * @tree: tree that the em is a part of.
+ * @em: the em to drop the reference to.
+ *
+ * Drop the reference count on @em by one, if the reference count hits 0 and
+ * there is an object on the em that can't be safely freed in the current
+ * context (if we are holding the extent_map_tree->lock for example), then add
+ * it to the freed_extents list on the extent_map_tree for later processing.
+ *
+ * This must be followed by a btrfs_free_pending_extent_maps() to clear
+ * the pending frees.
+ */
+void btrfs_free_extent_map_safe(struct extent_map_tree *tree,
+ struct extent_map *em)
+{
+ lockdep_assert_held_write(&tree->lock);
+
+ if (!em)
+ return;
+
+ if (refcount_dec_and_test(&em->refs)) {
+ WARN_ON(btrfs_extent_map_in_tree(em));
+ WARN_ON(!list_empty(&em->list));
+ list_add_tail(&em->free_list, &tree->freed_extents);
+ set_bit(EXTENT_MAP_TREE_PENDING_FREES, &tree->flags);
}
}
+/*
+ * Free the em objects that exist on the em tree
+ *
+ * @tree: the tree to free the objects from.
+ *
+ * If there are any objects on the em->freed_extents list go ahead and
+ * free them here in a safe way. This is to be coupled with any uses of
+ * btrfs_free_extent_map_safe().
+ */
+void btrfs_free_pending_extent_maps(struct extent_map_tree *tree)
+{
+ struct extent_map *em;
+
+ /* Avoid taking the write lock if we don't have any pending frees. */
+ if (!test_and_clear_bit(EXTENT_MAP_TREE_PENDING_FREES, &tree->flags))
+ return;
+
+ write_lock(&tree->lock);
+ while ((em = list_first_entry_or_null(&tree->freed_extents,
+ struct extent_map, free_list))) {
+ list_del_init(&em->free_list);
+ write_unlock(&tree->lock);
+ free_extent_map(em);
+ cond_resched();
+ write_lock(&tree->lock);
+ }
+ write_unlock(&tree->lock);
+}
+
/* Do the math around the end of an extent, handling wrapping. */
static u64 range_end(u64 start, u64 len)
{
@@ -784,7 +851,7 @@ static void drop_all_extent_maps_fast(struct btrfs_inode *inode)
em = rb_entry(node, struct extent_map, rb_node);
em->flags &= ~(EXTENT_FLAG_PINNED | EXTENT_FLAG_LOGGING);
btrfs_remove_extent_mapping(inode, em);
- btrfs_free_extent_map(em);
+ btrfs_free_extent_map_safe(tree, em);
if (cond_resched_rwlock_write(&tree->lock))
node = rb_first(&tree->root);
@@ -792,6 +859,8 @@ static void drop_all_extent_maps_fast(struct btrfs_inode *inode)
node = next;
}
write_unlock(&tree->lock);
+
+ btrfs_free_pending_extent_maps(tree);
}
/*
@@ -986,13 +1055,14 @@ void btrfs_drop_extent_map_range(struct btrfs_inode *inode, u64 start, u64 end,
btrfs_free_extent_map(em);
next:
/* Once for us (for our lookup reference). */
- btrfs_free_extent_map(em);
+ btrfs_free_extent_map_safe(em_tree, em);
em = next_em;
}
write_unlock(&em_tree->lock);
+ btrfs_free_pending_extent_maps(em_tree);
btrfs_free_extent_map(split);
btrfs_free_extent_map(split2);
}
diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h
index 6f685f3c9327..a962012be1c3 100644
--- a/fs/btrfs/extent_map.h
+++ b/fs/btrfs/extent_map.h
@@ -97,11 +97,18 @@ struct extent_map {
u32 flags;
refcount_t refs;
struct list_head list;
+ struct list_head free_list;
+};
+
+enum extent_map_flags {
+ EXTENT_MAP_TREE_PENDING_FREES,
};
struct extent_map_tree {
struct rb_root root;
+ unsigned long flags;
struct list_head modified_extents;
+ struct list_head freed_extents;
rwlock_t lock;
};
@@ -175,6 +182,9 @@ int btrfs_split_extent_map(struct btrfs_inode *inode, u64 start, u64 len, u64 pr
struct extent_map *btrfs_alloc_extent_map(void);
void btrfs_free_extent_map(struct extent_map *em);
+void btrfs_free_extent_map_safe(struct extent_map_tree *tree,
+ struct extent_map *em);
+void btrfs_free_pending_extent_maps(struct extent_map_tree *tree);
int __init btrfs_extent_map_init(void);
void __cold btrfs_extent_map_exit(void);
int btrfs_unpin_extent_cache(struct btrfs_inode *inode, u64 start, u64 len, u64 gen);
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index e1bd03ebfd98..4034c04d4d63 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -5383,7 +5383,7 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
*/
if (ret) {
btrfs_clear_em_logging(inode, em);
- btrfs_free_extent_map(em);
+ btrfs_free_extent_map_safe(tree, em);
continue;
}
@@ -5392,11 +5392,13 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
ret = log_one_extent(trans, inode, em, path, ctx);
write_lock(&tree->lock);
btrfs_clear_em_logging(inode, em);
- btrfs_free_extent_map(em);
+ btrfs_free_extent_map_safe(tree, em);
}
WARN_ON(!list_empty(&extents));
write_unlock(&tree->lock);
+ btrfs_free_pending_extent_maps(tree);
+
if (!ret)
ret = btrfs_log_prealloc_extents(trans, inode, path, ctx);
if (ret)
--
2.51.0
Powered by blists - more mailing lists