[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAL3q7H4n+7ZN228X9VGvgLX9S7cw5J27cJSGSiSCHjFbY9htLQ@mail.gmail.com>
Date: Mon, 25 Nov 2024 11:23:17 +0000
From: Filipe Manana <fdmanana@...nel.org>
To: Sasha Levin <sashal@...nel.org>
Cc: linux-kernel@...r.kernel.org, stable@...r.kernel.org,
Robbie Ko <robbieko@...ology.com>, Filipe Manana <fdmanana@...e.com>,
David Sterba <dsterba@...e.com>, clm@...com, josef@...icpanda.com,
linux-btrfs@...r.kernel.org
Subject: Re: [PATCH AUTOSEL 6.12 13/19] btrfs: reduce lock contention when eb
cache miss for btree search
On Sun, Nov 24, 2024 at 12:46 PM Sasha Levin <sashal@...nel.org> wrote:
>
> From: Robbie Ko <robbieko@...ology.com>
>
> [ Upstream commit 99785998ed1cea142e20f4904ced26537a37bf74 ]
Why is this being picked for stable?
It's not a bug fix or anything critical.
It's just a performance optimization, and it's not even one where we
know (AFAIK) of any workload where it would give very significant
gains to justify backporting to stable.
Thanks.
>
> When crawling btree, if an eb cache miss occurs, we change to use the eb
> read lock and release all previous locks (including the parent lock) to
> reduce lock contention.
>
> If an eb cache miss occurs in a leaf and needs to execute IO, before this
> change we released locks only from level 2 and up and we read a leaf's
> content from disk while holding a lock on its parent (level 1), causing
> the unnecessary lock contention on the parent, after this change we
> release locks from level 1 and up, but we lock level 0, and read leaf's
> content from disk.
>
> Because we have prepared the check parameters and the read lock of eb we
> hold, we can ensure that no race will occur during the check and cause
> unexpected errors.
>
> Reviewed-by: Filipe Manana <fdmanana@...e.com>
> Signed-off-by: Robbie Ko <robbieko@...ology.com>
> Signed-off-by: David Sterba <dsterba@...e.com>
> Signed-off-by: Sasha Levin <sashal@...nel.org>
> ---
> fs/btrfs/ctree.c | 101 ++++++++++++++++++++++++++++++++---------------
> 1 file changed, 70 insertions(+), 31 deletions(-)
>
> diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
> index 0cc919d15b144..dd92acd66624f 100644
> --- a/fs/btrfs/ctree.c
> +++ b/fs/btrfs/ctree.c
> @@ -1515,12 +1515,14 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p,
> struct btrfs_tree_parent_check check = { 0 };
> u64 blocknr;
> u64 gen;
> - struct extent_buffer *tmp;
> - int ret;
> + struct extent_buffer *tmp = NULL;
> + int ret = 0;
> int parent_level;
> - bool unlock_up;
> + int err;
> + bool read_tmp = false;
> + bool tmp_locked = false;
> + bool path_released = false;
>
> - unlock_up = ((level + 1 < BTRFS_MAX_LEVEL) && p->locks[level + 1]);
> blocknr = btrfs_node_blockptr(*eb_ret, slot);
> gen = btrfs_node_ptr_generation(*eb_ret, slot);
> parent_level = btrfs_header_level(*eb_ret);
> @@ -1551,68 +1553,105 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p,
> */
> if (btrfs_verify_level_key(tmp,
> parent_level - 1, &check.first_key, gen)) {
> - free_extent_buffer(tmp);
> - return -EUCLEAN;
> + ret = -EUCLEAN;
> + goto out;
> }
> *eb_ret = tmp;
> - return 0;
> + tmp = NULL;
> + ret = 0;
> + goto out;
> }
>
> if (p->nowait) {
> - free_extent_buffer(tmp);
> - return -EAGAIN;
> + ret = -EAGAIN;
> + goto out;
> }
>
> - if (unlock_up)
> + if (!p->skip_locking) {
> btrfs_unlock_up_safe(p, level + 1);
> -
> - /* now we're allowed to do a blocking uptodate check */
> - ret = btrfs_read_extent_buffer(tmp, &check);
> - if (ret) {
> - free_extent_buffer(tmp);
> + tmp_locked = true;
> + btrfs_tree_read_lock(tmp);
> btrfs_release_path(p);
> - return ret;
> + ret = -EAGAIN;
> + path_released = true;
> }
>
> - if (unlock_up)
> - ret = -EAGAIN;
> + /* Now we're allowed to do a blocking uptodate check. */
> + err = btrfs_read_extent_buffer(tmp, &check);
> + if (err) {
> + ret = err;
> + goto out;
> + }
>
> + if (ret == 0) {
> + ASSERT(!tmp_locked);
> + *eb_ret = tmp;
> + tmp = NULL;
> + }
> goto out;
> } else if (p->nowait) {
> - return -EAGAIN;
> + ret = -EAGAIN;
> + goto out;
> }
>
> - if (unlock_up) {
> + if (!p->skip_locking) {
> btrfs_unlock_up_safe(p, level + 1);
> ret = -EAGAIN;
> - } else {
> - ret = 0;
> }
>
> if (p->reada != READA_NONE)
> reada_for_search(fs_info, p, level, slot, key->objectid);
>
> - tmp = read_tree_block(fs_info, blocknr, &check);
> + tmp = btrfs_find_create_tree_block(fs_info, blocknr, check.owner_root, check.level);
> if (IS_ERR(tmp)) {
> + ret = PTR_ERR(tmp);
> + tmp = NULL;
> + goto out;
> + }
> + read_tmp = true;
> +
> + if (!p->skip_locking) {
> + ASSERT(ret == -EAGAIN);
> + tmp_locked = true;
> + btrfs_tree_read_lock(tmp);
> btrfs_release_path(p);
> - return PTR_ERR(tmp);
> + path_released = true;
> + }
> +
> + /* Now we're allowed to do a blocking uptodate check. */
> + err = btrfs_read_extent_buffer(tmp, &check);
> + if (err) {
> + ret = err;
> + goto out;
> }
> +
> /*
> * If the read above didn't mark this buffer up to date,
> * it will never end up being up to date. Set ret to EIO now
> * and give up so that our caller doesn't loop forever
> * on our EAGAINs.
> */
> - if (!extent_buffer_uptodate(tmp))
> + if (!extent_buffer_uptodate(tmp)) {
> ret = -EIO;
> + goto out;
> + }
>
> -out:
> if (ret == 0) {
> + ASSERT(!tmp_locked);
> *eb_ret = tmp;
> - } else {
> - free_extent_buffer(tmp);
> - btrfs_release_path(p);
> + tmp = NULL;
> + }
> +out:
> + if (tmp) {
> + if (tmp_locked)
> + btrfs_tree_read_unlock(tmp);
> + if (read_tmp && ret && ret != -EAGAIN)
> + free_extent_buffer_stale(tmp);
> + else
> + free_extent_buffer(tmp);
> }
> + if (ret && !path_released)
> + btrfs_release_path(p);
>
> return ret;
> }
> @@ -2198,7 +2237,7 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root,
> }
>
> err = read_block_for_search(root, p, &b, level, slot, key);
> - if (err == -EAGAIN)
> + if (err == -EAGAIN && !p->nowait)
> goto again;
> if (err) {
> ret = err;
> @@ -2325,7 +2364,7 @@ int btrfs_search_old_slot(struct btrfs_root *root, const struct btrfs_key *key,
> }
>
> err = read_block_for_search(root, p, &b, level, slot, key);
> - if (err == -EAGAIN)
> + if (err == -EAGAIN && !p->nowait)
> goto again;
> if (err) {
> ret = err;
> --
> 2.43.0
>
>
Powered by blists - more mailing lists