[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <6de87230-f981-411c-b173-55871e4d4720@gmx.com>
Date: Thu, 14 Sep 2023 18:48:17 +0930
From: Qu Wenruo <quwenruo.btrfs@....com>
To: Johannes Thumshirn <johannes.thumshirn@....com>,
Chris Mason <clm@...com>, Josef Bacik <josef@...icpanda.com>,
David Sterba <dsterba@...e.com>
Cc: Christoph Hellwig <hch@....de>,
Naohiro Aota <naohiro.aota@....com>, Qu Wenruo <wqu@...e.com>,
Damien Le Moal <dlemoal@...nel.org>,
linux-btrfs@...r.kernel.org, linux-kernel@...r.kernel.org
Subject: Re: [PATCH v8 05/11] btrfs: lookup physical address from stripe
extent
On 2023/9/11 22:22, Johannes Thumshirn wrote:
> Lookup the physical address from the raid stripe tree when a read on an
> RAID volume formatted with the raid stripe tree was attempted.
>
> If the requested logical address was not found in the stripe tree, it may
> still be in the in-memory ordered stripe tree, so fallback to searching
> the ordered stripe tree in this case.
>
> Signed-off-by: Johannes Thumshirn <johannes.thumshirn@....com>
> ---
> fs/btrfs/raid-stripe-tree.c | 159 ++++++++++++++++++++++++++++++++++++++++++++
> fs/btrfs/raid-stripe-tree.h | 11 +++
> fs/btrfs/volumes.c | 37 ++++++++---
> 3 files changed, 198 insertions(+), 9 deletions(-)
>
> diff --git a/fs/btrfs/raid-stripe-tree.c b/fs/btrfs/raid-stripe-tree.c
> index 5b12f40877b5..7ed02e4b79ec 100644
> --- a/fs/btrfs/raid-stripe-tree.c
> +++ b/fs/btrfs/raid-stripe-tree.c
> @@ -324,3 +324,162 @@ int btrfs_insert_raid_extent(struct btrfs_trans_handle *trans,
>
> return ret;
> }
> +
> +static bool btrfs_check_for_extent(struct btrfs_fs_info *fs_info, u64 logical,
> + u64 length, struct btrfs_path *path)
> +{
> + struct btrfs_root *extent_root = btrfs_extent_root(fs_info, logical);
> + struct btrfs_key key;
> + int ret;
> +
> + btrfs_release_path(path);
> +
> + key.objectid = logical;
> + key.type = BTRFS_EXTENT_ITEM_KEY;
> + key.offset = length;
> +
> + ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
> +
> + return ret;
> +}
> +
> +int btrfs_get_raid_extent_offset(struct btrfs_fs_info *fs_info,
> + u64 logical, u64 *length, u64 map_type,
> + u32 stripe_index,
> + struct btrfs_io_stripe *stripe)
> +{
> + struct btrfs_root *stripe_root = btrfs_stripe_tree_root(fs_info);
> + struct btrfs_stripe_extent *stripe_extent;
> + struct btrfs_key stripe_key;
> + struct btrfs_key found_key;
> + struct btrfs_path *path;
> + struct extent_buffer *leaf;
> + int num_stripes;
> + u8 encoding;
> + u64 offset;
> + u64 found_logical;
> + u64 found_length;
> + u64 end;
> + u64 found_end;
> + int slot;
> + int ret;
> + int i;
> +
> + stripe_key.objectid = logical;
> + stripe_key.type = BTRFS_RAID_STRIPE_KEY;
> + stripe_key.offset = 0;
> +
> + path = btrfs_alloc_path();
> + if (!path)
> + return -ENOMEM;
> +
> + ret = btrfs_search_slot(NULL, stripe_root, &stripe_key, path, 0, 0);
> + if (ret < 0)
> + goto free_path;
> + if (ret) {
> + if (path->slots[0] != 0)
> + path->slots[0]--;
IIRC we have btrfs_previous_item() to do the forward search.
> + }
> +
> + end = logical + *length;
IMHO we can make it const and initialize it at the definition part.
> +
> + while (1) {
Here we only can hit at most one RST item, thus I'd recommend to remove
the while().
Although this would mean we will need a if () to handle (ret > 0) case,
but it may still be a little easier to read than a loop.
You may want to refer to btrfs_lookup_csum() for the non-loop
implementation.
> + leaf = path->nodes[0];
> + slot = path->slots[0];
> +
> + btrfs_item_key_to_cpu(leaf, &found_key, slot);
> + found_logical = found_key.objectid;
> + found_length = found_key.offset;
> + found_end = found_logical + found_length;
> +
> + if (found_logical > end) {
> + ret = -ENOENT;
> + goto out;
> + }
> +
> + if (in_range(logical, found_logical, found_length))
> + break;
> +
> + ret = btrfs_next_item(stripe_root, path);
> + if (ret)
> + goto out;
> + }
> +
> + offset = logical - found_logical;
> +
> + /*
> + * If we have a logically contiguous, but physically noncontinuous
> + * range, we need to split the bio. Record the length after which we
> + * must split the bio.
> + */
> + if (end > found_end)
> + *length -= end - found_end;
> +
> + num_stripes = btrfs_num_raid_stripes(btrfs_item_size(leaf, slot));
> + stripe_extent = btrfs_item_ptr(leaf, slot, struct btrfs_stripe_extent);
> + encoding = btrfs_stripe_extent_encoding(leaf, stripe_extent);
> +
> + if (encoding != btrfs_bg_type_to_raid_encoding(map_type)) {
> + ret = -ENOENT;
> + goto out;
This looks like a very weird situation, we have a bg with a different type.
Should we do some warning or is there some valid situation for this?
> + }
> +
> + for (i = 0; i < num_stripes; i++) {
> + struct btrfs_raid_stride *stride = &stripe_extent->strides[i];
> + u64 devid = btrfs_raid_stride_devid(leaf, stride);
> + u64 len = btrfs_raid_stride_length(leaf, stride);
> + u64 physical = btrfs_raid_stride_physical(leaf, stride);
> +
> + if (offset >= len) {
> + offset -= len;
> +
> + if (offset >= BTRFS_STRIPE_LEN)
> + continue;
> + }
> +
> + if (devid != stripe->dev->devid)
> + continue;
> +
> + if ((map_type & BTRFS_BLOCK_GROUP_DUP) && stripe_index != i)
> + continue;
> +
> + stripe->physical = physical + offset;
> +
> + ret = 0;
> + goto free_path;
> + }
> +
> + /*
> + * If we're here, we haven't found the requested devid in the stripe.
> + */
> + ret = -ENOENT;
> +out:
> + if (ret > 0)
> + ret = -ENOENT;
> + if (ret && ret != -EIO) {
> + /*
> + * Check if the range we're looking for is actually backed by
> + * an extent. This can happen, e.g. when scrub is running on a
> + * block-group and the extent it is trying to scrub get's
> + * deleted in the meantime. Although scrub is setting the
> + * block-group to read-only, deletion of extents are still
> + * allowed. If the extent is gone, simply return ENOENT and be
> + * good.
> + */
As mentioned in the next patch (sorry for the reversed order), this
should be handled in a different way (by only searching commit root for
scrub usage).
> + if (btrfs_check_for_extent(fs_info, logical, *length, path)) {
> + ret = -ENOENT;
> + goto free_path;
> + }
> +
> + if (IS_ENABLED(CONFIG_BTRFS_DEBUG))
> + btrfs_print_tree(leaf, 1);
> + btrfs_err(fs_info,
> + "cannot find raid-stripe for logical [%llu, %llu] devid %llu, profile %s",
> + logical, logical + *length, stripe->dev->devid,
> + btrfs_bg_type_to_raid_name(map_type));
> + }
> +free_path:
> + btrfs_free_path(path);
> +
> + return ret;
> +}
> diff --git a/fs/btrfs/raid-stripe-tree.h b/fs/btrfs/raid-stripe-tree.h
> index 7560dc501a65..40aa553ae8aa 100644
> --- a/fs/btrfs/raid-stripe-tree.h
> +++ b/fs/btrfs/raid-stripe-tree.h
> @@ -13,6 +13,10 @@ struct btrfs_io_stripe;
>
> int btrfs_delete_raid_extent(struct btrfs_trans_handle *trans, u64 start,
> u64 length);
> +int btrfs_get_raid_extent_offset(struct btrfs_fs_info *fs_info,
> + u64 logical, u64 *length, u64 map_type,
> + u32 stripe_index,
> + struct btrfs_io_stripe *stripe);
> int btrfs_insert_raid_extent(struct btrfs_trans_handle *trans,
> struct btrfs_ordered_extent *ordered_extent);
>
> @@ -33,4 +37,11 @@ static inline bool btrfs_need_stripe_tree_update(struct btrfs_fs_info *fs_info,
>
> return false;
> }
> +
> +static inline int btrfs_num_raid_stripes(u32 item_size)
> +{
> + return (item_size - offsetof(struct btrfs_stripe_extent, strides)) /
> + sizeof(struct btrfs_raid_stride);
> +}
> +
> #endif
> diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
> index 0c0fd4eb4848..7c25f5c77788 100644
> --- a/fs/btrfs/volumes.c
> +++ b/fs/btrfs/volumes.c
> @@ -35,6 +35,7 @@
> #include "relocation.h"
> #include "scrub.h"
> #include "super.h"
> +#include "raid-stripe-tree.h"
>
> #define BTRFS_BLOCK_GROUP_STRIPE_MASK (BTRFS_BLOCK_GROUP_RAID0 | \
> BTRFS_BLOCK_GROUP_RAID10 | \
> @@ -6206,12 +6207,22 @@ static u64 btrfs_max_io_len(struct map_lookup *map, enum btrfs_map_op op,
> return U64_MAX;
> }
>
> -static void set_io_stripe(struct btrfs_io_stripe *dst, const struct map_lookup *map,
> - u32 stripe_index, u64 stripe_offset, u32 stripe_nr)
> +static int set_io_stripe(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
> + u64 logical, u64 *length, struct btrfs_io_stripe *dst,
> + struct map_lookup *map, u32 stripe_index,
> + u64 stripe_offset, u64 stripe_nr)
Do we need @length to be a pointer?
IIRC we can return the number of bytes we mapped, or <0 for errors.
Thus at least @length doesn't need to be a pointer.
Thanks,
Qu
> {
> dst->dev = map->stripes[stripe_index].dev;
> +
> + if (op == BTRFS_MAP_READ &&
> + btrfs_need_stripe_tree_update(fs_info, map->type))
> + return btrfs_get_raid_extent_offset(fs_info, logical, length,
> + map->type, stripe_index,
> + dst);
> +
> dst->physical = map->stripes[stripe_index].physical +
> stripe_offset + btrfs_stripe_nr_to_offset(stripe_nr);
> + return 0;
> }
>
> /*
> @@ -6428,11 +6439,11 @@ int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
> */
> if (smap && num_alloc_stripes == 1 &&
> !((map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) && mirror_num > 1)) {
> - set_io_stripe(smap, map, stripe_index, stripe_offset, stripe_nr);
> + ret = set_io_stripe(fs_info, op, logical, length, smap, map,
> + stripe_index, stripe_offset, stripe_nr);
> if (mirror_num_ret)
> *mirror_num_ret = mirror_num;
> *bioc_ret = NULL;
> - ret = 0;
> goto out;
> }
>
> @@ -6463,21 +6474,29 @@ int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
> bioc->full_stripe_logical = em->start +
> btrfs_stripe_nr_to_offset(stripe_nr * data_stripes);
> for (i = 0; i < num_stripes; i++)
> - set_io_stripe(&bioc->stripes[i], map,
> - (i + stripe_nr) % num_stripes,
> - stripe_offset, stripe_nr);
> + ret = set_io_stripe(fs_info, op, logical, length,
> + &bioc->stripes[i], map,
> + (i + stripe_nr) % num_stripes,
> + stripe_offset, stripe_nr);
> } else {
> /*
> * For all other non-RAID56 profiles, just copy the target
> * stripe into the bioc.
> */
> for (i = 0; i < num_stripes; i++) {
> - set_io_stripe(&bioc->stripes[i], map, stripe_index,
> - stripe_offset, stripe_nr);
> + ret = set_io_stripe(fs_info, op, logical, length,
> + &bioc->stripes[i], map, stripe_index,
> + stripe_offset, stripe_nr);
> stripe_index++;
> }
> }
>
> + if (ret) {
> + *bioc_ret = NULL;
> + btrfs_put_bioc(bioc);
> + goto out;
> + }
> +
> if (op != BTRFS_MAP_READ)
> max_errors = btrfs_chunk_max_errors(map);
>
>
Powered by blists - more mailing lists