lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <CAL3q7H6s=3avNUCCQK9W7AH+U_82eq0LaQ5XEL28m9X8k+rHkQ@mail.gmail.com>
Date: Fri, 24 May 2024 15:07:46 +0100
From: Filipe Manana <fdmanana@...nel.org>
To: Johannes Thumshirn <jth@...nel.org>
Cc: Chris Mason <clm@...com>, Josef Bacik <josef@...icpanda.com>, David Sterba <dsterba@...e.com>, 
	Hans Holmberg <Hans.Holmberg@....com>, linux-btrfs@...r.kernel.org, 
	linux-kernel@...r.kernel.org, Naohiro Aota <naohiro.aota@....com>, 
	Johannes Thumshirn <johannes.thumshirn@....com>
Subject: Re: [PATCH v4 1/2] btrfs: zoned: reserve relocation block-group on mount

On Thu, May 23, 2024 at 4:32 PM Johannes Thumshirn <jth@...nel.org> wrote:
>
> From: Johannes Thumshirn <johannes.thumshirn@....com>
>
> Reserve one zone as a data relocation target on each mount. If we already
> find one empty block group, there's no need to force a chunk allocation,
> but we can use this empty data block group as our relocation target.
>
> Signed-off-by: Johannes Thumshirn <johannes.thumshirn@....com>
> ---
>  fs/btrfs/block-group.c | 17 +++++++++++++
>  fs/btrfs/disk-io.c     |  2 ++
>  fs/btrfs/zoned.c       | 67 ++++++++++++++++++++++++++++++++++++++++++++++++++
>  fs/btrfs/zoned.h       |  3 +++
>  4 files changed, 89 insertions(+)
>
> diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
> index 9910bae89966..1195f6721c90 100644
> --- a/fs/btrfs/block-group.c
> +++ b/fs/btrfs/block-group.c
> @@ -1500,6 +1500,15 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
>                         btrfs_put_block_group(block_group);
>                         continue;
>                 }
> +
> +               spin_lock(&fs_info->relocation_bg_lock);
> +               if (block_group->start == fs_info->data_reloc_bg) {
> +                       btrfs_put_block_group(block_group);
> +                       spin_unlock(&fs_info->relocation_bg_lock);
> +                       continue;
> +               }
> +               spin_unlock(&fs_info->relocation_bg_lock);
> +
>                 spin_unlock(&fs_info->unused_bgs_lock);
>
>                 btrfs_discard_cancel_work(&fs_info->discard_ctl, block_group);
> @@ -1835,6 +1844,14 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
>                                       bg_list);
>                 list_del_init(&bg->bg_list);
>
> +               spin_lock(&fs_info->relocation_bg_lock);
> +               if (bg->start == fs_info->data_reloc_bg) {
> +                       btrfs_put_block_group(bg);
> +                       spin_unlock(&fs_info->relocation_bg_lock);
> +                       continue;
> +               }
> +               spin_unlock(&fs_info->relocation_bg_lock);

Ok, so the reclaim task and cleaner kthread will not remove the
reserved block group.

But there's nothing preventing someone running balance manually, which
will delete the block group.

E.g. block group X is empty and reserved as the data relocation bg.
The balance ioctl is invoked, it goes through all block groups for relocation.
It happens that it first finds bg X. Deletes bg X.

Now there's no more reserved bg for data relocation, and other tasks
can come in and use the freed space and fill all of it or most of it.

Shouldn't we prevent the data reloc bg from being a target of a manual
relocation too?
E.g. have btrfs_relocate_chunk() do nothing if the bg is the data reloc bg.

Thanks.

> +
>                 space_info = bg->space_info;
>                 spin_unlock(&fs_info->unused_bgs_lock);
>
> diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
> index 78d3966232ae..16bb52bcb69e 100644
> --- a/fs/btrfs/disk-io.c
> +++ b/fs/btrfs/disk-io.c
> @@ -3547,6 +3547,8 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
>         }
>         btrfs_discard_resume(fs_info);
>
> +       btrfs_reserve_relocation_bg(fs_info);
> +
>         if (fs_info->uuid_root &&
>             (btrfs_test_opt(fs_info, RESCAN_UUID_TREE) ||
>              fs_info->generation != btrfs_super_uuid_tree_generation(disk_super))) {
> diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
> index c52a0063f7db..d291cf4f565e 100644
> --- a/fs/btrfs/zoned.c
> +++ b/fs/btrfs/zoned.c
> @@ -17,6 +17,7 @@
>  #include "fs.h"
>  #include "accessors.h"
>  #include "bio.h"
> +#include "transaction.h"
>
>  /* Maximum number of zones to report per blkdev_report_zones() call */
>  #define BTRFS_REPORT_NR_ZONES   4096
> @@ -2637,3 +2638,69 @@ void btrfs_check_active_zone_reservation(struct btrfs_fs_info *fs_info)
>         }
>         spin_unlock(&fs_info->zone_active_bgs_lock);
>  }
> +
> +static u64 find_empty_block_group(struct btrfs_space_info *sinfo, u64 flags)
> +{
> +       struct btrfs_block_group *bg;
> +
> +       for (int i = 0; i < BTRFS_NR_RAID_TYPES; i++) {
> +               list_for_each_entry(bg, &sinfo->block_groups[i], list) {
> +                       if (bg->flags != flags)
> +                               continue;
> +                       if (bg->used == 0)
> +                               return bg->start;
> +               }
> +       }
> +
> +       return 0;
> +}
> +
> +void btrfs_reserve_relocation_bg(struct btrfs_fs_info *fs_info)
> +{
> +       struct btrfs_root *tree_root = fs_info->tree_root;
> +       struct btrfs_space_info *sinfo = fs_info->data_sinfo;
> +       struct btrfs_trans_handle *trans;
> +       struct btrfs_block_group *bg;
> +       u64 flags = btrfs_get_alloc_profile(fs_info, sinfo->flags);
> +       u64 bytenr = 0;
> +
> +       lockdep_assert_not_held(&fs_info->relocation_bg_lock);
> +
> +       if (!btrfs_is_zoned(fs_info))
> +               return;
> +
> +       if (fs_info->data_reloc_bg)
> +               return;
> +
> +       bytenr = find_empty_block_group(sinfo, flags);
> +       if (!bytenr) {
> +               int ret;
> +
> +               trans = btrfs_join_transaction(tree_root);
> +               if (IS_ERR(trans))
> +                       return;
> +
> +               ret = btrfs_chunk_alloc(trans, flags, CHUNK_ALLOC_FORCE);
> +               btrfs_end_transaction(trans);
> +               if (ret)
> +                       return;
> +
> +               bytenr = find_empty_block_group(sinfo, flags);
> +               if (!bytenr)
> +                       return;
> +
> +       }
> +
> +       bg = btrfs_lookup_block_group(fs_info, bytenr);
> +       if (!bg)
> +               return;
> +
> +       if (!btrfs_zone_activate(bg))
> +               bytenr = 0;
> +
> +       btrfs_put_block_group(bg);
> +
> +       spin_lock(&fs_info->relocation_bg_lock);
> +       fs_info->data_reloc_bg = bytenr;
> +       spin_unlock(&fs_info->relocation_bg_lock);
> +}
> diff --git a/fs/btrfs/zoned.h b/fs/btrfs/zoned.h
> index ff605beb84ef..56c1c19d52bc 100644
> --- a/fs/btrfs/zoned.h
> +++ b/fs/btrfs/zoned.h
> @@ -95,6 +95,7 @@ int btrfs_zone_finish_one_bg(struct btrfs_fs_info *fs_info);
>  int btrfs_zoned_activate_one_bg(struct btrfs_fs_info *fs_info,
>                                 struct btrfs_space_info *space_info, bool do_finish);
>  void btrfs_check_active_zone_reservation(struct btrfs_fs_info *fs_info);
> +void btrfs_reserve_relocation_bg(struct btrfs_fs_info *fs_info);
>  #else /* CONFIG_BLK_DEV_ZONED */
>
>  static inline int btrfs_get_dev_zone_info_all_devices(struct btrfs_fs_info *fs_info)
> @@ -264,6 +265,8 @@ static inline int btrfs_zoned_activate_one_bg(struct btrfs_fs_info *fs_info,
>
>  static inline void btrfs_check_active_zone_reservation(struct btrfs_fs_info *fs_info) { }
>
> +static inline void btrfs_reserve_relocation_bg(struct btrfs_fs_info *fs_info) { }
> +
>  #endif
>
>  static inline bool btrfs_dev_is_sequential(struct btrfs_device *device, u64 pos)
>
> --
> 2.43.0
>
>

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ