lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date: Wed, 19 Jun 2024 18:21:15 +0800
From: Chao Yu <chao@...nel.org>
To: daejun7.park@...sung.com, "jaegeuk@...nel.org" <jaegeuk@...nel.org>,
 "corbet@....net" <corbet@....net>,
 "linux-f2fs-devel@...ts.sourceforge.net"
 <linux-f2fs-devel@...ts.sourceforge.net>,
 "linux-doc@...r.kernel.org" <linux-doc@...r.kernel.org>,
 "linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>
Cc: Seokhwan Kim <sukka.kim@...sung.com>,
 Dongjin Kim <dongjin_.kim@...sung.com>,
 Yonggil Song <yonggil.song@...sung.com>,
 Jaeyoon Choi <j_yoon.choi@...sung.com>, Nayeon Kim
 <nayeoni.kim@...sung.com>, Siwoo Jung <siu.jung@...sung.com>
Subject: Re: [PATCH v2] f2fs: add support single node mode

On 2024/6/17 12:51, Daejun Park wrote:
> The amount of node writes is small compared to the amount of user data
> writes in most workloads. Therefore, even if there is enough free space
> in the node section, it cannot be used by another type because the type
> for its section is fixed. When using zoned storage, the free space in
> node section issue can be a problem due to the large section.
> 
> This patch can avoid the problem by using a single node section without
> considering the hotness of the node section. For particularly high file
> system usage, two sections can be used as free sections, which makes it
> more efficient.
> 
> To use single node section, add the 'single_node_sec' in mount option.
> The single node section mode can be activated when the number of active
> logs is equal to 6.

This patch is out-of-format due to tab is replaced as blanks.

> 
> Signed-off-by: Daejun Park <daejun7.park@...sung.com>
> ---
>   Documentation/filesystems/f2fs.rst |  2 +
>   fs/f2fs/f2fs.h                     |  3 ++
>   fs/f2fs/recovery.c                 |  3 ++
>   fs/f2fs/segment.c                  | 78 ++++++++++++++++++++++++++++++
>   fs/f2fs/segment.h                  |  2 +
>   fs/f2fs/super.c                    | 12 +++++
>   6 files changed, 100 insertions(+)
> 
> diff --git a/Documentation/filesystems/f2fs.rst b/Documentation/filesystems/f2fs.rst
> index 68a0885fb5e6..ba26b2ce4fa4 100644
> --- a/Documentation/filesystems/f2fs.rst
> +++ b/Documentation/filesystems/f2fs.rst
> @@ -134,6 +134,8 @@ noacl                        Disable POSIX Access Control List. Note: acl is enabled
>   active_logs=%u          Support configuring the number of active logs. In the
>                           current design, f2fs supports only 2, 4, and 6 logs.
>                           Default number is 6.
> +single_node_sec         Support single node section mode, it enables single active
> +                        log for hot/warm/cold nodes. This is disabled by default.

Do we allow changing this config during remount?

>   disable_ext_identify    Disable the extension list configured by mkfs, so f2fs
>                           is not aware of cold files such as media files.
>   inline_xattr            Enable the inline xattrs feature.
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> index 1974b6aff397..90f13a6b64ce 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -116,6 +116,7 @@ extern const char *f2fs_fault_name[FAULT_MAX];
>   #define        F2FS_MOUNT_GC_MERGE             0x02000000
>   #define F2FS_MOUNT_COMPRESS_CACHE      0x04000000
>   #define F2FS_MOUNT_AGE_EXTENT_CACHE    0x08000000
> +#define F2FS_MOUNT_SINGLE_NODE_SEC     0x10000000
> 
>   #define F2FS_OPTION(sbi)       ((sbi)->mount_opt)
>   #define clear_opt(sbi, option) (F2FS_OPTION(sbi).opt &= ~F2FS_MOUNT_##option)
> @@ -1655,6 +1656,8 @@ struct f2fs_sb_info {
> 
>          struct f2fs_mount_info mount_opt;       /* mount options */
> 
> +       bool single_node_sec;                   /* single node section */

Better to move it into struct f2fs_mount_info?

> +
>          /* for cleaning operations */
>          struct f2fs_rwsem gc_lock;              /*
>                                                   * semaphore for GC, avoid
> diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
> index 496aee53c38a..b5cdb0845ac7 100644
> --- a/fs/f2fs/recovery.c
> +++ b/fs/f2fs/recovery.c
> @@ -414,6 +414,9 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head,
> 
>          /* get node pages in the current segment */
>          curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
> +       /* check hot node if single node section mode is enabled */
> +       if (sbi->single_node_sec && curseg->segno == NULL_SEGNO)
> +               curseg = CURSEG_I(sbi, CURSEG_HOT_NODE);

If we turn off single_node_sec mode, fsynced data may be lost since warm node
chain is empty?

>          blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
>          blkaddr_fast = blkaddr;
> 
> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> index a0ce3d080f80..81b4d52b25c0 100644
> --- a/fs/f2fs/segment.c
> +++ b/fs/f2fs/segment.c
> @@ -394,6 +394,9 @@ int f2fs_commit_atomic_write(struct inode *inode)
>          return err;
>   }
> 
> +static int new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec);
> +static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno);
> +
>   /*
>    * This function balances dirty node and dentry pages.
>    * In addition, it controls garbage collection.
> @@ -420,6 +423,59 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need)
>          if (has_enough_free_secs(sbi, 0, 0))
>                  return;
> 
> +       if (test_opt(sbi, SINGLE_NODE_SEC) && !sbi->single_node_sec &&
> +           F2FS_OPTION(sbi).active_logs == 6) {
> +               int type, segno, left_blocks = 0;
> +
> +               for (type = CURSEG_HOT_NODE; type <= CURSEG_COLD_NODE; type++) {
> +                       segno = CURSEG_I(sbi, type)->segno;
> +                       left_blocks += CAP_BLKS_PER_SEC(sbi) -
> +                                       get_ckpt_valid_blocks(sbi, segno, true);
> +               }
> +
> +               /* enable single node section mode if we get 2 free sections */
> +               if (left_blocks < CAP_BLKS_PER_SEC(sbi) * 2)
> +                       goto do_gc;
> +
> +               f2fs_down_read(&SM_I(sbi)->curseg_lock);
> +               down_write(&SIT_I(sbi)->sentry_lock);
> +
> +               /* it can be enabled by others */
> +               if (sbi->single_node_sec)
> +                       goto unlock;
> +
> +               /* leave current zone by allocating new section */
> +               for (type = CURSEG_WARM_NODE; type <= CURSEG_COLD_NODE; type++) {
> +                       struct curseg_info *curseg = CURSEG_I(sbi, type);
> +
> +                       mutex_lock(&curseg->curseg_mutex);
> +                       segno = curseg->segno;
> +                       if (new_curseg(sbi, type, true)) {

It looks dangerous to allocate new section here for the case there
is heavy fragment and no available user space, because free section
may be exhausted during latter FGGC.

> +                               mutex_unlock(&curseg->curseg_mutex);
> +                               goto unlock;
> +                       }
> +                       locate_dirty_segment(sbi, segno);
> +                       mutex_unlock(&curseg->curseg_mutex);
> +               }
> +
> +               /* clear warm node, cold node information */
> +               for (type = CURSEG_WARM_NODE; type <= CURSEG_COLD_NODE; type++) {
> +                       struct curseg_info *curseg = CURSEG_I(sbi, type);
> +
> +                       mutex_lock(&curseg->curseg_mutex);
> +                       segno = curseg->segno;
> +                       curseg->segno = NULL_SEGNO;
> +                       curseg->inited = false;
> +                       __set_test_and_free(sbi, segno, false);
> +                       mutex_unlock(&curseg->curseg_mutex);
> +               }
> +               f2fs_notice(sbi, "single node section mode enabled");
> +               sbi->single_node_sec = true;

It looks complicated to enable single_node_sec mode dynamically, what do
you think of making this as a feature which can only be eanbled by mkfs?

Thanks,

> +unlock:
> +               up_write(&SIT_I(sbi)->sentry_lock);
> +               f2fs_up_read(&SM_I(sbi)->curseg_lock);
> +       }
> +do_gc:
>          if (test_opt(sbi, GC_MERGE) && sbi->gc_thread &&
>                                  sbi->gc_thread->f2fs_gc_task) {
>                  DEFINE_WAIT(wait);
> @@ -3502,6 +3558,9 @@ static int __get_segment_type_6(struct f2fs_io_info *fio)
>                  return f2fs_rw_hint_to_seg_type(F2FS_I_SB(inode),
>                                                  inode->i_write_hint);
>          } else {
> +               if (fio->sbi->single_node_sec)
> +                       return CURSEG_HOT_NODE;
> +
>                  if (IS_DNODE(fio->page))
>                          return is_cold_node(fio->page) ? CURSEG_WARM_NODE :
>                                                  CURSEG_HOT_NODE;
> @@ -4116,6 +4175,15 @@ static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
>                                                          CURSEG_HOT_NODE]);
>                  blk_off = le16_to_cpu(ckpt->cur_node_blkoff[type -
>                                                          CURSEG_HOT_NODE]);
> +               if (segno == NULL_SEGNO && type != CURSEG_HOT_NODE) {
> +                       if (!test_opt(sbi, SINGLE_NODE_SEC)) {
> +                               f2fs_err(sbi, "single_node_sec option required");
> +                               return -EFAULT;
> +                       }
> +                       sbi->single_node_sec = true;
> +                       return 0;
> +               }
> +
>                  if (__exist_node_summaries(sbi))
>                          blk_addr = sum_blk_addr(sbi, NR_CURSEG_NODE_TYPE,
>                                                          type - CURSEG_HOT_NODE);
> @@ -4884,6 +4952,8 @@ static void init_free_segmap(struct f2fs_sb_info *sbi)
>                  struct curseg_info *curseg_t = CURSEG_I(sbi, type);
> 
>                  __set_test_and_inuse(sbi, curseg_t->segno);
> +               if (sbi->single_node_sec && type == CURSEG_HOT_NODE)
> +                       break;
>          }
>   }
> 
> @@ -5027,6 +5097,10 @@ static int sanity_check_curseg(struct f2fs_sb_info *sbi)
>                          f2fs_handle_error(sbi, ERROR_INVALID_CURSEG);
>                          return -EFSCORRUPTED;
>                  }
> +
> +               /* in single node section mode, WARM/COLD NODE are invalid */
> +               if (sbi->single_node_sec && i == CURSEG_HOT_NODE)
> +                       break;
>          }
>          return 0;
>   }
> @@ -5153,6 +5227,10 @@ static int fix_curseg_write_pointer(struct f2fs_sb_info *sbi, int type)
>          if (!zbd)
>                  return 0;
> 
> +       /* in single node section mode, WARM/COLD node are not valid */
> +       if (sbi->single_node_sec && type > CURSEG_HOT_NODE)
> +               return 0;
> +
>          /* report zone for the sector the curseg points to */
>          zone_sector = (sector_t)(cs_zone_block - zbd->start_blk)
>                  << log_sectors_per_block;
> diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
> index e1c0f418aa11..152a07e61b5f 100644
> --- a/fs/f2fs/segment.h
> +++ b/fs/f2fs/segment.h
> @@ -570,6 +570,8 @@ static inline bool has_curseg_enough_space(struct f2fs_sb_info *sbi,
>                                  get_ckpt_valid_blocks(sbi, segno, true);
>                  if (node_blocks > left_blocks)
>                          return false;
> +               if (sbi->single_node_sec) /* check only hot node */
> +                       break;
>          }
> 
>          /* check current data section for dentry blocks. */
> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
> index 1f1b3647a998..7e1e80fe58dd 100644
> --- a/fs/f2fs/super.c
> +++ b/fs/f2fs/super.c
> @@ -129,6 +129,7 @@ enum {
>          Opt_acl,
>          Opt_noacl,
>          Opt_active_logs,
> +       Opt_single_node_sec,
>          Opt_disable_ext_identify,
>          Opt_inline_xattr,
>          Opt_noinline_xattr,
> @@ -207,6 +208,7 @@ static match_table_t f2fs_tokens = {
>          {Opt_acl, "acl"},
>          {Opt_noacl, "noacl"},
>          {Opt_active_logs, "active_logs=%u"},
> +       {Opt_single_node_sec, "single_node_sec"},
>          {Opt_disable_ext_identify, "disable_ext_identify"},
>          {Opt_inline_xattr, "inline_xattr"},
>          {Opt_noinline_xattr, "noinline_xattr"},
> @@ -803,6 +805,9 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
>                                  return -EINVAL;
>                          F2FS_OPTION(sbi).active_logs = arg;
>                          break;
> +               case Opt_single_node_sec:
> +                       set_opt(sbi, SINGLE_NODE_SEC);
> +                       break;
>                  case Opt_disable_ext_identify:
>                          set_opt(sbi, DISABLE_EXT_IDENTIFY);
>                          break;
> @@ -2039,6 +2044,8 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
>                                          F2FS_OPTION(sbi).s_resuid),
>                                  from_kgid_munged(&init_user_ns,
>                                          F2FS_OPTION(sbi).s_resgid));
> +       if (test_opt(sbi, SINGLE_NODE_SEC))
> +               seq_puts(seq, ",single_node_sec");
>   #ifdef CONFIG_F2FS_FAULT_INJECTION
>          if (test_opt(sbi, FAULT_INJECTION)) {
>                  seq_printf(seq, ",fault_injection=%u",
> @@ -3675,6 +3682,9 @@ int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi)
>          blocks_per_seg = BLKS_PER_SEG(sbi);
> 
>          for (i = 0; i < NR_CURSEG_NODE_TYPE; i++) {
> +               /* bypass single node section mode */
> +               if (le32_to_cpu(ckpt->cur_node_segno[i]) == NULL_SEGNO)
> +                       goto check_data;
>                  if (le32_to_cpu(ckpt->cur_node_segno[i]) >= main_segs ||
>                          le16_to_cpu(ckpt->cur_node_blkoff[i]) >= blocks_per_seg)
>                          return 1;
> @@ -3823,6 +3833,8 @@ static void init_sb_info(struct f2fs_sb_info *sbi)
>          init_f2fs_rwsem(&sbi->io_order_lock);
>          spin_lock_init(&sbi->cp_lock);
> 
> +       sbi->single_node_sec = false;
> +
>          sbi->dirty_device = 0;
>          spin_lock_init(&sbi->dev_lock);
> 
> --
> 2.25.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ