lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date: Mon, 3 Jun 2024 14:37:12 +0800
From: Yongpeng Yang <yangyongpeng1@...o.com>
To: daejun7.park@...sung.com, "jaegeuk@...nel.org" <jaegeuk@...nel.org>,
 "chao@...nel.org" <chao@...nel.org>, "corbet@....net" <corbet@....net>,
 "linux-f2fs-devel@...ts.sourceforge.net"
 <linux-f2fs-devel@...ts.sourceforge.net>,
 "linux-doc@...r.kernel.org" <linux-doc@...r.kernel.org>,
 "linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>
Cc: Nayeon Kim <nayeoni.kim@...sung.com>, Siwoo Jung <siu.jung@...sung.com>,
 Seokhwan Kim <sukka.kim@...sung.com>, Dongjin Kim <dongjin_.kim@...sung.com>
Subject: Re: [f2fs-dev] [RFC PATCH] f2fs: add support single node section mode



On 5/31/2024 3:46 PM, Daejun Park wrote:
> The amount of node writes is small compared to the amount of user data
> writes in most workloads. Therefore, even if there is enough free space
> in the node section, it cannot be used by another type because the type
> for its section is fixed. When using zoned storage, the free space in
> node section issue can be a problem due to the large section.
> 
> This patch can avoid the problem by using a single node section without
> considering the hotness of the node section. For particularly high file
> system usage, two sections can be used as free sections, which makes it
> more efficient.
> 
> To use single node section, add the 'single_node_sec' in mount option.
> 
> Signed-off-by: Daejun Park <daejun7.park@...sung.com>
> ---
>   Documentation/filesystems/f2fs.rst |  2 +
>   fs/f2fs/f2fs.h                     |  3 ++
>   fs/f2fs/recovery.c                 |  3 ++
>   fs/f2fs/segment.c                  | 77 ++++++++++++++++++++++++++++++
>   fs/f2fs/segment.h                  |  2 +
>   fs/f2fs/super.c                    | 12 +++++
>   6 files changed, 99 insertions(+)
> 
> diff --git a/Documentation/filesystems/f2fs.rst b/Documentation/filesystems/f2fs.rst
> index 68a0885fb5e6..ba26b2ce4fa4 100644
> --- a/Documentation/filesystems/f2fs.rst
> +++ b/Documentation/filesystems/f2fs.rst
> @@ -134,6 +134,8 @@ noacl			 Disable POSIX Access Control List. Note: acl is enabled
>   active_logs=%u		 Support configuring the number of active logs. In the
>   			 current design, f2fs supports only 2, 4, and 6 logs.
>   			 Default number is 6.
> +single_node_sec	 Support single node section mode, it enables single active
> +			 log for hot/warm/cold nodes. This is disabled by default.
>   disable_ext_identify	 Disable the extension list configured by mkfs, so f2fs
>   			 is not aware of cold files such as media files.
>   inline_xattr		 Enable the inline xattrs feature.
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> index 1974b6aff397..90f13a6b64ce 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -116,6 +116,7 @@ extern const char *f2fs_fault_name[FAULT_MAX];
>   #define	F2FS_MOUNT_GC_MERGE		0x02000000
>   #define F2FS_MOUNT_COMPRESS_CACHE	0x04000000
>   #define F2FS_MOUNT_AGE_EXTENT_CACHE	0x08000000
> +#define F2FS_MOUNT_SINGLE_NODE_SEC	0x10000000
>   
>   #define F2FS_OPTION(sbi)	((sbi)->mount_opt)
>   #define clear_opt(sbi, option)	(F2FS_OPTION(sbi).opt &= ~F2FS_MOUNT_##option)
> @@ -1655,6 +1656,8 @@ struct f2fs_sb_info {
>   
>   	struct f2fs_mount_info mount_opt;	/* mount options */
>   
> +	bool single_node_sec;			/* single node section */
> +
>   	/* for cleaning operations */
>   	struct f2fs_rwsem gc_lock;		/*
>   						 * semaphore for GC, avoid
> diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
> index 496aee53c38a..b5cdb0845ac7 100644
> --- a/fs/f2fs/recovery.c
> +++ b/fs/f2fs/recovery.c
> @@ -414,6 +414,9 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head,
>   
>   	/* get node pages in the current segment */
>   	curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
> +	/* check hot node if single node section mode is enabled */
> +	if (sbi->single_node_sec && curseg->segno == NULL_SEGNO)
> +		curseg = CURSEG_I(sbi, CURSEG_HOT_NODE);
>   	blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
>   	blkaddr_fast = blkaddr;
>   
> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> index a0ce3d080f80..c1fe5c92bdfb 100644
> --- a/fs/f2fs/segment.c
> +++ b/fs/f2fs/segment.c
> @@ -394,6 +394,9 @@ int f2fs_commit_atomic_write(struct inode *inode)
>   	return err;
>   }
>   
> +static int new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec);
> +static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno);
> +
>   /*
>    * This function balances dirty node and dentry pages.
>    * In addition, it controls garbage collection.
> @@ -420,6 +423,58 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need)
>   	if (has_enough_free_secs(sbi, 0, 0))
>   		return;
>   
> +	if (test_opt(sbi, SINGLE_NODE_SEC) && !sbi->single_node_sec) {
> +		int type, segno, left_blocks = 0;
> +
> +		for (type = CURSEG_HOT_NODE; type <= CURSEG_COLD_NODE; type++) {
> +			segno = CURSEG_I(sbi, type)->segno;
> +			left_blocks += CAP_BLKS_PER_SEC(sbi) -
> +					get_ckpt_valid_blocks(sbi, segno, true);
> +		}
> +
> +		/* enable single node section mode if we get 2 free sections */
> +		if (left_blocks < CAP_BLKS_PER_SEC(sbi) * 2)
> +			goto do_gc;
> +
> +		f2fs_down_read(&SM_I(sbi)->curseg_lock);
> +		down_write(&SIT_I(sbi)->sentry_lock);
> +
> +		/* it can be enabled by others */
> +		if (sbi->single_node_sec)
> +			goto unlock;
> +
> +		/* leave current zone by allocating new section */
> +		for (type = CURSEG_WARM_NODE; type <= CURSEG_COLD_NODE; type++) {
> +			struct curseg_info *curseg = CURSEG_I(sbi, type);
> +
> +			mutex_lock(&curseg->curseg_mutex);
> +			segno = curseg->segno;
> +			if (new_curseg(sbi, type, true)) {
> +				mutex_unlock(&curseg->curseg_mutex);
> +				goto unlock;
> +			}
> +			locate_dirty_segment(sbi, segno);
> +			mutex_unlock(&curseg->curseg_mutex);
> +		}
Hi Daejun,
1. It is not compatible with "F2FS_OPTION(sbi).active_logs == 2".
2. Once has_enough_free_secs is false, F2FS cannot restore to multi-node 
sections even after has_enough_free_secs becomes true and the filesystem 
is unmounted and remounted. This seems unreasonable.
> +
> +		/* clear warm node, cold node information */
> +		for (type = CURSEG_WARM_NODE; type <= CURSEG_COLD_NODE; type++) {
> +			struct curseg_info *curseg = CURSEG_I(sbi, type);
> +
> +			mutex_lock(&curseg->curseg_mutex);
> +			segno = curseg->segno;
> +			curseg->segno = NULL_SEGNO;
> +			curseg->inited = false;
> +			__set_test_and_free(sbi, segno, false);
> +			mutex_unlock(&curseg->curseg_mutex);
> +		}
> +		f2fs_notice(sbi, "single node section mode enabled");
> +		sbi->single_node_sec = true;
> +unlock:
> +		up_write(&SIT_I(sbi)->sentry_lock);
> +		f2fs_up_read(&SM_I(sbi)->curseg_lock);
> +	}
> +do_gc:
>   	if (test_opt(sbi, GC_MERGE) && sbi->gc_thread &&
>   				sbi->gc_thread->f2fs_gc_task) {
>   		DEFINE_WAIT(wait);
> @@ -3502,6 +3557,9 @@ static int __get_segment_type_6(struct f2fs_io_info *fio)
>   		return f2fs_rw_hint_to_seg_type(F2FS_I_SB(inode),
>   						inode->i_write_hint);
>   	} else {
> +		if (fio->sbi->single_node_sec)
> +			return CURSEG_HOT_NODE;
> +
>   		if (IS_DNODE(fio->page))
>   			return is_cold_node(fio->page) ? CURSEG_WARM_NODE :
>   						CURSEG_HOT_NODE;
> @@ -4116,6 +4174,15 @@ static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
>   							CURSEG_HOT_NODE]);
>   		blk_off = le16_to_cpu(ckpt->cur_node_blkoff[type -
>   							CURSEG_HOT_NODE]);
> +		if (segno == NULL_SEGNO && type != CURSEG_HOT_NODE) {
> +			if (!test_opt(sbi, SINGLE_NODE_SEC)) {
> +				f2fs_err(sbi, "single_node_sec option required");
> +				return -EFAULT;
> +			}
> +			sbi->single_node_sec = true;
> +			return 0;
> +		}
> +
>   		if (__exist_node_summaries(sbi))
>   			blk_addr = sum_blk_addr(sbi, NR_CURSEG_NODE_TYPE,
>   							type - CURSEG_HOT_NODE);
> @@ -4884,6 +4951,8 @@ static void init_free_segmap(struct f2fs_sb_info *sbi)
>   		struct curseg_info *curseg_t = CURSEG_I(sbi, type);
>   
>   		__set_test_and_inuse(sbi, curseg_t->segno);
> +		if (sbi->single_node_sec && type == CURSEG_HOT_NODE)
> +			break;
>   	}
>   }
>   
> @@ -5027,6 +5096,10 @@ static int sanity_check_curseg(struct f2fs_sb_info *sbi)
>   			f2fs_handle_error(sbi, ERROR_INVALID_CURSEG);
>   			return -EFSCORRUPTED;
>   		}
> +
> +		/* in single node section mode, WARM/COLD NODE are invalid */
> +		if (sbi->single_node_sec && i == CURSEG_HOT_NODE)
> +			break;
>   	}
>   	return 0;
>   }
> @@ -5153,6 +5226,10 @@ static int fix_curseg_write_pointer(struct f2fs_sb_info *sbi, int type)
>   	if (!zbd)
>   		return 0;
>   
> +	/* in single node section mode, WARM/COLD node are not valid */
> +	if (sbi->single_node_sec && type > CURSEG_HOT_NODE)
> +		return 0;
> +
>   	/* report zone for the sector the curseg points to */
>   	zone_sector = (sector_t)(cs_zone_block - zbd->start_blk)
>   		<< log_sectors_per_block;
> diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
> index e1c0f418aa11..152a07e61b5f 100644
> --- a/fs/f2fs/segment.h
> +++ b/fs/f2fs/segment.h
> @@ -570,6 +570,8 @@ static inline bool has_curseg_enough_space(struct f2fs_sb_info *sbi,
>   				get_ckpt_valid_blocks(sbi, segno, true);
>   		if (node_blocks > left_blocks)
>   			return false;
> +		if (sbi->single_node_sec) /* check only hot node */
> +			break;
>   	}
>   
>   	/* check current data section for dentry blocks. */
> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
> index 1f1b3647a998..c21eeca86b0a 100644
> --- a/fs/f2fs/super.c
> +++ b/fs/f2fs/super.c
> @@ -129,6 +129,7 @@ enum {
>   	Opt_acl,
>   	Opt_noacl,
>   	Opt_active_logs,
> +	Opt_single_node_sec,
>   	Opt_disable_ext_identify,
>   	Opt_inline_xattr,
>   	Opt_noinline_xattr,
> @@ -207,6 +208,7 @@ static match_table_t f2fs_tokens = {
>   	{Opt_acl, "acl"},
>   	{Opt_noacl, "noacl"},
>   	{Opt_active_logs, "active_logs=%u"},
> +	{Opt_single_node_sec, "single_node_sec"},
>   	{Opt_disable_ext_identify, "disable_ext_identify"},
>   	{Opt_inline_xattr, "inline_xattr"},
>   	{Opt_noinline_xattr, "noinline_xattr"},
> @@ -803,6 +805,9 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
>   				return -EINVAL;
>   			F2FS_OPTION(sbi).active_logs = arg;
>   			break;
> +		case Opt_single_node_sec:
> +			set_opt(sbi, SINGLE_NODE_SEC);
> +			break;
>   		case Opt_disable_ext_identify:
>   			set_opt(sbi, DISABLE_EXT_IDENTIFY);
>   			break;
> @@ -2039,6 +2044,8 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
>   					F2FS_OPTION(sbi).s_resuid),
>   				from_kgid_munged(&init_user_ns,
>   					F2FS_OPTION(sbi).s_resgid));
> +	if (test_opt(sbi, SINGLE_NODE_SEC))
> +		seq_puts(seq, ",single_node_sec");
>   #ifdef CONFIG_F2FS_FAULT_INJECTION
>   	if (test_opt(sbi, FAULT_INJECTION)) {
>   		seq_printf(seq, ",fault_injection=%u",
> @@ -3675,6 +3682,9 @@ int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi)
>   	blocks_per_seg = BLKS_PER_SEG(sbi);
>   
>   	for (i = 0; i < NR_CURSEG_NODE_TYPE; i++) {
> +		/* bypass single node section mode */
> +		if (le32_to_cpu(ckpt->cur_node_segno[i] == NULL_SEGNO))
> +			goto check_data;
>   		if (le32_to_cpu(ckpt->cur_node_segno[i]) >= main_segs ||
>   			le16_to_cpu(ckpt->cur_node_blkoff[i]) >= blocks_per_seg)
>   			return 1;
> @@ -3823,6 +3833,8 @@ static void init_sb_info(struct f2fs_sb_info *sbi)
>   	init_f2fs_rwsem(&sbi->io_order_lock);
>   	spin_lock_init(&sbi->cp_lock);
>   
> +	sbi->single_node_sec = false;
> +
>   	sbi->dirty_device = 0;
>   	spin_lock_init(&sbi->dev_lock);
>   

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ