lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <a17c2c2d-6bfb-4c5a-8b6d-1e2dd5f80f54@kernel.org>
Date: Fri, 23 Feb 2024 11:52:54 +0800
From: Chao Yu <chao@...nel.org>
To: Daeho Jeong <daeho43@...il.com>, linux-kernel@...r.kernel.org,
 linux-f2fs-devel@...ts.sourceforge.net, kernel-team@...roid.com
Cc: Jaegeuk Kim <jaegeuk@...nel.org>, Daeho Jeong <daehojeong@...gle.com>
Subject: Re: [f2fs-dev] [PATCH v3 2/2] f2fs: support file pinning for zoned
 devices

On 2024/2/14 1:38, Daeho Jeong wrote:
> From: Daeho Jeong <daehojeong@...gle.com>
> 
> Support file pinning with conventional storage area for zoned devices
> 
> Signed-off-by: Daeho Jeong <daehojeong@...gle.com>
> Signed-off-by: Jaegeuk Kim <jaegeuk@...nel.org>
> ---
> v3: check the hole when migrating blocks for swap.
>      do not use the remainder of cold pin section.
> v2: flush previous dirty pages before swapon.
>      do not re-check for the last extent of swap area.
>      merge this patch with swap file pinning support patch.
> ---
>   fs/f2fs/data.c    | 58 ++++++++++++++++++++++++++-------------
>   fs/f2fs/f2fs.h    | 17 +++++++++++-
>   fs/f2fs/file.c    | 24 ++++++++++++-----
>   fs/f2fs/gc.c      | 14 +++++++---
>   fs/f2fs/segment.c | 69 +++++++++++++++++++++++++++++++++++++++++------
>   fs/f2fs/segment.h | 10 +++++++
>   6 files changed, 154 insertions(+), 38 deletions(-)
> 
> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> index 828c797cd47c..0c9aa3082fcf 100644
> --- a/fs/f2fs/data.c
> +++ b/fs/f2fs/data.c
> @@ -3839,25 +3839,34 @@ static int f2fs_migrate_blocks(struct inode *inode, block_t start_blk,
>   	unsigned int blkofs;
>   	unsigned int blk_per_sec = BLKS_PER_SEC(sbi);
>   	unsigned int secidx = start_blk / blk_per_sec;
> -	unsigned int end_sec = secidx + blkcnt / blk_per_sec;
> +	unsigned int end_sec;
>   	int ret = 0;
>   
> +	if (!blkcnt)
> +		return 0;
> +	end_sec = secidx + (blkcnt - 1) / blk_per_sec;
> +
>   	f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
>   	filemap_invalidate_lock(inode->i_mapping);
>   
>   	set_inode_flag(inode, FI_ALIGNED_WRITE);
>   	set_inode_flag(inode, FI_OPU_WRITE);
>   
> -	for (; secidx < end_sec; secidx++) {
> +	for (; secidx <= end_sec; secidx++) {
> +		unsigned int blkofs_end = secidx == end_sec ?
> +			(blkcnt - 1) % blk_per_sec : blk_per_sec - 1;
> +
>   		f2fs_down_write(&sbi->pin_sem);
>   
> -		f2fs_lock_op(sbi);
> -		f2fs_allocate_new_section(sbi, CURSEG_COLD_DATA_PINNED, false);
> -		f2fs_unlock_op(sbi);
> +		ret = f2fs_allocate_pinning_section(sbi);
> +		if (ret) {
> +			f2fs_up_write(&sbi->pin_sem);
> +			break;
> +		}
>   
>   		set_inode_flag(inode, FI_SKIP_WRITES);
>   
> -		for (blkofs = 0; blkofs < blk_per_sec; blkofs++) {
> +		for (blkofs = 0; blkofs <= blkofs_end; blkofs++) {
>   			struct page *page;
>   			unsigned int blkidx = secidx * blk_per_sec + blkofs;
>   
> @@ -3946,27 +3955,34 @@ static int check_swap_activate(struct swap_info_struct *sis,
>   		nr_pblocks = map.m_len;
>   
>   		if ((pblock - SM_I(sbi)->main_blkaddr) & sec_blks_mask ||
> -				nr_pblocks & sec_blks_mask) {
> +				nr_pblocks & sec_blks_mask ||
> +				!f2fs_valid_pinned_area(sbi, pblock)) {
> +			bool last_extent = false;
> +
>   			not_aligned++;
>   
>   			nr_pblocks = roundup(nr_pblocks, blks_per_sec);
>   			if (cur_lblock + nr_pblocks > sis->max)
>   				nr_pblocks -= blks_per_sec;
>   
> +			/* this extent is last one */
>   			if (!nr_pblocks) {
> -				/* this extent is last one */
> -				nr_pblocks = map.m_len;
> -				f2fs_warn(sbi, "Swapfile: last extent is not aligned to section");
> -				goto next;
> +				nr_pblocks = last_lblock - cur_lblock;
> +				last_extent = true;
>   			}
>   
>   			ret = f2fs_migrate_blocks(inode, cur_lblock,
>   							nr_pblocks);
> -			if (ret)
> +			if (ret) {
> +				if (ret == -ENOENT)
> +					ret = -EINVAL;
>   				goto out;
> -			goto retry;
> +			}
> +
> +			if (!last_extent)
> +				goto retry;
>   		}
> -next:
> +
>   		if (cur_lblock + nr_pblocks >= sis->max)
>   			nr_pblocks = sis->max - cur_lblock;
>   
> @@ -4004,17 +4020,17 @@ static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file,
>   				sector_t *span)
>   {
>   	struct inode *inode = file_inode(file);
> +	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
>   	int ret;
>   
>   	if (!S_ISREG(inode->i_mode))
>   		return -EINVAL;
>   
> -	if (f2fs_readonly(F2FS_I_SB(inode)->sb))
> +	if (f2fs_readonly(sbi->sb))
>   		return -EROFS;
>   
> -	if (f2fs_lfs_mode(F2FS_I_SB(inode))) {
> -		f2fs_err(F2FS_I_SB(inode),
> -			"Swapfile not supported in LFS mode");
> +	if (f2fs_lfs_mode(sbi) && !f2fs_sb_has_blkzoned(sbi)) {
> +		f2fs_err(sbi, "Swapfile not supported in LFS mode");
>   		return -EINVAL;
>   	}
>   
> @@ -4027,13 +4043,17 @@ static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file,
>   
>   	f2fs_precache_extents(inode);
>   
> +	ret = filemap_fdatawrite(inode->i_mapping);
> +	if (ret < 0)
> +		return ret;
> +
>   	ret = check_swap_activate(sis, file, span);
>   	if (ret < 0)
>   		return ret;
>   
>   	stat_inc_swapfile_inode(inode);
>   	set_inode_flag(inode, FI_PIN_FILE);
> -	f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
> +	f2fs_update_time(sbi, REQ_TIME);
>   	return ret;
>   }
>   
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> index 40eb590ed646..351133a11518 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -3696,7 +3696,8 @@ void f2fs_get_new_segment(struct f2fs_sb_info *sbi,
>   			unsigned int *newseg, bool new_sec, int dir);
>   void f2fs_allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type,
>   					unsigned int start, unsigned int end);
> -void f2fs_allocate_new_section(struct f2fs_sb_info *sbi, int type, bool force);
> +int f2fs_allocate_new_section(struct f2fs_sb_info *sbi, int type, bool force);
> +int f2fs_allocate_pinning_section(struct f2fs_sb_info *sbi);
>   void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi);
>   int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range);
>   bool f2fs_exist_trim_candidates(struct f2fs_sb_info *sbi,
> @@ -3870,6 +3871,9 @@ void f2fs_stop_gc_thread(struct f2fs_sb_info *sbi);
>   block_t f2fs_start_bidx_of_node(unsigned int node_ofs, struct inode *inode);
>   int f2fs_gc(struct f2fs_sb_info *sbi, struct f2fs_gc_control *gc_control);
>   void f2fs_build_gc_manager(struct f2fs_sb_info *sbi);
> +int f2fs_gc_range(struct f2fs_sb_info *sbi,
> +		unsigned int start_seg, unsigned int end_seg,
> +		bool dry_run, unsigned int dry_run_sections);
>   int f2fs_resize_fs(struct file *filp, __u64 block_count);
>   int __init f2fs_create_garbage_collection_cache(void);
>   void f2fs_destroy_garbage_collection_cache(void);
> @@ -4524,6 +4528,17 @@ static inline bool f2fs_lfs_mode(struct f2fs_sb_info *sbi)
>   	return F2FS_OPTION(sbi).fs_mode == FS_MODE_LFS;
>   }
>   
> +static inline bool f2fs_valid_pinned_area(struct f2fs_sb_info *sbi,
> +					  block_t blkaddr)
> +{
> +	if (f2fs_sb_has_blkzoned(sbi)) {
> +		int devi = f2fs_target_device_index(sbi, blkaddr);
> +
> +		return !bdev_is_zoned(FDEV(devi).bdev);
> +	}
> +	return true;
> +}
> +
>   static inline bool f2fs_low_mem_mode(struct f2fs_sb_info *sbi)
>   {
>   	return F2FS_OPTION(sbi).memory_mode == MEMORY_MODE_LOW;
> diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
> index 2c13b340c8a0..21c3aa93a8db 100644
> --- a/fs/f2fs/file.c
> +++ b/fs/f2fs/file.c
> @@ -1733,9 +1733,11 @@ static int f2fs_expand_inode_data(struct inode *inode, loff_t offset,
>   
>   		f2fs_down_write(&sbi->pin_sem);
>   
> -		f2fs_lock_op(sbi);
> -		f2fs_allocate_new_section(sbi, CURSEG_COLD_DATA_PINNED, false);
> -		f2fs_unlock_op(sbi);
> +		err = f2fs_allocate_pinning_section(sbi);
> +		if (err) {
> +			f2fs_up_write(&sbi->pin_sem);
> +			goto out_err;
> +		}
>   
>   		map.m_seg_type = CURSEG_COLD_DATA_PINNED;
>   		err = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_PRE_DIO);
> @@ -3185,6 +3187,7 @@ int f2fs_pin_file_control(struct inode *inode, bool inc)
>   static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg)
>   {
>   	struct inode *inode = file_inode(filp);
> +	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
>   	__u32 pin;
>   	int ret = 0;
>   
> @@ -3194,7 +3197,7 @@ static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg)
>   	if (!S_ISREG(inode->i_mode))
>   		return -EINVAL;
>   
> -	if (f2fs_readonly(F2FS_I_SB(inode)->sb))
> +	if (f2fs_readonly(sbi->sb))
>   		return -EROFS;
>   
>   	ret = mnt_want_write_file(filp);
> @@ -3207,9 +3210,18 @@ static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg)
>   		clear_inode_flag(inode, FI_PIN_FILE);
>   		f2fs_i_gc_failures_write(inode, 0);
>   		goto done;
> +	} else if (f2fs_is_pinned_file(inode)) {
> +		goto done;
>   	}
>   
> -	if (f2fs_should_update_outplace(inode, NULL)) {
> +	if (f2fs_sb_has_blkzoned(sbi) && F2FS_HAS_BLOCKS(inode)) {
> +		ret = -EFBIG;
> +		goto out;
> +	}
> +
> +	/* Let's allow file pinning on zoned device. */
> +	if (!f2fs_sb_has_blkzoned(sbi) &&
> +	    f2fs_should_update_outplace(inode, NULL)) {
>   		ret = -EINVAL;
>   		goto out;
>   	}
> @@ -3231,7 +3243,7 @@ static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg)
>   	set_inode_flag(inode, FI_PIN_FILE);
>   	ret = F2FS_I(inode)->i_gc_failures[GC_FAILURE_PIN];
>   done:
> -	f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
> +	f2fs_update_time(sbi, REQ_TIME);
>   out:
>   	inode_unlock(inode);
>   	mnt_drop_write_file(filp);
> diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
> index a089a938355b..3ff126316d42 100644
> --- a/fs/f2fs/gc.c
> +++ b/fs/f2fs/gc.c
> @@ -1961,10 +1961,12 @@ void f2fs_build_gc_manager(struct f2fs_sb_info *sbi)
>   	init_atgc_management(sbi);
>   }
>   
> -static int f2fs_gc_range(struct f2fs_sb_info *sbi,
> -		unsigned int start_seg, unsigned int end_seg, bool dry_run)
> +int f2fs_gc_range(struct f2fs_sb_info *sbi,
> +		unsigned int start_seg, unsigned int end_seg,
> +		bool dry_run, unsigned int dry_run_sections)
>   {
>   	unsigned int segno;
> +	unsigned int gc_secs = dry_run_sections;
>   
>   	for (segno = start_seg; segno <= end_seg; segno += SEGS_PER_SEC(sbi)) {
>   		struct gc_inode_list gc_list = {
> @@ -1972,11 +1974,15 @@ static int f2fs_gc_range(struct f2fs_sb_info *sbi,
>   			.iroot = RADIX_TREE_INIT(gc_list.iroot, GFP_NOFS),
>   		};
>   
> -		do_garbage_collect(sbi, segno, &gc_list, FG_GC, true);
> +		do_garbage_collect(sbi, segno, &gc_list, FG_GC,
> +						dry_run_sections == 0);
>   		put_gc_inode(&gc_list);
>   
>   		if (!dry_run && get_valid_blocks(sbi, segno, true))
>   			return -EAGAIN;
> +		if (dry_run && dry_run_sections &&
> +		    !get_valid_blocks(sbi, segno, true) && --gc_secs == 0)
> +			break;
>   
>   		if (fatal_signal_pending(current))
>   			return -ERESTARTSYS;
> @@ -2014,7 +2020,7 @@ static int free_segment_range(struct f2fs_sb_info *sbi,
>   		f2fs_allocate_segment_for_resize(sbi, type, start, end);
>   
>   	/* do GC to move out valid blocks in the range */
> -	err = f2fs_gc_range(sbi, start, end, dry_run);
> +	err = f2fs_gc_range(sbi, start, end, dry_run, 0);
>   	if (err || dry_run)
>   		goto out;
>   
> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> index 4e985750c938..0b72c8536ccf 100644
> --- a/fs/f2fs/segment.c
> +++ b/fs/f2fs/segment.c
> @@ -2632,7 +2632,7 @@ static int is_next_segment_free(struct f2fs_sb_info *sbi,
>    * This function should be returned with success, otherwise BUG
>    */
>   static void get_new_segment(struct f2fs_sb_info *sbi,
> -			unsigned int *newseg, bool new_sec)
> +			unsigned int *newseg, bool new_sec, bool pinning)
>   {
>   	struct free_segmap_info *free_i = FREE_I(sbi);
>   	unsigned int segno, secno, zoneno;
> @@ -2650,6 +2650,16 @@ static void get_new_segment(struct f2fs_sb_info *sbi,
>   		if (segno < GET_SEG_FROM_SEC(sbi, hint + 1))
>   			goto got_it;
>   	}
> +
> +	/*
> +	 * If we format f2fs on zoned storage, let's try to get pinned sections
> +	 * from beginning of the storage, which should be a conventional one.
> +	 */
> +	if (f2fs_sb_has_blkzoned(sbi)) {
> +		segno = pinning ? 0 : max(first_zoned_segno(sbi), *newseg);
> +		hint = GET_SEC_FROM_SEG(sbi, segno);
> +	}
> +
>   find_other_zone:
>   	secno = find_next_zero_bit(free_i->free_secmap, MAIN_SECS(sbi), hint);
>   	if (secno >= MAIN_SECS(sbi)) {
> @@ -2749,21 +2759,30 @@ static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type)
>    * Allocate a current working segment.
>    * This function always allocates a free segment in LFS manner.
>    */
> -static void new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec)
> +static int new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec)
>   {
>   	struct curseg_info *curseg = CURSEG_I(sbi, type);
>   	unsigned int segno = curseg->segno;
> +	bool pinning = type == CURSEG_COLD_DATA_PINNED;
>   
>   	if (curseg->inited)
>   		write_sum_page(sbi, curseg->sum_blk, GET_SUM_BLOCK(sbi, segno));
> +
>   	segno = __get_next_segno(sbi, type);
> -	get_new_segment(sbi, &segno, new_sec);
> +	get_new_segment(sbi, &segno, new_sec, pinning);
> +	if (new_sec && pinning &&
> +	    !f2fs_valid_pinned_area(sbi, START_BLOCK(sbi, segno))) {
> +		__set_free(sbi, segno);
> +		return -EAGAIN;
> +	}
> +
>   	curseg->next_segno = segno;
>   	reset_curseg(sbi, type, 1);
>   	curseg->alloc_type = LFS;
>   	if (F2FS_OPTION(sbi).fs_mode == FS_MODE_FRAGMENT_BLK)
>   		curseg->fragment_remained_chunk =
>   				get_random_u32_inclusive(1, sbi->max_fragment_chunk);
> +	return 0;
>   }
>   
>   static int __next_free_blkoff(struct f2fs_sb_info *sbi,
> @@ -3036,7 +3055,7 @@ void f2fs_allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type,
>   	f2fs_up_read(&SM_I(sbi)->curseg_lock);
>   }
>   
> -static void __allocate_new_segment(struct f2fs_sb_info *sbi, int type,
> +static int __allocate_new_segment(struct f2fs_sb_info *sbi, int type,
>   						bool new_sec, bool force)
>   {
>   	struct curseg_info *curseg = CURSEG_I(sbi, type);
> @@ -3046,21 +3065,49 @@ static void __allocate_new_segment(struct f2fs_sb_info *sbi, int type,
>   	    !curseg->next_blkoff &&
>   	    !get_valid_blocks(sbi, curseg->segno, new_sec) &&
>   	    !get_ckpt_valid_blocks(sbi, curseg->segno, new_sec))
> -		return;
> +		return 0;
>   
>   	old_segno = curseg->segno;
> -	new_curseg(sbi, type, true);
> +	if (new_curseg(sbi, type, true))
> +		return -EAGAIN;
>   	stat_inc_seg_type(sbi, curseg);
>   	locate_dirty_segment(sbi, old_segno);
> +	return 0;
>   }
>   
> -void f2fs_allocate_new_section(struct f2fs_sb_info *sbi, int type, bool force)
> +int f2fs_allocate_new_section(struct f2fs_sb_info *sbi, int type, bool force)
>   {
> +	int ret;
> +
>   	f2fs_down_read(&SM_I(sbi)->curseg_lock);
>   	down_write(&SIT_I(sbi)->sentry_lock);
> -	__allocate_new_segment(sbi, type, true, force);
> +	ret = __allocate_new_segment(sbi, type, true, force);
>   	up_write(&SIT_I(sbi)->sentry_lock);
>   	f2fs_up_read(&SM_I(sbi)->curseg_lock);
> +
> +	return ret;
> +}
> +
> +int f2fs_allocate_pinning_section(struct f2fs_sb_info *sbi)
> +{
> +	int err;
> +	bool gc_required = true;
> +
> +retry:
> +	f2fs_lock_op(sbi);
> +	err = f2fs_allocate_new_section(sbi, CURSEG_COLD_DATA_PINNED, false);
> +	f2fs_unlock_op(sbi);
> +
> +	if (f2fs_sb_has_blkzoned(sbi) && err && gc_required) {
> +		f2fs_down_write(&sbi->gc_lock);
> +		f2fs_gc_range(sbi, 0, GET_SEGNO(sbi, FDEV(0).end_blk), true, 1);
> +		f2fs_up_write(&sbi->gc_lock);
> +
> +		gc_required = false;
> +		goto retry;
> +	}
> +
> +	return err;
>   }
>   
>   void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi)
> @@ -3426,6 +3473,10 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
>   	 * new segment.
>   	 */
>   	if (segment_full) {
> +		if (type == CURSEG_COLD_DATA_PINNED &&
> +		    !((curseg->segno + 1) % sbi->segs_per_sec))
> +			goto skip_new_segment;

Before we skip allocate new segment for pinned log, how about
tagging curseg as uninitialized one via curseg->inited = false, and
curseg->segno = NULL_SEGNO? so that we can avoid
__f2fs_save_inmem_curseg() to touch this log, and not show incorrect
segno of pinned log in /sys/kernel/debug/f2fs/status.

Thanks,

> +
>   		if (from_gc) {
>   			get_atssr_segment(sbi, type, se->type,
>   						AT_SSR, se->mtime);
> @@ -3437,6 +3488,8 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
>   			stat_inc_seg_type(sbi, curseg);
>   		}
>   	}
> +
> +skip_new_segment:
>   	/*
>   	 * segment dirty status should be updated after segment allocation,
>   	 * so we just need to update status only one time after previous
> diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
> index 60d93a16f2ac..953af072915f 100644
> --- a/fs/f2fs/segment.h
> +++ b/fs/f2fs/segment.h
> @@ -942,3 +942,13 @@ static inline void wake_up_discard_thread(struct f2fs_sb_info *sbi, bool force)
>   	dcc->discard_wake = true;
>   	wake_up_interruptible_all(&dcc->discard_wait_queue);
>   }
> +
> +static inline unsigned int first_zoned_segno(struct f2fs_sb_info *sbi)
> +{
> +	int devi;
> +
> +	for (devi = 0; devi < sbi->s_ndevs; devi++)
> +		if (bdev_is_zoned(FDEV(devi).bdev))
> +			return GET_SEGNO(sbi, FDEV(devi).start_blk);
> +	return 0;
> +}

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ