lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <136bad1c-f91c-4730-88e6-3186d9c7337e@kernel.org>
Date: Wed, 15 Jan 2025 10:12:51 +0800
From: Chao Yu <chao@...nel.org>
To: Jaegeuk Kim <jaegeuk@...nel.org>
Cc: chao@...nel.org, linux-kernel@...r.kernel.org,
 linux-f2fs-devel@...ts.sourceforge.net
Subject: Re: [f2fs-dev] [PATCH 1/2] f2fs: register inodes which is able to
 donate pages

On 1/15/25 01:15, Jaegeuk Kim wrote:
> On 01/14, Chao Yu wrote:
>> On 1/14/25 02:39, Jaegeuk Kim via Linux-f2fs-devel wrote:
>>> This patch introduces an inode list to keep the page cache ranges that users
>>> can donate pages together.
>>>
>>>    #define F2FS_IOC_DONATE_RANGE		_IOW(F2FS_IOCTL_MAGIC, 27,	\
>>> 						struct f2fs_donate_range)
>>>    struct f2fs_donate_range {
>>> 	__u64 start;
>>> 	__u64 len;
>>>    };
>>>
>>> e.g., ioctl(F2FS_IOC_DONATE_RANGE, &range);
>>
>> I guess we need to add documentation for all ioctls including this one, maybe
>> later? :)
> 
> Yeah, later.
> 
>>
>>>
>>> Signed-off-by: Jaegeuk Kim <jaegeuk@...nel.org>
>>> ---
>>>    fs/f2fs/debug.c           |  3 +++
>>>    fs/f2fs/f2fs.h            |  9 +++++++-
>>>    fs/f2fs/file.c            | 48 +++++++++++++++++++++++++++++++++++++++
>>>    fs/f2fs/inode.c           | 14 ++++++++++++
>>>    fs/f2fs/super.c           |  1 +
>>>    include/uapi/linux/f2fs.h |  7 ++++++
>>>    6 files changed, 81 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
>>> index 468828288a4a..1b099c123670 100644
>>> --- a/fs/f2fs/debug.c
>>> +++ b/fs/f2fs/debug.c
>>> @@ -164,6 +164,7 @@ static void update_general_status(struct f2fs_sb_info *sbi)
>>>    	si->ndirty_imeta = get_pages(sbi, F2FS_DIRTY_IMETA);
>>>    	si->ndirty_dirs = sbi->ndirty_inode[DIR_INODE];
>>>    	si->ndirty_files = sbi->ndirty_inode[FILE_INODE];
>>> +	si->ndonate_files = sbi->ndirty_inode[DONATE_INODE];
>>>    	si->nquota_files = sbi->nquota_files;
>>>    	si->ndirty_all = sbi->ndirty_inode[DIRTY_META];
>>>    	si->aw_cnt = atomic_read(&sbi->atomic_files);
>>> @@ -501,6 +502,8 @@ static int stat_show(struct seq_file *s, void *v)
>>>    			   si->compr_inode, si->compr_blocks);
>>>    		seq_printf(s, "  - Swapfile Inode: %u\n",
>>>    			   si->swapfile_inode);
>>> +		seq_printf(s, "  - Donate Inode: %d\n",
>>
>> %u instead of %d due to si->ndonate_files is type of unsigned int.
>>
>>> +			   si->ndonate_files);
>>>    		seq_printf(s, "  - Orphan/Append/Update Inode: %u, %u, %u\n",
>>>    			   si->orphans, si->append, si->update);
>>>    		seq_printf(s, "\nMain area: %d segs, %d secs %d zones\n",
>>> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
>>> index 4bfe162eefd3..7ce3e3eab17a 100644
>>> --- a/fs/f2fs/f2fs.h
>>> +++ b/fs/f2fs/f2fs.h
>>> @@ -850,6 +850,11 @@ struct f2fs_inode_info {
>>>    #endif
>>>    	struct list_head dirty_list;	/* dirty list for dirs and files */
>>>    	struct list_head gdirty_list;	/* linked in global dirty list */
>>> +
>>> +	/* linked in global inode list for cache donation */
>>> +	struct list_head gdonate_list;
>>> +	loff_t donate_start, donate_end; /* inclusive */
>>> +
>>>    	struct task_struct *atomic_write_task;	/* store atomic write task */
>>>    	struct extent_tree *extent_tree[NR_EXTENT_CACHES];
>>>    					/* cached extent_tree entry */
>>> @@ -1274,6 +1279,7 @@ enum inode_type {
>>>    	DIR_INODE,			/* for dirty dir inode */
>>>    	FILE_INODE,			/* for dirty regular/symlink inode */
>>>    	DIRTY_META,			/* for all dirtied inode metadata */
>>> +	DONATE_INODE,			/* for all inode to donate pages */
>>>    	NR_INODE_TYPE,
>>>    };
>>> @@ -3984,7 +3990,8 @@ struct f2fs_stat_info {
>>>    	unsigned long long allocated_data_blocks;
>>>    	int ndirty_node, ndirty_dent, ndirty_meta, ndirty_imeta;
>>>    	int ndirty_data, ndirty_qdata;
>>> -	unsigned int ndirty_dirs, ndirty_files, nquota_files, ndirty_all;
>>> +	unsigned int ndirty_dirs, ndirty_files, ndirty_all;
>>> +	unsigned int nquota_files, ndonate_files;
>>>    	int nats, dirty_nats, sits, dirty_sits;
>>>    	int free_nids, avail_nids, alloc_nids;
>>>    	int total_count, utilization;
>>> diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
>>> index 9980d17ef9f5..d6dea6258c2d 100644
>>> --- a/fs/f2fs/file.c
>>> +++ b/fs/f2fs/file.c
>>> @@ -2493,6 +2493,51 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg)
>>>    	return ret;
>>>    }
>>> +static int f2fs_ioc_donate_range(struct file *filp, unsigned long arg)
>>> +{
>>> +	struct inode *inode = file_inode(filp);
>>> +	struct mnt_idmap *idmap = file_mnt_idmap(filp);
>>> +	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
>>> +	struct f2fs_donate_range range;
>>> +	int ret;
>>> +
>>> +	if (copy_from_user(&range, (struct f2fs_donate_range __user *)arg,
>>> +							sizeof(range)))
>>> +		return -EFAULT;
>>
>> What about doing sanity check on donate range here? in order to avoid overflow
>> during fi->donate_end calculation.
>>
>> F2FS_I(inode)->donate_end = range.start + range.len - 1;
>>
>>> +
>>> +	if (!inode_owner_or_capable(idmap, inode))
>>> +		return -EACCES;
>>> +
>>> +	if (!S_ISREG(inode->i_mode))
>>> +		return -EINVAL;
>>> +
>>> +	ret = mnt_want_write_file(filp);
>>> +	if (ret)
>>> +		return ret;
>>> +
>>> +	inode_lock(inode);
>>> +
>>> +	if (f2fs_is_atomic_file(inode))
>>> +		goto out;
>>> +
>>> +	spin_lock(&sbi->inode_lock[DONATE_INODE]);
>>> +	if (list_empty(&F2FS_I(inode)->gdonate_list)) {
>>> +		list_add_tail(&F2FS_I(inode)->gdonate_list,
>>> +				&sbi->inode_list[DONATE_INODE]);
>>> +		stat_inc_dirty_inode(sbi, DONATE_INODE);
>>> +	} else {
>>> +		list_move_tail(&F2FS_I(inode)->gdonate_list,
>>> +				&sbi->inode_list[DONATE_INODE]);
>>> +	}
>>> +	F2FS_I(inode)->donate_start = range.start;
>>> +	F2FS_I(inode)->donate_end = range.start + range.len - 1;
>>> +	spin_unlock(&sbi->inode_lock[DONATE_INODE]);
>>> +out:
>>> +	inode_unlock(inode);
>>> +	mnt_drop_write_file(filp);
>>> +	return ret;
>>> +}
>>> +
>>>    static int f2fs_ioc_fitrim(struct file *filp, unsigned long arg)
>>>    {
>>>    	struct inode *inode = file_inode(filp);
>>> @@ -4522,6 +4567,8 @@ static long __f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
>>>    		return -EOPNOTSUPP;
>>>    	case F2FS_IOC_SHUTDOWN:
>>>    		return f2fs_ioc_shutdown(filp, arg);
>>> +	case F2FS_IOC_DONATE_RANGE:
>>> +		return f2fs_ioc_donate_range(filp, arg);
>>>    	case FITRIM:
>>>    		return f2fs_ioc_fitrim(filp, arg);
>>>    	case FS_IOC_SET_ENCRYPTION_POLICY:
>>> @@ -5273,6 +5320,7 @@ long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
>>>    	case F2FS_IOC_RELEASE_VOLATILE_WRITE:
>>>    	case F2FS_IOC_ABORT_ATOMIC_WRITE:
>>>    	case F2FS_IOC_SHUTDOWN:
>>> +	case F2FS_IOC_DONATE_RANGE:
>>>    	case FITRIM:
>>>    	case FS_IOC_SET_ENCRYPTION_POLICY:
>>>    	case FS_IOC_GET_ENCRYPTION_PWSALT:
>>> diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
>>> index 7de33da8b3ea..e38dc5fe2f2e 100644
>>> --- a/fs/f2fs/inode.c
>>> +++ b/fs/f2fs/inode.c
>>> @@ -804,6 +804,19 @@ int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc)
>>>    	return 0;
>>>    }
>>> +static void f2fs_remove_donate_inode(struct inode *inode)
>>> +{
>>> +	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
>>> +
>>> +	if (list_empty(&F2FS_I(inode)->gdonate_list))
>>
>> It will be more safe to access gdonate_list w/ inode_lock[DONATE_INODE]?
> 
> It's unnecessary as this is called from evict_inode.

I just concerned about the case fi->gdonate_list's prev and next pointer can
be updated in race condition due to insertion or deletion of its adjacent entry.

No risk now as I checked. :)

Thanks,

> 
>>
>> Thanks,
>>
>>> +		return;
>>> +
>>> +	spin_lock(&sbi->inode_lock[DONATE_INODE]);
>>> +	list_del_init(&F2FS_I(inode)->gdonate_list);
>>> +	stat_dec_dirty_inode(sbi, DONATE_INODE);
>>> +	spin_unlock(&sbi->inode_lock[DONATE_INODE]);
>>> +}
>>> +
>>>    /*
>>>     * Called at the last iput() if i_nlink is zero
>>>     */
>>> @@ -838,6 +851,7 @@ void f2fs_evict_inode(struct inode *inode)
>>>    	f2fs_bug_on(sbi, get_dirty_pages(inode));
>>>    	f2fs_remove_dirty_inode(inode);
>>> +	f2fs_remove_donate_inode(inode);
>>>    	if (!IS_DEVICE_ALIASING(inode))
>>>    		f2fs_destroy_extent_tree(inode);
>>> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
>>> index fc7d463dee15..ef639a6d82e5 100644
>>> --- a/fs/f2fs/super.c
>>> +++ b/fs/f2fs/super.c
>>> @@ -1441,6 +1441,7 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb)
>>>    	spin_lock_init(&fi->i_size_lock);
>>>    	INIT_LIST_HEAD(&fi->dirty_list);
>>>    	INIT_LIST_HEAD(&fi->gdirty_list);
>>> +	INIT_LIST_HEAD(&fi->gdonate_list);
>>>    	init_f2fs_rwsem(&fi->i_gc_rwsem[READ]);
>>>    	init_f2fs_rwsem(&fi->i_gc_rwsem[WRITE]);
>>>    	init_f2fs_rwsem(&fi->i_xattr_sem);
>>> diff --git a/include/uapi/linux/f2fs.h b/include/uapi/linux/f2fs.h
>>> index f7aaf8d23e20..cd38a7c166e6 100644
>>> --- a/include/uapi/linux/f2fs.h
>>> +++ b/include/uapi/linux/f2fs.h
>>> @@ -44,6 +44,8 @@
>>>    #define F2FS_IOC_COMPRESS_FILE		_IO(F2FS_IOCTL_MAGIC, 24)
>>>    #define F2FS_IOC_START_ATOMIC_REPLACE	_IO(F2FS_IOCTL_MAGIC, 25)
>>>    #define F2FS_IOC_GET_DEV_ALIAS_FILE	_IOR(F2FS_IOCTL_MAGIC, 26, __u32)
>>> +#define F2FS_IOC_DONATE_RANGE		_IOW(F2FS_IOCTL_MAGIC, 27,	\
>>> +						struct f2fs_donate_range)
>>>    /*
>>>     * should be same as XFS_IOC_GOINGDOWN.
>>> @@ -97,4 +99,9 @@ struct f2fs_comp_option {
>>>    	__u8 log_cluster_size;
>>>    };
>>> +struct f2fs_donate_range {
>>> +	__u64 start;
>>> +	__u64 len;
>>> +};
>>> +
>>>    #endif /* _UAPI_LINUX_F2FS_H */


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ