linux-kernel - Re: [PATCH] f2fs: let fstrim issue discard commands in lower priority

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Fri, 25 May 2018 14:47:12 +0800
From:   Chao Yu <yuchao0@...wei.com>
To:     Jaegeuk Kim <jaegeuk@...nel.org>
CC:     <linux-kernel@...r.kernel.org>,
        <linux-f2fs-devel@...ts.sourceforge.net>
Subject: Re: [PATCH] f2fs: let fstrim issue discard commands in lower priority

On 2018/5/25 14:18, Jaegeuk Kim wrote:
> On 05/25, Chao Yu wrote:
>> Hi Jaegeuk,
>>
>> On 2018/5/25 13:10, Jaegeuk Kim wrote:
>>> The fstrim gathers huge number of large discard commands, and tries to issue
>>> without IO awareness, which results in long user-perceive IO latencies on
>>> READ, WRITE, and FLUSH in UFS. We've observed some of commands take several
>>> seconds due to long discard latency.
>>>
>>> This patch limits the maximum size to 2MB per candidate, and check IO congestion
>>> when issuing them to disk.
>>>
>>> Signed-off-by: Jaegeuk Kim <jaegeuk@...nel.org>
>>> ---
>>>  fs/f2fs/f2fs.h    |   4 +-
>>>  fs/f2fs/segment.c | 123 +++++++++++++++++++++++-----------------------
>>>  2 files changed, 64 insertions(+), 63 deletions(-)
>>>
>>> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
>>> index 3bddf13794d9..75ae7fc86ae8 100644
>>> --- a/fs/f2fs/f2fs.h
>>> +++ b/fs/f2fs/f2fs.h
>>> @@ -178,6 +178,7 @@ enum {
>>>  
>>>  #define MAX_DISCARD_BLOCKS(sbi)		BLKS_PER_SEC(sbi)
>>>  #define DEF_MAX_DISCARD_REQUEST		8	/* issue 8 discards per round */
>>> +#define DEF_MAX_DISCARD_LEN		512	/* Max. 2MB per discard */
>>>  #define DEF_MIN_DISCARD_ISSUE_TIME	50	/* 50 ms, if exists */
>>>  #define DEF_MID_DISCARD_ISSUE_TIME	500	/* 500 ms, if device busy */
>>>  #define DEF_MAX_DISCARD_ISSUE_TIME	60000	/* 60 s, if no candidates */
>>> @@ -698,7 +699,8 @@ static inline void set_extent_info(struct extent_info *ei, unsigned int fofs,
>>>  static inline bool __is_discard_mergeable(struct discard_info *back,
>>>  						struct discard_info *front)
>>>  {
>>> -	return back->lstart + back->len == front->lstart;
>>> +	return (back->lstart + back->len == front->lstart) &&
>>> +		(back->len + front->len < DEF_MAX_DISCARD_LEN);
>>>  }
>>>  
>>>  static inline bool __is_discard_back_mergeable(struct discard_info *cur,
>>> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
>>> index 843fc2e6d41c..ba996d4091bc 100644
>>> --- a/fs/f2fs/segment.c
>>> +++ b/fs/f2fs/segment.c
>>> @@ -1139,68 +1139,6 @@ static int __queue_discard_cmd(struct f2fs_sb_info *sbi,
>>>  	return 0;
>>>  }
>>>  
>>> -static void __issue_discard_cmd_range(struct f2fs_sb_info *sbi,
>>> -					struct discard_policy *dpolicy,
>>> -					unsigned int start, unsigned int end)
>>> -{
>>> -	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
>>> -	struct discard_cmd *prev_dc = NULL, *next_dc = NULL;
>>> -	struct rb_node **insert_p = NULL, *insert_parent = NULL;
>>> -	struct discard_cmd *dc;
>>> -	struct blk_plug plug;
>>> -	int issued;
>>> -
>>> -next:
>>> -	issued = 0;
>>> -
>>> -	mutex_lock(&dcc->cmd_lock);
>>> -	f2fs_bug_on(sbi, !__check_rb_tree_consistence(sbi, &dcc->root));
>>> -
>>> -	dc = (struct discard_cmd *)__lookup_rb_tree_ret(&dcc->root,
>>> -					NULL, start,
>>> -					(struct rb_entry **)&prev_dc,
>>> -					(struct rb_entry **)&next_dc,
>>> -					&insert_p, &insert_parent, true);
>>> -	if (!dc)
>>> -		dc = next_dc;
>>> -
>>> -	blk_start_plug(&plug);
>>> -
>>> -	while (dc && dc->lstart <= end) {
>>> -		struct rb_node *node;
>>> -
>>> -		if (dc->len < dpolicy->granularity)
>>> -			goto skip;
>>> -
>>> -		if (dc->state != D_PREP) {
>>> -			list_move_tail(&dc->list, &dcc->fstrim_list);
>>> -			goto skip;
>>> -		}
>>> -
>>> -		__submit_discard_cmd(sbi, dpolicy, dc);
>>> -
>>> -		if (++issued >= dpolicy->max_requests) {
>>> -			start = dc->lstart + dc->len;
>>> -
>>> -			blk_finish_plug(&plug);
>>> -			mutex_unlock(&dcc->cmd_lock);
>>> -
>>> -			schedule();
>>> -
>>> -			goto next;
>>> -		}
>>> -skip:
>>> -		node = rb_next(&dc->rb_node);
>>> -		dc = rb_entry_safe(node, struct discard_cmd, rb_node);
>>> -
>>> -		if (fatal_signal_pending(current))
>>> -			break;
>>> -	}
>>> -
>>> -	blk_finish_plug(&plug);
>>> -	mutex_unlock(&dcc->cmd_lock);
>>> -}
>>> -
>>>  static int __issue_discard_cmd(struct f2fs_sb_info *sbi,
>>>  					struct discard_policy *dpolicy)
>>>  {
>>> @@ -2397,6 +2335,67 @@ bool exist_trim_candidates(struct f2fs_sb_info *sbi, struct cp_control *cpc)
>>>  	return has_candidate;
>>>  }
>>>  
>>> +static void __issue_discard_cmd_range(struct f2fs_sb_info *sbi,
>>> +					struct discard_policy *dpolicy,
>>> +					unsigned int start, unsigned int end)
>>> +{
>>> +	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
>>> +	struct discard_cmd *prev_dc = NULL, *next_dc = NULL;
>>> +	struct rb_node **insert_p = NULL, *insert_parent = NULL;
>>> +	struct discard_cmd *dc;
>>> +	struct blk_plug plug;
>>> +	int issued;
>>
>> unsigned int cur = start;
>>
>>> +
>>> +next:
>>> +	issued = 0;
>>> +
>>> +	mutex_lock(&dcc->cmd_lock);
>>> +	f2fs_bug_on(sbi, !__check_rb_tree_consistence(sbi, &dcc->root));
>>> +
>>> +	dc = (struct discard_cmd *)__lookup_rb_tree_ret(&dcc->root,
>>> +					NULL, start,
>>
>> NULL, cur,
>>
>>> +					(struct rb_entry **)&prev_dc,
>>> +					(struct rb_entry **)&next_dc,
>>> +					&insert_p, &insert_parent, true);
>>> +	if (!dc)
>>> +		dc = next_dc;
>>> +
>>> +	blk_start_plug(&plug);
>>> +
>>> +	while (dc && dc->lstart <= end) {
>>> +		struct rb_node *node;
>>> +
>>> +		if (dc->len < dpolicy->granularity)
>>> +			goto skip;
>>> +
>>> +		if (dc->state != D_PREP) {
>>> +			list_move_tail(&dc->list, &dcc->fstrim_list);
>>> +			goto skip;
>>> +		}
>>> +
>>> +		__submit_discard_cmd(sbi, dpolicy, dc);
>>> +
>>> +		if (++issued >= dpolicy->max_requests) {
>>> +			start = dc->lstart + dc->len;
>>
>> cur = dc->lstart + dc->len;
>>
>>> +
>>> +			blk_finish_plug(&plug);
>>> +			mutex_unlock(&dcc->cmd_lock);
>>> +			__wait_all_discard_cmd(sbi, dpolicy);
>>
>> How about moving __wait_discard_cmd_range(sbi, &dpolicy, start, end) from
>> f2fs_trim_fs here, this can avoid wait non-fstrim discard commands.

Sorry, in fstrim, __wait_all_discard_cmd already just wait discard commands
which are issued by itself.

> 
> The key here is to avoid any outstanding discard commands.

You want to wait all in-flight discard commands? So we need to wrap one other
function for that.

static void wait_all_discard_cmd(struct f2fs_sb_info *sbi,
				struct discard_policy *dpolicy, bool wait_all)
{
	if (dpolicy->type == DPOLICY_FSTRIM || wait_all)
		__wait_discard_cmd_range(sbi, dpolicy, dcc->fstrim_list);
	if (dpolicy->type != DPOLICY_FSTRIM || wait_all)
		__wait_discard_cmd_range(sbi, dpolicy, dcc->wait_list);
}

> 
>>
>>> +			congestion_wait(BLK_RW_ASYNC, HZ/50);
>>
>> What about using msleep(dpolicy->min_interval) instead?
> 
> We need to detect congestion in block layer.

OK.

Thanks,

> 
> Thanks,
> 
>>
>> Thanks,
>>
>>> +			goto next;
>>> +		}
>>> +skip:
>>> +		node = rb_next(&dc->rb_node);
>>> +		dc = rb_entry_safe(node, struct discard_cmd, rb_node);
>>> +
>>> +		if (fatal_signal_pending(current))
>>> +			break;
>>> +	}
>>> +
>>> +	blk_finish_plug(&plug);
>>> +	mutex_unlock(&dcc->cmd_lock);
>>> +}
>>> +
>>>  int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)
>>>  {
>>>  	__u64 start = F2FS_BYTES_TO_BLK(range->start);
>>>
> 
> .
>