lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <20241121114950.5ie64l3lmi3dkoz5@quack3>
Date: Thu, 21 Nov 2024 12:49:50 +0100
From: Jan Kara <jack@...e.cz>
To: Jim Zhao <jimzhao.ai@...il.com>
Cc: jack@...e.cz, akpm@...ux-foundation.org, linux-fsdevel@...r.kernel.org,
	linux-kernel@...r.kernel.org, linux-mm@...ck.org,
	willy@...radead.org
Subject: Re: [PATCH v2] mm/page-writeback: raise wb_thresh to prevent write
 blocking with strictlimit

On Tue 19-11-24 19:44:42, Jim Zhao wrote:
> With the strictlimit flag, wb_thresh acts as a hard limit in
> balance_dirty_pages() and wb_position_ratio().  When device write
> operations are inactive, wb_thresh can drop to 0, causing writes to be
> blocked.  The issue occasionally occurs in fuse fs, particularly with
> network backends, the write thread is blocked frequently during a period.
> To address it, this patch raises the minimum wb_thresh to a controllable
> level, similar to the non-strictlimit case.
> 
> Signed-off-by: Jim Zhao <jimzhao.ai@...il.com>
> ---
> Changes in v2:
> 1. Consolidate all wb_thresh bumping logic in __wb_calc_thresh for consistency;
> 2. Replace the limit variable with thresh for calculating the bump value,
> as __wb_calc_thresh is also used to calculate the background threshold;
> 3. Add domain_dirty_avail in wb_calc_thresh to get dtc->dirty.

Since the odd value of BdiDirryThresh got explained (independent cosmetic
bug), feel free to add:

Reviewed-by: Jan Kara <jack@...e.cz>

								Honza

> ---
>  mm/page-writeback.c | 48 ++++++++++++++++++++++-----------------------
>  1 file changed, 23 insertions(+), 25 deletions(-)
> 
> diff --git a/mm/page-writeback.c b/mm/page-writeback.c
> index e5a9eb795f99..8b13bcb42de3 100644
> --- a/mm/page-writeback.c
> +++ b/mm/page-writeback.c
> @@ -917,7 +917,9 @@ static unsigned long __wb_calc_thresh(struct dirty_throttle_control *dtc,
>  				      unsigned long thresh)
>  {
>  	struct wb_domain *dom = dtc_dom(dtc);
> +	struct bdi_writeback *wb = dtc->wb;
>  	u64 wb_thresh;
> +	u64 wb_max_thresh;
>  	unsigned long numerator, denominator;
>  	unsigned long wb_min_ratio, wb_max_ratio;
>  
> @@ -931,11 +933,27 @@ static unsigned long __wb_calc_thresh(struct dirty_throttle_control *dtc,
>  	wb_thresh *= numerator;
>  	wb_thresh = div64_ul(wb_thresh, denominator);
>  
> -	wb_min_max_ratio(dtc->wb, &wb_min_ratio, &wb_max_ratio);
> +	wb_min_max_ratio(wb, &wb_min_ratio, &wb_max_ratio);
>  
>  	wb_thresh += (thresh * wb_min_ratio) / (100 * BDI_RATIO_SCALE);
> -	if (wb_thresh > (thresh * wb_max_ratio) / (100 * BDI_RATIO_SCALE))
> -		wb_thresh = thresh * wb_max_ratio / (100 * BDI_RATIO_SCALE);
> +
> +	/*
> +	 * It's very possible that wb_thresh is close to 0 not because the
> +	 * device is slow, but that it has remained inactive for long time.
> +	 * Honour such devices a reasonable good (hopefully IO efficient)
> +	 * threshold, so that the occasional writes won't be blocked and active
> +	 * writes can rampup the threshold quickly.
> +	 */
> +	if (thresh > dtc->dirty) {
> +		if (unlikely(wb->bdi->capabilities & BDI_CAP_STRICTLIMIT))
> +			wb_thresh = max(wb_thresh, (thresh - dtc->dirty) / 100);
> +		else
> +			wb_thresh = max(wb_thresh, (thresh - dtc->dirty) / 8);
> +	}
> +
> +	wb_max_thresh = thresh * wb_max_ratio / (100 * BDI_RATIO_SCALE);
> +	if (wb_thresh > wb_max_thresh)
> +		wb_thresh = wb_max_thresh;
>  
>  	return wb_thresh;
>  }
> @@ -944,6 +962,7 @@ unsigned long wb_calc_thresh(struct bdi_writeback *wb, unsigned long thresh)
>  {
>  	struct dirty_throttle_control gdtc = { GDTC_INIT(wb) };
>  
> +	domain_dirty_avail(&gdtc, true);
>  	return __wb_calc_thresh(&gdtc, thresh);
>  }
>  
> @@ -1120,12 +1139,6 @@ static void wb_position_ratio(struct dirty_throttle_control *dtc)
>  	if (unlikely(wb->bdi->capabilities & BDI_CAP_STRICTLIMIT)) {
>  		long long wb_pos_ratio;
>  
> -		if (dtc->wb_dirty < 8) {
> -			dtc->pos_ratio = min_t(long long, pos_ratio * 2,
> -					   2 << RATELIMIT_CALC_SHIFT);
> -			return;
> -		}
> -
>  		if (dtc->wb_dirty >= wb_thresh)
>  			return;
>  
> @@ -1196,14 +1209,6 @@ static void wb_position_ratio(struct dirty_throttle_control *dtc)
>  	 */
>  	if (unlikely(wb_thresh > dtc->thresh))
>  		wb_thresh = dtc->thresh;
> -	/*
> -	 * It's very possible that wb_thresh is close to 0 not because the
> -	 * device is slow, but that it has remained inactive for long time.
> -	 * Honour such devices a reasonable good (hopefully IO efficient)
> -	 * threshold, so that the occasional writes won't be blocked and active
> -	 * writes can rampup the threshold quickly.
> -	 */
> -	wb_thresh = max(wb_thresh, (limit - dtc->dirty) / 8);
>  	/*
>  	 * scale global setpoint to wb's:
>  	 *	wb_setpoint = setpoint * wb_thresh / thresh
> @@ -1459,17 +1464,10 @@ static void wb_update_dirty_ratelimit(struct dirty_throttle_control *dtc,
>  	 * balanced_dirty_ratelimit = task_ratelimit * write_bw / dirty_rate).
>  	 * Hence, to calculate "step" properly, we have to use wb_dirty as
>  	 * "dirty" and wb_setpoint as "setpoint".
> -	 *
> -	 * We rampup dirty_ratelimit forcibly if wb_dirty is low because
> -	 * it's possible that wb_thresh is close to zero due to inactivity
> -	 * of backing device.
>  	 */
>  	if (unlikely(wb->bdi->capabilities & BDI_CAP_STRICTLIMIT)) {
>  		dirty = dtc->wb_dirty;
> -		if (dtc->wb_dirty < 8)
> -			setpoint = dtc->wb_dirty + 1;
> -		else
> -			setpoint = (dtc->wb_thresh + dtc->wb_bg_thresh) / 2;
> +		setpoint = (dtc->wb_thresh + dtc->wb_bg_thresh) / 2;
>  	}
>  
>  	if (dirty < setpoint) {
> -- 
> 2.20.1
> 
-- 
Jan Kara <jack@...e.com>
SUSE Labs, CR

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ