[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <5584d4d5-73c8-2a12-f11e-6f19c216656b@huaweicloud.com>
Date: Wed, 20 Nov 2024 16:03:16 +0800
From: Kemeng Shi <shikemeng@...weicloud.com>
To: Jim Zhao <jimzhao.ai@...il.com>, jack@...e.cz
Cc: akpm@...ux-foundation.org, linux-fsdevel@...r.kernel.org,
linux-kernel@...r.kernel.org, linux-mm@...ck.org, willy@...radead.org
Subject: Re: [PATCH v2] mm/page-writeback: Raise wb_thresh to prevent write
blocking with strictlimit
on 11/19/2024 8:29 PM, Jim Zhao wrote:
> Thanks, Jan, I just sent patch v2, could you please review it ?
>
> And I found the debug info in the bdi stats.
> The BdiDirtyThresh value may be greater than DirtyThresh, and after applying this patch, the value of BdiDirtyThresh could become even larger.
>
> without patch:
> ---
> root@...ntu:/sys/kernel/debug/bdi/8:0# cat stats
> BdiWriteback: 0 kB
> BdiReclaimable: 96 kB
> BdiDirtyThresh: 1346824 kB
> DirtyThresh: 673412 kB
> BackgroundThresh: 336292 kB
> BdiDirtied: 19872 kB
> BdiWritten: 19776 kB
> BdiWriteBandwidth: 0 kBps
> b_dirty: 0
> b_io: 0
> b_more_io: 0
> b_dirty_time: 0
> bdi_list: 1
> state: 1
>
> with patch:
> ---
> root@...ntu:/sys/kernel/debug/bdi/8:0# cat stats
> BdiWriteback: 96 kB
> BdiReclaimable: 192 kB
> BdiDirtyThresh: 3090736 kB
> DirtyThresh: 650716 kB
> BackgroundThresh: 324960 kB
> BdiDirtied: 472512 kB
> BdiWritten: 470592 kB
> BdiWriteBandwidth: 106268 kBps
> b_dirty: 2
> b_io: 0
> b_more_io: 0
> b_dirty_time: 0
> bdi_list: 1
> state: 1
>
>
> @kemeng, is this a normal behavior or an issue ?
Hello, this is not a normal behavior, could you aslo send the content in
wb_stats and configuired bdi_min_ratio.
I think the improper use of bdi_min_ratio may cause the issue.
Thanks,
Kemeng
>
> Thanks,
> Jim Zhao
>
>
>> With the strictlimit flag, wb_thresh acts as a hard limit in
>> balance_dirty_pages() and wb_position_ratio(). When device write
>> operations are inactive, wb_thresh can drop to 0, causing writes to be
>> blocked. The issue occasionally occurs in fuse fs, particularly with
>> network backends, the write thread is blocked frequently during a period.
>> To address it, this patch raises the minimum wb_thresh to a controllable
>> level, similar to the non-strictlimit case.
>>
>> Signed-off-by: Jim Zhao <jimzhao.ai@...il.com>
>> ---
>> Changes in v2:
>> 1. Consolidate all wb_thresh bumping logic in __wb_calc_thresh for consistency;
>> 2. Replace the limit variable with thresh for calculating the bump value,
>> as __wb_calc_thresh is also used to calculate the background threshold;
>> 3. Add domain_dirty_avail in wb_calc_thresh to get dtc->dirty.
>> ---
>> mm/page-writeback.c | 48 ++++++++++++++++++++++-----------------------
>> 1 file changed, 23 insertions(+), 25 deletions(-)
>>
>> diff --git a/mm/page-writeback.c b/mm/page-writeback.c
>> index e5a9eb795f99..8b13bcb42de3 100644
>> --- a/mm/page-writeback.c
>> +++ b/mm/page-writeback.c
>> @@ -917,7 +917,9 @@ static unsigned long __wb_calc_thresh(struct dirty_throttle_control *dtc,
>> unsigned long thresh)
>> {
>> struct wb_domain *dom = dtc_dom(dtc);
>> + struct bdi_writeback *wb = dtc->wb;
>> u64 wb_thresh;
>> + u64 wb_max_thresh;
>> unsigned long numerator, denominator;
>> unsigned long wb_min_ratio, wb_max_ratio;
>>
>> @@ -931,11 +933,27 @@ static unsigned long __wb_calc_thresh(struct dirty_throttle_control *dtc,
>> wb_thresh *= numerator;
>> wb_thresh = div64_ul(wb_thresh, denominator);
>>
>> - wb_min_max_ratio(dtc->wb, &wb_min_ratio, &wb_max_ratio);
>> + wb_min_max_ratio(wb, &wb_min_ratio, &wb_max_ratio);
>>
>> wb_thresh += (thresh * wb_min_ratio) / (100 * BDI_RATIO_SCALE);
>> - if (wb_thresh > (thresh * wb_max_ratio) / (100 * BDI_RATIO_SCALE))
>> - wb_thresh = thresh * wb_max_ratio / (100 * BDI_RATIO_SCALE);
>> +
>> + /*
>> + * It's very possible that wb_thresh is close to 0 not because the
>> + * device is slow, but that it has remained inactive for long time.
>> + * Honour such devices a reasonable good (hopefully IO efficient)
>> + * threshold, so that the occasional writes won't be blocked and active
>> + * writes can rampup the threshold quickly.
>> + */
>> + if (thresh > dtc->dirty) {
>> + if (unlikely(wb->bdi->capabilities & BDI_CAP_STRICTLIMIT))
>> + wb_thresh = max(wb_thresh, (thresh - dtc->dirty) / 100);
>> + else
>> + wb_thresh = max(wb_thresh, (thresh - dtc->dirty) / 8);
>> + }
>> +
>> + wb_max_thresh = thresh * wb_max_ratio / (100 * BDI_RATIO_SCALE);
>> + if (wb_thresh > wb_max_thresh)
>> + wb_thresh = wb_max_thresh;
>>
>> return wb_thresh;
>> }
>> @@ -944,6 +962,7 @@ unsigned long wb_calc_thresh(struct bdi_writeback *wb, unsigned long thresh)
>> {
>> struct dirty_throttle_control gdtc = { GDTC_INIT(wb) };
>>
>> + domain_dirty_avail(&gdtc, true);
>> return __wb_calc_thresh(&gdtc, thresh);
>> }
>>
>> @@ -1120,12 +1139,6 @@ static void wb_position_ratio(struct dirty_throttle_control *dtc)
>> if (unlikely(wb->bdi->capabilities & BDI_CAP_STRICTLIMIT)) {
>> long long wb_pos_ratio;
>>
>> - if (dtc->wb_dirty < 8) {
>> - dtc->pos_ratio = min_t(long long, pos_ratio * 2,
>> - 2 << RATELIMIT_CALC_SHIFT);
>> - return;
>> - }
>> -
>> if (dtc->wb_dirty >= wb_thresh)
>> return;
>>
>> @@ -1196,14 +1209,6 @@ static void wb_position_ratio(struct dirty_throttle_control *dtc)
>> */
>> if (unlikely(wb_thresh > dtc->thresh))
>> wb_thresh = dtc->thresh;
>> - /*
>> - * It's very possible that wb_thresh is close to 0 not because the
>> - * device is slow, but that it has remained inactive for long time.
>> - * Honour such devices a reasonable good (hopefully IO efficient)
>> - * threshold, so that the occasional writes won't be blocked and active
>> - * writes can rampup the threshold quickly.
>> - */
>> - wb_thresh = max(wb_thresh, (limit - dtc->dirty) / 8);
>> /*
>> * scale global setpoint to wb's:
>> * wb_setpoint = setpoint * wb_thresh / thresh
>> @@ -1459,17 +1464,10 @@ static void wb_update_dirty_ratelimit(struct dirty_throttle_control *dtc,
>> * balanced_dirty_ratelimit = task_ratelimit * write_bw / dirty_rate).
>> * Hence, to calculate "step" properly, we have to use wb_dirty as
>> * "dirty" and wb_setpoint as "setpoint".
>> - *
>> - * We rampup dirty_ratelimit forcibly if wb_dirty is low because
>> - * it's possible that wb_thresh is close to zero due to inactivity
>> - * of backing device.
>> */
>> if (unlikely(wb->bdi->capabilities & BDI_CAP_STRICTLIMIT)) {
>> dirty = dtc->wb_dirty;
>> - if (dtc->wb_dirty < 8)
>> - setpoint = dtc->wb_dirty + 1;
>> - else
>> - setpoint = (dtc->wb_thresh + dtc->wb_bg_thresh) / 2;
>> + setpoint = (dtc->wb_thresh + dtc->wb_bg_thresh) / 2;
>> }
>>
>> if (dirty < setpoint) {
>> --
>> 2.20.1
>
Powered by blists - more mailing lists