[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <d78ec9ea-006a-4317-b0df-99ad31994b0d@bytedance.com>
Date: Fri, 12 Apr 2024 16:47:30 +0800
From: Qi Zheng <zhengqi.arch@...edance.com>
To: lipeifeng@...o.com
Cc: akpm@...ux-foundation.org, david@...morbit.com, roman.gushchin@...ux.dev,
muchun.song@...ux.dev, linux-mm@...ck.org, linux-kernel@...r.kernel.org
Subject: Re: [PATCH] mm/shrinker: add SHRINKER_NO_DIRECT_RECLAIM
Hi Peifeng,
On 2024/4/12 16:07, lipeifeng@...o.com wrote:
> From: Peifeng Li <lipeifeng@...o.com>
>
> In the case of insufficient memory, threads will be in direct_reclaim to
> reclaim memory, direct_reclaim will call shrink_slab to run sequentially
> each shrinker callback. If there is a lock-contention in the shrinker
> callback,such as spinlock,mutex_lock and so on, threads may be likely to
> be stuck in direct_reclaim for a long time, even if the memfree has reached
> the high watermarks of the zone, resulting in poor performance of threads.
>
> Example 1: shrinker callback may wait for spinlock
> static unsigned long mb_cache_shrink(struct mb_cache *cache,
> unsigned long nr_to_scan)
> {
> struct mb_cache_entry *entry;
> unsigned long shrunk = 0;
>
> spin_lock(&cache->c_list_lock);
> while (nr_to_scan-- && !list_empty(&cache->c_list)) {
> entry = list_first_entry(&cache->c_list,
> struct mb_cache_entry, e_list);
> if (test_bit(MBE_REFERENCED_B, &entry->e_flags) ||
> atomic_cmpxchg(&entry->e_refcnt, 1, 0) != 1) {
> clear_bit(MBE_REFERENCED_B, &entry->e_flags);
> list_move_tail(&entry->e_list, &cache->c_list);
> continue;
> }
> list_del_init(&entry->e_list);
> cache->c_entry_count--;
> spin_unlock(&cache->c_list_lock);
> __mb_cache_entry_free(cache, entry);
> shrunk++;
> cond_resched();
> spin_lock(&cache->c_list_lock);
> }
> spin_unlock(&cache->c_list_lock);
>
> return shrunk;
> }
> Example 2: shrinker callback may wait for mutex lock
> static
> unsigned long kbase_mem_evictable_reclaim_scan_objects(struct shrinker *s,
> struct shrink_control *sc)
> {
> struct kbase_context *kctx;
> struct kbase_mem_phy_alloc *alloc;
> struct kbase_mem_phy_alloc *tmp;
> unsigned long freed = 0;
>
> kctx = container_of(s, struct kbase_context, reclaim);
>
> // MTK add to prevent false alarm
> lockdep_off();
>
> mutex_lock(&kctx->jit_evict_lock);
>
> list_for_each_entry_safe(alloc, tmp, &kctx->evict_list, evict_node) {
> int err;
>
> err = kbase_mem_shrink_gpu_mapping(kctx, alloc->reg,
> 0, alloc->nents);
> if (err != 0) {
> freed = -1;
> goto out_unlock;
> }
>
> alloc->evicted = alloc->nents;
>
> kbase_free_phy_pages_helper(alloc, alloc->evicted);
> freed += alloc->evicted;
> list_del_init(&alloc->evict_node);
>
> kbase_jit_backing_lost(alloc->reg);
>
> if (freed > sc->nr_to_scan)
> break;
> }
> out_unlock:
> mutex_unlock(&kctx->jit_evict_lock);
>
> // MTK add to prevent false alarm
> lockdep_on();
>
> return freed;
> }
>
> In mobile-phone,threads are likely to be stuck in shrinker callback during
> direct_reclaim, with example like the following:
> <...>-2806 [004] ..... 866458.339840: mm_shrink_slab_start:
> dynamic_mem_shrink_scan+0x0/0xb8 ... priority 2
> <...>-2806 [004] ..... 866459.339933: mm_shrink_slab_end:
> dynamic_mem_shrink_scan+0x0/0xb8 ...
>
> For the above reason, the patch introduces SHRINKER_NO_DIRECT_RECLAIM that
> allows driver to set shrinker callback not to be called in direct_reclaim
> unless sc->priority is 0.
Hmm, this is just a workaround, no shrinker will want to set this flag.
If a shrinker has a lock contention problem, then it needs to be fixed.
Perhaps executing do_shrink_slab() asynchronously may be a more
fundamental solution, but this may result in untimely reclamation.
>
> The reason why sc->priority=0 allows shrinker callback to be called in
> direct_reclaim is for two reasons:
> 1.Always call all shrinker callback in drop_slab that priority is 0.
> 2.sc->priority is 0 during direct_reclaim, allow direct_reclaim to call
> shrinker callback, to reclaim memory timely.
>
> Note:
> 1.Register_shrinker_prepared() default not to set
This API is no longer included in the latest upstream code. Please
submit a patch based on the latest code.
Thanks,
Qi
> SHRINKER_NO_DIRECT_RECLAIM, to maintain the current behavior of the code.
> 2.Logic of kswapd and drop_slab to call shrinker callback isn't affected.
>
> Signed-off-by: Peifeng Li <lipeifeng@...o.com>
> ---
> include/linux/shrinker.h | 5 +++++
> mm/shrinker.c | 36 ++++++++++++++++++++++++++++++++++--
> 2 files changed, 39 insertions(+), 2 deletions(-)
>
> diff --git a/include/linux/shrinker.h b/include/linux/shrinker.h
> index 1a00be90d93a..2d5a8b3a720b 100644
> --- a/include/linux/shrinker.h
> +++ b/include/linux/shrinker.h
> @@ -130,6 +130,11 @@ struct shrinker {
> * non-MEMCG_AWARE shrinker should not have this flag set.
> */
> #define SHRINKER_NONSLAB BIT(4)
> +/*
> + * Can shrinker callback be called in direct_relcaim unless
> + * sc->priority is 0?
> + */
> +#define SHRINKER_NO_DIRECT_RECLAIM BIT(5)
>
> __printf(2, 3)
> struct shrinker *shrinker_alloc(unsigned int flags, const char *fmt, ...);
> diff --git a/mm/shrinker.c b/mm/shrinker.c
> index dc5d2a6fcfc4..3ac50da72494 100644
> --- a/mm/shrinker.c
> +++ b/mm/shrinker.c
> @@ -544,7 +544,23 @@ static unsigned long shrink_slab_memcg(gfp_t gfp_mask, int nid,
> if (!memcg_kmem_online() &&
> !(shrinker->flags & SHRINKER_NONSLAB))
> continue;
> -
> + /*
> + * SHRINKER_NO_DIRECT_RECLAIM, mean that shrinker callback
> + * should not be called in direct_reclaim unless priority
> + * is 0.
> + */
> + if ((shrinker->flags & SHRINKER_NO_DIRECT_RECLAIM) &&
> + !current_is_kswapd()) {
> + /*
> + * 1.Always call shrinker callback in drop_slab that
> + * priority is 0.
> + * 2.sc->priority is 0 during direct_reclaim, allow
> + * direct_reclaim to call shrinker callback, to reclaim
> + * memory timely.
> + */
> + if (priority)
> + continue;
> + }
> ret = do_shrink_slab(&sc, shrinker, priority);
> if (ret == SHRINK_EMPTY) {
> clear_bit(offset, unit->map);
> @@ -658,7 +674,23 @@ unsigned long shrink_slab(gfp_t gfp_mask, int nid, struct mem_cgroup *memcg,
> continue;
>
> rcu_read_unlock();
> -
> + /*
> + * SHRINKER_NO_DIRECT_RECLAIM, mean that shrinker callback
> + * should not be called in direct_reclaim unless priority
> + * is 0.
> + */
> + if ((shrinker->flags & SHRINKER_NO_DIRECT_RECLAIM) &&
> + !current_is_kswapd()) {
> + /*
> + * 1.Always call shrinker callback in drop_slab that
> + * priority is 0.
> + * 2.sc->priority is 0 during direct_reclaim, allow
> + * direct_reclaim to call shrinker callback, to reclaim
> + * memory timely.
> + */
> + if (priority)
> + continue;
> + }
> ret = do_shrink_slab(&sc, shrinker, priority);
> if (ret == SHRINK_EMPTY)
> ret = 0;
Powered by blists - more mailing lists