linux-kernel - Re: [PATCH] mm/shrinker: add SHRINKER_NO_DIRECT

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <d78ec9ea-006a-4317-b0df-99ad31994b0d@bytedance.com>
Date: Fri, 12 Apr 2024 16:47:30 +0800
From: Qi Zheng <zhengqi.arch@...edance.com>
To: lipeifeng@...o.com
Cc: akpm@...ux-foundation.org, david@...morbit.com, roman.gushchin@...ux.dev,
 muchun.song@...ux.dev, linux-mm@...ck.org, linux-kernel@...r.kernel.org
Subject: Re: [PATCH] mm/shrinker: add SHRINKER_NO_DIRECT_RECLAIM

Hi Peifeng,

On 2024/4/12 16:07, lipeifeng@...o.com wrote:
> From: Peifeng Li <lipeifeng@...o.com>
> 
> In the case of insufficient memory, threads will be in direct_reclaim to
> reclaim memory, direct_reclaim will call shrink_slab to run sequentially
> each shrinker callback. If there is a lock-contention in the shrinker
> callback,such as spinlock,mutex_lock and so on, threads may be likely to
> be stuck in direct_reclaim for a long time, even if the memfree has reached
> the high watermarks of the zone, resulting in poor performance of threads.
> 
> Example 1: shrinker callback may wait for spinlock
> static unsigned long mb_cache_shrink(struct mb_cache *cache,
>                                       unsigned long nr_to_scan)
> {
>          struct mb_cache_entry *entry;
>          unsigned long shrunk = 0;
> 
>          spin_lock(&cache->c_list_lock);
>          while (nr_to_scan-- && !list_empty(&cache->c_list)) {
>                  entry = list_first_entry(&cache->c_list,
>                                           struct mb_cache_entry, e_list);
>                  if (test_bit(MBE_REFERENCED_B, &entry->e_flags) ||
>                      atomic_cmpxchg(&entry->e_refcnt, 1, 0) != 1) {
>                          clear_bit(MBE_REFERENCED_B, &entry->e_flags);
>                          list_move_tail(&entry->e_list, &cache->c_list);
>                          continue;
>                  }
>                  list_del_init(&entry->e_list);
>                  cache->c_entry_count--;
>                  spin_unlock(&cache->c_list_lock);
>                  __mb_cache_entry_free(cache, entry);
>                  shrunk++;
>                  cond_resched();
>                  spin_lock(&cache->c_list_lock);
>          }
>          spin_unlock(&cache->c_list_lock);
> 
>          return shrunk;
> }
> Example 2: shrinker callback may wait for mutex lock
> static
> unsigned long kbase_mem_evictable_reclaim_scan_objects(struct shrinker *s,
> 		struct shrink_control *sc)
> {
> 	struct kbase_context *kctx;
> 	struct kbase_mem_phy_alloc *alloc;
> 	struct kbase_mem_phy_alloc *tmp;
> 	unsigned long freed = 0;
> 
> 	kctx = container_of(s, struct kbase_context, reclaim);
> 
> 	// MTK add to prevent false alarm
> 	lockdep_off();
> 
> 	mutex_lock(&kctx->jit_evict_lock);
> 
> 	list_for_each_entry_safe(alloc, tmp, &kctx->evict_list, evict_node) {
> 		int err;
> 
> 		err = kbase_mem_shrink_gpu_mapping(kctx, alloc->reg,
> 				0, alloc->nents);
> 		if (err != 0) {
> 			freed = -1;
> 			goto out_unlock;
> 		}
> 
> 		alloc->evicted = alloc->nents;
> 
> 		kbase_free_phy_pages_helper(alloc, alloc->evicted);
> 		freed += alloc->evicted;
> 		list_del_init(&alloc->evict_node);
> 
> 		kbase_jit_backing_lost(alloc->reg);
> 
> 		if (freed > sc->nr_to_scan)
> 			break;
> 	}
> out_unlock:
> 	mutex_unlock(&kctx->jit_evict_lock);
> 
> 	// MTK add to prevent false alarm
> 	lockdep_on();
> 
> 	return freed;
> }
> 
> In mobile-phone,threads are likely to be stuck in shrinker callback during
> direct_reclaim, with example like the following:
> <...>-2806    [004] ..... 866458.339840: mm_shrink_slab_start:
> 			dynamic_mem_shrink_scan+0x0/0xb8 ... priority 2
> <...>-2806    [004] ..... 866459.339933: mm_shrink_slab_end:
> 			dynamic_mem_shrink_scan+0x0/0xb8 ...
> 
> For the above reason, the patch introduces SHRINKER_NO_DIRECT_RECLAIM that
> allows driver to set shrinker callback not to be called in direct_reclaim
> unless sc->priority is 0.

Hmm, this is just a workaround, no shrinker will want to set this flag.
If a shrinker has a lock contention problem, then it needs to be fixed.

Perhaps executing do_shrink_slab() asynchronously may be a more
fundamental solution, but this may result in untimely reclamation.

> 
> The reason why sc->priority=0 allows shrinker callback to be called in
> direct_reclaim is for two reasons:
> 1.Always call all shrinker callback in drop_slab that priority is 0.
> 2.sc->priority is 0 during direct_reclaim, allow direct_reclaim to call
> shrinker callback, to reclaim memory timely.
> 
> Note:
> 1.Register_shrinker_prepared() default not to set

This API is no longer included in the latest upstream code. Please
submit a patch based on the latest code.

Thanks,
Qi

> SHRINKER_NO_DIRECT_RECLAIM, to maintain the current behavior of the code.
> 2.Logic of kswapd and drop_slab to call shrinker callback isn't affected.
> 
> Signed-off-by: Peifeng Li <lipeifeng@...o.com>
> ---
>   include/linux/shrinker.h |  5 +++++
>   mm/shrinker.c            | 36 ++++++++++++++++++++++++++++++++++--
>   2 files changed, 39 insertions(+), 2 deletions(-)
> 
> diff --git a/include/linux/shrinker.h b/include/linux/shrinker.h
> index 1a00be90d93a..2d5a8b3a720b 100644
> --- a/include/linux/shrinker.h
> +++ b/include/linux/shrinker.h
> @@ -130,6 +130,11 @@ struct shrinker {
>    * non-MEMCG_AWARE shrinker should not have this flag set.
>    */
>   #define SHRINKER_NONSLAB	BIT(4)
> +/*
> + * Can shrinker callback be called in direct_relcaim unless
> + * sc->priority is 0?
> + */
> +#define SHRINKER_NO_DIRECT_RECLAIM	BIT(5)
>   
>   __printf(2, 3)
>   struct shrinker *shrinker_alloc(unsigned int flags, const char *fmt, ...);
> diff --git a/mm/shrinker.c b/mm/shrinker.c
> index dc5d2a6fcfc4..3ac50da72494 100644
> --- a/mm/shrinker.c
> +++ b/mm/shrinker.c
> @@ -544,7 +544,23 @@ static unsigned long shrink_slab_memcg(gfp_t gfp_mask, int nid,
>   			if (!memcg_kmem_online() &&
>   			    !(shrinker->flags & SHRINKER_NONSLAB))
>   				continue;
> -
> +			/*
> +			 * SHRINKER_NO_DIRECT_RECLAIM, mean that shrinker callback
> +			 * should not be called in direct_reclaim unless priority
> +			 * is 0.
> +			 */
> +			if ((shrinker->flags & SHRINKER_NO_DIRECT_RECLAIM) &&
> +					!current_is_kswapd()) {
> +				/*
> +				 * 1.Always call shrinker callback in drop_slab that
> +				 * priority is 0.
> +				 * 2.sc->priority is 0 during direct_reclaim, allow
> +				 * direct_reclaim to call shrinker callback, to reclaim
> +				 * memory timely.
> +				 */
> +				if (priority)
> +					continue;
> +			}
>   			ret = do_shrink_slab(&sc, shrinker, priority);
>   			if (ret == SHRINK_EMPTY) {
>   				clear_bit(offset, unit->map);
> @@ -658,7 +674,23 @@ unsigned long shrink_slab(gfp_t gfp_mask, int nid, struct mem_cgroup *memcg,
>   			continue;
>   
>   		rcu_read_unlock();
> -
> +		/*
> +		 * SHRINKER_NO_DIRECT_RECLAIM, mean that shrinker callback
> +		 * should not be called in direct_reclaim unless priority
> +		 * is 0.
> +		 */
> +		if ((shrinker->flags & SHRINKER_NO_DIRECT_RECLAIM) &&
> +				!current_is_kswapd()) {
> +			/*
> +			 * 1.Always call shrinker callback in drop_slab that
> +			 * priority is 0.
> +			 * 2.sc->priority is 0 during direct_reclaim, allow
> +			 * direct_reclaim to call shrinker callback, to reclaim
> +			 * memory timely.
> +			 */
> +			if (priority)
> +				continue;
> +		}
>   		ret = do_shrink_slab(&sc, shrinker, priority);
>   		if (ret == SHRINK_EMPTY)
>   			ret = 0;