lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAGsJ_4yGTcgMnK_81hMUAL0xG06Nmu0kn3bwdQHhSiDV8HQ+ZA@mail.gmail.com>
Date: Sat, 13 Apr 2024 17:19:44 +1200
From: Barry Song <21cnbao@...il.com>
To: lipeifeng@...o.com
Cc: akpm@...ux-foundation.org, david@...morbit.com, zhengqi.arch@...edance.com, 
	roman.gushchin@...ux.dev, muchun.song@...ux.dev, linux-mm@...ck.org, 
	linux-kernel@...r.kernel.org
Subject: Re: [PATCH v2] mm/shrinker: add SHRINKER_NO_DIRECT_RECLAIM

On Sat, Apr 13, 2024 at 1:54 PM <lipeifeng@...o.com> wrote:
>
> From: Peifeng Li <lipeifeng@...o.com>
>
> In the case of insufficient memory, threads will be in direct_reclaim to
> reclaim memory, direct_reclaim will call shrink_slab to run sequentially
> each shrinker callback. If there is a lock-contention in the shrinker
> callback,such as spinlock,mutex_lock and so on, threads may be likely to
> be stuck in direct_reclaim for a long time, even if the memfree has reached
> the high watermarks of the zone, resulting in poor performance of threads.
>
> Example 1: shrinker callback may wait for spinlock
> static unsigned long mb_cache_shrink(struct mb_cache *cache,
>                                      unsigned long nr_to_scan)
> {
>         struct mb_cache_entry *entry;
>         unsigned long shrunk = 0;
>
>         spin_lock(&cache->c_list_lock);
>         while (nr_to_scan-- && !list_empty(&cache->c_list)) {
>                 entry = list_first_entry(&cache->c_list,
>                                          struct mb_cache_entry, e_list);
>                 if (test_bit(MBE_REFERENCED_B, &entry->e_flags) ||
>                     atomic_cmpxchg(&entry->e_refcnt, 1, 0) != 1) {
>                         clear_bit(MBE_REFERENCED_B, &entry->e_flags);
>                         list_move_tail(&entry->e_list, &cache->c_list);
>                         continue;
>                 }
>                 list_del_init(&entry->e_list);
>                 cache->c_entry_count--;
>                 spin_unlock(&cache->c_list_lock);
>                 __mb_cache_entry_free(cache, entry);
>                 shrunk++;
>                 cond_resched();
>                 spin_lock(&cache->c_list_lock);
>         }
>         spin_unlock(&cache->c_list_lock);
>
>         return shrunk;
> }
> Example 2: shrinker callback may wait for mutex lock
> static
> unsigned long kbase_mem_evictable_reclaim_scan_objects(struct shrinker *s,
>                 struct shrink_control *sc)
> {
>         struct kbase_context *kctx;
>         struct kbase_mem_phy_alloc *alloc;
>         struct kbase_mem_phy_alloc *tmp;
>         unsigned long freed = 0;
>
>         kctx = container_of(s, struct kbase_context, reclaim);
>
>         // MTK add to prevent false alarm
>         lockdep_off();
>
>         mutex_lock(&kctx->jit_evict_lock);
>
>         list_for_each_entry_safe(alloc, tmp, &kctx->evict_list, evict_node) {
>                 int err;
>
>                 err = kbase_mem_shrink_gpu_mapping(kctx, alloc->reg,
>                                 0, alloc->nents);
>                 if (err != 0) {
>                         freed = -1;
>                         goto out_unlock;
>                 }
>
>                 alloc->evicted = alloc->nents;
>
>                 kbase_free_phy_pages_helper(alloc, alloc->evicted);
>                 freed += alloc->evicted;
>                 list_del_init(&alloc->evict_node);
>
>                 kbase_jit_backing_lost(alloc->reg);
>
>                 if (freed > sc->nr_to_scan)
>                         break;
>         }
> out_unlock:
>         mutex_unlock(&kctx->jit_evict_lock);
>
>         // MTK add to prevent false alarm
>         lockdep_on();
>
>         return freed;
> }
>
> In mobile-phone,threads are likely to be stuck in shrinker callback during
> direct_reclaim, with example like the following:
> <...>-2806    [004] ..... 866458.339840: mm_shrink_slab_start:
>                         dynamic_mem_shrink_scan+0x0/0xb8 ... priority 2
> <...>-2806    [004] ..... 866459.339933: mm_shrink_slab_end:
>                         dynamic_mem_shrink_scan+0x0/0xb8 ...
>
> For the above reason, the patch introduces SHRINKER_NO_DIRECT_RECLAIM that
> allows driver to set shrinker callback not to be called in direct_reclaim
> unless sc->priority is 0.
>
> The reason why sc->priority=0 allows shrinker callback to be called in
> direct_reclaim is for two reasons:
> 1.Always call all shrinker callback in drop_slab that priority is 0.
> 2.sc->priority is 0 during direct_reclaim, allow direct_reclaim to call
> shrinker callback, to reclaim memory timely.
>
> Note:
> 1.shrinker_register() default not to set SHRINKER_NO_DIRECT_RECLAIM, to
> maintain the current behavior of the code.
> 2.Logic of kswapd and drop_slab to call shrinker callback isn't affected.
>
> Signed-off-by: Peifeng Li <lipeifeng@...o.com>
> ---
> -v2: fix the commit message
>  include/linux/shrinker.h |  5 +++++
>  mm/shrinker.c            | 38 +++++++++++++++++++++++++++++++++++---
>  2 files changed, 40 insertions(+), 3 deletions(-)
>
> diff --git a/include/linux/shrinker.h b/include/linux/shrinker.h
> index 1a00be90d93a..2d5a8b3a720b 100644
> --- a/include/linux/shrinker.h
> +++ b/include/linux/shrinker.h
> @@ -130,6 +130,11 @@ struct shrinker {
>   * non-MEMCG_AWARE shrinker should not have this flag set.
>   */
>  #define SHRINKER_NONSLAB       BIT(4)
> +/*
> + * Can shrinker callback be called in direct_relcaim unless
> + * sc->priority is 0?
> + */
> +#define SHRINKER_NO_DIRECT_RECLAIM     BIT(5)
>

My point is, drivers won't voluntarily stay unreclaimed during direct
reclamation. Hence, this approach is unlikely to succeed. Those
drivers can't be trusted. Had they been aware of their slowness,
they wouldn't have written code in this manner.

Detecting problematic driver shrinkers and marking them as skipped
might prove challenging. I concur with Zhengqi; the priority should
be fixing the driver whose shrinker is slow. Do you have a list of
slow drivers?


>  __printf(2, 3)
>  struct shrinker *shrinker_alloc(unsigned int flags, const char *fmt, ...);
> diff --git a/mm/shrinker.c b/mm/shrinker.c
> index dc5d2a6fcfc4..7a5dffd166cd 100644
> --- a/mm/shrinker.c
> +++ b/mm/shrinker.c
> @@ -4,7 +4,7 @@
>  #include <linux/shrinker.h>
>  #include <linux/rculist.h>
>  #include <trace/events/vmscan.h>
> -
> +#include <linux/swap.h>
>  #include "internal.h"
>
>  LIST_HEAD(shrinker_list);
> @@ -544,7 +544,23 @@ static unsigned long shrink_slab_memcg(gfp_t gfp_mask, int nid,
>                         if (!memcg_kmem_online() &&
>                             !(shrinker->flags & SHRINKER_NONSLAB))
>                                 continue;
> -
> +                       /*
> +                        * SHRINKER_NO_DIRECT_RECLAIM, mean that shrinker callback
> +                        * should not be called in direct_reclaim unless priority
> +                        * is 0.
> +                        */
> +                       if ((shrinker->flags & SHRINKER_NO_DIRECT_RECLAIM) &&
> +                                       !current_is_kswapd()) {
> +                               /*
> +                                * 1.Always call shrinker callback in drop_slab that
> +                                * priority is 0.
> +                                * 2.sc->priority is 0 during direct_reclaim, allow
> +                                * direct_reclaim to call shrinker callback, to reclaim
> +                                * memory timely.
> +                                */
> +                               if (priority)
> +                                       continue;
> +                       }
>                         ret = do_shrink_slab(&sc, shrinker, priority);
>                         if (ret == SHRINK_EMPTY) {
>                                 clear_bit(offset, unit->map);
> @@ -658,7 +674,23 @@ unsigned long shrink_slab(gfp_t gfp_mask, int nid, struct mem_cgroup *memcg,
>                         continue;
>
>                 rcu_read_unlock();
> -
> +               /*
> +                * SHRINKER_NO_DIRECT_RECLAIM, mean that shrinker callback
> +                * should not be called in direct_reclaim unless priority
> +                * is 0.
> +                */
> +               if ((shrinker->flags & SHRINKER_NO_DIRECT_RECLAIM) &&
> +                               !current_is_kswapd()) {
> +                       /*
> +                        * 1.Always call shrinker callback in drop_slab that
> +                        * priority is 0.
> +                        * 2.sc->priority is 0 during direct_reclaim, allow
> +                        * direct_reclaim to call shrinker callback, to reclaim
> +                        * memory timely.
> +                        */
> +                       if (priority)
> +                               continue;
> +               }
>                 ret = do_shrink_slab(&sc, shrinker, priority);
>                 if (ret == SHRINK_EMPTY)
>                         ret = 0;
> --
> 2.34.1

Thanks
Barry

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ