lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Date:   Wed, 24 Nov 2021 17:21:53 +0100
From:   Mike Galbraith <efault@....de>
To:     Mel Gorman <mgorman@...hsingularity.net>
Cc:     lkml <linux-kernel@...r.kernel.org>
Subject: Re: mm: LTP/memcg testcase regression induced by
 8cd7c588decf..66ce520bb7c2 series

On Wed, 2021-11-24 at 14:56 +0000, Mel Gorman wrote:
> On Tue, Nov 23, 2021 at 12:18:01PM +0100, Mike Galbraith wrote:
> > On Tue, 2021-11-23 at 09:13 +0000, Mel Gorman wrote:
> > > 
> > > I'll see can I reproduce this...
> > 
> > You likely already know this, but just in case, just plunk the below
> > into $LTPROOT/runtest/foo, and $LTPROOT/runltp -f foo.
> > 
> > #DESCRIPTION:Resource Management testing
> > memcg_regression        memcg_regression_test.sh
> > 
> 
> Thanks. Can you try the following patch please?
> 
> The test will still take longer to reach OOM and complete as it's stalling
> but not as severely.

Yeah, way better.  test1() is still to be avoided, but everything else
seems fine, including stress (twiddled to not be swap storm from hell).

> 
> diff --git a/mm/vmscan.c b/mm/vmscan.c
> index 07db03883062..d9166e94eb95 100644
> --- a/mm/vmscan.c
> +++ b/mm/vmscan.c
> @@ -1057,7 +1057,17 @@ void reclaim_throttle(pg_data_t *pgdat, enum vmscan_throttle_state reason)
>  
>                 break;
>         case VMSCAN_THROTTLE_NOPROGRESS:
> -               timeout = HZ/2;
> +               timeout = 1;
> +
> +               /*
> +                * If kswapd is disabled, reschedule if necessary but do not
> +                * throttle as the system is likely near OOM.
> +                */
> +               if (pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES) {
> +                       cond_resched();
> +                       return;
> +               }
> +
>                 break;
>         case VMSCAN_THROTTLE_ISOLATED:
>                 timeout = HZ/50;
> @@ -3395,7 +3405,7 @@ static void consider_reclaim_throttle(pg_data_t *pgdat, struct scan_control *sc)
>                 return;
>  
>         /* Throttle if making no progress at high prioities. */
> -       if (sc->priority < DEF_PRIORITY - 2)
> +       if (sc->priority < DEF_PRIORITY - 2 && !sc->nr_reclaimed)
>                 reclaim_throttle(pgdat, VMSCAN_THROTTLE_NOPROGRESS);
>  }
>  
> @@ -3415,6 +3425,7 @@ static void shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
>         unsigned long nr_soft_scanned;
>         gfp_t orig_mask;
>         pg_data_t *last_pgdat = NULL;
> +       pg_data_t *first_pgdat = NULL;
>  
>         /*
>          * If the number of buffer_heads in the machine exceeds the maximum
> @@ -3478,14 +3489,18 @@ static void shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
>                         /* need some check for avoid more shrink_zone() */
>                 }
>  
> +               if (!first_pgdat)
> +                       first_pgdat = zone->zone_pgdat;
> +
>                 /* See comment about same check for global reclaim above */
>                 if (zone->zone_pgdat == last_pgdat)
>                         continue;
>                 last_pgdat = zone->zone_pgdat;
>                 shrink_node(zone->zone_pgdat, sc);
> -               consider_reclaim_throttle(zone->zone_pgdat, sc);
>         }
>  
> +       consider_reclaim_throttle(first_pgdat, sc);
> +
>         /*
>          * Restore to original mask to avoid the impact on the caller if we
>          * promoted it to __GFP_HIGHMEM.

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ