linux-kernel - Re: v3.4-rc2 out-of-memory problems (was Re: 3.4-rc1 sticks-and-crashs)

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAMbhsRSV9Oy5n-GBjyhCtfCd=3_S9iif+qzEk0Oyy0EBC9AF6w@mail.gmail.com>
Date:	Mon, 9 Apr 2012 17:23:16 -0700
From:	Colin Cross <ccross@...gle.com>
To:	David Rientjes <rientjes@...gle.com>
Cc:	Linus Torvalds <torvalds@...ux-foundation.org>,
	Andrew Morton <akpm@...ux-foundation.org>,
	werner <w.landgraf@...ru>, Rik van Riel <riel@...hat.com>,
	Hugh Dickins <hughd@...gle.com>, linux-kernel@...r.kernel.org,
	Oleg Nesterov <oleg@...hat.com>,
	Rabin Vincent <rabin.vincent@...ricsson.com>,
	Christian Bejram <christian.bejram@...ricsson.com>,
	"Paul E. McKenney" <paulmck@...ux.vnet.ibm.com>,
	Anton Vorontsov <anton.vorontsov@...aro.org>,
	Greg Kroah-Hartman <gregkh@...uxfoundation.org>,
	stable@...r.kernel.org
Subject: Re: v3.4-rc2 out-of-memory problems (was Re: 3.4-rc1 sticks-and-crashs)

On Mon, Apr 9, 2012 at 4:37 PM, David Rientjes <rientjes@...gle.com> wrote:
> On Mon, 9 Apr 2012, Colin Cross wrote:
>
>> This was a known issue in 2010, in the android tree the use of
>> task_handoff_register was dropped one day after it was added and
>> replaced with a new task_free_register hook.
>
> Why can't you just do this?  Are you concerned about the possibility of
> depleting all memory reserves?

The point of the lowmem_deathpending patch was to avoid a stutter
where the cpu would spend its time looping through the tasks due to
repeated calls to lowmem_shrink instead of processing the kill signal
to the selected thread.  With this patch, it will still loop through
tasks until it finds the one that was previously killed and then
abort.  It's possible that the improvements Anton made to the task
loop reduce the performance impact enough that this whole mess could
just be dropped (by reverting 1eda516, e5d7965, and 4755b72).

This may have also been impacted by another bug that is on my list of
things to look at: when asked the size of it's "cache", lowmemkiller
returns something on the order of all memory used by userspace, but
under some conditions will refuse to kill any of it due to the current
lowmem_minfree settings.  Due to the large size of the "cache", the
shrinker can call lowmem_shrink hundreds of times for a single
allocation, each time asking to reduce the size of the cache by 128
pages.  The original lowmem_deathpending patch may have been a
misguided "fix" for this bug.

> ---
>  drivers/staging/android/lowmemorykiller.c |   47 ++++-------------------------
>  1 file changed, 6 insertions(+), 41 deletions(-)
>
> diff --git a/drivers/staging/android/lowmemorykiller.c b/drivers/staging/android/lowmemorykiller.c
> --- a/drivers/staging/android/lowmemorykiller.c
> +++ b/drivers/staging/android/lowmemorykiller.c
> @@ -55,7 +55,6 @@ static int lowmem_minfree[6] = {
>  };
>  static int lowmem_minfree_size = 4;
>
> -static struct task_struct *lowmem_deathpending;
>  static unsigned long lowmem_deathpending_timeout;
>
>  #define lowmem_print(level, x...)                      \
> @@ -64,24 +63,6 @@ static unsigned long lowmem_deathpending_timeout;
>                        printk(x);                      \
>        } while (0)
>
> -static int
> -task_notify_func(struct notifier_block *self, unsigned long val, void *data);
> -
> -static struct notifier_block task_nb = {
> -       .notifier_call  = task_notify_func,
> -};
> -
> -static int
> -task_notify_func(struct notifier_block *self, unsigned long val, void *data)
> -{
> -       struct task_struct *task = data;
> -
> -       if (task == lowmem_deathpending)
> -               lowmem_deathpending = NULL;
> -
> -       return NOTIFY_OK;
> -}
> -
>  static int lowmem_shrink(struct shrinker *s, struct shrink_control *sc)
>  {
>        struct task_struct *tsk;
> @@ -97,19 +78,6 @@ static int lowmem_shrink(struct shrinker *s, struct shrink_control *sc)
>        int other_file = global_page_state(NR_FILE_PAGES) -
>                                                global_page_state(NR_SHMEM);
>
> -       /*
> -        * If we already have a death outstanding, then
> -        * bail out right away; indicating to vmscan
> -        * that we have nothing further to offer on
> -        * this pass.
> -        *
> -        * Note: Currently you need CONFIG_PROFILING
> -        * for this to work correctly.
> -        */
> -       if (lowmem_deathpending &&
> -           time_before_eq(jiffies, lowmem_deathpending_timeout))
> -               return 0;
> -
>        if (lowmem_adj_size < array_size)
>                array_size = lowmem_adj_size;
>        if (lowmem_minfree_size < array_size)
> @@ -148,6 +116,11 @@ static int lowmem_shrink(struct shrinker *s, struct shrink_control *sc)
>                if (!p)
>                        continue;
>
> +               if (test_tsk_thread_flag(p, TIF_MEMDIE) &&
> +                   time_before_eq(jiffies, lowmem_deathpending_timeout)) {
> +                       task_unlock(p);
> +                       return 0;
> +               }
>                oom_score_adj = p->signal->oom_score_adj;
>                if (oom_score_adj < min_score_adj) {
>                        task_unlock(p);
> @@ -174,15 +147,9 @@ static int lowmem_shrink(struct shrinker *s, struct shrink_control *sc)
>                lowmem_print(1, "send sigkill to %d (%s), adj %d, size %d\n",
>                             selected->pid, selected->comm,
>                             selected_oom_score_adj, selected_tasksize);
> -               /*
> -                * If CONFIG_PROFILING is off, then we don't want to stall
> -                * the killer by setting lowmem_deathpending.
> -                */
> -#ifdef CONFIG_PROFILING
> -               lowmem_deathpending = selected;
>                lowmem_deathpending_timeout = jiffies + HZ;
> -#endif
>                send_sig(SIGKILL, selected, 0);
> +               set_tsk_thread_flag(selected, TIF_MEMDIE);
>                rem -= selected_tasksize;
>        }
>        lowmem_print(4, "lowmem_shrink %lu, %x, return %d\n",
> @@ -198,7 +165,6 @@ static struct shrinker lowmem_shrinker = {
>
>  static int __init lowmem_init(void)
>  {
> -       task_handoff_register(&task_nb);
>        register_shrinker(&lowmem_shrinker);
>        return 0;
>  }
> @@ -206,7 +172,6 @@ static int __init lowmem_init(void)
>  static void __exit lowmem_exit(void)
>  {
>        unregister_shrinker(&lowmem_shrinker);
> -       task_handoff_unregister(&task_nb);
>  }
>
>  module_param_named(cost, lowmem_shrinker.seeks, int, S_IRUGO | S_IWUSR);
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/