lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <5251d336-4ad0-ccf0-e31f-35d9c832b0be@i-love.sakura.ne.jp>
Date:   Tue, 23 Oct 2018 19:23:00 +0900
From:   Tetsuo Handa <penguin-kernel@...ove.sakura.ne.jp>
To:     Petr Mladek <pmladek@...e.com>
Cc:     Sergey Senozhatsky <sergey.senozhatsky.work@...il.com>,
        Michal Hocko <mhocko@...nel.org>,
        Johannes Weiner <hannes@...xchg.org>, linux-mm@...ck.org,
        syzkaller-bugs@...glegroups.com, guro@...com,
        kirill.shutemov@...ux.intel.com, linux-kernel@...r.kernel.org,
        rientjes@...gle.com, yang.s@...baba-inc.com,
        Andrew Morton <akpm@...ux-foundation.org>,
        Sergey Senozhatsky <sergey.senozhatsky@...il.com>,
        Steven Rostedt <rostedt@...dmis.org>,
        syzbot <syzbot+77e6b28a7a7106ad0def@...kaller.appspotmail.com>
Subject: Re: [PATCH v3] mm: memcontrol: Don't flood OOM messages with no
 eligible task.

On 2018/10/23 17:21, Petr Mladek wrote:
> On Fri 2018-10-19 09:18:16, Tetsuo Handa wrote:
>> I assumed we calculate the average dynamically, for the amount of
>> messages printed by an OOM event is highly unstable (depends on
>> hardware configuration such as number of nodes, number of zones,
>> and how many processes are there as a candidate for OOM victim).
> 
> Is there any idea how the average length can be counted dynamically?

I don't have one. Maybe sum up return values of printk() from OOM context?



> This reminds me another problem. We would need to use the same
> decision for all printk() calls that logically belongs to each
> other. Otherwise we might get mixed lines that might confuse
> poeple. I mean that OOM messages might look like:
> 
>   OOM: A
>   OOM: B
>   OOM: C
> 
> If we do not synchronize the rateliting, we might see:
> 
>   OOM: A
>   OOM: B
>   OOM: C
>   OOM: B
>   OOM: B
>   OOM: A
>   OOM: C
>   OOM: C

Messages from out_of_memory() are serialized by oom_lock mutex.
Messages from warn_alloc() are not serialized, and thus cause confusion.



>> I wish that memcg OOM events do not use printk(). Since memcg OOM is not
>> out of physical memory, we can dynamically allocate physical memory for
>> holding memcg OOM messages and let the userspace poll it via some interface.
> 
> Would the userspace work when the system gets blocked on allocations?

Yes for memcg OOM events. No for global OOM events.
You can try reproducers shown below from your environment.

Regarding case 2, we can solve the problem by checking tsk_is_oom_victim(current) == true.
But regarding case 1, Michal's patch is not sufficient for allowing administrators
to enter commands for recovery from console.

---------- Case 1: Flood of memcg OOM events caused by misconfiguration. ----------

#include <stdio.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>
#include <stdlib.h>

int main(int argc, char *argv[])
{
	FILE *fp;
	const unsigned long size = 1048576 * 200;
	char *buf = malloc(size);
	mkdir("/sys/fs/cgroup/memory/test1", 0755);
	fp = fopen("/sys/fs/cgroup/memory/test1/memory.limit_in_bytes", "w");
	fprintf(fp, "%lu\n", size / 2);
	fclose(fp);
	fp = fopen("/sys/fs/cgroup/memory/test1/tasks", "w");
	fprintf(fp, "%u\n", getpid());
	fclose(fp);
	fp = fopen("/proc/self/oom_score_adj", "w");
	fprintf(fp, "-1000\n");
	fclose(fp);
	fp = fopen("/dev/zero", "r");
	fread(buf, 1, size, fp);
	fclose(fp);
	return 0;
}

---------- Case 2: Flood of memcg OOM events caused by MMF_OOM_SKIP race. ----------

#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#include <sched.h>
#include <sys/mman.h>

#define NUMTHREADS 256
#define MMAPSIZE 4 * 10485760
#define STACKSIZE 4096
static int pipe_fd[2] = { EOF, EOF };
static int memory_eater(void *unused)
{
	int fd = open("/dev/zero", O_RDONLY);
	char *buf = mmap(NULL, MMAPSIZE, PROT_WRITE | PROT_READ,
			 MAP_ANONYMOUS | MAP_SHARED, EOF, 0);
	read(pipe_fd[0], buf, 1);
	read(fd, buf, MMAPSIZE);
	pause();
	return 0;
}
int main(int argc, char *argv[])
{
	int i;
	char *stack;
	FILE *fp;
	const unsigned long size = 1048576 * 200;
	mkdir("/sys/fs/cgroup/memory/test1", 0755);
	fp = fopen("/sys/fs/cgroup/memory/test1/memory.limit_in_bytes", "w");
	fprintf(fp, "%lu\n", size);
	fclose(fp);
	fp = fopen("/sys/fs/cgroup/memory/test1/tasks", "w");
	fprintf(fp, "%u\n", getpid());
	fclose(fp);
	if (setgid(-2) || setuid(-2))
		return 1;
	stack = mmap(NULL, STACKSIZE * NUMTHREADS, PROT_WRITE | PROT_READ,
		     MAP_ANONYMOUS | MAP_SHARED, EOF, 0);
	for (i = 0; i < NUMTHREADS; i++)
		if (clone(memory_eater, stack + (i + 1) * STACKSIZE,
			  CLONE_SIGHAND | CLONE_THREAD | CLONE_VM | CLONE_FS | CLONE_FILES, NULL) == -1)
			break;
	sleep(1);
	close(pipe_fd[1]);
	pause();
	return 0;
}

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ