lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <ZFIgUfPrinUKLIVI@dhcp22.suse.cz>
Date:   Wed, 3 May 2023 10:50:25 +0200
From:   Michal Hocko <mhocko@...e.com>
To:     Yosry Ahmed <yosryahmed@...gle.com>
Cc:     Johannes Weiner <hannes@...xchg.org>,
        Roman Gushchin <roman.gushchin@...ux.dev>,
        Shakeel Butt <shakeelb@...gle.com>,
        Andrew Morton <akpm@...ux-foundation.org>,
        Muchun Song <muchun.song@...ux.dev>,
        Sergey Senozhatsky <senozhatsky@...omium.org>,
        Steven Rostedt <rostedt@...dmis.org>,
        Petr Mladek <pmladek@...e.com>, Chris Li <chrisl@...nel.org>,
        cgroups@...r.kernel.org, linux-mm@...ck.org,
        linux-kernel@...r.kernel.org
Subject: Re: [PATCH v2 2/2] memcg: dump memory.stat during cgroup OOM for v1

On Fri 28-04-23 13:24:06, Yosry Ahmed wrote:
> Commit c8713d0b2312 ("mm: memcontrol: dump memory.stat during cgroup
> OOM") made sure we dump all the stats in memory.stat during a cgroup
> OOM, but it also introduced a slight behavioral change. The code used to
> print the non-hierarchical v1 cgroup stats for the entire cgroup
> subtree, now it only prints the v2 cgroup stats for the cgroup under
> OOM.
> 
> For cgroup v1 users, this introduces a few problems:
> (a) The non-hierarchical stats of the memcg under OOM are no longer
> shown.
> (b) A couple of v1-only stats (e.g. pgpgin, pgpgout) are no longer
> shown.
> (c) We show the list of cgroup v2 stats, even in cgroup v1. This list of
> stats is not tracked with v1 in mind. While most of the stats seem to be
> working on v1, there may be some stats that are not fully or correctly
> tracked.
> 
> Although OOM log is not set in stone, we should not change it for no
> reason. When upgrading the kernel version to a version including
> commit c8713d0b2312 ("mm: memcontrol: dump memory.stat during cgroup
> OOM"), these behavioral changes are noticed in cgroup v1.
> 
> The fix is simple. Commit c8713d0b2312 ("mm: memcontrol: dump memory.stat
> during cgroup OOM") separated stats formatting from stats display for
> v2, to reuse the stats formatting in the OOM logs. Do the same for v1.
> 
> Move the v2 specific formatting from memory_stat_format() to
> memcg_stat_format(), add memcg1_stat_format() for v1, and make
> memory_stat_format() select between them based on cgroup version.
> Since memory_stat_show() now works for both v1 & v2, drop
> memcg_stat_show().
> 
> Signed-off-by: Yosry Ahmed <yosryahmed@...gle.com>

Acked-by: Michal Hocko <mhocko@...e.com>
Thanks

> ---
>  mm/memcontrol.c | 60 ++++++++++++++++++++++++++++---------------------
>  1 file changed, 35 insertions(+), 25 deletions(-)
> 
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index 5922940f92c9..2b492f8d540c 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -1551,7 +1551,7 @@ static inline unsigned long memcg_page_state_output(struct mem_cgroup *memcg,
>  	return memcg_page_state(memcg, item) * memcg_page_state_unit(item);
>  }
>  
> -static void memory_stat_format(struct mem_cgroup *memcg, struct seq_buf *s)
> +static void memcg_stat_format(struct mem_cgroup *memcg, struct seq_buf *s)
>  {
>  	int i;
>  
> @@ -1604,6 +1604,17 @@ static void memory_stat_format(struct mem_cgroup *memcg, struct seq_buf *s)
>  	WARN_ON_ONCE(seq_buf_has_overflowed(s));
>  }
>  
> +static void memcg1_stat_format(struct mem_cgroup *memcg, struct seq_buf *s);
> +
> +static void memory_stat_format(struct mem_cgroup *memcg, struct seq_buf *s)
> +{
> +	if (cgroup_subsys_on_dfl(memory_cgrp_subsys))
> +		memcg_stat_format(memcg, s);
> +	else
> +		memcg1_stat_format(memcg, s);
> +	WARN_ON_ONCE(seq_buf_has_overflowed(s));
> +}
> +
>  #define K(x) ((x) << (PAGE_SHIFT-10))
>  /**
>   * mem_cgroup_print_oom_context: Print OOM information relevant to
> @@ -4078,9 +4089,8 @@ static const unsigned int memcg1_events[] = {
>  	PGMAJFAULT,
>  };
>  
> -static int memcg_stat_show(struct seq_file *m, void *v)
> +static void memcg1_stat_format(struct mem_cgroup *memcg, struct seq_buf *s)
>  {
> -	struct mem_cgroup *memcg = mem_cgroup_from_seq(m);
>  	unsigned long memory, memsw;
>  	struct mem_cgroup *mi;
>  	unsigned int i;
> @@ -4095,18 +4105,18 @@ static int memcg_stat_show(struct seq_file *m, void *v)
>  		if (memcg1_stats[i] == MEMCG_SWAP && !do_memsw_account())
>  			continue;
>  		nr = memcg_page_state_local(memcg, memcg1_stats[i]);
> -		seq_printf(m, "%s %lu\n", memcg1_stat_names[i],
> +		seq_buf_printf(s, "%s %lu\n", memcg1_stat_names[i],
>  			   nr * memcg_page_state_unit(memcg1_stats[i]));
>  	}
>  
>  	for (i = 0; i < ARRAY_SIZE(memcg1_events); i++)
> -		seq_printf(m, "%s %lu\n", vm_event_name(memcg1_events[i]),
> -			   memcg_events_local(memcg, memcg1_events[i]));
> +		seq_buf_printf(s, "%s %lu\n", vm_event_name(memcg1_events[i]),
> +			       memcg_events_local(memcg, memcg1_events[i]));
>  
>  	for (i = 0; i < NR_LRU_LISTS; i++)
> -		seq_printf(m, "%s %lu\n", lru_list_name(i),
> -			   memcg_page_state_local(memcg, NR_LRU_BASE + i) *
> -			   PAGE_SIZE);
> +		seq_buf_printf(s, "%s %lu\n", lru_list_name(i),
> +			       memcg_page_state_local(memcg, NR_LRU_BASE + i) *
> +			       PAGE_SIZE);
>  
>  	/* Hierarchical information */
>  	memory = memsw = PAGE_COUNTER_MAX;
> @@ -4114,11 +4124,11 @@ static int memcg_stat_show(struct seq_file *m, void *v)
>  		memory = min(memory, READ_ONCE(mi->memory.max));
>  		memsw = min(memsw, READ_ONCE(mi->memsw.max));
>  	}
> -	seq_printf(m, "hierarchical_memory_limit %llu\n",
> -		   (u64)memory * PAGE_SIZE);
> +	seq_buf_printf(s, "hierarchical_memory_limit %llu\n",
> +		       (u64)memory * PAGE_SIZE);
>  	if (do_memsw_account())
> -		seq_printf(m, "hierarchical_memsw_limit %llu\n",
> -			   (u64)memsw * PAGE_SIZE);
> +		seq_buf_printf(s, "hierarchical_memsw_limit %llu\n",
> +			       (u64)memsw * PAGE_SIZE);
>  
>  	for (i = 0; i < ARRAY_SIZE(memcg1_stats); i++) {
>  		unsigned long nr;
> @@ -4126,19 +4136,19 @@ static int memcg_stat_show(struct seq_file *m, void *v)
>  		if (memcg1_stats[i] == MEMCG_SWAP && !do_memsw_account())
>  			continue;
>  		nr = memcg_page_state(memcg, memcg1_stats[i]);
> -		seq_printf(m, "total_%s %llu\n", memcg1_stat_names[i],
> +		seq_buf_printf(s, "total_%s %llu\n", memcg1_stat_names[i],
>  			   (u64)nr * memcg_page_state_unit(memcg1_stats[i]));
>  	}
>  
>  	for (i = 0; i < ARRAY_SIZE(memcg1_events); i++)
> -		seq_printf(m, "total_%s %llu\n",
> -			   vm_event_name(memcg1_events[i]),
> -			   (u64)memcg_events(memcg, memcg1_events[i]));
> +		seq_buf_printf(s, "total_%s %llu\n",
> +			       vm_event_name(memcg1_events[i]),
> +			       (u64)memcg_events(memcg, memcg1_events[i]));
>  
>  	for (i = 0; i < NR_LRU_LISTS; i++)
> -		seq_printf(m, "total_%s %llu\n", lru_list_name(i),
> -			   (u64)memcg_page_state(memcg, NR_LRU_BASE + i) *
> -			   PAGE_SIZE);
> +		seq_buf_printf(s, "total_%s %llu\n", lru_list_name(i),
> +			       (u64)memcg_page_state(memcg, NR_LRU_BASE + i) *
> +			       PAGE_SIZE);
>  
>  #ifdef CONFIG_DEBUG_VM
>  	{
> @@ -4153,12 +4163,10 @@ static int memcg_stat_show(struct seq_file *m, void *v)
>  			anon_cost += mz->lruvec.anon_cost;
>  			file_cost += mz->lruvec.file_cost;
>  		}
> -		seq_printf(m, "anon_cost %lu\n", anon_cost);
> -		seq_printf(m, "file_cost %lu\n", file_cost);
> +		seq_buf_printf(s, "anon_cost %lu\n", anon_cost);
> +		seq_buf_printf(s, "file_cost %lu\n", file_cost);
>  	}
>  #endif
> -
> -	return 0;
>  }
>  
>  static u64 mem_cgroup_swappiness_read(struct cgroup_subsys_state *css,
> @@ -4998,6 +5006,8 @@ static int mem_cgroup_slab_show(struct seq_file *m, void *p)
>  }
>  #endif
>  
> +static int memory_stat_show(struct seq_file *m, void *v);
> +
>  static struct cftype mem_cgroup_legacy_files[] = {
>  	{
>  		.name = "usage_in_bytes",
> @@ -5030,7 +5040,7 @@ static struct cftype mem_cgroup_legacy_files[] = {
>  	},
>  	{
>  		.name = "stat",
> -		.seq_show = memcg_stat_show,
> +		.seq_show = memory_stat_show,
>  	},
>  	{
>  		.name = "force_empty",
> -- 
> 2.40.1.495.gc816e09b53d-goog

-- 
Michal Hocko
SUSE Labs

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ