lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [day] [month] [year] [list]
Date:   Fri, 5 Jun 2020 19:53:08 +0000
From:   Dennis Zhou <dennis@...nel.org>
To:     Roman Gushchin <guro@...com>
Cc:     Andrew Morton <akpm@...ux-foundation.org>,
        Tejun Heo <tj@...nel.org>, Christoph Lameter <cl@...ux.com>,
        Johannes Weiner <hannes@...xchg.org>,
        Michal Hocko <mhocko@...nel.org>,
        Shakeel Butt <shakeelb@...gle.com>, linux-mm@...ck.org,
        kernel-team@...com, linux-kernel@...r.kernel.org
Subject: Re: [PATCH v1 3/5] mm: memcg/percpu: per-memcg percpu memory
 statistics

On Thu, May 28, 2020 at 04:25:06PM -0700, Roman Gushchin wrote:
> Percpu memory can represent a noticeable chunk of the total
> memory consumption, especially on big machines with many CPUs.
> Let's track percpu memory usage for each memcg and display
> it in memory.stat.
> 
> A percpu allocation is usually scattered over multiple pages
> (and nodes), and can be significantly smaller than a page.
> So let's add a byte-sized counter on the memcg level:
> MEMCG_PERCPU_B. Byte-sized vmstat infra created for slabs
> can be perfectly reused for percpu case.
> 
> Signed-off-by: Roman Gushchin <guro@...com>
> ---
>  Documentation/admin-guide/cgroup-v2.rst |  4 ++++
>  include/linux/memcontrol.h              |  8 ++++++++
>  mm/memcontrol.c                         |  4 +++-
>  mm/percpu.c                             | 10 ++++++++++
>  4 files changed, 25 insertions(+), 1 deletion(-)
> 
> diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst
> index fed4e1d2a343..aa8cb6dadadc 100644
> --- a/Documentation/admin-guide/cgroup-v2.rst
> +++ b/Documentation/admin-guide/cgroup-v2.rst
> @@ -1276,6 +1276,10 @@ PAGE_SIZE multiple when read back.
>  		Amount of memory used for storing in-kernel data
>  		structures.
>  
> +	  percpu
> +		Amount of memory used for storing per-cpu kernel
> +		data structures.
> +
>  	  sock
>  		Amount of memory used in network transmission buffers
>  
> diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
> index 7a84d9164449..f62a95d472f7 100644
> --- a/include/linux/memcontrol.h
> +++ b/include/linux/memcontrol.h
> @@ -32,11 +32,19 @@ struct kmem_cache;
>  enum memcg_stat_item {
>  	MEMCG_SWAP = NR_VM_NODE_STAT_ITEMS,
>  	MEMCG_SOCK,
> +	MEMCG_PERCPU_B,
>  	/* XXX: why are these zone and not node counters? */
>  	MEMCG_KERNEL_STACK_KB,
>  	MEMCG_NR_STAT,
>  };
>  
> +static __always_inline bool memcg_stat_item_in_bytes(enum memcg_stat_item item)
> +{
> +	if (item == MEMCG_PERCPU_B)
> +		return true;
> +	return vmstat_item_in_bytes(item);
> +}
> +
>  enum memcg_memory_event {
>  	MEMCG_LOW,
>  	MEMCG_HIGH,
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index 7bc3fd196210..5007d1585a4a 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -783,7 +783,7 @@ void __mod_memcg_state(struct mem_cgroup *memcg, int idx, int val)
>  	if (mem_cgroup_disabled())
>  		return;
>  
> -	if (vmstat_item_in_bytes(idx))
> +	if (memcg_stat_item_in_bytes(idx))
>  		threshold <<= PAGE_SHIFT;
>  
>  	x = val + __this_cpu_read(memcg->vmstats_percpu->stat[idx]);
> @@ -1490,6 +1490,8 @@ static char *memory_stat_format(struct mem_cgroup *memcg)
>  	seq_buf_printf(&s, "slab %llu\n",
>  		       (u64)(memcg_page_state(memcg, NR_SLAB_RECLAIMABLE_B) +
>  			     memcg_page_state(memcg, NR_SLAB_UNRECLAIMABLE_B)));
> +	seq_buf_printf(&s, "percpu %llu\n",
> +		       (u64)memcg_page_state(memcg, MEMCG_PERCPU_B));
>  	seq_buf_printf(&s, "sock %llu\n",
>  		       (u64)memcg_page_state(memcg, MEMCG_SOCK) *
>  		       PAGE_SIZE);
> diff --git a/mm/percpu.c b/mm/percpu.c
> index 85f5755c9114..b4b3e9c8a6d1 100644
> --- a/mm/percpu.c
> +++ b/mm/percpu.c
> @@ -1608,6 +1608,11 @@ static void pcpu_memcg_post_alloc_hook(struct obj_cgroup *objcg,
>  
>  	if (chunk) {
>  		chunk->obj_cgroups[off >> PCPU_MIN_ALLOC_SHIFT] = objcg;
> +
> +		rcu_read_lock();
> +		mod_memcg_state(obj_cgroup_memcg(objcg), MEMCG_PERCPU_B,
> +				size * num_possible_cpus());
> +		rcu_read_unlock();
>  	} else {
>  		obj_cgroup_uncharge(objcg, size * num_possible_cpus());
>  		obj_cgroup_put(objcg);
> @@ -1626,6 +1631,11 @@ static void pcpu_memcg_free_hook(struct pcpu_chunk *chunk, int off, size_t size)
>  
>  	obj_cgroup_uncharge(objcg, size * num_possible_cpus());
>  
> +	rcu_read_lock();
> +	mod_memcg_state(obj_cgroup_memcg(objcg), MEMCG_PERCPU_B,
> +			-(size * num_possible_cpus()));
> +	rcu_read_unlock();
> +
>  	obj_cgroup_put(objcg);
>  }
>  
> -- 
> 2.25.4
> 

Acked-by: Dennis Zhou <dennis@...nel.org>

Thanks,
Dennis

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ