lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Tue, 30 Aug 2022 09:01:12 +0200
From:   Michal Hocko <mhocko@...e.com>
To:     Kairui Song <kasong@...cent.com>
Cc:     cgroups@...r.kernel.org, linux-mm@...ck.org,
        Johannes Weiner <hannes@...xchg.org>,
        Roman Gushchin <roman.gushchin@...ux.dev>,
        Shakeel Butt <shakeelb@...gle.com>,
        Muchun Song <songmuchun@...edance.com>,
        Andrew Morton <akpm@...ux-foundation.org>,
        linux-kernel@...r.kernel.org
Subject: Re: [PATCH 2/2] mm: memcontrol: make cgroup_memory_noswap a static
 key

On Tue 30-08-22 13:59:49, Kairui Song wrote:
> From: Kairui Song <kasong@...cent.com>
> 
> cgroup_memory_noswap is used in many hot path, so make it a static key
> to lower the kernel overhead.
> 
> Using 8G of ZRAM as SWAP, benchmark using `perf stat -d -d -d --repeat 100`
> with the following code snip in a non-root cgroup:
> 
>    #include <stdio.h>
>    #include <string.h>
>    #include <linux/mman.h>
>    #include <sys/mman.h>
>    #define MB 1024UL * 1024UL
>    int main(int argc, char **argv){
>       void *p = mmap(NULL, 8000 * MB, PROT_READ | PROT_WRITE,
>                      MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
>       memset(p, 0xff, 8000 * MB);
>       madvise(p, 8000 * MB, MADV_PAGEOUT);
>       memset(p, 0xff, 8000 * MB);
>       return 0;
>    }
> 
> Before:
>           7,021.43 msec task-clock                #    0.967 CPUs utilized            ( +-  0.03% )
>              4,010      context-switches          #  573.853 /sec                     ( +-  0.01% )
>                  0      cpu-migrations            #    0.000 /sec
>          2,052,057      page-faults               #  293.661 K/sec                    ( +-  0.00% )
>     12,616,546,027      cycles                    #    1.805 GHz                      ( +-  0.06% )  (39.92%)
>        156,823,666      stalled-cycles-frontend   #    1.25% frontend cycles idle     ( +-  0.10% )  (40.25%)
>        310,130,812      stalled-cycles-backend    #    2.47% backend cycles idle      ( +-  4.39% )  (40.73%)
>     18,692,516,591      instructions              #    1.49  insn per cycle
>                                                   #    0.01  stalled cycles per insn  ( +-  0.04% )  (40.75%)
>      4,907,447,976      branches                  #  702.283 M/sec                    ( +-  0.05% )  (40.30%)
>         13,002,578      branch-misses             #    0.26% of all branches          ( +-  0.08% )  (40.48%)
>      7,069,786,296      L1-dcache-loads           #    1.012 G/sec                    ( +-  0.03% )  (40.32%)
>        649,385,847      L1-dcache-load-misses     #    9.13% of all L1-dcache accesses  ( +-  0.07% )  (40.10%)
>      1,485,448,688      L1-icache-loads           #  212.576 M/sec                    ( +-  0.15% )  (39.49%)
>         31,628,457      L1-icache-load-misses     #    2.13% of all L1-icache accesses  ( +-  0.40% )  (39.57%)
>          6,667,311      dTLB-loads                #  954.129 K/sec                    ( +-  0.21% )  (39.50%)
>          5,668,555      dTLB-load-misses          #   86.40% of all dTLB cache accesses  ( +-  0.12% )  (39.03%)
>                765      iTLB-loads                #  109.476 /sec                     ( +- 21.81% )  (39.44%)
>          4,370,351      iTLB-load-misses          # 214320.09% of all iTLB cache accesses  ( +-  1.44% )  (39.86%)
>        149,207,254      L1-dcache-prefetches      #   21.352 M/sec                    ( +-  0.13% )  (40.27%)
> 
>            7.25869 +- 0.00203 seconds time elapsed  ( +-  0.03% )
> 
> After:
>           6,576.16 msec task-clock                #    0.953 CPUs utilized            ( +-  0.10% )
>              4,020      context-switches          #  605.595 /sec                     ( +-  0.01% )
>                  0      cpu-migrations            #    0.000 /sec
>          2,052,056      page-faults               #  309.133 K/sec                    ( +-  0.00% )
>     11,967,619,180      cycles                    #    1.803 GHz                      ( +-  0.36% )  (38.76%)
>        161,259,240      stalled-cycles-frontend   #    1.38% frontend cycles idle     ( +-  0.27% )  (36.58%)
>        253,605,302      stalled-cycles-backend    #    2.16% backend cycles idle      ( +-  4.45% )  (34.78%)
>     19,328,171,892      instructions              #    1.65  insn per cycle
>                                                   #    0.01  stalled cycles per insn  ( +-  0.10% )  (31.46%)
>      5,213,967,902      branches                  #  785.461 M/sec                    ( +-  0.18% )  (30.68%)
>         12,385,170      branch-misses             #    0.24% of all branches          ( +-  0.26% )  (34.13%)
>      7,271,687,822      L1-dcache-loads           #    1.095 G/sec                    ( +-  0.12% )  (35.29%)
>        649,873,045      L1-dcache-load-misses     #    8.93% of all L1-dcache accesses  ( +-  0.11% )  (41.41%)
>      1,950,037,608      L1-icache-loads           #  293.764 M/sec                    ( +-  0.33% )  (43.11%)
>         31,365,566      L1-icache-load-misses     #    1.62% of all L1-icache accesses  ( +-  0.39% )  (45.89%)
>          6,767,809      dTLB-loads                #    1.020 M/sec                    ( +-  0.47% )  (48.42%)
>          6,339,590      dTLB-load-misses          #   95.43% of all dTLB cache accesses  ( +-  0.50% )  (46.60%)
>                736      iTLB-loads                #  110.875 /sec                     ( +-  1.79% )  (48.60%)
>          4,314,836      iTLB-load-misses          # 518653.73% of all iTLB cache accesses  ( +-  0.63% )  (42.91%)
>        144,950,156      L1-dcache-prefetches      #   21.836 M/sec                    ( +-  0.37% )  (41.39%)
> 
>            6.89935 +- 0.00703 seconds time elapsed  ( +-  0.10% )

Do you happen to have a perf profile before and after to see which of
the paths really benefits from this?

> The performance is clearly better.
> 
> Signed-off-by: Kairui Song <kasong@...cent.com>

Anyway, this looks good to me. I like memcg_swap_enabled() better than
!cgroup_memory_noswap. The double negative was quite confusing.

Acked-by: Michal Hocko <mhocko@...e.com>

Thanks!

> ---
>  mm/memcontrol.c | 27 +++++++++++++++++++--------
>  1 file changed, 19 insertions(+), 8 deletions(-)
> 
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index 20e26ccd7dddc..8ea5589345a14 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -90,9 +90,18 @@ static bool cgroup_memory_nokmem __initdata;
>  
>  /* Whether the swap controller is active */
>  #ifdef CONFIG_MEMCG_SWAP
> -static bool cgroup_memory_noswap __ro_after_init;
> +static bool cgroup_memory_noswap __initdata;
> +
> +static DEFINE_STATIC_KEY_FALSE(memcg_swap_enabled_key);
> +static inline bool memcg_swap_enabled(void)
> +{
> +	return static_branch_likely(&memcg_swap_enabled_key);
> +}
>  #else
> -#define cgroup_memory_noswap		1
> +static inline bool memcg_swap_enabled(void)
> +{
> +	return false;
> +}
>  #endif
>  
>  #ifdef CONFIG_CGROUP_WRITEBACK
> @@ -102,7 +111,7 @@ static DECLARE_WAIT_QUEUE_HEAD(memcg_cgwb_frn_waitq);
>  /* Whether legacy memory+swap accounting is active */
>  static bool do_memsw_account(void)
>  {
> -	return !cgroup_subsys_on_dfl(memory_cgrp_subsys) && !cgroup_memory_noswap;
> +	return !cgroup_subsys_on_dfl(memory_cgrp_subsys) && memcg_swap_enabled();
>  }
>  
>  #define THRESHOLDS_EVENTS_TARGET 128
> @@ -7264,7 +7273,7 @@ void mem_cgroup_swapout(struct folio *folio, swp_entry_t entry)
>  	if (!mem_cgroup_is_root(memcg))
>  		page_counter_uncharge(&memcg->memory, nr_entries);
>  
> -	if (!cgroup_memory_noswap && memcg != swap_memcg) {
> +	if (memcg_swap_enabled() && memcg != swap_memcg) {
>  		if (!mem_cgroup_is_root(swap_memcg))
>  			page_counter_charge(&swap_memcg->memsw, nr_entries);
>  		page_counter_uncharge(&memcg->memsw, nr_entries);
> @@ -7316,7 +7325,7 @@ int __mem_cgroup_try_charge_swap(struct folio *folio, swp_entry_t entry)
>  
>  	memcg = mem_cgroup_id_get_online(memcg);
>  
> -	if (!cgroup_memory_noswap && !mem_cgroup_is_root(memcg) &&
> +	if (memcg_swap_enabled() && !mem_cgroup_is_root(memcg) &&
>  	    !page_counter_try_charge(&memcg->swap, nr_pages, &counter)) {
>  		memcg_memory_event(memcg, MEMCG_SWAP_MAX);
>  		memcg_memory_event(memcg, MEMCG_SWAP_FAIL);
> @@ -7348,7 +7357,7 @@ void __mem_cgroup_uncharge_swap(swp_entry_t entry, unsigned int nr_pages)
>  	rcu_read_lock();
>  	memcg = mem_cgroup_from_id(id);
>  	if (memcg) {
> -		if (!cgroup_memory_noswap && !mem_cgroup_is_root(memcg)) {
> +		if (memcg_swap_enabled() && !mem_cgroup_is_root(memcg)) {
>  			if (cgroup_subsys_on_dfl(memory_cgrp_subsys))
>  				page_counter_uncharge(&memcg->swap, nr_pages);
>  			else
> @@ -7364,7 +7373,7 @@ long mem_cgroup_get_nr_swap_pages(struct mem_cgroup *memcg)
>  {
>  	long nr_swap_pages = get_nr_swap_pages();
>  
> -	if (cgroup_memory_noswap || !cgroup_subsys_on_dfl(memory_cgrp_subsys))
> +	if (!memcg_swap_enabled() || !cgroup_subsys_on_dfl(memory_cgrp_subsys))
>  		return nr_swap_pages;
>  	for (; memcg != root_mem_cgroup; memcg = parent_mem_cgroup(memcg))
>  		nr_swap_pages = min_t(long, nr_swap_pages,
> @@ -7381,7 +7390,7 @@ bool mem_cgroup_swap_full(struct page *page)
>  
>  	if (vm_swap_full())
>  		return true;
> -	if (cgroup_memory_noswap || !cgroup_subsys_on_dfl(memory_cgrp_subsys))
> +	if (!memcg_swap_enabled() || !cgroup_subsys_on_dfl(memory_cgrp_subsys))
>  		return false;
>  
>  	memcg = page_memcg(page);
> @@ -7689,6 +7698,8 @@ static int __init mem_cgroup_swap_init(void)
>  	if (cgroup_memory_noswap)
>  		return 0;
>  
> +	static_branch_enable(&memcg_swap_enabled_key);
> +
>  	WARN_ON(cgroup_add_dfl_cftypes(&memory_cgrp_subsys, swap_files));
>  	WARN_ON(cgroup_add_legacy_cftypes(&memory_cgrp_subsys, memsw_files));
>  #if defined(CONFIG_MEMCG_KMEM) && defined(CONFIG_ZSWAP)
> -- 
> 2.35.2

-- 
Michal Hocko
SUSE Labs

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ