lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20231123082941.21122-1-00107082@163.com>
Date:   Thu, 23 Nov 2023 16:29:41 +0800
From:   David Wang <00107082@....com>
To:     bagasdotme@...il.com
Cc:     Liam.Howlett@...cle.com, akpm@...ux-foundation.org,
        ankitag@...dia.com, chunn@...dia.com, linux-kernel@...r.kernel.org,
        linux-mm@...ck.org, regressions@...ts.linux.dev
Subject: Re: [REGRESSION]: mmap performance regression starting with k-6.1

Hi, 

Just contribute some information I recently collected for this thread:

I run some profiler, and it shows fundamental difference between 6.0 and 6.1:


v6.0:
```
__x64_sys_munmap(60.544% 6474/10693)
    __vm_munmap(98.749% 6393/6474)
        __do_munmap(97.982% 6264/6393)
            __split_vma(53.975% 3381/6264)
                vm_area_dup(59.036% 1996/3381)
                __vma_adjust(32.121% 1086/3381)
                anon_vma_clone(5.915% 200/3381)
                vma_dup_policy(0.769% 26/3381)
            unmap_region(16.699% 1046/6264)
            find_vma(10.361% 649/6264)
            remove_vma(7.822% 490/6264)
            percpu_counter_add_batch(2.011% 126/6264)
            __vma_rb_erase(1.405% 88/6264)
            userfaultfd_unmap_prep(0.798% 50/6264)
            downgrade_write(0.511% 32/6264)
```

v6.1:
```
__x64_sys_munmap(68.024% 24741/36371)
    __vm_munmap(99.681% 24662/24741)
        do_mas_munmap(99.015% 24419/24662)
            do_mas_align_munmap(98.243% 23990/24419)
                __split_vma(58.966% 14146/23990)
                    __vma_adjust(83.755% 11848/14146)
                    vm_area_dup(13.191% 1866/14146)
                    anon_vma_clone(2.050% 290/14146)
                    vma_dup_policy(0.254% 36/14146)
                mas_store_prealloc(11.709% 2809/23990)
                mas_preallocate(9.579% 2298/23990)
                unmap_region(5.523% 1325/23990)
```

v6.1 introduce the maple tree data structure, and mmap/munmap performance started to degrade since.
Base on the observation, I tested two commit:
9832fb87834e2bd925d30020962c81b05948fa7b GOOD (Same as v6.0, about 20seonds) (This is before "Patch series "Introducing the Maple Tree")
11f9a21ab65542189372b7d64bb2d2937dfdc9dc BAD  (about 51seconds) (This one is somewhere middle in the path series for maple tree.)
While with v6.1, the test run about 56 seconds

For v6.7, profiler show further fundimental changes, some vmi stuff, and preformance is worse (~70 seconds).
```
__x64_sys_munmap(63.873% 30725/48103)
    __vm_munmap(99.456% 30558/30725)
        do_vmi_munmap(97.670% 29846/30558)
            do_vmi_align_munmap(97.196% 29009/29846)
                __split_vma(63.701% 18479/29009)
                    vma_complete(34.417% 6360/18479)
                    vm_area_dup(33.681% 6224/18479)
                    mas_preallocate(11.835% 2187/18479)
                    down_write(5.173% 956/18479)
                    up_write(3.815% 705/18479)
                    asm_sysvec_apic_timer_interrupt(1.153% 213/18479)
                    anon_vma_clone(0.974% 180/18479)
                    vma_adjust_trans_huge(0.622% 115/18479)
                    mas_next_slot(0.498% 92/18479)
                    vma_dup_policy(0.465% 86/18479)
                    vma_prepare(0.357% 66/18479)
                    srso_return_thunk(0.336% 62/18479)
                    mas_find(0.114% 21/18479)
                unmap_region.constprop.0(12.196% 3538/29009)
                mas_store_gfp(10.548% 3060/29009)
                __call_rcu_common.constprop.0(1.992% 578/29009)
```

I use following test code, and timed it
```
#define MAXN 1024
struct { void* addr; size_t n; } maps[MAXN];
int main() {
	int i, n, k, r;
	void *p;
	for (i=0; i<MAXN; i++) {
		n = 1024*((rand()%32)+1);
		p = mmap(NULL, n, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
		if (p == MAP_FAILED) {
			perror("fail to mmap");
			return -1;
		}
		maps[i].addr = p; 
		maps[i].n = n;

	}
	for (i=0; i<10000000; i++) {
		k = rand()%MAXN;
		r = munmap(maps[k].addr, maps[k].n);
		if (r) {
			perror("fail to munmap");
			return -1;
		}
		n = 1024*((rand()%32)+1);
		p = mmap(NULL, n, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
		if (p == MAP_FAILED) {
			perror("fail to mmap");
			return -1;
		}
		maps[k].addr = p; 
		maps[k].n = n;
	}
	for (i=0; i<MAXN; i++) munmap(maps[i].addr, maps[i].n);
	return 0;
}
```

Thanks
David Wang

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ