lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <282731.1700168945@warthog.procyon.org.uk>
Date:   Thu, 16 Nov 2023 21:09:05 +0000
From:   David Howells <dhowells@...hat.com>
To:     Linus Torvalds <torvalds@...ux-foundation.org>
Cc:     dhowells@...hat.com, Borislav Petkov <bp@...en8.de>,
        kernel test robot <oliver.sang@...el.com>,
        oe-lkp@...ts.linux.dev, lkp@...el.com,
        linux-kernel@...r.kernel.org,
        Christian Brauner <brauner@...nel.org>,
        Alexander Viro <viro@...iv.linux.org.uk>,
        Jens Axboe <axboe@...nel.dk>, Christoph Hellwig <hch@....de>,
        Christian Brauner <christian@...uner.io>,
        Matthew Wilcox <willy@...radead.org>,
        David Laight <David.Laight@...lab.com>, ying.huang@...el.com,
        feng.tang@...el.com, fengwei.yin@...el.com
Subject: Re: [linus:master] [iov_iter] c9eec08bac: vm-scalability.throughput -16.9% regression

Linus Torvalds <torvalds@...ux-foundation.org> wrote:

> You could try building the kernel without mitigations (or booting with them
> off, which isn't quite as good) to verify.

Okay, I disabled RETPOLINE, which seems like it should be the important one.
With inlined memcpy:

 iov_kunit_benchmark_bvec: avg 3160 uS, stddev 17 uS
 iov_kunit_benchmark_bvec_split: avg 3380 uS, stddev 29 uS
 iov_kunit_benchmark_kvec: avg 2940 uS, stddev 978 uS
 iov_kunit_benchmark_xarray: avg 3599 uS, stddev 8 uS
 iov_kunit_benchmark_xarray_to_bvec: avg 3964 uS, stddev 16 uS

Directly calling __memcpy():

 iov_kunit_benchmark_bvec: avg 9947 uS, stddev 61 uS
 iov_kunit_benchmark_bvec_split: avg 9790 uS, stddev 13 uS
 iov_kunit_benchmark_kvec: avg 9565 uS, stddev 758 uS
 iov_kunit_benchmark_xarray: avg 10498 uS, stddev 24 uS
 iov_kunit_benchmark_xarray_to_bvec: avg 10459 uS, stddev 188 uS

I created a duplicate of __memcpy() (called __movsb_memcpy) without the
"alternative" statement and made it call that:

 iov_kunit_benchmark_bvec: avg 3177 uS, stddev 7 uS
 iov_kunit_benchmark_bvec_split: avg 3393 uS, stddev 10 uS
 iov_kunit_benchmark_kvec: avg 2813 uS, stddev 385 uS
 iov_kunit_benchmark_xarray: avg 3651 uS, stddev 7 uS
 iov_kunit_benchmark_xarray_to_bvec: avg 3946 uS, stddev 8 uS

And then I made it call memcpy_orig() directly:

 iov_kunit_benchmark_bvec: avg 9942 uS, stddev 17 uS
 iov_kunit_benchmark_bvec_split: avg 9802 uS, stddev 29 uS
 iov_kunit_benchmark_kvec: avg 9547 uS, stddev 598 uS
 iov_kunit_benchmark_xarray: avg 10486 uS, stddev 13 uS
 iov_kunit_benchmark_xarray_to_bvec: avg 10438 uS, stddev 12 uS

(See attached patch)

David
---
diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S
index 0ae2e1712e2e..df1ebbe345e2 100644
--- a/arch/x86/lib/memcpy_64.S
+++ b/arch/x86/lib/memcpy_64.S
@@ -43,7 +43,7 @@ EXPORT_SYMBOL(__memcpy)
 SYM_FUNC_ALIAS_MEMFUNC(memcpy, __memcpy)
 EXPORT_SYMBOL(memcpy)
 
-SYM_FUNC_START_LOCAL(memcpy_orig)
+SYM_TYPED_FUNC_START(memcpy_orig)
 	movq %rdi, %rax
 
 	cmpq $0x20, %rdx
@@ -169,4 +169,12 @@ SYM_FUNC_START_LOCAL(memcpy_orig)
 .Lend:
 	RET
 SYM_FUNC_END(memcpy_orig)
+EXPORT_SYMBOL(memcpy_orig)
 
+SYM_TYPED_FUNC_START(__movsb_memcpy)
+	movq %rdi, %rax
+	movq %rdx, %rcx
+	rep movsb
+	RET
+SYM_FUNC_END(__movsb_memcpy)
+EXPORT_SYMBOL(__movsb_memcpy)
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index de7d11cf4c63..620cd6356a5b 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -58,11 +58,18 @@ size_t copy_from_user_iter(void __user *iter_from, size_t progress,
 	return res;
 }
 
+extern void *__movsb_memcpy(void *, const void *, size_t);
+extern void *memcpy_orig(void *, const void *, size_t);
+
 static __always_inline
 size_t memcpy_to_iter(void *iter_to, size_t progress,
 		      size_t len, void *from, void *priv2)
 {
-	memcpy(iter_to, from + progress, len);
+#if 0
+	__movsb_memcpy(iter_to, from + progress, len);
+#else
+	memcpy_orig(iter_to, from + progress, len);
+#endif	
 	return 0;
 }
 
@@ -70,7 +77,11 @@ static __always_inline
 size_t memcpy_from_iter(void *iter_from, size_t progress,
 			size_t len, void *to, void *priv2)
 {
-	memcpy(to + progress, iter_from, len);
+#if 0
+	__movsb_memcpy(to + progress, iter_from, len);
+#else
+	memcpy_orig(to + progress, iter_from, len);
+#endif
 	return 0;
 }
 

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ