[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <282731.1700168945@warthog.procyon.org.uk>
Date: Thu, 16 Nov 2023 21:09:05 +0000
From: David Howells <dhowells@...hat.com>
To: Linus Torvalds <torvalds@...ux-foundation.org>
Cc: dhowells@...hat.com, Borislav Petkov <bp@...en8.de>,
kernel test robot <oliver.sang@...el.com>,
oe-lkp@...ts.linux.dev, lkp@...el.com,
linux-kernel@...r.kernel.org,
Christian Brauner <brauner@...nel.org>,
Alexander Viro <viro@...iv.linux.org.uk>,
Jens Axboe <axboe@...nel.dk>, Christoph Hellwig <hch@....de>,
Christian Brauner <christian@...uner.io>,
Matthew Wilcox <willy@...radead.org>,
David Laight <David.Laight@...lab.com>, ying.huang@...el.com,
feng.tang@...el.com, fengwei.yin@...el.com
Subject: Re: [linus:master] [iov_iter] c9eec08bac: vm-scalability.throughput -16.9% regression
Linus Torvalds <torvalds@...ux-foundation.org> wrote:
> You could try building the kernel without mitigations (or booting with them
> off, which isn't quite as good) to verify.
Okay, I disabled RETPOLINE, which seems like it should be the important one.
With inlined memcpy:
iov_kunit_benchmark_bvec: avg 3160 uS, stddev 17 uS
iov_kunit_benchmark_bvec_split: avg 3380 uS, stddev 29 uS
iov_kunit_benchmark_kvec: avg 2940 uS, stddev 978 uS
iov_kunit_benchmark_xarray: avg 3599 uS, stddev 8 uS
iov_kunit_benchmark_xarray_to_bvec: avg 3964 uS, stddev 16 uS
Directly calling __memcpy():
iov_kunit_benchmark_bvec: avg 9947 uS, stddev 61 uS
iov_kunit_benchmark_bvec_split: avg 9790 uS, stddev 13 uS
iov_kunit_benchmark_kvec: avg 9565 uS, stddev 758 uS
iov_kunit_benchmark_xarray: avg 10498 uS, stddev 24 uS
iov_kunit_benchmark_xarray_to_bvec: avg 10459 uS, stddev 188 uS
I created a duplicate of __memcpy() (called __movsb_memcpy) without the
"alternative" statement and made it call that:
iov_kunit_benchmark_bvec: avg 3177 uS, stddev 7 uS
iov_kunit_benchmark_bvec_split: avg 3393 uS, stddev 10 uS
iov_kunit_benchmark_kvec: avg 2813 uS, stddev 385 uS
iov_kunit_benchmark_xarray: avg 3651 uS, stddev 7 uS
iov_kunit_benchmark_xarray_to_bvec: avg 3946 uS, stddev 8 uS
And then I made it call memcpy_orig() directly:
iov_kunit_benchmark_bvec: avg 9942 uS, stddev 17 uS
iov_kunit_benchmark_bvec_split: avg 9802 uS, stddev 29 uS
iov_kunit_benchmark_kvec: avg 9547 uS, stddev 598 uS
iov_kunit_benchmark_xarray: avg 10486 uS, stddev 13 uS
iov_kunit_benchmark_xarray_to_bvec: avg 10438 uS, stddev 12 uS
(See attached patch)
David
---
diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S
index 0ae2e1712e2e..df1ebbe345e2 100644
--- a/arch/x86/lib/memcpy_64.S
+++ b/arch/x86/lib/memcpy_64.S
@@ -43,7 +43,7 @@ EXPORT_SYMBOL(__memcpy)
SYM_FUNC_ALIAS_MEMFUNC(memcpy, __memcpy)
EXPORT_SYMBOL(memcpy)
-SYM_FUNC_START_LOCAL(memcpy_orig)
+SYM_TYPED_FUNC_START(memcpy_orig)
movq %rdi, %rax
cmpq $0x20, %rdx
@@ -169,4 +169,12 @@ SYM_FUNC_START_LOCAL(memcpy_orig)
.Lend:
RET
SYM_FUNC_END(memcpy_orig)
+EXPORT_SYMBOL(memcpy_orig)
+SYM_TYPED_FUNC_START(__movsb_memcpy)
+ movq %rdi, %rax
+ movq %rdx, %rcx
+ rep movsb
+ RET
+SYM_FUNC_END(__movsb_memcpy)
+EXPORT_SYMBOL(__movsb_memcpy)
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index de7d11cf4c63..620cd6356a5b 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -58,11 +58,18 @@ size_t copy_from_user_iter(void __user *iter_from, size_t progress,
return res;
}
+extern void *__movsb_memcpy(void *, const void *, size_t);
+extern void *memcpy_orig(void *, const void *, size_t);
+
static __always_inline
size_t memcpy_to_iter(void *iter_to, size_t progress,
size_t len, void *from, void *priv2)
{
- memcpy(iter_to, from + progress, len);
+#if 0
+ __movsb_memcpy(iter_to, from + progress, len);
+#else
+ memcpy_orig(iter_to, from + progress, len);
+#endif
return 0;
}
@@ -70,7 +77,11 @@ static __always_inline
size_t memcpy_from_iter(void *iter_from, size_t progress,
size_t len, void *to, void *priv2)
{
- memcpy(to + progress, iter_from, len);
+#if 0
+ __movsb_memcpy(to + progress, iter_from, len);
+#else
+ memcpy_orig(to + progress, iter_from, len);
+#endif
return 0;
}
Powered by blists - more mailing lists