[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <aA-3OwNum9gzHLH1@google.com>
Date: Mon, 28 Apr 2025 10:13:31 -0700
From: Sean Christopherson <seanjc@...gle.com>
To: Peter Zijlstra <peterz@...radead.org>
Cc: Josh Poimboeuf <jpoimboe@...nel.org>, x86@...nel.org, kys@...rosoft.com,
haiyangz@...rosoft.com, wei.liu@...nel.org, decui@...rosoft.com,
tglx@...utronix.de, mingo@...hat.com, bp@...en8.de,
dave.hansen@...ux.intel.com, hpa@...or.com, pawan.kumar.gupta@...ux.intel.com,
pbonzini@...hat.com, ardb@...nel.org, kees@...nel.org,
Arnd Bergmann <arnd@...db.de>, gregkh@...uxfoundation.org, linux-hyperv@...r.kernel.org,
linux-kernel@...r.kernel.org, kvm@...r.kernel.org, linux-efi@...r.kernel.org,
samitolvanen@...gle.com, ojeda@...nel.org
Subject: Re: [PATCH 3/6] x86/kvm/emulate: Avoid RET for fastops
On Sat, Apr 26, 2025, Peter Zijlstra wrote:
> On Wed, Apr 16, 2025 at 10:38:59AM +0200, Peter Zijlstra wrote:
>
> > Yeah, I finally got there. I'll go cook up something else.
>
> Sean, Paolo, can I once again ask how best to test this fastop crud?
Apply the below, build KVM selftests, enable forced emulation in KVM, and then
run fastops_test. It's well past time we had a selftest for this. It won't
detect bugs that are specific to 32-bit kernels, e.g. b63f20a778c8 ("x86/retpoline:
Don't clobber RFLAGS during CALL_NOSPEC on i386"), since KVM selftests are 64-bit
only, but for what you're doing, it should suffice.
For 32-bit kernels, it requires a 32-bit QEMU and KVM-Unit-Tests (or maybe even
a full blown 32-bit guest image; I forget how much coverage KUT provides).
Regardless, I don't see any reason to put you through that pain, I can do that
sanity testing.
I'll post a proper patch for the new selftest after testing on AMD. The test
relies on hardware providing deterministic behavior for undefined output (RFLAGS
and GPRs); I don't know if that holds true on AMD.
To enable forced emulation, set /sys/module/kvm/parameters/force_emulation_prefix
to '1' (for the purposes of this test, the value doesn't matter). The param is
writable at runtime, so it doesn't matter if kvm.ko is built-in or a module.
---
From: Sean Christopherson <seanjc@...gle.com>
Date: Mon, 28 Apr 2025 08:55:44 -0700
Subject: [PATCH] KVM: selftests: Add a test for x86's fastops emulation
Signed-off-by: Sean Christopherson <seanjc@...gle.com>
---
tools/testing/selftests/kvm/Makefile.kvm | 1 +
.../testing/selftests/kvm/x86/fastops_test.c | 165 ++++++++++++++++++
2 files changed, 166 insertions(+)
create mode 100644 tools/testing/selftests/kvm/x86/fastops_test.c
diff --git a/tools/testing/selftests/kvm/Makefile.kvm b/tools/testing/selftests/kvm/Makefile.kvm
index f62b0a5aba35..411c3d5eb5b1 100644
--- a/tools/testing/selftests/kvm/Makefile.kvm
+++ b/tools/testing/selftests/kvm/Makefile.kvm
@@ -66,6 +66,7 @@ TEST_GEN_PROGS_x86 += x86/cr4_cpuid_sync_test
TEST_GEN_PROGS_x86 += x86/dirty_log_page_splitting_test
TEST_GEN_PROGS_x86 += x86/feature_msrs_test
TEST_GEN_PROGS_x86 += x86/exit_on_emulation_failure_test
+TEST_GEN_PROGS_x86 += x86/fastops_test
TEST_GEN_PROGS_x86 += x86/fix_hypercall_test
TEST_GEN_PROGS_x86 += x86/hwcr_msr_test
TEST_GEN_PROGS_x86 += x86/hyperv_clock
diff --git a/tools/testing/selftests/kvm/x86/fastops_test.c b/tools/testing/selftests/kvm/x86/fastops_test.c
new file mode 100644
index 000000000000..c3799edb5d0c
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/fastops_test.c
@@ -0,0 +1,165 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+
+/*
+ * Execute a fastop() instruction, with or without forced emulation. BT bit 0
+ * to set RFLAGS.CF based on whether or not the input is even or odd, so that
+ * instructions like ADC and SBB are deterministic.
+ */
+#define guest_execute_fastop_1(FEP, insn, type_t, __val, __flags) \
+do { \
+ __asm__ __volatile__("bt $0, %[val]\n\t" \
+ FEP insn " %[val]\n\t" \
+ "pushfq\n\t" \
+ "pop %[flags]\n\t" \
+ : [val]"+r"(__val), [flags]"=r"(__flags) \
+ : : "cc", "memory"); \
+} while (0)
+
+#define guest_test_fastop_1(insn, type_t, __val) \
+do { \
+ type_t val = __val, ex_val = __val, input = __val; \
+ uint64_t flags, ex_flags; \
+ \
+ guest_execute_fastop_1("", insn, type_t, ex_val, ex_flags); \
+ guest_execute_fastop_1(KVM_FEP, insn, type_t, val, flags); \
+ \
+ __GUEST_ASSERT(val == ex_val, \
+ "Wanted 0x%lx for '%s 0x%lx', got 0x%lx", \
+ (uint64_t)ex_val, insn, (uint64_t)input, (uint64_t)val); \
+ __GUEST_ASSERT(flags == ex_flags, \
+ "Wanted flags 0x%lx for '%s 0x%lx', got 0x%lx", \
+ ex_flags, insn, (uint64_t)input, flags); \
+} while (0)
+
+#define guest_execute_fastop_2(FEP, insn, type_t, __input, __output, __flags) \
+do { \
+ __asm__ __volatile__("bt $0, %[output]\n\t" \
+ FEP insn " %[input], %[output]\n\t" \
+ "pushfq\n\t" \
+ "pop %[flags]\n\t" \
+ : [output]"+r"(__output), [flags]"=r"(__flags) \
+ : [input]"r"(__input) : "cc", "memory"); \
+} while (0)
+
+#define guest_test_fastop_2(insn, type_t, __val1, __val2) \
+do { \
+ type_t input = __val1, input2 = __val2, output = __val2, ex_output = __val2; \
+ uint64_t flags, ex_flags; \
+ \
+ guest_execute_fastop_2("", insn, type_t, input, ex_output, ex_flags); \
+ guest_execute_fastop_2(KVM_FEP, insn, type_t, input, output, flags); \
+ \
+ __GUEST_ASSERT(output == ex_output, \
+ "Wanted 0x%lx for '%s 0x%lx 0x%lx', got 0x%lx", \
+ (uint64_t)ex_output, insn, (uint64_t)input, \
+ (uint64_t)input2, (uint64_t)output); \
+ __GUEST_ASSERT(flags == ex_flags, \
+ "Wanted flags 0x%lx for '%s 0x%lx, 0x%lx', got 0x%lx", \
+ ex_flags, insn, (uint64_t)input, (uint64_t)input2, flags); \
+} while (0)
+
+#define guest_execute_fastop_cl(FEP, insn, type_t, __shift, __output, __flags) \
+do { \
+ __asm__ __volatile__("bt $0, %[output]\n\t" \
+ FEP insn " %%cl, %[output]\n\t" \
+ "pushfq\n\t" \
+ "pop %[flags]\n\t" \
+ : [output]"+r"(__output), [flags]"=r"(__flags) \
+ : "c"(__shift) : "cc", "memory"); \
+} while (0)
+
+#define guest_test_fastop_cl(insn, type_t, __val1, __val2) \
+do { \
+ type_t output = __val2, ex_output = __val2, input = __val2; \
+ uint8_t shift = __val1; \
+ uint64_t flags, ex_flags; \
+ \
+ guest_execute_fastop_cl("", insn, type_t, shift, ex_output, ex_flags); \
+ guest_execute_fastop_cl(KVM_FEP, insn, type_t, shift, output, flags); \
+ \
+ __GUEST_ASSERT(output == ex_output, \
+ "Wanted 0x%lx for '%s 0x%x, 0x%lx', got 0x%lx", \
+ (uint64_t)ex_output, insn, shift, (uint64_t)input, \
+ (uint64_t)output); \
+ __GUEST_ASSERT(flags == ex_flags, \
+ "Wanted flags 0x%lx for '%s 0x%x, 0x%lx', got 0x%lx", \
+ ex_flags, insn, shift, (uint64_t)input, flags); \
+} while (0)
+
+static const uint64_t vals[] = {
+ 0,
+ 1,
+ 2,
+ 4,
+ 7,
+ 0x5555555555555555,
+ 0xaaaaaaaaaaaaaaaa,
+ 0xfefefefefefefefe,
+ 0xffffffffffffffff,
+};
+
+#define guest_test_fastops(type_t, suffix) \
+do { \
+ int i, j; \
+ \
+ for (i = 0; i < ARRAY_SIZE(vals); i++) { \
+ guest_test_fastop_1("dec" suffix, type_t, vals[i]); \
+ guest_test_fastop_1("inc" suffix, type_t, vals[i]); \
+ guest_test_fastop_1("neg" suffix, type_t, vals[i]); \
+ guest_test_fastop_1("not" suffix, type_t, vals[i]); \
+ \
+ for (j = 0; j < ARRAY_SIZE(vals); j++) { \
+ guest_test_fastop_2("add" suffix, type_t, vals[i], vals[j]); \
+ guest_test_fastop_2("adc" suffix, type_t, vals[i], vals[j]); \
+ guest_test_fastop_2("and" suffix, type_t, vals[i], vals[j]); \
+ guest_test_fastop_2("bsf" suffix, type_t, vals[i], vals[j]); \
+ guest_test_fastop_2("bsr" suffix, type_t, vals[i], vals[j]); \
+ guest_test_fastop_2("bt" suffix, type_t, vals[i], vals[j]); \
+ guest_test_fastop_2("btc" suffix, type_t, vals[i], vals[j]); \
+ guest_test_fastop_2("btr" suffix, type_t, vals[i], vals[j]); \
+ guest_test_fastop_2("bts" suffix, type_t, vals[i], vals[j]); \
+ guest_test_fastop_2("cmp" suffix, type_t, vals[i], vals[j]); \
+ guest_test_fastop_2("imul" suffix, type_t, vals[i], vals[j]); \
+ guest_test_fastop_2("or" suffix, type_t, vals[i], vals[j]); \
+ guest_test_fastop_2("sbb" suffix, type_t, vals[i], vals[j]); \
+ guest_test_fastop_2("sub" suffix, type_t, vals[i], vals[j]); \
+ guest_test_fastop_2("test" suffix, type_t, vals[i], vals[j]); \
+ guest_test_fastop_2("xor" suffix, type_t, vals[i], vals[j]); \
+ \
+ guest_test_fastop_cl("rol" suffix, type_t, vals[i], vals[j]); \
+ guest_test_fastop_cl("ror" suffix, type_t, vals[i], vals[j]); \
+ guest_test_fastop_cl("rcl" suffix, type_t, vals[i], vals[j]); \
+ guest_test_fastop_cl("rcr" suffix, type_t, vals[i], vals[j]); \
+ guest_test_fastop_cl("sar" suffix, type_t, vals[i], vals[j]); \
+ guest_test_fastop_cl("shl" suffix, type_t, vals[i], vals[j]); \
+ guest_test_fastop_cl("shr" suffix, type_t, vals[i], vals[j]); \
+ } \
+ } \
+} while (0)
+
+static void guest_code(void)
+{
+ guest_test_fastops(uint16_t, "w");
+ guest_test_fastops(uint32_t, "l");
+ guest_test_fastops(uint64_t, "q");
+
+ GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+
+ TEST_REQUIRE(is_forced_emulation_enabled);
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_DONE);
+
+ kvm_vm_free(vm);
+}
base-commit: 661b7ddb2d10258b53106d7c39c309806b00a99c
--
Powered by blists - more mailing lists