[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <3061f5f8-cef0-b7b1-c4de-f2ceea29af9a@huawei.com>
Date: Thu, 11 Dec 2025 21:02:27 +0800
From: Zenghui Yu <yuzenghui@...wei.com>
To: Jiaqi Yan <jiaqiyan@...gle.com>
CC: <maz@...nel.org>, <oliver.upton@...ux.dev>, <duenwen@...gle.com>,
<rananta@...gle.com>, <jthoughton@...gle.com>, <vsethi@...dia.com>,
<jgg@...dia.com>, <joey.gouly@....com>, <suzuki.poulose@....com>,
<catalin.marinas@....com>, <will@...nel.org>, <pbonzini@...hat.com>,
<corbet@....net>, <shuah@...nel.org>, <kvm@...r.kernel.org>,
<kvmarm@...ts.linux.dev>, <linux-arm-kernel@...ts.infradead.org>,
<linux-kernel@...r.kernel.org>, <linux-doc@...r.kernel.org>,
<linux-kselftest@...r.kernel.org>
Subject: Re: [PATCH v4 2/3] KVM: selftests: Test for KVM_EXIT_ARM_SEA
Hi Jiaqi,
I had run into several problems when testing it on different servers. I
haven't figured them out yet but post it early for discussion.
On 2025/10/14 2:59, Jiaqi Yan wrote:
> Test how KVM handles guest SEA when APEI is unable to claim it, and
> KVM_CAP_ARM_SEA_TO_USER is enabled.
>
> The behavior is triggered by consuming recoverable memory error (UER)
> injected via EINJ. The test asserts two major things:
> 1. KVM returns to userspace with KVM_EXIT_ARM_SEA exit reason, and
> has provided expected fault information, e.g. esr, flags, gva, gpa.
> 2. Userspace is able to handle KVM_EXIT_ARM_SEA by injecting SEA to
> guest and KVM injects expected SEA into the VCPU.
>
> Tested on a data center server running Siryn AmpereOne processor
> that has RAS support.
>
> Several things to notice before attempting to run this selftest:
> - The test relies on EINJ support in both firmware and kernel to
> inject UER. Otherwise the test will be skipped.
> - The under-test platform's APEI should be unable to claim the SEA.
> Otherwise the test will be skipped.
> - Some platform doesn't support notrigger in EINJ, which may cause
> APEI and GHES to offline the memory before guest can consume
> injected UER, and making test unable to trigger SEA.
>
> Signed-off-by: Jiaqi Yan <jiaqiyan@...gle.com>
[...]
> +static void inject_uer(uint64_t paddr)
> +{
> + if (access("/sys/firmware/acpi/tables/EINJ", R_OK) == -1)
> + ksft_test_result_skip("EINJ table no available in firmware");
Missing '\n'.
We should return early (to actually skip the test) if the file can not
be accessed, right?
> +
> + if (access(EINJ_ETYPE, R_OK | W_OK) == -1)
> + ksft_test_result_skip("EINJ module probably not loaded?");
> +
> + write_einj_entry(EINJ_ETYPE, ERROR_TYPE_MEMORY_UER);
> + write_einj_entry(EINJ_FLAGS, MASK_MEMORY_UER);
> + write_einj_entry(EINJ_ADDR, paddr);
> + write_einj_entry(EINJ_MASK, ~0x0UL);
> + write_einj_entry(EINJ_NOTRIGGER, 1);
> + write_einj_entry(EINJ_DOIT, 1);
> +}
> +
> +/*
> + * When host APEI successfully claims the SEA caused by guest_code, kernel
> + * will send SIGBUS signal with BUS_MCEERR_AR to test thread.
> + *
> + * We set up this SIGBUS handler to skip the test for that case.
> + */
> +static void sigbus_signal_handler(int sig, siginfo_t *si, void *v)
> +{
> + ksft_print_msg("SIGBUS (%d) received, dumping siginfo...\n", sig);
> + ksft_print_msg("si_signo=%d, si_errno=%d, si_code=%d, si_addr=%p\n",
> + si->si_signo, si->si_errno, si->si_code, si->si_addr);
> + if (si->si_code == BUS_MCEERR_AR)
> + ksft_test_result_skip("SEA is claimed by host APEI\n");
> + else
> + ksft_test_result_fail("Exit with signal unhandled\n");
> +
> + exit(0);
> +}
> +
> +static void setup_sigbus_handler(void)
> +{
> + struct sigaction act;
> +
> + memset(&act, 0, sizeof(act));
> + sigemptyset(&act.sa_mask);
> + act.sa_sigaction = sigbus_signal_handler;
> + act.sa_flags = SA_SIGINFO;
> + TEST_ASSERT(sigaction(SIGBUS, &act, NULL) == 0,
> + "Failed to setup SIGBUS handler");
> +}
> +
> +static void guest_code(void)
> +{
> + uint64_t guest_data;
> +
> + /* Consumes error will cause a SEA. */
> + guest_data = *(uint64_t *)EINJ_GVA;
> +
> + GUEST_FAIL("Poison not protected by SEA: gva=%#lx, guest_data=%#lx\n",
> + EINJ_GVA, guest_data);
> +}
> +
> +static void expect_sea_handler(struct ex_regs *regs)
> +{
> + u64 esr = read_sysreg(esr_el1);
> + u64 far = read_sysreg(far_el1);
> + bool expect_far_invalid = far_invalid;
> +
> + GUEST_PRINTF("Handling Guest SEA\n");
> + GUEST_PRINTF("ESR_EL1=%#lx, FAR_EL1=%#lx\n", esr, far);
> +
> + GUEST_ASSERT_EQ(ESR_ELx_EC(esr), ESR_ELx_EC_DABT_CUR);
> + GUEST_ASSERT_EQ(esr & ESR_ELx_FSC_TYPE, ESR_ELx_FSC_EXTABT);
> +
> + if (expect_far_invalid) {
> + GUEST_ASSERT_EQ(esr & ESR_ELx_FnV, ESR_ELx_FnV);
I hit this ASSERT with:
# Mapped 0x40000 pages: gva=0x80000000 to gpa=0xff80000000
# Before EINJect: data=0xbaadcafe
# EINJ_GVA=0x81234bad, einj_gpa=0xff81234bad, einj_hva=0xffff41234bad,
einj_hpa=0x202841234bad
# echo 0x10 > /sys/kernel/debug/apei/einj/error_type - done
# echo 0x2 > /sys/kernel/debug/apei/einj/flags - done
# echo 0x202841234bad > /sys/kernel/debug/apei/einj/param1 - done
# echo 0xffffffffffffffff > /sys/kernel/debug/apei/einj/param2 - done
# echo 0x1 > /sys/kernel/debug/apei/einj/notrigger - done
# echo 0x1 > /sys/kernel/debug/apei/einj/error_inject - done
# Memory UER EINJected
# Dump kvm_run info about KVM_EXIT_ARM_SEA
# kvm_run.arm_sea: esr=0x92000610, flags=0
# kvm_run.arm_sea: gva=0, gpa=0
# From guest: Handling Guest SEA
# From guest: ESR_EL1=0x96000010, FAR_EL1=0xaaaadf254828
# Guest aborted!
==== Test Assertion Failure ====
arm64/sea_to_user.c:172: esr & ESR_ELx_FnV == ESR_ELx_FnV
pid=38112 tid=38112 errno=4 - Interrupted system call
1 0x0000000000402f9b: run_vm at sea_to_user.c:246
2 0x0000000000402467: main at sea_to_user.c:330
3 0x0000ffff8e22b03f: ?? ??:0
4 0x0000ffff8e22b117: ?? ??:0
5 0x00000000004026ef: _start at ??:?
0x0 != 0x400 (esr & ESR_ELx_FnV != ESR_ELx_FnV)
It seems that KVM doesn't emulate FnV when injecting an abort.
> + GUEST_PRINTF("Guest observed garbage value in FAR\n");
> + } else {
> + GUEST_ASSERT_EQ(esr & ESR_ELx_FnV, 0);
> + GUEST_ASSERT_EQ(far, EINJ_GVA);
> + }
> +
> + GUEST_DONE();
> +}
> +
> +static void vcpu_inject_sea(struct kvm_vcpu *vcpu)
> +{
> + struct kvm_vcpu_events events = {};
> +
> + events.exception.ext_dabt_pending = true;
> + vcpu_events_set(vcpu, &events);
> +}
> +
> +static void run_vm(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
> +{
> + struct ucall uc;
> + bool guest_done = false;
> + struct kvm_run *run = vcpu->run;
> + u64 esr;
> +
> + /* Resume the vCPU after error injection to consume the error. */
> + vcpu_run(vcpu);
> +
> + ksft_print_msg("Dump kvm_run info about KVM_EXIT_%s\n",
> + exit_reason_str(run->exit_reason));
> + ksft_print_msg("kvm_run.arm_sea: esr=%#llx, flags=%#llx\n",
> + run->arm_sea.esr, run->arm_sea.flags);
> + ksft_print_msg("kvm_run.arm_sea: gva=%#llx, gpa=%#llx\n",
> + run->arm_sea.gva, run->arm_sea.gpa);
> +
> + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_ARM_SEA);
I can also hit this ASSERT with:
Random seed: 0x6b8b4567
# Mapped 0x40000 pages: gva=0x80000000 to gpa=0xff80000000
# Before EINJect: data=0xbaadcafe
# EINJ_GVA=0x81234bad, einj_gpa=0xff81234bad, einj_hva=0xffff41234bad,
einj_hpa=0x2841234bad
# echo 0x10 > /sys/kernel/debug/apei/einj/error_type - done
# echo 0x2 > /sys/kernel/debug/apei/einj/flags - done
# echo 0x2841234bad > /sys/kernel/debug/apei/einj/param1 - done
# echo 0xffffffffffffffff > /sys/kernel/debug/apei/einj/param2 - done
# echo 0x1 > /sys/kernel/debug/apei/einj/notrigger - done
# echo 0x1 > /sys/kernel/debug/apei/einj/error_inject - done
# Memory UER EINJected
# Dump kvm_run info about KVM_EXIT_MMIO
# kvm_run.arm_sea: esr=0xffff90ba0040, flags=0x691000
# kvm_run.arm_sea: gva=0x100000008, gpa=0
==== Test Assertion Failure ====
arm64/sea_to_user.c:207: exit_reason == (41)
pid=38023 tid=38023 errno=4 - Interrupted system call
1 0x0000000000402d1b: run_vm at sea_to_user.c:207
2 0x0000000000402467: main at sea_to_user.c:330
3 0x0000ffff9122b03f: ?? ??:0
4 0x0000ffff9122b117: ?? ??:0
5 0x00000000004026ef: _start at ??:?
Wanted KVM exit reason: 41 (ARM_SEA), got: 6 (MMIO)
Not sure what's wrong it..
> +
> + esr = run->arm_sea.esr;
> + TEST_ASSERT_EQ(ESR_ELx_EC(esr), ESR_ELx_EC_DABT_LOW);
> + TEST_ASSERT_EQ(esr & ESR_ELx_FSC_TYPE, ESR_ELx_FSC_EXTABT);
> + TEST_ASSERT_EQ(ESR_ELx_ISS2(esr), 0);
> + TEST_ASSERT_EQ((esr & ESR_ELx_INST_SYNDROME), 0);
> + TEST_ASSERT_EQ(esr & ESR_ELx_VNCR, 0);
> +
> + if (!(esr & ESR_ELx_FnV)) {
> + ksft_print_msg("Expect gva to match given FnV bit is 0\n");
> + TEST_ASSERT_EQ(run->arm_sea.gva, EINJ_GVA);
> + }
> +
> + if (run->arm_sea.flags & KVM_EXIT_ARM_SEA_FLAG_GPA_VALID) {
> + ksft_print_msg("Expect gpa to match given KVM_EXIT_ARM_SEA_FLAG_GPA_VALID is set\n");
> + TEST_ASSERT_EQ(run->arm_sea.gpa, einj_gpa & PAGE_ADDR_MASK);
> + }
> +
> + far_invalid = esr & ESR_ELx_FnV;
Missing sync_global_to_guest()?
Thanks,
Zenghui
Powered by blists - more mailing lists