[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20241118123948.4796-7-kalyazin@amazon.com>
Date: Mon, 18 Nov 2024 12:39:48 +0000
From: Nikita Kalyazin <kalyazin@...zon.com>
To: <pbonzini@...hat.com>, <seanjc@...gle.com>, <corbet@....net>,
<tglx@...utronix.de>, <mingo@...hat.com>, <bp@...en8.de>,
<dave.hansen@...ux.intel.com>, <hpa@...or.com>, <rostedt@...dmis.org>,
<mhiramat@...nel.org>, <mathieu.desnoyers@...icios.com>,
<kvm@...r.kernel.org>, <linux-doc@...r.kernel.org>,
<linux-kernel@...r.kernel.org>, <linux-trace-kernel@...r.kernel.org>
CC: <jthoughton@...gle.com>, <david@...hat.com>, <peterx@...hat.com>,
<oleg@...hat.com>, <vkuznets@...hat.com>, <gshan@...hat.com>,
<graf@...zon.de>, <jgowans@...zon.com>, <roypat@...zon.co.uk>,
<derekmn@...zon.com>, <nsaenz@...zon.es>, <xmarcalx@...zon.com>,
<kalyazin@...zon.com>
Subject: [RFC PATCH 6/6] KVM: x86: async_pf_user: hook to fault handling and add ioctl
This patch adds interception in the __kvm_faultin_pfn for handling
faults that are causing exit to userspace asynchronously. If the kernel
expects for the userspace to handle the fault asynchronously (ie it can
resume the vCPU while the fault is being processed), it sets the
KVM_MEMORY_EXIT_FLAG_ASYNC_PF_USER flag and supplies the async PF token
in the struct memory_fault in the VM exit info.
The patch also adds the KVM_ASYNC_PF_USER_READY ioctl that the userspace
should use to notify the kernel that the fault has been processed by
using the token corresponding to the fault.
Signed-off-by: Nikita Kalyazin <kalyazin@...zon.com>
---
arch/x86/kvm/mmu/mmu.c | 45 ++++++++++++++++++++++++++++++++++++++++
arch/x86/kvm/x86.c | 16 +++++++++++++-
arch/x86/kvm/x86.h | 2 ++
include/uapi/linux/kvm.h | 4 +++-
4 files changed, 65 insertions(+), 2 deletions(-)
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index adf0161af894..a2b024ccbbe1 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -4282,6 +4282,22 @@ static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu,
kvm_vcpu_gfn_to_hva(vcpu, fault->gfn), &arch);
}
+static bool kvm_arch_setup_async_pf_user(struct kvm_vcpu *vcpu,
+ struct kvm_page_fault *fault, u32 *token)
+{
+ struct kvm_arch_async_pf arch;
+
+ arch.token = alloc_apf_token(vcpu);
+ arch.gfn = fault->gfn;
+ arch.error_code = fault->error_code;
+ arch.direct_map = vcpu->arch.mmu->root_role.direct;
+ arch.cr3 = kvm_mmu_get_guest_pgd(vcpu, vcpu->arch.mmu);
+
+ *token = arch.token;
+
+ return kvm_setup_async_pf_user(vcpu, 0, fault->addr, &arch);
+}
+
void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work)
{
int r;
@@ -4396,6 +4412,35 @@ static int __kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
{
bool async;
+ /* Pre-check for userfault and bail out early. */
+ if (gfn_has_userfault(fault->slot->kvm, fault->gfn)) {
+ bool report_async = false;
+ u32 token = 0;
+
+ if (vcpu->kvm->arch.vm_type == KVM_X86_SW_PROTECTED_VM &&
+ !fault->prefetch && kvm_can_do_async_pf(vcpu)) {
+ trace_kvm_try_async_get_page(fault->addr, fault->gfn, 1);
+ if (kvm_find_async_pf_gfn(vcpu, fault->gfn)) {
+ trace_kvm_async_pf_repeated_fault(fault->addr, fault->gfn, 1);
+ kvm_make_request(KVM_REQ_APF_HALT, vcpu);
+ return RET_PF_RETRY;
+ } else if (kvm_can_deliver_async_pf(vcpu) &&
+ kvm_arch_setup_async_pf_user(vcpu, fault, &token)) {
+ report_async = true;
+ }
+ }
+
+ fault->pfn = KVM_PFN_ERR_USERFAULT;
+ kvm_mmu_prepare_memory_fault_exit(vcpu, fault);
+
+ if (report_async) {
+ vcpu->run->memory_fault.flags |= KVM_MEMORY_EXIT_FLAG_ASYNC_PF_USER;
+ vcpu->run->memory_fault.async_pf_user_token = token;
+ }
+
+ return -EFAULT;
+ }
+
if (fault->is_private)
return kvm_faultin_pfn_private(vcpu, fault);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 2b8cd3af326b..30b22904859f 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -13372,7 +13372,7 @@ static inline bool apf_pageready_slot_free(struct kvm_vcpu *vcpu)
return !val;
}
-static bool kvm_can_deliver_async_pf(struct kvm_vcpu *vcpu)
+bool kvm_can_deliver_async_pf(struct kvm_vcpu *vcpu)
{
if (!kvm_pv_async_pf_enabled(vcpu))
@@ -13697,6 +13697,20 @@ void kvm_arch_gmem_invalidate(kvm_pfn_t start, kvm_pfn_t end)
long kvm_arch_vcpu_async_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg)
{
+ void __user *argp = (void __user *)arg;
+ struct kvm_vcpu *vcpu = filp->private_data;
+
+#ifdef CONFIG_KVM_ASYNC_PF_USER
+ if (ioctl == KVM_ASYNC_PF_USER_READY) {
+ struct kvm_async_pf_user_ready apf_ready;
+
+ if (copy_from_user(&apf_ready, argp, sizeof(apf_ready)))
+ return -EFAULT;
+
+ return kvm_async_pf_user_ready(vcpu, &apf_ready);
+ }
+#endif
+
return -ENOIOCTLCMD;
}
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index d80a4c6b5a38..66ece51ee94b 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -325,6 +325,8 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
int emulation_type, void *insn, int insn_len);
fastpath_t handle_fastpath_set_msr_irqoff(struct kvm_vcpu *vcpu);
+bool kvm_can_deliver_async_pf(struct kvm_vcpu *vcpu);
+
extern u64 host_xcr0;
extern u64 host_xss;
extern u64 host_arch_capabilities;
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index ef3840a1c5e9..8aa5ce347bdf 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -430,12 +430,14 @@ struct kvm_run {
struct {
#define KVM_MEMORY_EXIT_FLAG_PRIVATE (1ULL << 3)
#define KVM_MEMORY_EXIT_FLAG_USERFAULT (1ULL << 4)
+#define KVM_MEMORY_EXIT_FLAG_ASYNC_PF_USER (1ULL << 5)
__u64 flags;
__u64 gpa;
__u64 size;
+ __u32 async_pf_user_token;
} memory_fault;
/* Fix the size of the union. */
- char padding[256];
+ char padding[252];
};
/* 2048 is the size of the char array used to bound/pad the size
--
2.40.1
Powered by blists - more mailing lists