[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-Id: <20211126123145.2772-1-jiangshanlai@gmail.com>
Date: Fri, 26 Nov 2021 20:31:45 +0800
From: Lai Jiangshan <jiangshanlai@...il.com>
To: linux-kernel@...r.kernel.org
Cc: x86@...nel.org, Lai Jiangshan <laijs@...ux.alibaba.com>,
Paolo Bonzini <pbonzini@...hat.com>,
Sean Christopherson <seanjc@...gle.com>,
Vitaly Kuznetsov <vkuznets@...hat.com>,
Wanpeng Li <wanpengli@...cent.com>,
Jim Mattson <jmattson@...gle.com>,
Joerg Roedel <joro@...tes.org>,
Thomas Gleixner <tglx@...utronix.de>,
Ingo Molnar <mingo@...hat.com>, Borislav Petkov <bp@...en8.de>,
Dave Hansen <dave.hansen@...ux.intel.com>,
"H. Peter Anvin" <hpa@...or.com>,
Andy Lutomirski <luto@...nel.org>,
Peter Zijlstra <peterz@...radead.org>, kvm@...r.kernel.org
Subject: [RFC PATCH] x86/kvm: Allow kvm_read_and_reset_apf_flags() to be instrumented
From: Lai Jiangshan <laijs@...ux.alibaba.com>
Both VMX and SVM made mistakes of calling kvm_read_and_reset_apf_flags()
in instrumentable code:
vmx_vcpu_enter_exit()
ASYNC PF induced VMEXIT
save cr2
leave noinstr section
-- kernel #PF can happen here due to instrumentation
handle_exception_nmi_irqoff
kvm_read_and_reset_apf_flags()
If kernel #PF happens before handle_exception_nmi_irqoff() after leaving
noinstr section due to instrumentation, kernel #PF handler will consume
the incorrect apf flags and panic.
The problem might be fixed by moving kvm_read_and_reset_apf_flags()
into vmx_vcpu_enter_exit() to consume apf flags earlier before leaving
noinstr section like the way it handles CR2.
But linux kernel only resigters ASYNC PF for userspace and guest, so
ASYNC PF can't hit when it is in kernel, so apf flags can be changed to
be consumed only when the #PF is from guest or userspace.
It is natually that kvm_read_and_reset_apf_flags() in vmx/svm is for
page fault from guest. So this patch changes kvm_handle_async_pf()
to be called only for page fault from userspace and renames it.
Signed-off-by: Lai Jiangshan <laijs@...ux.alibaba.com>
---
arch/x86/include/asm/kvm_para.h | 8 +++----
arch/x86/kernel/kvm.c | 10 +--------
arch/x86/mm/fault.c | 39 ++++++++++++---------------------
3 files changed, 19 insertions(+), 38 deletions(-)
diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h
index 56935ebb1dfe..3452e705dfda 100644
--- a/arch/x86/include/asm/kvm_para.h
+++ b/arch/x86/include/asm/kvm_para.h
@@ -104,14 +104,14 @@ unsigned int kvm_arch_para_hints(void);
void kvm_async_pf_task_wait_schedule(u32 token);
void kvm_async_pf_task_wake(u32 token);
u32 kvm_read_and_reset_apf_flags(void);
-bool __kvm_handle_async_pf(struct pt_regs *regs, u32 token);
+bool __kvm_handle_async_user_pf(struct pt_regs *regs, u32 token);
DECLARE_STATIC_KEY_FALSE(kvm_async_pf_enabled);
-static __always_inline bool kvm_handle_async_pf(struct pt_regs *regs, u32 token)
+static inline bool kvm_handle_async_user_pf(struct pt_regs *regs, u32 token)
{
if (static_branch_unlikely(&kvm_async_pf_enabled))
- return __kvm_handle_async_pf(regs, token);
+ return __kvm_handle_async_user_pf(regs, token);
else
return false;
}
@@ -148,7 +148,7 @@ static inline u32 kvm_read_and_reset_apf_flags(void)
return 0;
}
-static __always_inline bool kvm_handle_async_pf(struct pt_regs *regs, u32 token)
+static inline bool kvm_handle_async_user_pf(struct pt_regs *regs, u32 token)
{
return false;
}
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 59abbdad7729..285eeddf98f2 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -240,17 +240,13 @@ noinstr u32 kvm_read_and_reset_apf_flags(void)
}
EXPORT_SYMBOL_GPL(kvm_read_and_reset_apf_flags);
-noinstr bool __kvm_handle_async_pf(struct pt_regs *regs, u32 token)
+bool __kvm_handle_async_user_pf(struct pt_regs *regs, u32 token)
{
u32 flags = kvm_read_and_reset_apf_flags();
- irqentry_state_t state;
if (!flags)
return false;
- state = irqentry_enter(regs);
- instrumentation_begin();
-
/*
* If the host managed to inject an async #PF into an interrupt
* disabled region, then die hard as this is not going to end well
@@ -260,16 +256,12 @@ noinstr bool __kvm_handle_async_pf(struct pt_regs *regs, u32 token)
panic("Host injected async #PF in interrupt disabled region\n");
if (flags & KVM_PV_REASON_PAGE_NOT_PRESENT) {
- if (unlikely(!(user_mode(regs))))
- panic("Host injected async #PF in kernel mode\n");
/* Page is swapped out by the host. */
kvm_async_pf_task_wait_schedule(token);
} else {
WARN_ONCE(1, "Unexpected async PF flags: %x\n", flags);
}
- instrumentation_end();
- irqentry_exit(regs, state);
return true;
}
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 4bfed53e210e..dadef2afa185 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -1501,30 +1501,6 @@ DEFINE_IDTENTRY_RAW_ERRORCODE(exc_page_fault)
prefetchw(¤t->mm->mmap_lock);
- /*
- * KVM uses #PF vector to deliver 'page not present' events to guests
- * (asynchronous page fault mechanism). The event happens when a
- * userspace task is trying to access some valid (from guest's point of
- * view) memory which is not currently mapped by the host (e.g. the
- * memory is swapped out). Note, the corresponding "page ready" event
- * which is injected when the memory becomes available, is delivered via
- * an interrupt mechanism and not a #PF exception
- * (see arch/x86/kernel/kvm.c: sysvec_kvm_asyncpf_interrupt()).
- *
- * We are relying on the interrupted context being sane (valid RSP,
- * relevant locks not held, etc.), which is fine as long as the
- * interrupted context had IF=1. We are also relying on the KVM
- * async pf type field and CR2 being read consistently instead of
- * getting values from real and async page faults mixed up.
- *
- * Fingers crossed.
- *
- * The async #PF handling code takes care of idtentry handling
- * itself.
- */
- if (kvm_handle_async_pf(regs, (u32)address))
- return;
-
/*
* Entry handling for valid #PF from kernel mode is slightly
* different: RCU is already watching and rcu_irq_enter() must not
@@ -1538,7 +1514,20 @@ DEFINE_IDTENTRY_RAW_ERRORCODE(exc_page_fault)
state = irqentry_enter(regs);
instrumentation_begin();
- handle_page_fault(regs, error_code, address);
+
+ /*
+ * KVM uses #PF vector to deliver 'page not present' events to guests
+ * (asynchronous page fault mechanism). The event happens when a
+ * userspace task is trying to access some valid (from guest's point of
+ * view) memory which is not currently mapped by the host (e.g. the
+ * memory is swapped out). Note, the corresponding "page ready" event
+ * which is injected when the memory becomes available, is delivered via
+ * an interrupt mechanism and not a #PF exception
+ * (see arch/x86/kernel/kvm.c: sysvec_kvm_asyncpf_interrupt()).
+ */
+ if (!user_mode(regs) || !kvm_handle_async_user_pf(regs, (u32)address))
+ handle_page_fault(regs, error_code, address);
+
instrumentation_end();
irqentry_exit(regs, state);
--
2.19.1.6.gb485710b
Powered by blists - more mailing lists