lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Date:   Fri, 26 Nov 2021 20:31:45 +0800
From:   Lai Jiangshan <jiangshanlai@...il.com>
To:     linux-kernel@...r.kernel.org
Cc:     x86@...nel.org, Lai Jiangshan <laijs@...ux.alibaba.com>,
        Paolo Bonzini <pbonzini@...hat.com>,
        Sean Christopherson <seanjc@...gle.com>,
        Vitaly Kuznetsov <vkuznets@...hat.com>,
        Wanpeng Li <wanpengli@...cent.com>,
        Jim Mattson <jmattson@...gle.com>,
        Joerg Roedel <joro@...tes.org>,
        Thomas Gleixner <tglx@...utronix.de>,
        Ingo Molnar <mingo@...hat.com>, Borislav Petkov <bp@...en8.de>,
        Dave Hansen <dave.hansen@...ux.intel.com>,
        "H. Peter Anvin" <hpa@...or.com>,
        Andy Lutomirski <luto@...nel.org>,
        Peter Zijlstra <peterz@...radead.org>, kvm@...r.kernel.org
Subject: [RFC PATCH] x86/kvm: Allow kvm_read_and_reset_apf_flags() to be instrumented

From: Lai Jiangshan <laijs@...ux.alibaba.com>

Both VMX and SVM made mistakes of calling kvm_read_and_reset_apf_flags()
in instrumentable code:
	vmx_vcpu_enter_exit()
		ASYNC PF induced VMEXIT
		save cr2
	leave noinstr section
	-- kernel #PF can happen here due to instrumentation
	handle_exception_nmi_irqoff
		kvm_read_and_reset_apf_flags()

If kernel #PF happens before handle_exception_nmi_irqoff() after leaving
noinstr section due to instrumentation, kernel #PF handler will consume
the incorrect apf flags and panic.

The problem might be fixed by moving kvm_read_and_reset_apf_flags()
into vmx_vcpu_enter_exit() to consume apf flags earlier before leaving
noinstr section like the way it handles CR2.

But linux kernel only resigters ASYNC PF for userspace and guest, so
ASYNC PF can't hit when it is in kernel, so apf flags can be changed to
be consumed only when the #PF is from guest or userspace.

It is natually that kvm_read_and_reset_apf_flags() in vmx/svm is for
page fault from guest.  So this patch changes kvm_handle_async_pf()
to be called only for page fault from userspace and renames it.

Signed-off-by: Lai Jiangshan <laijs@...ux.alibaba.com>
---
 arch/x86/include/asm/kvm_para.h |  8 +++----
 arch/x86/kernel/kvm.c           | 10 +--------
 arch/x86/mm/fault.c             | 39 ++++++++++++---------------------
 3 files changed, 19 insertions(+), 38 deletions(-)

diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h
index 56935ebb1dfe..3452e705dfda 100644
--- a/arch/x86/include/asm/kvm_para.h
+++ b/arch/x86/include/asm/kvm_para.h
@@ -104,14 +104,14 @@ unsigned int kvm_arch_para_hints(void);
 void kvm_async_pf_task_wait_schedule(u32 token);
 void kvm_async_pf_task_wake(u32 token);
 u32 kvm_read_and_reset_apf_flags(void);
-bool __kvm_handle_async_pf(struct pt_regs *regs, u32 token);
+bool __kvm_handle_async_user_pf(struct pt_regs *regs, u32 token);
 
 DECLARE_STATIC_KEY_FALSE(kvm_async_pf_enabled);
 
-static __always_inline bool kvm_handle_async_pf(struct pt_regs *regs, u32 token)
+static inline bool kvm_handle_async_user_pf(struct pt_regs *regs, u32 token)
 {
 	if (static_branch_unlikely(&kvm_async_pf_enabled))
-		return __kvm_handle_async_pf(regs, token);
+		return __kvm_handle_async_user_pf(regs, token);
 	else
 		return false;
 }
@@ -148,7 +148,7 @@ static inline u32 kvm_read_and_reset_apf_flags(void)
 	return 0;
 }
 
-static __always_inline bool kvm_handle_async_pf(struct pt_regs *regs, u32 token)
+static inline bool kvm_handle_async_user_pf(struct pt_regs *regs, u32 token)
 {
 	return false;
 }
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 59abbdad7729..285eeddf98f2 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -240,17 +240,13 @@ noinstr u32 kvm_read_and_reset_apf_flags(void)
 }
 EXPORT_SYMBOL_GPL(kvm_read_and_reset_apf_flags);
 
-noinstr bool __kvm_handle_async_pf(struct pt_regs *regs, u32 token)
+bool __kvm_handle_async_user_pf(struct pt_regs *regs, u32 token)
 {
 	u32 flags = kvm_read_and_reset_apf_flags();
-	irqentry_state_t state;
 
 	if (!flags)
 		return false;
 
-	state = irqentry_enter(regs);
-	instrumentation_begin();
-
 	/*
 	 * If the host managed to inject an async #PF into an interrupt
 	 * disabled region, then die hard as this is not going to end well
@@ -260,16 +256,12 @@ noinstr bool __kvm_handle_async_pf(struct pt_regs *regs, u32 token)
 		panic("Host injected async #PF in interrupt disabled region\n");
 
 	if (flags & KVM_PV_REASON_PAGE_NOT_PRESENT) {
-		if (unlikely(!(user_mode(regs))))
-			panic("Host injected async #PF in kernel mode\n");
 		/* Page is swapped out by the host. */
 		kvm_async_pf_task_wait_schedule(token);
 	} else {
 		WARN_ONCE(1, "Unexpected async PF flags: %x\n", flags);
 	}
 
-	instrumentation_end();
-	irqentry_exit(regs, state);
 	return true;
 }
 
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 4bfed53e210e..dadef2afa185 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -1501,30 +1501,6 @@ DEFINE_IDTENTRY_RAW_ERRORCODE(exc_page_fault)
 
 	prefetchw(&current->mm->mmap_lock);
 
-	/*
-	 * KVM uses #PF vector to deliver 'page not present' events to guests
-	 * (asynchronous page fault mechanism). The event happens when a
-	 * userspace task is trying to access some valid (from guest's point of
-	 * view) memory which is not currently mapped by the host (e.g. the
-	 * memory is swapped out). Note, the corresponding "page ready" event
-	 * which is injected when the memory becomes available, is delivered via
-	 * an interrupt mechanism and not a #PF exception
-	 * (see arch/x86/kernel/kvm.c: sysvec_kvm_asyncpf_interrupt()).
-	 *
-	 * We are relying on the interrupted context being sane (valid RSP,
-	 * relevant locks not held, etc.), which is fine as long as the
-	 * interrupted context had IF=1.  We are also relying on the KVM
-	 * async pf type field and CR2 being read consistently instead of
-	 * getting values from real and async page faults mixed up.
-	 *
-	 * Fingers crossed.
-	 *
-	 * The async #PF handling code takes care of idtentry handling
-	 * itself.
-	 */
-	if (kvm_handle_async_pf(regs, (u32)address))
-		return;
-
 	/*
 	 * Entry handling for valid #PF from kernel mode is slightly
 	 * different: RCU is already watching and rcu_irq_enter() must not
@@ -1538,7 +1514,20 @@ DEFINE_IDTENTRY_RAW_ERRORCODE(exc_page_fault)
 	state = irqentry_enter(regs);
 
 	instrumentation_begin();
-	handle_page_fault(regs, error_code, address);
+
+	/*
+	 * KVM uses #PF vector to deliver 'page not present' events to guests
+	 * (asynchronous page fault mechanism). The event happens when a
+	 * userspace task is trying to access some valid (from guest's point of
+	 * view) memory which is not currently mapped by the host (e.g. the
+	 * memory is swapped out). Note, the corresponding "page ready" event
+	 * which is injected when the memory becomes available, is delivered via
+	 * an interrupt mechanism and not a #PF exception
+	 * (see arch/x86/kernel/kvm.c: sysvec_kvm_asyncpf_interrupt()).
+	 */
+	if (!user_mode(regs) || !kvm_handle_async_user_pf(regs, (u32)address))
+		handle_page_fault(regs, error_code, address);
+
 	instrumentation_end();
 
 	irqentry_exit(regs, state);
-- 
2.19.1.6.gb485710b

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ