linux-kernel - [PATCH v6 09/12] Inject asynchronous page fault into a PV guest if page is swapped out.

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <1286207794-16120-10-git-send-email-gleb@redhat.com>
Date:	Mon,  4 Oct 2010 17:56:31 +0200
From:	Gleb Natapov <gleb@...hat.com>
To:	kvm@...r.kernel.org
Cc:	linux-mm@...ck.org, linux-kernel@...r.kernel.org, avi@...hat.com,
	mingo@...e.hu, a.p.zijlstra@...llo.nl, tglx@...utronix.de,
	hpa@...or.com, riel@...hat.com, cl@...ux-foundation.org,
	mtosatti@...hat.com
Subject: [PATCH v6 09/12] Inject asynchronous page fault into a PV guest if page is swapped out.

Send async page fault to a PV guest if it accesses swapped out memory.
Guest will choose another task to run upon receiving the fault.

Allow async page fault injection only when guest is in user mode since
otherwise guest may be in non-sleepable context and will not be able
to reschedule.

Vcpu will be halted if guest will fault on the same page again or if
vcpu executes kernel code.

Signed-off-by: Gleb Natapov <gleb@...hat.com>
---
 arch/x86/include/asm/kvm_host.h |    3 ++
 arch/x86/kvm/mmu.c              |    1 +
 arch/x86/kvm/x86.c              |   49 ++++++++++++++++++++++++++++++++------
 include/trace/events/kvm.h      |   17 ++++++++----
 virt/kvm/async_pf.c             |    3 +-
 5 files changed, 58 insertions(+), 15 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index de31551..2f6fc87 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -419,6 +419,7 @@ struct kvm_vcpu_arch {
 		gfn_t gfns[roundup_pow_of_two(ASYNC_PF_PER_VCPU)];
 		struct gfn_to_hva_cache data;
 		u64 msr_val;
+		u32 id;
 	} apf;
 };
 
@@ -594,6 +595,7 @@ struct kvm_x86_ops {
 };
 
 struct kvm_arch_async_pf {
+	u32 token;
 	gfn_t gfn;
 };
 
@@ -842,6 +844,7 @@ void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
 				 struct kvm_async_pf *work);
 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
 			       struct kvm_async_pf *work);
+bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu);
 extern bool kvm_find_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn);
 
 #endif /* _ASM_X86_KVM_HOST_H */
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index d85fda8..de53cab 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2580,6 +2580,7 @@ static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva,
 int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn)
 {
 	struct kvm_arch_async_pf arch;
+	arch.token = (vcpu->arch.apf.id++ << 12) | vcpu->vcpu_id;
 	arch.gfn = gfn;
 
 	return kvm_setup_async_pf(vcpu, gva, gfn, &arch);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 3e123ab..0e69d37 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6225,25 +6225,58 @@ static void kvm_del_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
 	}
 }
 
+static int apf_put_user(struct kvm_vcpu *vcpu, u32 val)
+{
+
+	return kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.apf.data, &val,
+				      sizeof(val));
+}
+
 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
 				     struct kvm_async_pf *work)
 {
-	vcpu->arch.mp_state = KVM_MP_STATE_HALTED;
-
-	if (work == kvm_double_apf)
+	if (work == kvm_double_apf) {
 		trace_kvm_async_pf_doublefault(kvm_rip_read(vcpu));
-	else {
-		trace_kvm_async_pf_not_present(work->gva);
-
+		vcpu->arch.mp_state = KVM_MP_STATE_HALTED;
+	} else {
+		trace_kvm_async_pf_not_present(work->arch.token, work->gva);
 		kvm_add_async_pf_gfn(vcpu, work->arch.gfn);
+
+		if (!(vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED) ||
+		    kvm_x86_ops->get_cpl(vcpu) == 0)
+			vcpu->arch.mp_state = KVM_MP_STATE_HALTED;
+		else if (!apf_put_user(vcpu, KVM_PV_REASON_PAGE_NOT_PRESENT)) {
+			vcpu->arch.fault.error_code = 0;
+			vcpu->arch.fault.address = work->arch.token;
+			kvm_inject_page_fault(vcpu);
+		}
 	}
 }
 
 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
 				 struct kvm_async_pf *work)
 {
-	trace_kvm_async_pf_ready(work->gva);
-	kvm_del_async_pf_gfn(vcpu, work->arch.gfn);
+	trace_kvm_async_pf_ready(work->arch.token, work->gva);
+	if (is_error_page(work->page))
+		work->arch.token = ~0; /* broadcast wakeup */
+	else
+		kvm_del_async_pf_gfn(vcpu, work->arch.gfn);
+
+	if ((vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED) &&
+	    !apf_put_user(vcpu, KVM_PV_REASON_PAGE_READY)) {
+		vcpu->arch.fault.error_code = 0;
+		vcpu->arch.fault.address = work->arch.token;
+		kvm_inject_page_fault(vcpu);
+	}
+}
+
+bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
+{
+	if (!(vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED))
+		return true;
+	else
+		return !kvm_event_needs_reinjection(vcpu) &&
+			kvm_x86_ops->interrupt_allowed(vcpu);
 }
 
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit);
diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h
index bcc69b2..dd44aa3 100644
--- a/include/trace/events/kvm.h
+++ b/include/trace/events/kvm.h
@@ -204,34 +204,39 @@ TRACE_EVENT(
 
 TRACE_EVENT(
 	kvm_async_pf_not_present,
-	TP_PROTO(u64 gva),
-	TP_ARGS(gva),
+	TP_PROTO(u64 token, u64 gva),
+	TP_ARGS(token, gva),
 
 	TP_STRUCT__entry(
+		__field(__u64, token)
 		__field(__u64, gva)
 		),
 
 	TP_fast_assign(
+		__entry->token = token;
 		__entry->gva = gva;
 		),
 
-	TP_printk("gva %#llx not present", __entry->gva)
+	TP_printk("token %#llx gva %#llx not present", __entry->token,
+		  __entry->gva)
 );
 
 TRACE_EVENT(
 	kvm_async_pf_ready,
-	TP_PROTO(u64 gva),
-	TP_ARGS(gva),
+	TP_PROTO(u64 token, u64 gva),
+	TP_ARGS(token, gva),
 
 	TP_STRUCT__entry(
+		__field(__u64, token)
 		__field(__u64, gva)
 		),
 
 	TP_fast_assign(
+		__entry->token = token;
 		__entry->gva = gva;
 		),
 
-	TP_printk("gva %#llx ready", __entry->gva)
+	TP_printk("token %#llx gva %#llx ready", __entry->token, __entry->gva)
 );
 
 TRACE_EVENT(
diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c
index 44f4005..d1cd495 100644
--- a/virt/kvm/async_pf.c
+++ b/virt/kvm/async_pf.c
@@ -138,7 +138,8 @@ void kvm_check_async_pf_completion(struct kvm_vcpu *vcpu)
 		}
 	}
 
-	if (list_empty_careful(&vcpu->async_pf.done))
+	if (list_empty_careful(&vcpu->async_pf.done) ||
+	    !kvm_arch_can_inject_async_page_present(vcpu))
 		return;
 
 	spin_lock(&vcpu->async_pf.lock);
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/