[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <4E4A113F.5090404@cn.fujitsu.com>
Date: Tue, 16 Aug 2011 14:42:07 +0800
From: Xiao Guangrong <xiaoguangrong@...fujitsu.com>
To: Avi Kivity <avi@...hat.com>
CC: Marcelo Tosatti <mtosatti@...hat.com>,
LKML <linux-kernel@...r.kernel.org>, KVM <kvm@...r.kernel.org>
Subject: [PATCH 03/11] KVM: x86: retry non-page-table writing instruction
If the emulation is caused by #PF and it is non-page_table writing instruction,
it means the VM-EXIT is caused by shadow page protected, we can zap the shadow
page and retry this instruction directly
The idea is from Avi
Signed-off-by: Xiao Guangrong <xiaoguangrong@...fujitsu.com>
---
arch/x86/include/asm/kvm_emulate.h | 1 +
arch/x86/include/asm/kvm_host.h | 5 +++
arch/x86/kvm/emulate.c | 5 +++
arch/x86/kvm/mmu.c | 22 +++++++++++---
arch/x86/kvm/x86.c | 53 ++++++++++++++++++++++++++++++++++++
5 files changed, 81 insertions(+), 5 deletions(-)
diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h
index 6040d11..fa87b63 100644
--- a/arch/x86/include/asm/kvm_emulate.h
+++ b/arch/x86/include/asm/kvm_emulate.h
@@ -362,6 +362,7 @@ enum x86_intercept {
#endif
int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len);
+bool page_table_writing_insn(struct x86_emulate_ctxt *ctxt);
#define EMULATION_FAILED -1
#define EMULATION_OK 0
#define EMULATION_RESTART 1
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 6ab4241..27a25df 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -443,6 +443,9 @@ struct kvm_vcpu_arch {
cpumask_var_t wbinvd_dirty_mask;
+ unsigned long last_retry_eip;
+ unsigned long last_retry_addr;
+
struct {
bool halted;
gfn_t gfns[roundup_pow_of_two(ASYNC_PF_PER_VCPU)];
@@ -689,6 +692,7 @@ enum emulation_result {
#define EMULTYPE_NO_DECODE (1 << 0)
#define EMULTYPE_TRAP_UD (1 << 1)
#define EMULTYPE_SKIP (1 << 2)
+#define EMULTYPE_RETRY (1 << 3)
int x86_emulate_instruction(struct kvm_vcpu *vcpu, unsigned long cr2,
int emulation_type, void *insn, int insn_len);
@@ -753,6 +757,7 @@ void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu);
void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
const u8 *new, int bytes,
bool guest_initiated);
+int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn);
int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva);
void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu);
int kvm_mmu_load(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index e24c269..c62424e 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -3691,6 +3691,11 @@ done:
return (rc != X86EMUL_CONTINUE) ? EMULATION_FAILED : EMULATION_OK;
}
+bool page_table_writing_insn(struct x86_emulate_ctxt *ctxt)
+{
+ return ctxt->d & PageTable;
+}
+
static bool string_insn_completed(struct x86_emulate_ctxt *ctxt)
{
/* The second termination condition only applies for REPE
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index b01afee..26aae11 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1997,7 +1997,7 @@ void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int goal_nr_mmu_pages)
kvm->arch.n_max_mmu_pages = goal_nr_mmu_pages;
}
-static int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn)
+int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn)
{
struct kvm_mmu_page *sp;
struct hlist_node *node;
@@ -2007,6 +2007,7 @@ static int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn)
pgprintk("%s: looking for gfn %llx\n", __func__, gfn);
r = 0;
+ spin_lock(&kvm->mmu_lock);
for_each_gfn_indirect_valid_sp(kvm, sp, gfn, node) {
pgprintk("%s: gfn %llx role %x\n", __func__, gfn,
sp->role.word);
@@ -2014,8 +2015,10 @@ static int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn)
kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list);
}
kvm_mmu_commit_zap_page(kvm, &invalid_list);
+ spin_unlock(&kvm->mmu_lock);
return r;
}
+EXPORT_SYMBOL_GPL(kvm_mmu_unprotect_page);
static void mmu_unshadow(struct kvm *kvm, gfn_t gfn)
{
@@ -3697,9 +3700,7 @@ int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva)
gpa = kvm_mmu_gva_to_gpa_read(vcpu, gva, NULL);
- spin_lock(&vcpu->kvm->mmu_lock);
r = kvm_mmu_unprotect_page(vcpu->kvm, gpa >> PAGE_SHIFT);
- spin_unlock(&vcpu->kvm->mmu_lock);
return r;
}
EXPORT_SYMBOL_GPL(kvm_mmu_unprotect_page_virt);
@@ -3720,10 +3721,18 @@ void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu)
kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list);
}
+static bool is_mmio_page_fault(struct kvm_vcpu *vcpu, gva_t addr)
+{
+ if (vcpu->arch.mmu.direct_map || mmu_is_nested(vcpu))
+ return vcpu_match_mmio_gpa(vcpu, addr);
+
+ return vcpu_match_mmio_gva(vcpu, addr);
+}
+
int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code,
void *insn, int insn_len)
{
- int r;
+ int r, emulation_type = EMULTYPE_RETRY;
enum emulation_result er;
r = vcpu->arch.mmu.page_fault(vcpu, cr2, error_code, false);
@@ -3735,7 +3744,10 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code,
goto out;
}
- er = x86_emulate_instruction(vcpu, cr2, 0, insn, insn_len);
+ if (is_mmio_page_fault(vcpu, cr2))
+ emulation_type = 0;
+
+ er = x86_emulate_instruction(vcpu, cr2, emulation_type, insn, insn_len);
switch (er) {
case EMULATE_DONE:
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 6b37f18..db83fbe 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4814,6 +4814,56 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t gva)
return false;
}
+static bool retry_instruction(struct x86_emulate_ctxt *ctxt,
+ unsigned long cr2, int emulation_type)
+{
+ struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
+ unsigned long last_retry_eip, last_retry_addr, gpa = cr2;
+
+ last_retry_eip = vcpu->arch.last_retry_eip;
+ last_retry_addr = vcpu->arch.last_retry_addr;
+
+ /*
+ * If the emulation is caused by #PF and it is non-page_table
+ * writing instruction, it means the VM-EXIT is caused by shadow
+ * page protected, we can zap the shadow page and retry this
+ * instruction directly.
+ *
+ * Note: if the guest uses a non-page-table modifying instruction
+ * on the PDE that points to the instruction, then we will unmap
+ * the instruction and go to an infinite loop. So, we cache the
+ * last retried eip and the last fault address, if we meet the eip
+ * and the address again, we can break out of the potential infinite
+ * loop.
+ */
+ vcpu->arch.last_retry_eip = vcpu->arch.last_retry_addr = 0;
+
+ if (!(emulation_type & EMULTYPE_RETRY))
+ return false;
+
+ if (page_table_writing_insn(ctxt))
+ return false;
+
+ if (ctxt->eip == last_retry_eip && last_retry_addr == cr2)
+ return false;
+
+ vcpu->arch.last_retry_eip = ctxt->eip;
+ vcpu->arch.last_retry_addr = cr2;
+
+ if (!vcpu->arch.mmu.direct_map && !mmu_is_nested(vcpu))
+ gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2, NULL);
+
+ kvm_mmu_unprotect_page(vcpu->kvm, gpa >> PAGE_SHIFT);
+
+ /*
+ * The shadow pages have been zapped, then we call the page
+ * fault path to change the mapping to writable.
+ */
+ vcpu->arch.mmu.page_fault(vcpu, cr2, PFERR_WRITE_MASK, true);
+
+ return true;
+}
+
int x86_emulate_instruction(struct kvm_vcpu *vcpu,
unsigned long cr2,
int emulation_type,
@@ -4855,6 +4905,9 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu,
return EMULATE_DONE;
}
+ if (retry_instruction(ctxt, cr2, emulation_type))
+ return EMULATE_DONE;
+
/* this is needed for vmware backdoor interface to work since it
changes registers values during IO operation */
if (vcpu->arch.emulate_regs_need_sync_from_vcpu) {
--
1.7.5.4
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Powered by blists - more mailing lists