[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20210820155918.7518-40-brijesh.singh@amd.com>
Date: Fri, 20 Aug 2021 10:59:12 -0500
From: Brijesh Singh <brijesh.singh@....com>
To: x86@...nel.org, linux-kernel@...r.kernel.org, kvm@...r.kernel.org,
linux-coco@...ts.linux.dev, linux-mm@...ck.org,
linux-crypto@...r.kernel.org
Cc: Thomas Gleixner <tglx@...utronix.de>,
Ingo Molnar <mingo@...hat.com>, Joerg Roedel <jroedel@...e.de>,
Tom Lendacky <thomas.lendacky@....com>,
"H. Peter Anvin" <hpa@...or.com>, Ard Biesheuvel <ardb@...nel.org>,
Paolo Bonzini <pbonzini@...hat.com>,
Sean Christopherson <seanjc@...gle.com>,
Vitaly Kuznetsov <vkuznets@...hat.com>,
Wanpeng Li <wanpengli@...cent.com>,
Jim Mattson <jmattson@...gle.com>,
Andy Lutomirski <luto@...nel.org>,
Dave Hansen <dave.hansen@...ux.intel.com>,
Sergio Lopez <slp@...hat.com>, Peter Gonda <pgonda@...gle.com>,
Peter Zijlstra <peterz@...radead.org>,
Srinivas Pandruvada <srinivas.pandruvada@...ux.intel.com>,
David Rientjes <rientjes@...gle.com>,
Dov Murik <dovmurik@...ux.ibm.com>,
Tobin Feldman-Fitzthum <tobin@....com>,
Borislav Petkov <bp@...en8.de>,
Michael Roth <michael.roth@....com>,
Vlastimil Babka <vbabka@...e.cz>,
"Kirill A . Shutemov" <kirill@...temov.name>,
Andi Kleen <ak@...ux.intel.com>, tony.luck@...el.com,
marcorr@...gle.com, sathyanarayanan.kuppuswamy@...ux.intel.com,
Brijesh Singh <brijesh.singh@....com>
Subject: [PATCH Part2 v5 39/45] KVM: SVM: Introduce ops for the post gfn map and unmap
When SEV-SNP is enabled in the guest VM, the guest memory pages can
either be a private or shared. A write from the hypervisor goes through
the RMP checks. If hardware sees that hypervisor is attempting to write
to a guest private page, then it triggers an RMP violation #PF.
To avoid the RMP violation, add post_{map,unmap}_gfn() ops that can be
used to verify that its safe to map a given guest page. Use the SRCU to
protect against the page state change for existing mapped pages.
Signed-off-by: Brijesh Singh <brijesh.singh@....com>
---
arch/x86/include/asm/kvm-x86-ops.h | 2 +
arch/x86/include/asm/kvm_host.h | 4 ++
arch/x86/kvm/svm/sev.c | 69 +++++++++++++++++++++-----
arch/x86/kvm/svm/svm.c | 4 ++
arch/x86/kvm/svm/svm.h | 8 +++
arch/x86/kvm/x86.c | 78 +++++++++++++++++++++++++++---
6 files changed, 146 insertions(+), 19 deletions(-)
diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h
index 371756c7f8f4..c09bd40e0160 100644
--- a/arch/x86/include/asm/kvm-x86-ops.h
+++ b/arch/x86/include/asm/kvm-x86-ops.h
@@ -124,6 +124,8 @@ KVM_X86_OP(msr_filter_changed)
KVM_X86_OP_NULL(complete_emulated_msr)
KVM_X86_OP(alloc_apic_backing_page)
KVM_X86_OP_NULL(rmp_page_level_adjust)
+KVM_X86_OP(post_map_gfn)
+KVM_X86_OP(post_unmap_gfn)
#undef KVM_X86_OP
#undef KVM_X86_OP_NULL
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index a6e764458f3e..5ac1ff097e8c 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1463,7 +1463,11 @@ struct kvm_x86_ops {
void (*vcpu_deliver_sipi_vector)(struct kvm_vcpu *vcpu, u8 vector);
void *(*alloc_apic_backing_page)(struct kvm_vcpu *vcpu);
+
void (*rmp_page_level_adjust)(struct kvm *kvm, kvm_pfn_t pfn, int *level);
+
+ int (*post_map_gfn)(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, int *token);
+ void (*post_unmap_gfn)(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, int token);
};
struct kvm_x86_nested_ops {
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index 0de85ed63e9b..65b578463271 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -336,6 +336,7 @@ static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp)
if (ret)
goto e_free;
+ init_srcu_struct(&sev->psc_srcu);
ret = sev_snp_init(&argp->error);
} else {
ret = sev_platform_init(&argp->error);
@@ -2293,6 +2294,7 @@ void sev_vm_destroy(struct kvm *kvm)
WARN_ONCE(1, "Failed to free SNP guest context, leaking asid!\n");
return;
}
+ cleanup_srcu_struct(&sev->psc_srcu);
} else {
sev_unbind_asid(kvm, sev->handle);
}
@@ -2494,23 +2496,32 @@ void sev_free_vcpu(struct kvm_vcpu *vcpu)
kfree(svm->ghcb_sa);
}
-static inline int svm_map_ghcb(struct vcpu_svm *svm, struct kvm_host_map *map)
+static inline int svm_map_ghcb(struct vcpu_svm *svm, struct kvm_host_map *map, int *token)
{
struct vmcb_control_area *control = &svm->vmcb->control;
u64 gfn = gpa_to_gfn(control->ghcb_gpa);
+ struct kvm_vcpu *vcpu = &svm->vcpu;
- if (kvm_vcpu_map(&svm->vcpu, gfn, map)) {
+ if (kvm_vcpu_map(vcpu, gfn, map)) {
/* Unable to map GHCB from guest */
pr_err("error mapping GHCB GFN [%#llx] from guest\n", gfn);
return -EFAULT;
}
+ if (sev_post_map_gfn(vcpu->kvm, map->gfn, map->pfn, token)) {
+ kvm_vcpu_unmap(vcpu, map, false);
+ return -EBUSY;
+ }
+
return 0;
}
-static inline void svm_unmap_ghcb(struct vcpu_svm *svm, struct kvm_host_map *map)
+static inline void svm_unmap_ghcb(struct vcpu_svm *svm, struct kvm_host_map *map, int token)
{
- kvm_vcpu_unmap(&svm->vcpu, map, true);
+ struct kvm_vcpu *vcpu = &svm->vcpu;
+
+ kvm_vcpu_unmap(vcpu, map, true);
+ sev_post_unmap_gfn(vcpu->kvm, map->gfn, map->pfn, token);
}
static void dump_ghcb(struct vcpu_svm *svm)
@@ -2518,8 +2529,9 @@ static void dump_ghcb(struct vcpu_svm *svm)
struct kvm_host_map map;
unsigned int nbits;
struct ghcb *ghcb;
+ int token;
- if (svm_map_ghcb(svm, &map))
+ if (svm_map_ghcb(svm, &map, &token))
return;
ghcb = map.hva;
@@ -2544,7 +2556,7 @@ static void dump_ghcb(struct vcpu_svm *svm)
pr_err("%-20s%*pb\n", "valid_bitmap", nbits, ghcb->save.valid_bitmap);
e_unmap:
- svm_unmap_ghcb(svm, &map);
+ svm_unmap_ghcb(svm, &map, token);
}
static bool sev_es_sync_to_ghcb(struct vcpu_svm *svm)
@@ -2552,8 +2564,9 @@ static bool sev_es_sync_to_ghcb(struct vcpu_svm *svm)
struct kvm_vcpu *vcpu = &svm->vcpu;
struct kvm_host_map map;
struct ghcb *ghcb;
+ int token;
- if (svm_map_ghcb(svm, &map))
+ if (svm_map_ghcb(svm, &map, &token))
return false;
ghcb = map.hva;
@@ -2579,7 +2592,7 @@ static bool sev_es_sync_to_ghcb(struct vcpu_svm *svm)
trace_kvm_vmgexit_exit(svm->vcpu.vcpu_id, ghcb);
- svm_unmap_ghcb(svm, &map);
+ svm_unmap_ghcb(svm, &map, token);
return true;
}
@@ -2636,8 +2649,9 @@ static int sev_es_validate_vmgexit(struct vcpu_svm *svm, u64 *exit_code)
struct kvm_vcpu *vcpu = &svm->vcpu;
struct kvm_host_map map;
struct ghcb *ghcb;
+ int token;
- if (svm_map_ghcb(svm, &map))
+ if (svm_map_ghcb(svm, &map, &token))
return -EFAULT;
ghcb = map.hva;
@@ -2739,7 +2753,7 @@ static int sev_es_validate_vmgexit(struct vcpu_svm *svm, u64 *exit_code)
sev_es_sync_from_ghcb(svm, ghcb);
- svm_unmap_ghcb(svm, &map);
+ svm_unmap_ghcb(svm, &map, token);
return 0;
vmgexit_err:
@@ -2760,7 +2774,7 @@ static int sev_es_validate_vmgexit(struct vcpu_svm *svm, u64 *exit_code)
vcpu->run->internal.data[0] = *exit_code;
vcpu->run->internal.data[1] = vcpu->arch.last_vmentry_cpu;
- svm_unmap_ghcb(svm, &map);
+ svm_unmap_ghcb(svm, &map, token);
return -EINVAL;
}
@@ -3036,6 +3050,9 @@ static int __snp_handle_page_state_change(struct kvm_vcpu *vcpu, enum psc_op op,
return PSC_UNDEF_ERR;
}
+ /* Wait for all the existing mapped gfn to unmap */
+ synchronize_srcu_expedited(&sev->psc_srcu);
+
write_lock(&kvm->mmu_lock);
rc = kvm_mmu_get_tdp_walk(vcpu, gpa, &pfn, &npt_level);
@@ -3604,3 +3621,33 @@ void sev_rmp_page_level_adjust(struct kvm *kvm, kvm_pfn_t pfn, int *level)
/* Adjust the level to keep the NPT and RMP in sync */
*level = min_t(size_t, *level, rmp_level);
}
+
+int sev_post_map_gfn(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, int *token)
+{
+ struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
+ int level;
+
+ if (!sev_snp_guest(kvm))
+ return 0;
+
+ *token = srcu_read_lock(&sev->psc_srcu);
+
+ /* If pfn is not added as private then fail */
+ if (snp_lookup_rmpentry(pfn, &level) == 1) {
+ srcu_read_unlock(&sev->psc_srcu, *token);
+ pr_err_ratelimited("failed to map private gfn 0x%llx pfn 0x%llx\n", gfn, pfn);
+ return -EBUSY;
+ }
+
+ return 0;
+}
+
+void sev_post_unmap_gfn(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, int token)
+{
+ struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
+
+ if (!sev_snp_guest(kvm))
+ return;
+
+ srcu_read_unlock(&sev->psc_srcu, token);
+}
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 5f73f21a37a1..3784d389247b 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -4679,7 +4679,11 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
.vcpu_deliver_sipi_vector = svm_vcpu_deliver_sipi_vector,
.alloc_apic_backing_page = svm_alloc_apic_backing_page,
+
.rmp_page_level_adjust = sev_rmp_page_level_adjust,
+
+ .post_map_gfn = sev_post_map_gfn,
+ .post_unmap_gfn = sev_post_unmap_gfn,
};
static struct kvm_x86_init_ops svm_init_ops __initdata = {
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index d10f7166b39d..ff91184f9b4a 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -76,16 +76,22 @@ struct kvm_sev_info {
bool active; /* SEV enabled guest */
bool es_active; /* SEV-ES enabled guest */
bool snp_active; /* SEV-SNP enabled guest */
+
unsigned int asid; /* ASID used for this guest */
unsigned int handle; /* SEV firmware handle */
int fd; /* SEV device fd */
+
unsigned long pages_locked; /* Number of pages locked */
struct list_head regions_list; /* List of registered regions */
+
u64 ap_jump_table; /* SEV-ES AP Jump Table address */
+
struct kvm *enc_context_owner; /* Owner of copied encryption context */
struct misc_cg *misc_cg; /* For misc cgroup accounting */
+
u64 snp_init_flags;
void *snp_context; /* SNP guest context page */
+ struct srcu_struct psc_srcu;
};
struct kvm_svm {
@@ -618,6 +624,8 @@ void sev_es_prepare_guest_switch(struct vcpu_svm *svm, unsigned int cpu);
void sev_es_unmap_ghcb(struct vcpu_svm *svm);
struct page *snp_safe_alloc_page(struct kvm_vcpu *vcpu);
void sev_rmp_page_level_adjust(struct kvm *kvm, kvm_pfn_t pfn, int *level);
+int sev_post_map_gfn(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, int *token);
+void sev_post_unmap_gfn(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, int token);
/* vmenter.S */
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index afcdc75a99f2..bf4389ffc88f 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3095,6 +3095,65 @@ static inline bool kvm_pv_async_pf_enabled(struct kvm_vcpu *vcpu)
return (vcpu->arch.apf.msr_en_val & mask) == mask;
}
+static int kvm_map_gfn_protected(struct kvm_vcpu *vcpu, gfn_t gfn, struct kvm_host_map *map,
+ struct gfn_to_pfn_cache *cache, bool atomic, int *token)
+{
+ int ret;
+
+ ret = kvm_map_gfn(vcpu, gfn, map, cache, atomic);
+ if (ret)
+ return ret;
+
+ if (kvm_x86_ops.post_map_gfn) {
+ ret = static_call(kvm_x86_post_map_gfn)(vcpu->kvm, map->gfn, map->pfn, token);
+ if (ret)
+ kvm_unmap_gfn(vcpu, map, cache, false, atomic);
+ }
+
+ return ret;
+}
+
+static int kvm_unmap_gfn_protected(struct kvm_vcpu *vcpu, struct kvm_host_map *map,
+ struct gfn_to_pfn_cache *cache, bool dirty,
+ bool atomic, int token)
+{
+ int ret;
+
+ ret = kvm_unmap_gfn(vcpu, map, cache, dirty, atomic);
+
+ if (kvm_x86_ops.post_unmap_gfn)
+ static_call(kvm_x86_post_unmap_gfn)(vcpu->kvm, map->gfn, map->pfn, token);
+
+ return ret;
+}
+
+static int kvm_vcpu_map_protected(struct kvm_vcpu *vcpu, gpa_t gpa, struct kvm_host_map *map,
+ int *token)
+{
+ int ret;
+
+ ret = kvm_vcpu_map(vcpu, gpa, map);
+ if (ret)
+ return ret;
+
+ if (kvm_x86_ops.post_map_gfn) {
+ ret = static_call(kvm_x86_post_map_gfn)(vcpu->kvm, map->gfn, map->pfn, token);
+ if (ret)
+ kvm_vcpu_unmap(vcpu, map, false);
+ }
+
+ return ret;
+}
+
+static void kvm_vcpu_unmap_protected(struct kvm_vcpu *vcpu, struct kvm_host_map *map,
+ bool dirty, int token)
+{
+ kvm_vcpu_unmap(vcpu, map, dirty);
+
+ if (kvm_x86_ops.post_unmap_gfn)
+ static_call(kvm_x86_post_unmap_gfn)(vcpu->kvm, map->gfn, map->pfn, token);
+}
+
static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data)
{
gpa_t gpa = data & ~0x3f;
@@ -3185,6 +3244,7 @@ static void record_steal_time(struct kvm_vcpu *vcpu)
{
struct kvm_host_map map;
struct kvm_steal_time *st;
+ int token;
if (kvm_xen_msr_enabled(vcpu->kvm)) {
kvm_xen_runstate_set_running(vcpu);
@@ -3195,8 +3255,8 @@ static void record_steal_time(struct kvm_vcpu *vcpu)
return;
/* -EAGAIN is returned in atomic context so we can just return. */
- if (kvm_map_gfn(vcpu, vcpu->arch.st.msr_val >> PAGE_SHIFT,
- &map, &vcpu->arch.st.cache, false))
+ if (kvm_map_gfn_protected(vcpu, vcpu->arch.st.msr_val >> PAGE_SHIFT,
+ &map, &vcpu->arch.st.cache, false, &token))
return;
st = map.hva +
@@ -3234,7 +3294,7 @@ static void record_steal_time(struct kvm_vcpu *vcpu)
st->version += 1;
- kvm_unmap_gfn(vcpu, &map, &vcpu->arch.st.cache, true, false);
+ kvm_unmap_gfn_protected(vcpu, &map, &vcpu->arch.st.cache, true, false, token);
}
int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
@@ -4271,6 +4331,7 @@ static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu)
{
struct kvm_host_map map;
struct kvm_steal_time *st;
+ int token;
if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
return;
@@ -4278,8 +4339,8 @@ static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu)
if (vcpu->arch.st.preempted)
return;
- if (kvm_map_gfn(vcpu, vcpu->arch.st.msr_val >> PAGE_SHIFT, &map,
- &vcpu->arch.st.cache, true))
+ if (kvm_map_gfn_protected(vcpu, vcpu->arch.st.msr_val >> PAGE_SHIFT,
+ &map, &vcpu->arch.st.cache, true, &token))
return;
st = map.hva +
@@ -4287,7 +4348,7 @@ static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu)
st->preempted = vcpu->arch.st.preempted = KVM_VCPU_PREEMPTED;
- kvm_unmap_gfn(vcpu, &map, &vcpu->arch.st.cache, true, true);
+ kvm_unmap_gfn_protected(vcpu, &map, &vcpu->arch.st.cache, true, true, token);
}
void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
@@ -6816,6 +6877,7 @@ static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt,
gpa_t gpa;
char *kaddr;
bool exchanged;
+ int token;
/* guests cmpxchg8b have to be emulated atomically */
if (bytes > 8 || (bytes & (bytes - 1)))
@@ -6839,7 +6901,7 @@ static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt,
if (((gpa + bytes - 1) & page_line_mask) != (gpa & page_line_mask))
goto emul_write;
- if (kvm_vcpu_map(vcpu, gpa_to_gfn(gpa), &map))
+ if (kvm_vcpu_map_protected(vcpu, gpa_to_gfn(gpa), &map, &token))
goto emul_write;
kaddr = map.hva + offset_in_page(gpa);
@@ -6861,7 +6923,7 @@ static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt,
BUG();
}
- kvm_vcpu_unmap(vcpu, &map, true);
+ kvm_vcpu_unmap_protected(vcpu, &map, true, token);
if (!exchanged)
return X86EMUL_CMPXCHG_FAILED;
--
2.17.1
Powered by blists - more mailing lists