[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20240501085210.2213060-11-michael.roth@amd.com>
Date: Wed, 1 May 2024 03:52:00 -0500
From: Michael Roth <michael.roth@....com>
To: <kvm@...r.kernel.org>
CC: <linux-coco@...ts.linux.dev>, <linux-mm@...ck.org>,
<linux-crypto@...r.kernel.org>, <x86@...nel.org>,
<linux-kernel@...r.kernel.org>, <tglx@...utronix.de>, <mingo@...hat.com>,
<jroedel@...e.de>, <thomas.lendacky@....com>, <hpa@...or.com>,
<ardb@...nel.org>, <pbonzini@...hat.com>, <seanjc@...gle.com>,
<vkuznets@...hat.com>, <jmattson@...gle.com>, <luto@...nel.org>,
<dave.hansen@...ux.intel.com>, <slp@...hat.com>, <pgonda@...gle.com>,
<peterz@...radead.org>, <srinivas.pandruvada@...ux.intel.com>,
<rientjes@...gle.com>, <dovmurik@...ux.ibm.com>, <tobin@....com>,
<bp@...en8.de>, <vbabka@...e.cz>, <kirill@...temov.name>,
<ak@...ux.intel.com>, <tony.luck@...el.com>,
<sathyanarayanan.kuppuswamy@...ux.intel.com>, <alpergun@...gle.com>,
<jarkko@...nel.org>, <ashish.kalra@....com>, <nikunj.dadhania@....com>,
<pankaj.gupta@....com>, <liam.merwick@...cle.com>, Brijesh Singh
<brijesh.singh@....com>
Subject: [PATCH v15 10/20] KVM: SEV: Add support to handle Page State Change VMGEXIT
SEV-SNP VMs can ask the hypervisor to change the page state in the RMP
table to be private or shared using the Page State Change NAE event
as defined in the GHCB specification version 2.
Forward these requests to userspace as KVM_EXIT_VMGEXITs, similar to how
it is done for requests that don't use a GHCB page.
As with the MSR-based page-state changes, use the existing
KVM_HC_MAP_GPA_RANGE hypercall format to deliver these requests to
userspace via KVM_EXIT_HYPERCALL.
Signed-off-by: Michael Roth <michael.roth@....com>
Co-developed-by: Brijesh Singh <brijesh.singh@....com>
Signed-off-by: Brijesh Singh <brijesh.singh@....com>
Signed-off-by: Ashish Kalra <ashish.kalra@....com>
---
arch/x86/include/asm/sev-common.h | 11 ++
arch/x86/kvm/svm/sev.c | 180 ++++++++++++++++++++++++++++++
arch/x86/kvm/svm/svm.h | 5 +
3 files changed, 196 insertions(+)
diff --git a/arch/x86/include/asm/sev-common.h b/arch/x86/include/asm/sev-common.h
index 6d68db812de1..8647cc05e2f4 100644
--- a/arch/x86/include/asm/sev-common.h
+++ b/arch/x86/include/asm/sev-common.h
@@ -129,8 +129,19 @@ enum psc_op {
* The VMGEXIT_PSC_MAX_ENTRY determines the size of the PSC structure, which
* is a local stack variable in set_pages_state(). Do not increase this value
* without evaluating the impact to stack usage.
+ *
+ * Use VMGEXIT_PSC_MAX_COUNT in cases where the actual GHCB-defined max value
+ * is needed, such as when processing GHCB requests on the hypervisor side.
*/
#define VMGEXIT_PSC_MAX_ENTRY 64
+#define VMGEXIT_PSC_MAX_COUNT 253
+
+#define VMGEXIT_PSC_ERROR_GENERIC (0x100UL << 32)
+#define VMGEXIT_PSC_ERROR_INVALID_HDR ((1UL << 32) | 1)
+#define VMGEXIT_PSC_ERROR_INVALID_ENTRY ((1UL << 32) | 2)
+
+#define VMGEXIT_PSC_OP_PRIVATE 1
+#define VMGEXIT_PSC_OP_SHARED 2
struct psc_hdr {
u16 cur_entry;
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index 720775c9d0b8..70b8f4cd1b03 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -3274,6 +3274,10 @@ static int sev_es_validate_vmgexit(struct vcpu_svm *svm)
case SVM_VMGEXIT_HV_FEATURES:
case SVM_VMGEXIT_TERM_REQUEST:
break;
+ case SVM_VMGEXIT_PSC:
+ if (!sev_snp_guest(vcpu->kvm) || !kvm_ghcb_sw_scratch_is_valid(svm))
+ goto vmgexit_err;
+ break;
default:
reason = GHCB_ERR_INVALID_EVENT;
goto vmgexit_err;
@@ -3503,6 +3507,175 @@ static int snp_begin_psc_msr(struct vcpu_svm *svm, u64 ghcb_msr)
return 0; /* forward request to userspace */
}
+struct psc_buffer {
+ struct psc_hdr hdr;
+ struct psc_entry entries[];
+} __packed;
+
+static int snp_begin_psc(struct vcpu_svm *svm, struct psc_buffer *psc);
+
+static int snp_complete_psc(struct kvm_vcpu *vcpu)
+{
+ struct vcpu_svm *svm = to_svm(vcpu);
+ struct psc_buffer *psc = svm->sev_es.ghcb_sa;
+ struct psc_entry *entries = psc->entries;
+ struct psc_hdr *hdr = &psc->hdr;
+ __u64 psc_ret;
+ __u16 idx;
+
+ if (vcpu->run->hypercall.ret) {
+ psc_ret = VMGEXIT_PSC_ERROR_GENERIC;
+ goto out_resume;
+ }
+
+ /*
+ * Everything in-flight has been processed successfully. Update the
+ * corresponding entries in the guest's PSC buffer.
+ */
+ for (idx = svm->sev_es.psc_idx; svm->sev_es.psc_inflight;
+ svm->sev_es.psc_inflight--, idx++) {
+ struct psc_entry *entry = &entries[idx];
+
+ entry->cur_page = svm->sev_es.psc_2m ? 512 : 1;
+ }
+
+ hdr->cur_entry = idx;
+
+ /* Handle the next range (if any). */
+ return snp_begin_psc(svm, psc);
+
+out_resume:
+ svm->sev_es.psc_idx = 0;
+ svm->sev_es.psc_inflight = 0;
+ svm->sev_es.psc_2m = false;
+ ghcb_set_sw_exit_info_2(svm->sev_es.ghcb, psc_ret);
+
+ return 1; /* resume guest */
+}
+
+static int snp_begin_psc(struct vcpu_svm *svm, struct psc_buffer *psc)
+{
+ struct psc_entry *entries = psc->entries;
+ struct kvm_vcpu *vcpu = &svm->vcpu;
+ struct psc_hdr *hdr = &psc->hdr;
+ struct psc_entry entry_start;
+ u16 idx, idx_start, idx_end;
+ __u64 psc_ret, gpa;
+ int npages;
+
+ /* There should be no other PSCs in-flight at this point. */
+ if (WARN_ON_ONCE(svm->sev_es.psc_inflight)) {
+ psc_ret = VMGEXIT_PSC_ERROR_GENERIC;
+ goto out_resume;
+ }
+
+ if (!(vcpu->kvm->arch.hypercall_exit_enabled & (1 << KVM_HC_MAP_GPA_RANGE))) {
+ psc_ret = VMGEXIT_PSC_ERROR_GENERIC;
+ goto out_resume;
+ }
+
+ /*
+ * The PSC descriptor buffer can be modified by a misbehaved guest after
+ * validation, so take care to only use validated copies of values used
+ * for things like array indexing.
+ */
+ idx_start = hdr->cur_entry;
+ idx_end = hdr->end_entry;
+
+ if (idx_end >= VMGEXIT_PSC_MAX_COUNT) {
+ psc_ret = VMGEXIT_PSC_ERROR_INVALID_HDR;
+ goto out_resume;
+ }
+
+ /* Nothing more to process. */
+ if (idx_start > idx_end) {
+ psc_ret = 0;
+ goto out_resume;
+ }
+
+ /* Find the start of the next range which needs processing. */
+ for (idx = idx_start; idx <= idx_end; idx++, hdr->cur_entry++) {
+ __u16 cur_page;
+ gfn_t gfn;
+ bool huge;
+
+ entry_start = entries[idx];
+
+ /* Only private/shared conversions are currently supported. */
+ if (entry_start.operation != VMGEXIT_PSC_OP_PRIVATE &&
+ entry_start.operation != VMGEXIT_PSC_OP_SHARED)
+ continue;
+
+ gfn = entry_start.gfn;
+ cur_page = entry_start.cur_page;
+ huge = entry_start.pagesize;
+
+ if ((huge && (cur_page > 512 || !IS_ALIGNED(gfn, 512))) ||
+ (!huge && cur_page > 1)) {
+ psc_ret = VMGEXIT_PSC_ERROR_INVALID_ENTRY;
+ goto out_resume;
+ }
+
+ /* All sub-pages already processed. */
+ if ((huge && cur_page == 512) || (!huge && cur_page == 1))
+ continue;
+
+ /*
+ * If this is a partially-completed 2M range, force 4K handling
+ * for the remaining pages since they're effectively split at
+ * this point. Subsequent code should ensure this doesn't get
+ * combined with adjacent PSC entries where 2M handling is still
+ * possible.
+ */
+ svm->sev_es.psc_2m = cur_page ? false : huge;
+ svm->sev_es.psc_idx = idx;
+ svm->sev_es.psc_inflight = 1;
+
+ gpa = gfn_to_gpa(gfn + cur_page);
+ npages = huge ? 512 - cur_page : 1;
+ break;
+ }
+
+ /*
+ * Find all subsequent PSC entries that contain adjacent GPA
+ * ranges/operations and can be combined into a single
+ * KVM_HC_MAP_GPA_RANGE exit.
+ */
+ for (idx = svm->sev_es.psc_idx + 1; idx <= idx_end; idx++) {
+ struct psc_entry entry = entries[idx];
+
+ if (entry.operation != entry_start.operation ||
+ entry.gfn != entry_start.gfn + npages ||
+ !!entry.pagesize != svm->sev_es.psc_2m)
+ break;
+
+ svm->sev_es.psc_inflight++;
+ npages += entry_start.pagesize ? 512 : 1;
+ }
+
+ vcpu->run->exit_reason = KVM_EXIT_HYPERCALL;
+ vcpu->run->hypercall.nr = KVM_HC_MAP_GPA_RANGE;
+ vcpu->run->hypercall.args[0] = gpa;
+ vcpu->run->hypercall.args[1] = npages;
+ vcpu->run->hypercall.args[2] = entry_start.operation == VMGEXIT_PSC_OP_PRIVATE
+ ? KVM_MAP_GPA_RANGE_ENCRYPTED
+ : KVM_MAP_GPA_RANGE_DECRYPTED;
+ vcpu->run->hypercall.args[2] |= entry_start.pagesize
+ ? KVM_MAP_GPA_RANGE_PAGE_SZ_2M
+ : KVM_MAP_GPA_RANGE_PAGE_SZ_4K;
+ vcpu->arch.complete_userspace_io = snp_complete_psc;
+
+ return 0; /* forward request to userspace */
+
+out_resume:
+ svm->sev_es.psc_idx = 0;
+ svm->sev_es.psc_inflight = 0;
+ svm->sev_es.psc_2m = false;
+ ghcb_set_sw_exit_info_2(svm->sev_es.ghcb, psc_ret);
+
+ return 1; /* resume guest */
+}
+
static int sev_handle_vmgexit_msr_protocol(struct vcpu_svm *svm)
{
struct vmcb_control_area *control = &svm->vmcb->control;
@@ -3761,6 +3934,13 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu)
vcpu->run->system_event.ndata = 1;
vcpu->run->system_event.data[0] = control->ghcb_gpa;
break;
+ case SVM_VMGEXIT_PSC:
+ ret = setup_vmgexit_scratch(svm, true, control->exit_info_2);
+ if (ret)
+ break;
+
+ ret = snp_begin_psc(svm, svm->sev_es.ghcb_sa);
+ break;
case SVM_VMGEXIT_UNSUPPORTED_EVENT:
vcpu_unimpl(vcpu,
"vmgexit: unsupported event - exit_info_1=%#llx, exit_info_2=%#llx\n",
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index bbfbeed4c676..438cad6c9421 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -210,6 +210,11 @@ struct vcpu_sev_es_state {
bool ghcb_sa_sync;
bool ghcb_sa_free;
+ /* SNP Page-State-Change buffer entries currently being processed */
+ u16 psc_idx;
+ u16 psc_inflight;
+ bool psc_2m;
+
u64 ghcb_registered_gpa;
};
--
2.25.1
Powered by blists - more mailing lists