linux-kernel - [PATCH v2] KVM: SEV: Fix guest memory leak when handling guest requests

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20240520230200.1161826-1-michael.roth@amd.com>
Date: Mon, 20 May 2024 18:02:00 -0500
From: Michael Roth <michael.roth@....com>
To: <pbonzini@...hat.com>
CC: <kvm@...r.kernel.org>, <linux-kernel@...r.kernel.org>,
	<ashish.kalra@....com>, <thomas.lendacky@....com>, <seanjc@...gle.com>,
	<rick.p.edgecombe@...el.com>
Subject: [PATCH v2] KVM: SEV: Fix guest memory leak when handling guest requests

Before forwarding guest requests to firmware, KVM takes a reference on
the 2 pages the guest uses for its request/response buffers. Make sure
to release these when cleaning up after the request is completed.

Also modify the logic to fail immediately (rather than report failure to
the guest) if there is an error returning the guest pages to their
expected state after the firmware command completes. Continue to
propagate firmware errors to the guest as per the GHCB spec, however.

Suggested-by: Sean Christopherson <seanjc@...gle.com> #for error-handling
Signed-off-by: Michael Roth <michael.roth@....com>
---
v2:
 - Fail to userspace if reclaim fails rather than trying to inform the
   guest of the error (Sean)
 - Remove the setup/cleanup helpers so that the cleanup logic is easier
   to follow (Sean)
 - Full original patch with this and other pending fix squashed in:
   https://github.com/mdroth/linux/commit/b4f51e38da22a2b163c546cb2a3aefd04446b3c7

 arch/x86/kvm/svm/sev.c | 105 ++++++++++++++++++-----------------------
 1 file changed, 47 insertions(+), 58 deletions(-)

diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index 252bf7564f4b..446f9811cdaf 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -3919,11 +3919,16 @@ static int sev_snp_ap_creation(struct vcpu_svm *svm)
 	return ret;
 }
 
-static int snp_setup_guest_buf(struct kvm *kvm, struct sev_data_snp_guest_request *data,
-			       gpa_t req_gpa, gpa_t resp_gpa)
+static int snp_handle_guest_req(struct vcpu_svm *svm, gpa_t req_gpa, gpa_t resp_gpa)
 {
-	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
+	struct sev_data_snp_guest_request data = {0};
+	struct kvm *kvm = svm->vcpu.kvm;
 	kvm_pfn_t req_pfn, resp_pfn;
+	sev_ret_code fw_err = 0;
+	int ret;
+
+	if (!sev_snp_guest(kvm))
+		return -EINVAL;
 
 	if (!PAGE_ALIGNED(req_gpa) || !PAGE_ALIGNED(resp_gpa))
 		return -EINVAL;
@@ -3933,64 +3938,49 @@ static int snp_setup_guest_buf(struct kvm *kvm, struct sev_data_snp_guest_reques
 		return -EINVAL;
 
 	resp_pfn = gfn_to_pfn(kvm, gpa_to_gfn(resp_gpa));
-	if (is_error_noslot_pfn(resp_pfn))
-		return -EINVAL;
-
-	if (rmp_make_private(resp_pfn, 0, PG_LEVEL_4K, 0, true))
-		return -EINVAL;
-
-	data->gctx_paddr = __psp_pa(sev->snp_context);
-	data->req_paddr = __sme_set(req_pfn << PAGE_SHIFT);
-	data->res_paddr = __sme_set(resp_pfn << PAGE_SHIFT);
-
-	return 0;
-}
-
-static int snp_cleanup_guest_buf(struct sev_data_snp_guest_request *data)
-{
-	u64 pfn = __sme_clr(data->res_paddr) >> PAGE_SHIFT;
-
-	if (snp_page_reclaim(pfn) || rmp_make_shared(pfn, PG_LEVEL_4K))
-		return -EINVAL;
-
-	return 0;
-}
-
-static int __snp_handle_guest_req(struct kvm *kvm, gpa_t req_gpa, gpa_t resp_gpa,
-				  sev_ret_code *fw_err)
-{
-	struct sev_data_snp_guest_request data = {0};
-	int ret;
-
-	if (!sev_snp_guest(kvm))
-		return -EINVAL;
-
-	ret = snp_setup_guest_buf(kvm, &data, req_gpa, resp_gpa);
-	if (ret)
-		return ret;
+	if (is_error_noslot_pfn(resp_pfn)) {
+		ret = -EINVAL;
+		goto release_req;
+	}
 
-	ret = sev_issue_cmd(kvm, SEV_CMD_SNP_GUEST_REQUEST, &data, fw_err);
-	if (ret)
-		return ret;
+	if (rmp_make_private(resp_pfn, 0, PG_LEVEL_4K, 0, true)) {
+		ret = -EINVAL;
+		kvm_release_pfn_clean(resp_pfn);
+		goto release_req;
+	}
 
-	ret = snp_cleanup_guest_buf(&data);
-	if (ret)
-		return ret;
+	data.gctx_paddr = __psp_pa(to_kvm_sev_info(kvm)->snp_context);
+	data.req_paddr = __sme_set(req_pfn << PAGE_SHIFT);
+	data.res_paddr = __sme_set(resp_pfn << PAGE_SHIFT);
 
-	return 0;
-}
+	ret = sev_issue_cmd(kvm, SEV_CMD_SNP_GUEST_REQUEST, &data, &fw_err);
 
-static void snp_handle_guest_req(struct vcpu_svm *svm, gpa_t req_gpa, gpa_t resp_gpa)
-{
-	struct kvm_vcpu *vcpu = &svm->vcpu;
-	struct kvm *kvm = vcpu->kvm;
-	sev_ret_code fw_err = 0;
-	int vmm_ret = 0;
-
-	if (__snp_handle_guest_req(kvm, req_gpa, resp_gpa, &fw_err))
-		vmm_ret = SNP_GUEST_VMM_ERR_GENERIC;
+	/*
+	 * If the pages can't be placed back in the expected state then it is
+	 * more reliable to always report the error to userspace than to try to
+	 * let the guest deal with it somehow. Either way, the guest would
+	 * likely terminate itself soon after a guest request failure anyway.
+	 */
+	if (snp_page_reclaim(resp_pfn) ||
+	    host_rmp_make_shared(resp_pfn, PG_LEVEL_4K)) {
+		ret = -EIO;
+		goto release_req;
+	}
 
-	ghcb_set_sw_exit_info_2(svm->sev_es.ghcb, SNP_GUEST_ERR(vmm_ret, fw_err));
+	/*
+	 * Unlike with reclaim failures, firmware failures should be
+	 * communicated back to the guest via SW_EXITINFO2 rather than be
+	 * treated as immediately fatal.
+	 */
+	ghcb_set_sw_exit_info_2(svm->sev_es.ghcb,
+				SNP_GUEST_ERR(ret ? SNP_GUEST_VMM_ERR_GENERIC : 0,
+					      fw_err));
+	ret = 1; /* resume guest */
+	kvm_release_pfn_dirty(resp_pfn);
+
+release_req:
+	kvm_release_pfn_clean(req_pfn);
+	return ret;
 }
 
 static int sev_handle_vmgexit_msr_protocol(struct vcpu_svm *svm)
@@ -4268,8 +4258,7 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu)
 		ret = 1;
 		break;
 	case SVM_VMGEXIT_GUEST_REQUEST:
-		snp_handle_guest_req(svm, control->exit_info_1, control->exit_info_2);
-		ret = 1;
+		ret = snp_handle_guest_req(svm, control->exit_info_1, control->exit_info_2);
 		break;
 	case SVM_VMGEXIT_UNSUPPORTED_EVENT:
 		vcpu_unimpl(vcpu,
-- 
2.25.1