linux-kernel - Re: [PATCH 2/2] virt: sev-guest: Move SNP Guest Request data pages handling under snp_cmd

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <96221e03-adfd-cc59-ce45-7933220999a7@amd.com>
Date: Fri, 7 Mar 2025 11:50:54 -0600
From: Tom Lendacky <thomas.lendacky@....com>
To: Alexey Kardashevskiy <aik@....com>, x86@...nel.org
Cc: linux-kernel@...r.kernel.org, Thomas Gleixner <tglx@...utronix.de>,
 Ingo Molnar <mingo@...hat.com>, Borislav Petkov <bp@...en8.de>,
 Dave Hansen <dave.hansen@...ux.intel.com>, "H. Peter Anvin" <hpa@...or.com>,
 Nikunj A Dadhania <nikunj@....com>, Ard Biesheuvel <ardb@...nel.org>,
 Pavan Kumar Paluri <papaluri@....com>, Ashish Kalra <ashish.kalra@....com>,
 Paolo Bonzini <pbonzini@...hat.com>, Michael Roth <michael.roth@....com>,
 Kevin Loughlin <kevinloughlin@...gle.com>,
 Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@...ux.intel.com>,
 Brijesh Singh <brijesh.singh@....com>, Liam Merwick
 <liam.merwick@...cle.com>, stable@...r.kernel.org
Subject: Re: [PATCH 2/2] virt: sev-guest: Move SNP Guest Request data pages
 handling under snp_cmd_mutex

On 3/6/25 19:37, Alexey Kardashevskiy wrote:
> Compared to the SNP Guest Request, the "Extended" version adds data pages
> for receiving certificates. If not enough pages provided, the HV can
> report to the VM how much is needed so the VM can reallocate and repeat.
> 
> Commit ae596615d93d ("virt: sev-guest: Reduce the scope of SNP command
> mutex") moved handling of the allocated/desired pages number out of scope
> of said mutex and create a possibility for a race (multiple instances
> trying to trigger Extended request in a VM) as there is just one instance
> of snp_msg_desc per /dev/sev-guest and no locking other than snp_cmd_mutex.
> 
> Fix the issue by moving the data blob/size and the GHCB input struct
> (snp_req_data) into snp_guest_req which is allocated on stack now
> and accessed by the GHCB caller under that mutex.
> 
> Stop allocating SEV_FW_BLOB_MAX_SIZE in snp_msg_alloc() as only one of
> four callers needs it. Free the received blob in get_ext_report() right
> after it is copied to the userspace. Possible future users of
> snp_send_guest_request() are likely to have different ideas about
> the buffer size anyways.
> 
> Fixes: ae596615d93d ("virt: sev-guest: Reduce the scope of SNP command mutex")
> Cc: stable@...r.kernel.org
> Cc: Nikunj A Dadhania <nikunj@....com>
> Signed-off-by: Alexey Kardashevskiy <aik@....com>
> ---
>  arch/x86/include/asm/sev.h              |  6 ++--
>  arch/x86/coco/sev/core.c                | 23 +++++--------
>  drivers/virt/coco/sev-guest/sev-guest.c | 34 ++++++++++++++++----
>  3 files changed, 39 insertions(+), 24 deletions(-)
> 
> diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h
> index 1581246491b5..ba7999f66abe 100644
> --- a/arch/x86/include/asm/sev.h
> +++ b/arch/x86/include/asm/sev.h
> @@ -203,6 +203,9 @@ struct snp_guest_req {
>  	unsigned int vmpck_id;
>  	u8 msg_version;
>  	u8 msg_type;
> +
> +	struct snp_req_data input;
> +	void *certs_data;
>  };
>  
>  /*
> @@ -263,9 +266,6 @@ struct snp_msg_desc {
>  	struct snp_guest_msg secret_request, secret_response;
>  
>  	struct snp_secrets_page *secrets;
> -	struct snp_req_data input;
> -
> -	void *certs_data;
>  
>  	struct aesgcm_ctx *ctx;
>  
> diff --git a/arch/x86/coco/sev/core.c b/arch/x86/coco/sev/core.c
> index 82492efc5d94..d02eea5e3d50 100644
> --- a/arch/x86/coco/sev/core.c
> +++ b/arch/x86/coco/sev/core.c
> @@ -2853,19 +2853,8 @@ struct snp_msg_desc *snp_msg_alloc(void)
>  	if (!mdesc->response)
>  		goto e_free_request;
>  
> -	mdesc->certs_data = alloc_shared_pages(SEV_FW_BLOB_MAX_SIZE);
> -	if (!mdesc->certs_data)
> -		goto e_free_response;
> -
> -	/* initial the input address for guest request */
> -	mdesc->input.req_gpa = __pa(mdesc->request);
> -	mdesc->input.resp_gpa = __pa(mdesc->response);
> -	mdesc->input.data_gpa = __pa(mdesc->certs_data);
> -
>  	return mdesc;
>  
> -e_free_response:
> -	free_shared_pages(mdesc->response, sizeof(struct snp_guest_msg));
>  e_free_request:
>  	free_shared_pages(mdesc->request, sizeof(struct snp_guest_msg));
>  e_unmap:
> @@ -2885,7 +2874,6 @@ void snp_msg_free(struct snp_msg_desc *mdesc)
>  	kfree(mdesc->ctx);
>  	free_shared_pages(mdesc->response, sizeof(struct snp_guest_msg));
>  	free_shared_pages(mdesc->request, sizeof(struct snp_guest_msg));
> -	free_shared_pages(mdesc->certs_data, SEV_FW_BLOB_MAX_SIZE);
>  	iounmap((__force void __iomem *)mdesc->secrets);
>  
>  	memset(mdesc, 0, sizeof(*mdesc));
> @@ -3054,7 +3042,7 @@ static int __handle_guest_request(struct snp_msg_desc *mdesc, struct snp_guest_r
>  	 * sequence number must be incremented or the VMPCK must be deleted to
>  	 * prevent reuse of the IV.
>  	 */
> -	rc = snp_issue_guest_request(req, &mdesc->input, rio);
> +	rc = snp_issue_guest_request(req, &req->input, rio);
>  	switch (rc) {
>  	case -ENOSPC:
>  		/*
> @@ -3064,7 +3052,7 @@ static int __handle_guest_request(struct snp_msg_desc *mdesc, struct snp_guest_r
>  		 * order to increment the sequence number and thus avoid
>  		 * IV reuse.
>  		 */
> -		override_npages = mdesc->input.data_npages;
> +		override_npages = req->input.data_npages;
>  		req->exit_code	= SVM_VMGEXIT_GUEST_REQUEST;
>  
>  		/*
> @@ -3120,7 +3108,7 @@ static int __handle_guest_request(struct snp_msg_desc *mdesc, struct snp_guest_r
>  	}
>  
>  	if (override_npages)
> -		mdesc->input.data_npages = override_npages;
> +		req->input.data_npages = override_npages;
>  
>  	return rc;
>  }
> @@ -3158,6 +3146,11 @@ int snp_send_guest_request(struct snp_msg_desc *mdesc, struct snp_guest_req *req
>  	 */
>  	memcpy(mdesc->request, &mdesc->secret_request, sizeof(mdesc->secret_request));
>  
> +	/* initial the input address for guest request */
> +	req->input.req_gpa = __pa(mdesc->request);
> +	req->input.resp_gpa = __pa(mdesc->response);
> +	req->input.data_gpa = req->certs_data ? __pa(req->certs_data) : 0;
> +
>  	rc = __handle_guest_request(mdesc, req, rio);
>  	if (rc) {
>  		if (rc == -EIO &&
> diff --git a/drivers/virt/coco/sev-guest/sev-guest.c b/drivers/virt/coco/sev-guest/sev-guest.c
> index 4699fdc9ed44..cf3fb61f4d5b 100644
> --- a/drivers/virt/coco/sev-guest/sev-guest.c
> +++ b/drivers/virt/coco/sev-guest/sev-guest.c
> @@ -177,6 +177,7 @@ static int get_ext_report(struct snp_guest_dev *snp_dev, struct snp_guest_reques
>  	struct snp_guest_req req = {};
>  	int ret, npages = 0, resp_len;
>  	sockptr_t certs_address;
> +	struct page *page;
>  
>  	if (sockptr_is_null(io->req_data) || sockptr_is_null(io->resp_data))
>  		return -EINVAL;
> @@ -210,8 +211,20 @@ static int get_ext_report(struct snp_guest_dev *snp_dev, struct snp_guest_reques
>  	 * the host. If host does not supply any certs in it, then copy
>  	 * zeros to indicate that certificate data was not provided.
>  	 */
> -	memset(mdesc->certs_data, 0, report_req->certs_len);
>  	npages = report_req->certs_len >> PAGE_SHIFT;
> +	page = alloc_pages(GFP_KERNEL_ACCOUNT | __GFP_ZERO,
> +			   get_order(report_req->certs_len));

Not sure if it is worth using alloc_pages_exact() (and free_pages_exact())
here instead, since you only end up performing set_memory_decrypted()
against npages vs the actual number allocated. It's not an issue, just
looks a bit odd to my eye.

> +	if (!page)
> +		return -ENOMEM;
> +
> +	req.certs_data = page_address(page);
> +	ret = set_memory_decrypted((unsigned long)req.certs_data, npages);
> +	if (ret) {
> +		pr_err("failed to mark page shared, ret=%d\n", ret);
> +		__free_pages(page, get_order(report_req->certs_len));

You can't be sure at what stage the failure occurred, so you need to leak
the pages, just like below where you call set_memory_encrypted().

And similar to below, maybe do a WARN_ONCE() instead of pr_err()?

> +		return -EFAULT;
> +	}
> +
>  cmd:
>  	/*
>  	 * The intermediate response buffer is used while decrypting the
> @@ -220,10 +233,12 @@ static int get_ext_report(struct snp_guest_dev *snp_dev, struct snp_guest_reques
>  	 */
>  	resp_len = sizeof(report_resp->data) + mdesc->ctx->authsize;
>  	report_resp = kzalloc(resp_len, GFP_KERNEL_ACCOUNT);
> -	if (!report_resp)
> -		return -ENOMEM;
> +	if (!report_resp) {
> +		ret = -ENOMEM;
> +		goto e_free_data;
> +	}
>  
> -	mdesc->input.data_npages = npages;
> +	req.input.data_npages = npages;
>  
>  	req.msg_version = arg->msg_version;
>  	req.msg_type = SNP_MSG_REPORT_REQ;
> @@ -238,7 +253,7 @@ static int get_ext_report(struct snp_guest_dev *snp_dev, struct snp_guest_reques
>  
>  	/* If certs length is invalid then copy the returned length */
>  	if (arg->vmm_error == SNP_GUEST_VMM_ERR_INVALID_LEN) {
> -		report_req->certs_len = mdesc->input.data_npages << PAGE_SHIFT;
> +		report_req->certs_len = req.input.data_npages << PAGE_SHIFT;
>  
>  		if (copy_to_sockptr(io->req_data, report_req, sizeof(*report_req)))
>  			ret = -EFAULT;
> @@ -247,7 +262,7 @@ static int get_ext_report(struct snp_guest_dev *snp_dev, struct snp_guest_reques
>  	if (ret)
>  		goto e_free;
>  
> -	if (npages && copy_to_sockptr(certs_address, mdesc->certs_data, report_req->certs_len)) {
> +	if (npages && copy_to_sockptr(certs_address, req.certs_data, report_req->certs_len)) {
>  		ret = -EFAULT;
>  		goto e_free;
>  	}
> @@ -257,6 +272,13 @@ static int get_ext_report(struct snp_guest_dev *snp_dev, struct snp_guest_reques
>  
>  e_free:
>  	kfree(report_resp);
> +e_free_data:
> +	if (npages) {
> +		if (set_memory_encrypted((unsigned long)req.certs_data, npages))
> +			WARN_ONCE(ret, "failed to restore encryption mask (leak it)\n");
> +		else
> +			__free_pages(page, get_order(report_req->certs_len));

Can't report_req->certs_len have been updated with a new value at this
point (from the "if (arg->vmm_error == SNP_GUEST_VMM_ERR_INVALID_LEN)")
check and you'll attempt to possibly free more than you allocated?

This would be covered if you stick with npages and use alloc_pages_exact()
and free_pages_exact() using npages.

Thanks,
Tom

> +	}
>  	return ret;
>  }
>