lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Fri, 9 Dec 2022 15:01:36 -0600
From:   Tom Lendacky <thomas.lendacky@....com>
To:     Rijo Thomas <Rijo-john.Thomas@....com>,
        John Allen <john.allen@....com>,
        Herbert Xu <herbert@...dor.apana.org.au>,
        "David S . Miller" <davem@...emloft.net>,
        Sumit Semwal <sumit.semwal@...aro.org>,
        Christian König <christian.koenig@....com>,
        linux-kernel@...r.kernel.org, linux-crypto@...r.kernel.org,
        linux-media@...r.kernel.org, dri-devel@...ts.freedesktop.org,
        linaro-mm-sig@...ts.linaro.org
Cc:     Mythri PK <Mythri.Pandeshwarakrishna@....com>,
        Jeshwanth <JESHWANTHKUMAR.NK@....com>,
        Devaraj Rangasamy <Devaraj.Rangasamy@....com>,
        stable@...r.kernel.org, Jens Wiklander <jens.wiklander@...aro.org>
Subject: Re: [PATCH 1/1] crypto: ccp - Allocate TEE ring and cmd buffer using
 DMA APIs

On 12/6/22 06:30, Rijo Thomas wrote:
> For AMD Secure Processor (ASP) to map and access TEE ring buffer, the
> ring buffer address sent by host to ASP must be a real physical
> address and the pages must be physically contiguous.
> 
> In a virtualized environment though, when the driver is running in a
> guest VM, the pages allocated by __get_free_pages() may not be
> contiguous in the host (or machine) physical address space. Guests
> will see a guest (or pseudo) physical address and not the actual host
> (or machine) physical address. The TEE running on ASP cannot decipher
> pseudo physical addresses. It needs host or machine physical address.
> 
> To resolve this problem, use DMA APIs for allocating buffers that must
> be shared with TEE. This will ensure that the pages are contiguous in
> host (or machine) address space. If the DMA handle is an IOVA,
> translate it into a physical address before sending it to ASP.
> 
> This patch also exports two APIs (one for buffer allocation and
> another to free the buffer). This API can be used by AMD-TEE driver to
> share buffers with TEE.
> 
> Fixes: 33960acccfbd ("crypto: ccp - add TEE support for Raven Ridge")
> Cc: Tom Lendacky <thomas.lendacky@....com>
> Cc: stable@...r.kernel.org
> Signed-off-by: Rijo Thomas <Rijo-john.Thomas@....com>
> Co-developed-by: Jeshwanth <JESHWANTHKUMAR.NK@....com>
> Signed-off-by: Jeshwanth <JESHWANTHKUMAR.NK@....com>
> Reviewed-by: Devaraj Rangasamy <Devaraj.Rangasamy@....com>
> ---
>   drivers/crypto/ccp/psp-dev.c |   6 +-
>   drivers/crypto/ccp/tee-dev.c | 116 ++++++++++++++++++++++-------------
>   drivers/crypto/ccp/tee-dev.h |   9 +--
>   include/linux/psp-tee.h      |  47 ++++++++++++++
>   4 files changed, 127 insertions(+), 51 deletions(-)
> 
> diff --git a/drivers/crypto/ccp/psp-dev.c b/drivers/crypto/ccp/psp-dev.c
> index c9c741ac8442..2b86158d7435 100644
> --- a/drivers/crypto/ccp/psp-dev.c
> +++ b/drivers/crypto/ccp/psp-dev.c
> @@ -161,13 +161,13 @@ int psp_dev_init(struct sp_device *sp)
>   		goto e_err;
>   	}
>   
> +	if (sp->set_psp_master_device)
> +		sp->set_psp_master_device(sp);
> +

This worries me a bit...  if psp_init() fails, it may still be referenced 
as the master device. What's the reason for moving it?

>   	ret = psp_init(psp);
>   	if (ret)
>   		goto e_irq;
>   
> -	if (sp->set_psp_master_device)
> -		sp->set_psp_master_device(sp);
> -
>   	/* Enable interrupt */
>   	iowrite32(-1, psp->io_regs + psp->vdata->inten_reg);
>   
> diff --git a/drivers/crypto/ccp/tee-dev.c b/drivers/crypto/ccp/tee-dev.c
> index 5c9d47f3be37..1631d9851e54 100644
> --- a/drivers/crypto/ccp/tee-dev.c
> +++ b/drivers/crypto/ccp/tee-dev.c
> @@ -12,8 +12,9 @@
>   #include <linux/mutex.h>
>   #include <linux/delay.h>
>   #include <linux/slab.h>
> +#include <linux/dma-direct.h>
> +#include <linux/iommu.h>
>   #include <linux/gfp.h>
> -#include <linux/psp-sev.h>
>   #include <linux/psp-tee.h>
>   
>   #include "psp-dev.h"
> @@ -21,25 +22,64 @@
>   
>   static bool psp_dead;
>   
> +struct dma_buffer *psp_tee_alloc_dmabuf(unsigned long size, gfp_t gfp)

It looks like both calls to this use the same gfp_t values, you can 
probably eliminate from the call and just specify them in here.

> +{
> +	struct psp_device *psp = psp_get_master_device();
> +	struct dma_buffer *dma_buf;
> +	struct iommu_domain *dom;
> +
> +	if (!psp || !size)
> +		return NULL;
> +
> +	dma_buf = kzalloc(sizeof(*dma_buf), GFP_KERNEL);
> +	if (!dma_buf)
> +		return NULL;
> +
> +	dma_buf->vaddr = dma_alloc_coherent(psp->dev, size, &dma_buf->dma, gfp);
> +	if (!dma_buf->vaddr || !dma_buf->dma) {

I don't think you can have one of these be NULL without both being NULL, 
but I guess it doesn't hurt to check.

> +		kfree(dma_buf);
> +		return NULL;
> +	}
> +
> +	dma_buf->size = size;
> + > +	dom = iommu_get_domain_for_dev(psp->dev);
> +	if (dom)

You need a nice comment above this all explaining that. I guess you're 
using the presence of a domain to determine whether you're running on 
bare-metal vs within a hypervisor? I'm not sure what will happen if the 
guest ever gets an emulated IOMMU...

> +		dma_buf->paddr = iommu_iova_to_phys(dom, dma_buf->dma);

If you're just looking to get the physical address, why not just to an 
__pa(dma_buf->vaddr)?

Also, paddr might not be the best name, since it isn't always a physical 
address, but I can't really think of something right now.

Thanks,
Tom

> +	else
> +		dma_buf->paddr = dma_buf->dma;
> +
> +	return dma_buf;
> +}
> +EXPORT_SYMBOL(psp_tee_alloc_dmabuf);
> +
> +void psp_tee_free_dmabuf(struct dma_buffer *dma_buf)
> +{
> +	struct psp_device *psp = psp_get_master_device();
> +
> +	if (!psp || !dma_buf)
> +		return;
> +
> +	dma_free_coherent(psp->dev, dma_buf->size,
> +			  dma_buf->vaddr, dma_buf->dma);
> +
> +	kfree(dma_buf);
> +}
> +EXPORT_SYMBOL(psp_tee_free_dmabuf);
> +
>   static int tee_alloc_ring(struct psp_tee_device *tee, int ring_size)
>   {
>   	struct ring_buf_manager *rb_mgr = &tee->rb_mgr;
> -	void *start_addr;
>   
>   	if (!ring_size)
>   		return -EINVAL;
>   
> -	/* We need actual physical address instead of DMA address, since
> -	 * Trusted OS running on AMD Secure Processor will map this region
> -	 */
> -	start_addr = (void *)__get_free_pages(GFP_KERNEL, get_order(ring_size));
> -	if (!start_addr)
> +	rb_mgr->ring_buf = psp_tee_alloc_dmabuf(ring_size,
> +						GFP_KERNEL | __GFP_ZERO);
> +	if (!rb_mgr->ring_buf) {
> +		dev_err(tee->dev, "ring allocation failed\n");
>   		return -ENOMEM;
> -
> -	memset(start_addr, 0x0, ring_size);
> -	rb_mgr->ring_start = start_addr;
> -	rb_mgr->ring_size = ring_size;
> -	rb_mgr->ring_pa = __psp_pa(start_addr);
> +	}
>   	mutex_init(&rb_mgr->mutex);
>   
>   	return 0;
> @@ -49,15 +89,8 @@ static void tee_free_ring(struct psp_tee_device *tee)
>   {
>   	struct ring_buf_manager *rb_mgr = &tee->rb_mgr;
>   
> -	if (!rb_mgr->ring_start)
> -		return;
> +	psp_tee_free_dmabuf(rb_mgr->ring_buf);
>   
> -	free_pages((unsigned long)rb_mgr->ring_start,
> -		   get_order(rb_mgr->ring_size));
> -
> -	rb_mgr->ring_start = NULL;
> -	rb_mgr->ring_size = 0;
> -	rb_mgr->ring_pa = 0;
>   	mutex_destroy(&rb_mgr->mutex);
>   }
>   
> @@ -81,35 +114,36 @@ static int tee_wait_cmd_poll(struct psp_tee_device *tee, unsigned int timeout,
>   	return -ETIMEDOUT;
>   }
>   
> -static
> -struct tee_init_ring_cmd *tee_alloc_cmd_buffer(struct psp_tee_device *tee)
> +struct dma_buffer *tee_alloc_cmd_buffer(struct psp_tee_device *tee)
>   {
>   	struct tee_init_ring_cmd *cmd;
> +	struct dma_buffer *cmd_buffer;
>   
> -	cmd = kzalloc(sizeof(*cmd), GFP_KERNEL);
> -	if (!cmd)
> +	cmd_buffer = psp_tee_alloc_dmabuf(sizeof(*cmd),
> +					  GFP_KERNEL | __GFP_ZERO);
> +	if (!cmd_buffer)
>   		return NULL;
>   
> -	cmd->hi_addr = upper_32_bits(tee->rb_mgr.ring_pa);
> -	cmd->low_addr = lower_32_bits(tee->rb_mgr.ring_pa);
> -	cmd->size = tee->rb_mgr.ring_size;
> +	cmd = (struct tee_init_ring_cmd *)cmd_buffer->vaddr;
> +	cmd->hi_addr = upper_32_bits(tee->rb_mgr.ring_buf->paddr);
> +	cmd->low_addr = lower_32_bits(tee->rb_mgr.ring_buf->paddr);
> +	cmd->size = tee->rb_mgr.ring_buf->size;
>   
>   	dev_dbg(tee->dev, "tee: ring address: high = 0x%x low = 0x%x size = %u\n",
>   		cmd->hi_addr, cmd->low_addr, cmd->size);
>   
> -	return cmd;
> +	return cmd_buffer;
>   }
>   
> -static inline void tee_free_cmd_buffer(struct tee_init_ring_cmd *cmd)
> +static inline void tee_free_cmd_buffer(struct dma_buffer *cmd_buffer)
>   {
> -	kfree(cmd);
> +	psp_tee_free_dmabuf(cmd_buffer);
>   }
>   
>   static int tee_init_ring(struct psp_tee_device *tee)
>   {
>   	int ring_size = MAX_RING_BUFFER_ENTRIES * sizeof(struct tee_ring_cmd);
> -	struct tee_init_ring_cmd *cmd;
> -	phys_addr_t cmd_buffer;
> +	struct dma_buffer *cmd_buffer;
>   	unsigned int reg;
>   	int ret;
>   
> @@ -123,21 +157,19 @@ static int tee_init_ring(struct psp_tee_device *tee)
>   
>   	tee->rb_mgr.wptr = 0;
>   
> -	cmd = tee_alloc_cmd_buffer(tee);
> -	if (!cmd) {
> +	cmd_buffer = tee_alloc_cmd_buffer(tee);
> +	if (!cmd_buffer) {
>   		tee_free_ring(tee);
>   		return -ENOMEM;
>   	}
>   
> -	cmd_buffer = __psp_pa((void *)cmd);
> -
>   	/* Send command buffer details to Trusted OS by writing to
>   	 * CPU-PSP message registers
>   	 */
>   
> -	iowrite32(lower_32_bits(cmd_buffer),
> +	iowrite32(lower_32_bits(cmd_buffer->paddr),
>   		  tee->io_regs + tee->vdata->cmdbuff_addr_lo_reg);
> -	iowrite32(upper_32_bits(cmd_buffer),
> +	iowrite32(upper_32_bits(cmd_buffer->paddr),
>   		  tee->io_regs + tee->vdata->cmdbuff_addr_hi_reg);
>   	iowrite32(TEE_RING_INIT_CMD,
>   		  tee->io_regs + tee->vdata->cmdresp_reg);
> @@ -157,7 +189,7 @@ static int tee_init_ring(struct psp_tee_device *tee)
>   	}
>   
>   free_buf:
> -	tee_free_cmd_buffer(cmd);
> +	tee_free_cmd_buffer(cmd_buffer);
>   
>   	return ret;
>   }
> @@ -167,7 +199,7 @@ static void tee_destroy_ring(struct psp_tee_device *tee)
>   	unsigned int reg;
>   	int ret;
>   
> -	if (!tee->rb_mgr.ring_start)
> +	if (!tee->rb_mgr.ring_buf->vaddr)
>   		return;
>   
>   	if (psp_dead)
> @@ -256,7 +288,7 @@ static int tee_submit_cmd(struct psp_tee_device *tee, enum tee_cmd_id cmd_id,
>   	do {
>   		/* Get pointer to ring buffer command entry */
>   		cmd = (struct tee_ring_cmd *)
> -			(tee->rb_mgr.ring_start + tee->rb_mgr.wptr);
> +			(tee->rb_mgr.ring_buf->vaddr + tee->rb_mgr.wptr);
>   
>   		rptr = ioread32(tee->io_regs + tee->vdata->ring_rptr_reg);
>   
> @@ -305,7 +337,7 @@ static int tee_submit_cmd(struct psp_tee_device *tee, enum tee_cmd_id cmd_id,
>   
>   	/* Update local copy of write pointer */
>   	tee->rb_mgr.wptr += sizeof(struct tee_ring_cmd);
> -	if (tee->rb_mgr.wptr >= tee->rb_mgr.ring_size)
> +	if (tee->rb_mgr.wptr >= tee->rb_mgr.ring_buf->size)
>   		tee->rb_mgr.wptr = 0;
>   
>   	/* Trigger interrupt to Trusted OS */
> diff --git a/drivers/crypto/ccp/tee-dev.h b/drivers/crypto/ccp/tee-dev.h
> index 49d26158b71e..9238487ee8bf 100644
> --- a/drivers/crypto/ccp/tee-dev.h
> +++ b/drivers/crypto/ccp/tee-dev.h
> @@ -16,6 +16,7 @@
>   
>   #include <linux/device.h>
>   #include <linux/mutex.h>
> +#include <linux/psp-tee.h>
>   
>   #define TEE_DEFAULT_TIMEOUT		10
>   #define MAX_BUFFER_SIZE			988
> @@ -48,17 +49,13 @@ struct tee_init_ring_cmd {
>   
>   /**
>    * struct ring_buf_manager - Helper structure to manage ring buffer.
> - * @ring_start:  starting address of ring buffer
> - * @ring_size:   size of ring buffer in bytes
> - * @ring_pa:     physical address of ring buffer
>    * @wptr:        index to the last written entry in ring buffer
> + * @ring_buf:    ring buffer allocated using DMA api
>    */
>   struct ring_buf_manager {
>   	struct mutex mutex;	/* synchronizes access to ring buffer */
> -	void *ring_start;
> -	u32 ring_size;
> -	phys_addr_t ring_pa;
>   	u32 wptr;
> +	struct dma_buffer *ring_buf;
>   };
>   
>   struct psp_tee_device {
> diff --git a/include/linux/psp-tee.h b/include/linux/psp-tee.h
> index cb0c95d6d76b..c0fa922f24d4 100644
> --- a/include/linux/psp-tee.h
> +++ b/include/linux/psp-tee.h
> @@ -13,6 +13,7 @@
>   
>   #include <linux/types.h>
>   #include <linux/errno.h>
> +#include <linux/dma-mapping.h>
>   
>   /* This file defines the Trusted Execution Environment (TEE) interface commands
>    * and the API exported by AMD Secure Processor driver to communicate with
> @@ -40,6 +41,20 @@ enum tee_cmd_id {
>   	TEE_CMD_ID_UNMAP_SHARED_MEM,
>   };
>   
> +/**
> + * struct dma_buffer - Structure for a DMA buffer.
> + * @dma:    DMA buffer address
> + * @paddr:  Physical address of DMA buffer
> + * @vaddr:  CPU virtual address of DMA buffer
> + * @size:   Size of DMA buffer in bytes
> + */
> +struct dma_buffer {
> +	dma_addr_t dma;
> +	phys_addr_t paddr;
> +	void *vaddr;
> +	unsigned long size;
> +};
> +
>   #ifdef CONFIG_CRYPTO_DEV_SP_PSP
>   /**
>    * psp_tee_process_cmd() - Process command in Trusted Execution Environment
> @@ -75,6 +90,28 @@ int psp_tee_process_cmd(enum tee_cmd_id cmd_id, void *buf, size_t len,
>    */
>   int psp_check_tee_status(void);
>   
> +/**
> + * psp_tee_alloc_dmabuf() - Allocates memory of requested size and flags using
> + * dma_alloc_coherent() API.
> + *
> + * This function can be used to allocate a shared memory region between the
> + * host and PSP TEE.
> + *
> + * Returns:
> + * non-NULL   a valid pointer to struct dma_buffer
> + * NULL       on failure
> + */
> +struct dma_buffer *psp_tee_alloc_dmabuf(unsigned long size, gfp_t gfp);
> +
> +/**
> + * psp_tee_free_dmabuf() - Deallocates memory using dma_free_coherent() API.
> + *
> + * This function can be used to release shared memory region between host
> + * and PSP TEE.
> + *
> + */
> +void psp_tee_free_dmabuf(struct dma_buffer *dma_buffer);
> +
>   #else /* !CONFIG_CRYPTO_DEV_SP_PSP */
>   
>   static inline int psp_tee_process_cmd(enum tee_cmd_id cmd_id, void *buf,
> @@ -87,5 +124,15 @@ static inline int psp_check_tee_status(void)
>   {
>   	return -ENODEV;
>   }
> +
> +static inline
> +struct dma_buffer *psp_tee_alloc_dmabuf(unsigned long size, gfp_t gfp)
> +{
> +	return NULL;
> +}
> +
> +static inline void psp_tee_free_dmabuf(struct dma_buffer *dma_buffer)
> +{
> +}
>   #endif /* CONFIG_CRYPTO_DEV_SP_PSP */
>   #endif /* __PSP_TEE_H_ */

Powered by blists - more mailing lists