lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <ef12e2ff-5410-1f4e-e549-d99e37214257@linaro.org>
Date:   Mon, 12 Jun 2023 13:55:26 +0100
From:   Srinivas Kandagatla <srinivas.kandagatla@...aro.org>
To:     Ekansh Gupta <quic_ekangupt@...cinc.com>,
        linux-arm-msm@...r.kernel.org
Cc:     ekangupt@....qualcomm.com, gregkh@...uxfoundation.org,
        linux-kernel@...r.kernel.org, fastrpc.upstream@....qualcomm.com,
        quic_anane@...cinc.com
Subject: Re: [PATCH v1 1/2] misc: fastrpc: Add cached buffer support



On 09/06/2023 11:39, Ekansh Gupta wrote:
> FastRPC driver uses DMA API for region allocation for any buf_alloc
> request and the DMA region is unmap and freed once the usage is
> complete.
> 
> Cached buffer support enables caching of certain types of buffers
> instead of freeing which help in reducing the overhead of DMA API
> call for every buffer allocation request.

Can you quanitfy this argument with some measurments?

what kind of savings/overhead are we seeing with this patch vs without?

--srini
> 
> A list of cached buffer is maintained which will get reused when
> needed and this buffer list will get freed during device release.
> 
> Co-developed-by: Anandu Krishnan E <quic_anane@...cinc.com>
> Signed-off-by: Anandu Krishnan E <quic_anane@...cinc.com>
> Signed-off-by: Ekansh Gupta <quic_ekangupt@...cinc.com>
> ---
>   drivers/misc/fastrpc.c      | 133 +++++++++++++++++++++++++++++++++++++-------
>   include/uapi/misc/fastrpc.h |   8 +++
>   2 files changed, 122 insertions(+), 19 deletions(-)
> 
> diff --git a/drivers/misc/fastrpc.c b/drivers/misc/fastrpc.c
> index 30d4d04..a961a66 100644
> --- a/drivers/misc/fastrpc.c
> +++ b/drivers/misc/fastrpc.c
> @@ -41,6 +41,10 @@
>   #define INIT_FILE_NAMELEN_MAX (128)
>   #define FASTRPC_DEVICE_NAME	"fastrpc"
>   
> +/* Maximum buffers cached in cached buffer list */
> +#define FASTRPC_MAX_CACHED_BUFS (32)
> +#define FASTRPC_MAX_CACHE_BUF_SIZE (8*1024*1024)
> +
>   /* Add memory to static PD pool, protection thru XPU */
>   #define ADSP_MMAP_HEAP_ADDR  4
>   /* MAP static DMA buffer on DSP User PD */
> @@ -195,6 +199,7 @@ struct fastrpc_buf {
>   	struct dma_buf *dmabuf;
>   	struct device *dev;
>   	void *virt;
> +	u32 type;
>   	u64 phys;
>   	u64 size;
>   	/* Lock for dma buf attachments */
> @@ -293,6 +298,7 @@ struct fastrpc_user {
>   	struct list_head maps;
>   	struct list_head pending;
>   	struct list_head mmaps;
> +	struct list_head cached_bufs;
>   
>   	struct fastrpc_channel_ctx *cctx;
>   	struct fastrpc_session_ctx *sctx;
> @@ -300,6 +306,8 @@ struct fastrpc_user {
>   
>   	int tgid;
>   	int pd;
> +	/* total cached buffers */
> +	u32 num_cached_buf;
>   	bool is_secure_dev;
>   	/* Lock for lists */
>   	spinlock_t lock;
> @@ -391,17 +399,95 @@ static int fastrpc_map_lookup(struct fastrpc_user *fl, int fd,
>   	return ret;
>   }
>   
> -static void fastrpc_buf_free(struct fastrpc_buf *buf)
> +static void __fastrpc_buf_free(struct fastrpc_buf *buf)
>   {
>   	dma_free_coherent(buf->dev, buf->size, buf->virt,
>   			  FASTRPC_PHYS(buf->phys));
>   	kfree(buf);
>   }
>   
> +static void fastrpc_cached_buf_list_add(struct fastrpc_buf *buf)
> +{
> +	struct fastrpc_user *fl = buf->fl;
> +
> +	if (buf->size < FASTRPC_MAX_CACHE_BUF_SIZE) {
> +		spin_lock(&fl->lock);
> +		if (fl->num_cached_buf > FASTRPC_MAX_CACHED_BUFS) {
> +			spin_unlock(&fl->lock);
> +			goto skip_buf_cache;
> +		}
> +
> +		list_add_tail(&buf->node, &fl->cached_bufs);
> +		fl->num_cached_buf++;
> +		buf->type = -1;
> +		spin_unlock(&fl->lock);
> +	}
> +	return;
> +
> +skip_buf_cache:
> +	__fastrpc_buf_free(buf);
> +}
> +
> +static void fastrpc_buf_free(struct fastrpc_buf *buf, bool cache)
> +{
> +	if (cache)
> +		fastrpc_cached_buf_list_add(buf);
> +	else
> +		__fastrpc_buf_free(buf);
> +}
> +
> +static inline bool fastrpc_get_cached_buf(struct fastrpc_user *fl,
> +		size_t size, int buf_type, struct fastrpc_buf **obuf)
> +{
> +	bool found = false;
> +	struct fastrpc_buf *buf, *n, *cbuf = NULL;
> +
> +	if (buf_type == USER_BUF)
> +		return found;
> +
> +	/* find the smallest buffer that fits in the cache */
> +	spin_lock(&fl->lock);
> +	list_for_each_entry_safe(buf, n, &fl->cached_bufs, node) {
> +		if (buf->size >= size && (!cbuf || cbuf->size > buf->size))
> +			cbuf = buf;
> +	}
> +	if (cbuf) {
> +		list_del_init(&cbuf->node);
> +		fl->num_cached_buf--;
> +	}
> +	spin_unlock(&fl->lock);
> +	if (cbuf) {
> +		cbuf->type = buf_type;
> +		*obuf = cbuf;
> +		found = true;
> +	}
> +
> +	return found;
> +}
> +
> +static void fastrpc_cached_buf_list_free(struct fastrpc_user *fl)
> +{
> +	struct fastrpc_buf *buf, *n, *free;
> +
> +	do {
> +		free = NULL;
> +		spin_lock(&fl->lock);
> +		list_for_each_entry_safe(buf, n, &fl->cached_bufs, node) {
> +			list_del(&buf->node);
> +			fl->num_cached_buf--;
> +			free = buf;
> +			break;
> +		}
> +		spin_unlock(&fl->lock);
> +		if (free)
> +			fastrpc_buf_free(free, false);
> +	} while (free);
> +}
> +
>   static int __fastrpc_buf_alloc(struct fastrpc_user *fl, struct device *dev,
>   			     u64 size, struct fastrpc_buf **obuf)
>   {
> -	struct fastrpc_buf *buf;
> +	struct fastrpc_buf *buf = NULL;
>   
>   	buf = kzalloc(sizeof(*buf), GFP_KERNEL);
>   	if (!buf)
> @@ -432,16 +518,23 @@ static int __fastrpc_buf_alloc(struct fastrpc_user *fl, struct device *dev,
>   }
>   
>   static int fastrpc_buf_alloc(struct fastrpc_user *fl, struct device *dev,
> -			     u64 size, struct fastrpc_buf **obuf)
> +			     u64 size, u32 buf_type, struct fastrpc_buf **obuf)
>   {
>   	int ret;
>   	struct fastrpc_buf *buf;
>   
> +	if (fastrpc_get_cached_buf(fl, size, buf_type, obuf))
> +		return 0;
>   	ret = __fastrpc_buf_alloc(fl, dev, size, obuf);
> -	if (ret)
> -		return ret;
> +	if (ret == -ENOMEM) {
> +		fastrpc_cached_buf_list_free(fl);
> +		ret = __fastrpc_buf_alloc(fl, dev, size, obuf);
> +		if (ret)
> +			return ret;
> +	}
>   
>   	buf = *obuf;
> +	buf->type = buf_type;
>   
>   	if (fl->sctx && fl->sctx->sid)
>   		buf->phys += ((u64)fl->sctx->sid << 32);
> @@ -490,7 +583,7 @@ static void fastrpc_context_free(struct kref *ref)
>   		fastrpc_map_put(ctx->maps[i]);
>   
>   	if (ctx->buf)
> -		fastrpc_buf_free(ctx->buf);
> +		fastrpc_buf_free(ctx->buf, true);
>   
>   	spin_lock_irqsave(&cctx->lock, flags);
>   	idr_remove(&cctx->ctx_idr, ctx->ctxid >> 4);
> @@ -674,7 +767,7 @@ static void fastrpc_release(struct dma_buf *dmabuf)
>   {
>   	struct fastrpc_buf *buffer = dmabuf->priv;
>   
> -	fastrpc_buf_free(buffer);
> +	fastrpc_buf_free(buffer, false);
>   }
>   
>   static int fastrpc_dma_buf_attach(struct dma_buf *dmabuf,
> @@ -951,7 +1044,7 @@ static int fastrpc_get_args(u32 kernel, struct fastrpc_invoke_ctx *ctx)
>   
>   	ctx->msg_sz = pkt_size;
>   
> -	err = fastrpc_buf_alloc(ctx->fl, dev, pkt_size, &ctx->buf);
> +	err = fastrpc_buf_alloc(ctx->fl, dev, pkt_size, METADATA_BUF, &ctx->buf);
>   	if (err)
>   		return err;
>   
> @@ -1334,7 +1427,7 @@ static int fastrpc_init_create_static_process(struct fastrpc_user *fl,
>   				fl->cctx->remote_heap->phys, fl->cctx->remote_heap->size, err);
>   	}
>   err_map:
> -	fastrpc_buf_free(fl->cctx->remote_heap);
> +	fastrpc_buf_free(fl->cctx->remote_heap, false);
>   err_name:
>   	kfree(name);
>   err:
> @@ -1403,7 +1496,7 @@ static int fastrpc_init_create_process(struct fastrpc_user *fl,
>   	memlen = ALIGN(max(INIT_FILELEN_MAX, (int)init.filelen * 4),
>   		       1024 * 1024);
>   	err = fastrpc_buf_alloc(fl, fl->sctx->dev, memlen,
> -				&imem);
> +				INITMEM_BUF, &imem);
>   	if (err)
>   		goto err_alloc;
>   
> @@ -1450,7 +1543,7 @@ static int fastrpc_init_create_process(struct fastrpc_user *fl,
>   
>   err_invoke:
>   	fl->init_mem = NULL;
> -	fastrpc_buf_free(imem);
> +	fastrpc_buf_free(imem, false);
>   err_alloc:
>   	fastrpc_map_put(map);
>   err:
> @@ -1521,7 +1614,7 @@ static int fastrpc_device_release(struct inode *inode, struct file *file)
>   	spin_unlock_irqrestore(&cctx->lock, flags);
>   
>   	if (fl->init_mem)
> -		fastrpc_buf_free(fl->init_mem);
> +		fastrpc_buf_free(fl->init_mem, false);
>   
>   	list_for_each_entry_safe(ctx, n, &fl->pending, node) {
>   		list_del(&ctx->node);
> @@ -1533,9 +1626,10 @@ static int fastrpc_device_release(struct inode *inode, struct file *file)
>   
>   	list_for_each_entry_safe(buf, b, &fl->mmaps, node) {
>   		list_del(&buf->node);
> -		fastrpc_buf_free(buf);
> +		fastrpc_buf_free(buf, false);
>   	}
>   
> +	fastrpc_cached_buf_list_free(fl);
>   	fastrpc_session_free(cctx, fl->sctx);
>   	fastrpc_channel_ctx_put(cctx);
>   
> @@ -1570,6 +1664,7 @@ static int fastrpc_device_open(struct inode *inode, struct file *filp)
>   	INIT_LIST_HEAD(&fl->maps);
>   	INIT_LIST_HEAD(&fl->mmaps);
>   	INIT_LIST_HEAD(&fl->user);
> +	INIT_LIST_HEAD(&fl->cached_bufs);
>   	fl->tgid = current->tgid;
>   	fl->cctx = cctx;
>   	fl->is_secure_dev = fdevice->secure;
> @@ -1600,7 +1695,7 @@ static int fastrpc_dmabuf_alloc(struct fastrpc_user *fl, char __user *argp)
>   	if (copy_from_user(&bp, argp, sizeof(bp)))
>   		return -EFAULT;
>   
> -	err = fastrpc_buf_alloc(fl, fl->sctx->dev, bp.size, &buf);
> +	err = fastrpc_buf_alloc(fl, fl->sctx->dev, bp.size, USER_BUF, &buf);
>   	if (err)
>   		return err;
>   	exp_info.ops = &fastrpc_dma_buf_ops;
> @@ -1610,7 +1705,7 @@ static int fastrpc_dmabuf_alloc(struct fastrpc_user *fl, char __user *argp)
>   	buf->dmabuf = dma_buf_export(&exp_info);
>   	if (IS_ERR(buf->dmabuf)) {
>   		err = PTR_ERR(buf->dmabuf);
> -		fastrpc_buf_free(buf);
> +		fastrpc_buf_free(buf, false);
>   		return err;
>   	}
>   
> @@ -1805,7 +1900,7 @@ static int fastrpc_req_munmap_impl(struct fastrpc_user *fl, struct fastrpc_buf *
>   		spin_lock(&fl->lock);
>   		list_del(&buf->node);
>   		spin_unlock(&fl->lock);
> -		fastrpc_buf_free(buf);
> +		fastrpc_buf_free(buf, false);
>   	} else {
>   		dev_err(dev, "unmmap\tpt 0x%09lx ERROR\n", buf->raddr);
>   	}
> @@ -1866,7 +1961,7 @@ static int fastrpc_req_mmap(struct fastrpc_user *fl, char __user *argp)
>   		return -EINVAL;
>   	}
>   
> -	err = fastrpc_buf_alloc(fl, fl->sctx->dev, req.size, &buf);
> +	err = fastrpc_buf_alloc(fl, fl->sctx->dev, req.size, USER_BUF, &buf);
>   	if (err) {
>   		dev_err(dev, "failed to allocate buffer\n");
>   		return err;
> @@ -1935,7 +2030,7 @@ static int fastrpc_req_mmap(struct fastrpc_user *fl, char __user *argp)
>   err_assign:
>   	fastrpc_req_munmap_impl(fl, buf);
>   err_invoke:
> -	fastrpc_buf_free(buf);
> +	fastrpc_buf_free(buf, false);
>   
>   	return err;
>   }
> @@ -2380,7 +2475,7 @@ static void fastrpc_rpmsg_remove(struct rpmsg_device *rpdev)
>   		list_del(&buf->node);
>   
>   	if (cctx->remote_heap)
> -		fastrpc_buf_free(cctx->remote_heap);
> +		fastrpc_buf_free(cctx->remote_heap, false);
>   
>   	of_platform_depopulate(&rpdev->dev);
>   
> diff --git a/include/uapi/misc/fastrpc.h b/include/uapi/misc/fastrpc.h
> index f33d914..91c7c4f 100644
> --- a/include/uapi/misc/fastrpc.h
> +++ b/include/uapi/misc/fastrpc.h
> @@ -64,6 +64,14 @@ enum fastrpc_proc_attr {
>   	FASTRPC_MODE_PRIVILEGED		= (1 << 6),
>   };
>   
> + /* Types of fastrpc DMA bufs sent to DSP */
> + enum fastrpc_buf_type {
> +	METADATA_BUF,
> +	COPYDATA_BUF,
> +	INITMEM_BUF,
> +	USER_BUF,
> +};
> +
>   /* Fastrpc attribute for memory protection of buffers */
>   #define FASTRPC_ATTR_SECUREMAP	(1)
>   

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ