lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <49295da9-82d4-45a0-a2a4-fdaa6600c70d@quicinc.com>
Date: Wed, 29 Jan 2025 11:12:16 +0530
From: Ekansh Gupta <quic_ekangupt@...cinc.com>
To: Dmitry Baryshkov <dmitry.baryshkov@...aro.org>
CC: <srinivas.kandagatla@...aro.org>, <linux-arm-msm@...r.kernel.org>,
        <gregkh@...uxfoundation.org>, <quic_bkumar@...cinc.com>,
        <linux-kernel@...r.kernel.org>, <quic_chennak@...cinc.com>,
        <dri-devel@...ts.freedesktop.org>, <arnd@...db.de>
Subject: Re: [PATCH v2 4/5] misc: fastrpc: Add polling mode support for
 fastRPC driver




On 1/29/2025 4:59 AM, Dmitry Baryshkov wrote:
> On Mon, Jan 27, 2025 at 10:12:38AM +0530, Ekansh Gupta wrote:
>> For any remote call to DSP, after sending an invocation message,
>> fastRPC driver waits for glink response and during this time the
>> CPU can go into low power modes. Adding a polling mode support
>> with which fastRPC driver will poll continuously on a memory
>> after sending a message to remote subsystem which will eliminate
>> CPU wakeup and scheduling latencies and reduce fastRPC overhead.
>> With this change, DSP always sends a glink response which will
>> get ignored if polling mode didn't time out.
> Is there a chance to implement actual async I/O protocol with the help
> of the poll() call instead of hiding the polling / wait inside the
> invoke2?

This design is based on the implementation on DSP firmware as of today:
Call flow: https://github.com/quic-ekangupt/fastrpc/blob/invokev2/Docs/invoke_v2.md#5-polling-mode

Can you please give some reference to the async I/O protocol that you've
suggested? I can check if it can be implemented here.

--ekansh

>
>> Signed-off-by: Ekansh Gupta <quic_ekangupt@...cinc.com>
>> ---
>>  drivers/misc/fastrpc.c      | 122 +++++++++++++++++++++++++++++++++---
>>  include/uapi/misc/fastrpc.h |   3 +-
>>  2 files changed, 114 insertions(+), 11 deletions(-)
>>
>> diff --git a/drivers/misc/fastrpc.c b/drivers/misc/fastrpc.c
>> index cfacee0dded5..257a741af115 100644
>> --- a/drivers/misc/fastrpc.c
>> +++ b/drivers/misc/fastrpc.c
>> @@ -19,6 +19,7 @@
>>  #include <linux/rpmsg.h>
>>  #include <linux/scatterlist.h>
>>  #include <linux/slab.h>
>> +#include <linux/delay.h>
>>  #include <linux/firmware/qcom/qcom_scm.h>
>>  #include <uapi/misc/fastrpc.h>
>>  #include <linux/of_reserved_mem.h>
>> @@ -38,6 +39,7 @@
>>  #define FASTRPC_CTX_MAX (256)
>>  #define FASTRPC_INIT_HANDLE	1
>>  #define FASTRPC_DSP_UTILITIES_HANDLE	2
>> +#define FASTRPC_MAX_STATIC_HANDLE (20)
>>  #define FASTRPC_CTXID_MASK (0xFF0)
>>  #define INIT_FILELEN_MAX (2 * 1024 * 1024)
>>  #define INIT_FILE_NAMELEN_MAX (128)
>> @@ -106,6 +108,19 @@
>>  
>>  #define miscdev_to_fdevice(d) container_of(d, struct fastrpc_device, miscdev)
>>  
>> +/* Poll response number from remote processor for call completion */
>> +#define FASTRPC_POLL_RESPONSE (0xdecaf)
>> +/* timeout in us for polling until memory barrier */
>> +#define FASTRPC_POLL_TIME_MEM_UPDATE (500)
>> +
>> +/* Response types supported for RPC calls */
>> +enum fastrpc_response_flags {
>> +	/* normal job completion glink response */
>> +	NORMAL_RESPONSE = 0,
>> +	/* process updates poll memory instead of glink response */
>> +	POLL_MODE = 1,
>> +};
>> +
>>  static const char *domains[FASTRPC_DEV_MAX] = { "adsp", "mdsp",
>>  						"sdsp", "cdsp", "cdsp1" };
>>  struct fastrpc_phy_page {
>> @@ -238,9 +253,16 @@ struct fastrpc_invoke_ctx {
>>  	u32 sc;
>>  	u64 *fdlist;
>>  	u32 *crclist;
>> +	u32 *poll;
>>  	void __user *crc;
>>  	u64 ctxid;
>>  	u64 msg_sz;
>> +	/* Threads poll for specified timeout and fall back to glink wait */
>> +	u64 poll_timeout;
>> +	/* work done status flag */
>> +	bool is_work_done;
>> +	/* response flags from remote processor */
>> +	enum fastrpc_response_flags rsp_flags;
>>  	struct kref refcount;
>>  	struct list_head node; /* list of ctxs */
>>  	struct completion work;
>> @@ -258,6 +280,7 @@ struct fastrpc_invoke_ctx {
>>  struct fastrpc_ctx_args {
>>  	struct fastrpc_invoke_args *args;
>>  	void __user *crc;
>> +	u64 poll_timeout;
>>  };
>>  
>>  struct fastrpc_session_ctx {
>> @@ -619,11 +642,14 @@ static struct fastrpc_invoke_ctx *fastrpc_context_alloc(
>>  	fastrpc_channel_ctx_get(cctx);
>>  
>>  	ctx->crc = cargs->crc;
>> +	ctx->poll_timeout = cargs->poll_timeout;
>>  	ctx->sc = sc;
>>  	ctx->retval = -1;
>>  	ctx->pid = current->pid;
>>  	ctx->client_id = user->client_id;
>>  	ctx->cctx = cctx;
>> +	ctx->rsp_flags = NORMAL_RESPONSE;
>> +	ctx->is_work_done = false;
>>  	init_completion(&ctx->work);
>>  	INIT_WORK(&ctx->put_work, fastrpc_context_put_wq);
>>  
>> @@ -882,7 +908,8 @@ static int fastrpc_get_meta_size(struct fastrpc_invoke_ctx *ctx)
>>  		sizeof(struct fastrpc_invoke_buf) +
>>  		sizeof(struct fastrpc_phy_page)) * ctx->nscalars +
>>  		sizeof(u64) * FASTRPC_MAX_FDLIST +
>> -		sizeof(u32) * FASTRPC_MAX_CRCLIST;
>> +		sizeof(u32) * FASTRPC_MAX_CRCLIST +
>> +		sizeof(u32);
>>  
>>  	return size;
>>  }
>> @@ -975,6 +1002,8 @@ static int fastrpc_get_args(u32 kernel, struct fastrpc_invoke_ctx *ctx)
>>  	pages = fastrpc_phy_page_start(list, ctx->nscalars);
>>  	ctx->fdlist = (u64 *)(pages + ctx->nscalars);
>>  	ctx->crclist = (u32 *)(ctx->fdlist + FASTRPC_MAX_FDLIST);
>> +	ctx->poll = (u32 *)(ctx->crclist + FASTRPC_MAX_CRCLIST);
>> +
>>  	args = (uintptr_t)ctx->buf->virt + metalen;
>>  	rlen = pkt_size - metalen;
>>  	ctx->rpra = rpra;
>> @@ -1145,6 +1174,72 @@ static int fastrpc_invoke_send(struct fastrpc_session_ctx *sctx,
>>  
>>  }
>>  
>> +static int poll_for_remote_response(struct fastrpc_invoke_ctx *ctx, u64 timeout)
>> +{
>> +	int err = -EIO, i, j;
>> +
>> +	/* poll on memory for DSP response. Return failure on timeout */
>> +	for (i = 0, j = 0; i < timeout; i++, j++) {
>> +		if (*ctx->poll == FASTRPC_POLL_RESPONSE) {
>> +			err = 0;
>> +			ctx->is_work_done = true;
>> +			ctx->retval = 0;
>> +			break;
>> +		}
>> +		if (j == FASTRPC_POLL_TIME_MEM_UPDATE) {
>> +			/* make sure that all poll memory writes by DSP are seen by CPU */
>> +			dma_rmb();
>> +			j = 0;
>> +		}
>> +		udelay(1);
>> +	}
>> +	return err;
>> +}
>> +
>> +static inline int fastrpc_wait_for_response(struct fastrpc_invoke_ctx *ctx,
>> +						u32 kernel)
>> +{
>> +	int err = 0;
>> +
>> +	if (kernel) {
>> +		if (!wait_for_completion_timeout(&ctx->work, 10 * HZ))
>> +			err = -ETIMEDOUT;
>> +	} else {
>> +		err = wait_for_completion_interruptible(&ctx->work);
>> +	}
>> +
>> +	return err;
>> +}
>> +
>> +static int fastrpc_wait_for_completion(struct fastrpc_invoke_ctx *ctx,
>> +					u32 kernel)
>> +{
>> +	int err;
>> +
>> +	do {
>> +		switch (ctx->rsp_flags) {
>> +		case NORMAL_RESPONSE:
>> +			err = fastrpc_wait_for_response(ctx, kernel);
>> +			if (err || ctx->is_work_done)
>> +				return err;
>> +			break;
>> +		case POLL_MODE:
>> +			err = poll_for_remote_response(ctx, ctx->poll_timeout);
>> +			/* If polling timed out, move to normal response mode */
>> +			if (err)
>> +				ctx->rsp_flags = NORMAL_RESPONSE;
>> +			break;
>> +		default:
>> +			err = -EBADR;
>> +			dev_dbg(ctx->fl->sctx->dev,
>> +				"unsupported response type:0x%x\n", ctx->rsp_flags);
>> +			break;
>> +		}
>> +	} while (!ctx->is_work_done);
>> +
>> +	return err;
>> +}
>> +
>>  static int fastrpc_internal_invoke(struct fastrpc_user *fl,  u32 kernel,
>>  				   u32 handle, u32 sc,
>>  				   struct fastrpc_ctx_args *cargs)
>> @@ -1180,16 +1275,20 @@ static int fastrpc_internal_invoke(struct fastrpc_user *fl,  u32 kernel,
>>  	if (err)
>>  		goto bail;
>>  
>> -	if (kernel) {
>> -		if (!wait_for_completion_timeout(&ctx->work, 10 * HZ))
>> -			err = -ETIMEDOUT;
>> -	} else {
>> -		err = wait_for_completion_interruptible(&ctx->work);
>> -	}
>> +	if (ctx->poll_timeout != 0 && handle > FASTRPC_MAX_STATIC_HANDLE
>> +	    && fl->pd == USER_PD)
>> +		ctx->rsp_flags = POLL_MODE;
>>  
>> +	err = fastrpc_wait_for_completion(ctx, kernel);
>>  	if (err)
>>  		goto bail;
>>  
>> +	if (!ctx->is_work_done) {
>> +		err = -ETIMEDOUT;
>> +		dev_dbg(fl->sctx->dev, "Invalid workdone state for handle 0x%x, sc 0x%x\n",
>> +			handle, sc);
>> +		goto bail;
>> +	}
>>  	/* make sure that all memory writes by DSP are seen by CPU */
>>  	dma_rmb();
>>  	/* populate all the output buffers with results */
>> @@ -1769,7 +1868,7 @@ static int fastrpc_invokev2(struct fastrpc_user *fl, char __user *argp)
>>  		return -EFAULT;
>>  
>>  	/* Check if all reserved fields are zero */
>> -	for (i = 0; i < 16; i++) {
>> +	for (i = 0; i < 14; i++) {
>>  		if (inv2.reserved[i] != 0)
>>  			return -EINVAL;
>>  	}
>> @@ -1779,6 +1878,7 @@ static int fastrpc_invokev2(struct fastrpc_user *fl, char __user *argp)
>>  		return -ENOMEM;
>>  
>>  	cargs->crc = (void __user *)(uintptr_t)inv2.crc;
>> +	cargs->poll_timeout = inv2.poll_timeout;
>>  
>>  	err = fastrpc_remote_invoke(fl, &inv2.inv, cargs);
>>  	kfree(cargs);
>> @@ -2581,12 +2681,14 @@ static int fastrpc_rpmsg_callback(struct rpmsg_device *rpdev, void *data,
>>  	ctx = idr_find(&cctx->ctx_idr, ctxid);
>>  	spin_unlock_irqrestore(&cctx->lock, flags);
>>  
>> +	/* Ignore this failure as context returned will be NULL for polling mode */
>>  	if (!ctx) {
>> -		dev_err(&rpdev->dev, "No context ID matches response\n");
>> -		return -ENOENT;
>> +		dev_dbg(&rpdev->dev, "No context ID matches response\n");
>> +		return 0;
>>  	}
>>  
>>  	ctx->retval = rsp->retval;
>> +	ctx->is_work_done = true;
>>  	complete(&ctx->work);
>>  
>>  	/*
>> diff --git a/include/uapi/misc/fastrpc.h b/include/uapi/misc/fastrpc.h
>> index 406b80555d41..1920c537bbbf 100644
>> --- a/include/uapi/misc/fastrpc.h
>> +++ b/include/uapi/misc/fastrpc.h
>> @@ -84,7 +84,8 @@ struct fastrpc_invoke {
>>  struct fastrpc_invoke_v2 {
>>  	struct fastrpc_invoke inv;
>>  	__u64 crc;
>> -	__u32 reserved[16];
>> +	__u64 poll_timeout;
>> +	__u32 reserved[14];
>>  };
>>  
>>  struct fastrpc_init_create {
>> -- 
>> 2.34.1
>>


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ