lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <a2495939-0ad0-4b7c-8c77-618e79a1615c@linaro.org>
Date:   Fri, 24 Nov 2023 13:27:45 +0000
From:   Srinivas Kandagatla <srinivas.kandagatla@...aro.org>
To:     Ekansh Gupta <quic_ekangupt@...cinc.com>,
        linux-arm-msm@...r.kernel.org
Cc:     gregkh@...uxfoundation.org, linux-kernel@...r.kernel.org
Subject: Re: [PATCH v3 1/5] misc: fastrpc: Add early wakeup support for
 fastRPC driver

Thanks Ekansh for the patch,

On 21/11/2023 11:24, Ekansh Gupta wrote:
> CPU wake up and context switch latency are significant in FastRPC
> overhead for remote calls. As part of this change, DSP sends early
> signal of completion to CPU and FastRPC driver detects early signal
> on the given context and starts polling on a memory for actual
> completion. Multiple different response flags are added to support
> DSP user early hint of approximate time of completion, early response
> from DSP user to wake up CPU and poll on memory for actual completion.
> Complete signal is also added which is sent by DSP user in case of
> timeout after early response is sent.
> 
> Signed-off-by: Ekansh Gupta <quic_ekangupt@...cinc.com>
> ---
> Changes in v3:
>    - Rebase the patch to latest kernel version
> 
>   drivers/misc/fastrpc.c | 265 +++++++++++++++++++++++++++++++++++++++--
>   1 file changed, 252 insertions(+), 13 deletions(-)
> 
> diff --git a/drivers/misc/fastrpc.c b/drivers/misc/fastrpc.c
> index 8e77beb3a693..6b6ac3e3328d 100644
> --- a/drivers/misc/fastrpc.c
> +++ b/drivers/misc/fastrpc.c
> @@ -106,6 +106,19 @@
>   #define USER_PD		(1)
>   #define SENSORS_PD	(2)
>   
> +#define FASTRPC_RSP_VERSION2 2
> +/* Early wake up poll completion number received from remoteproc */
> +#define FASTRPC_EARLY_WAKEUP_POLL (0xabbccdde)
> +/* timeout in us for polling until memory barrier */
> +#define FASTRPC_POLL_TIME_MEM_UPDATE (500)
> +/* timeout in us for busy polling after early response from remoteproc */
> +#define FASTRPC_POLL_TIME (4000)
> +/* timeout in us for polling completion signal after user early hint */
> +#define FASTRPC_USER_EARLY_HINT_TIMEOUT (500)

> +/* CPU feature information to DSP */

> +#define FASTRPC_CPUINFO_DEFAULT (0)
> +#define FASTRPC_CPUINFO_EARLY_WAKEUP (1)
> +
>   #define miscdev_to_fdevice(d) container_of(d, struct fastrpc_device, miscdev)
>   
>   #define PERF_END ((void)0)
> @@ -129,6 +142,15 @@
>   			(uint64_t *)(perf_ptr + offset)\
>   				: (uint64_t *)NULL) : (uint64_t *)NULL)
>   

Can you add some documentation to these flags.

> +enum fastrpc_response_flags {
> +	NORMAL_RESPONSE = 0,
> +	EARLY_RESPONSE = 1,
> +	USER_EARLY_SIGNAL = 2,
> +	COMPLETE_SIGNAL = 3,
> +	STATUS_RESPONSE = 4,
> +	POLL_MODE = 5,
> +};
> +
>   static const char *domains[FASTRPC_DEV_MAX] = { "adsp", "mdsp",
>   						"sdsp", "cdsp"};
>   struct fastrpc_phy_page {
> @@ -206,6 +228,14 @@ struct fastrpc_invoke_rsp {
>   	int retval;		/* invoke return value */
>   };
>   
> +struct fastrpc_invoke_rspv2 {
> +	u64 ctx;		/* invoke caller context */
> +	int retval;		/* invoke return value */
> +	u32 flags;		/* early response flags */
> +	u32 early_wake_time;	/* user hint in us */
> +	u32 version;		/* version number */
> +};
> +
>   struct fastrpc_buf_overlap {
>   	u64 start;
>   	u64 end;
> @@ -272,11 +302,17 @@ struct fastrpc_invoke_ctx {
>   	int pid;
>   	int tgid;
>   	u32 sc;
> +	/* user hint of completion time in us */
> +	u32 early_wake_time;
>   	u32 *crc;
>   	u64 *perf_kernel;
>   	u64 *perf_dsp;
>   	u64 ctxid;
>   	u64 msg_sz;
> +	/* work done status flag */
> +	bool is_work_done;
> +	/* response flags from remote processor */
> +	enum fastrpc_response_flags rsp_flags;
>   	struct kref refcount;
>   	struct list_head node; /* list of ctxs */
>   	struct completion work;
> @@ -321,7 +357,9 @@ struct fastrpc_channel_ctx {
>   	struct list_head invoke_interrupted_mmaps;
>   	bool secure;
>   	bool unsigned_support;
> +	bool cpuinfo_status;
>   	u64 dma_mask;
> +	u64 cpuinfo_todsp;
>   };
>   
>   struct fastrpc_device {
> @@ -352,13 +390,21 @@ struct fastrpc_user {
>   	struct mutex mutex;
>   };
>   
> +struct fastrpc_ctrl_latency {
> +	u32 enable;	/* latency control enable */
> +	u32 latency;	/* latency request in us */
> +};
> +

What is this struct? am not seeing it used anywhere in this patch.


>   struct fastrpc_ctrl_smmu {
>   	u32 sharedcb;	/* Set to SMMU share context bank */
>   };
> 

>   struct fastrpc_internal_control {
>   	u32 req;
> -	struct fastrpc_ctrl_smmu smmu;
> +	union {
> +		struct fastrpc_ctrl_latency lp;
> +		struct fastrpc_ctrl_smmu smmu;
> +	};
>   };
>   
>   static inline int64_t getnstimediff(struct timespec64 *start)
> @@ -692,6 +738,8 @@ static struct fastrpc_invoke_ctx *fastrpc_context_alloc(
>   	ctx->pid = current->pid;
>   	ctx->tgid = user->tgid;
>   	ctx->cctx = cctx;
> +	ctx->rsp_flags = NORMAL_RESPONSE;
> +	ctx->is_work_done = false;
>   	init_completion(&ctx->work);
>   	INIT_WORK(&ctx->put_work, fastrpc_context_put_wq);
>   
> @@ -1302,6 +1350,115 @@ static int fastrpc_invoke_send(struct fastrpc_session_ctx *sctx,
>   
>   }
>   
> +static int poll_for_remote_response(struct fastrpc_invoke_ctx *ctx, u32 timeout)
> +{
> +	int err = -EIO, ii = 0, jj = 0;

variable naming is really odd.

> +	u32 sc = ctx->sc;
> +	struct fastrpc_invoke_buf *list;
> +	struct fastrpc_phy_page *pages;
> +	u64 *fdlist = NULL;
> +	u32 *crclist = NULL, *poll = NULL;
> +	unsigned int inbufs, outbufs, handles;
> +
> +	/* calculate poll memory location */
> +	inbufs = REMOTE_SCALARS_INBUFS(sc);
> +	outbufs = REMOTE_SCALARS_OUTBUFS(sc);
> +	handles = REMOTE_SCALARS_INHANDLES(sc) + REMOTE_SCALARS_OUTHANDLES(sc);
> +	list = fastrpc_invoke_buf_start(ctx->rpra, ctx->nscalars);
> +	pages = fastrpc_phy_page_start(list, ctx->nscalars);
> +	fdlist = (u64 *)(pages + inbufs + outbufs + handles);
> +	crclist = (u32 *)(fdlist + FASTRPC_MAX_FDLIST);
> +	poll = (u32 *)(crclist + FASTRPC_MAX_CRCLIST);
> +
> +	/* poll on memory for DSP response. Return failure on timeout */
> +	for (ii = 0, jj = 0; ii < timeout; ii++, jj++) {
> +		if (*poll == FASTRPC_EARLY_WAKEUP_POLL) {
> +			/* Remote processor sent early response */
> +			err = 0;
> +			break;
> +		}
> +		if (jj == FASTRPC_POLL_TIME_MEM_UPDATE) {
> +			/* Wait for DSP to finish updating poll memory */
> +			rmb();

Could you add a comment on why do we need this memory barrier here?
Can we use dma_rmb() ?


> +			jj = 0;
> +		}
> +		udelay(1);
> +	}
> +	return err;
> +}
> +
> +static inline int fastrpc_wait_for_response(struct fastrpc_invoke_ctx *ctx,
> +						u32 kernel)
> +{
> +	int interrupted = 0;
> +
> +	if (kernel)
> +		wait_for_completion(&ctx->work);
> +	else
> +		interrupted = wait_for_completion_interruptible(&ctx->work);
> +
> +	return interrupted;
> +}
> +
> +static void fastrpc_wait_for_completion(struct fastrpc_invoke_ctx *ctx,
> +			int *ptr_interrupted, u32 kernel)
> +{
> +	int err = 0, jj = 0;
unnecessary intializations.

> +	bool wait_resp = false;
> +	u32 wTimeout = FASTRPC_USER_EARLY_HINT_TIMEOUT;
> +	u32 wakeTime = ctx->early_wake_time;
> +
> +	do {
> +		switch (ctx->rsp_flags) {
> +		/* try polling on completion with timeout */
> +		case USER_EARLY_SIGNAL:
> +			/* try wait if completion time is less than timeout */
> +			/* disable preempt to avoid context switch latency */
pl use proper multi-line commenting style here.

> +			preempt_disable();
> +			jj = 0;
> +			wait_resp = false;
> +			for (; wakeTime < wTimeout && jj < wTimeout; jj++) {
> +				wait_resp = try_wait_for_completion(&ctx->work);
> +				if (wait_resp)
> +					break;
> +				udelay(1);
> +			}
> +			preempt_enable();
> +			if (!wait_resp) {
> +				*ptr_interrupted = fastrpc_wait_for_response(ctx, kernel);
> +				if (*ptr_interrupted || ctx->is_work_done)
> +					return;
> +			}
> +			break;
> +		/* busy poll on memory for actual job done */
> +		case EARLY_RESPONSE:
> +			err = poll_for_remote_response(ctx, FASTRPC_POLL_TIME);
> +			/* Mark job done if poll on memory successful */
> +			/* Wait for completion if poll on memory timeout */
> +			if (!err) {
> +				ctx->is_work_done = true;
> +				return;
> +			}
> +			if (!ctx->is_work_done) {
> +				*ptr_interrupted = fastrpc_wait_for_response(ctx, kernel);
> +				if (*ptr_interrupted || ctx->is_work_done)
> +					return;
> +			}
> +			break;
> +		case COMPLETE_SIGNAL:
> +		case NORMAL_RESPONSE:
> +			*ptr_interrupted = fastrpc_wait_for_response(ctx, kernel);
> +			if (*ptr_interrupted || ctx->is_work_done)
> +				return;
> +			break;
> +		default:
> +			*ptr_interrupted = -EBADR;
> +			dev_err(ctx->fl->sctx->dev, "unsupported response type:0x%x\n", ctx->rsp_flags);
> +			break;
> +		}
> +	} while (!ctx->is_work_done);
> +}
> +
>   static void fastrpc_update_invoke_count(u32 handle, u64 *perf_counter,
>   					struct timespec64 *invoket)
>   {
> @@ -1325,7 +1482,7 @@ static int fastrpc_internal_invoke(struct fastrpc_user *fl,  u32 kernel,
>   	struct fastrpc_invoke *inv = &invoke->inv;
>   	u32 handle, sc;
>   	u64 *perf_counter = NULL;
> -	int err = 0, perferr = 0;
> +	int err = 0, perferr = 0, interrupted = 0;
>   	struct timespec64 invoket = {0};
>   
>   	if (fl->profile)
> @@ -1374,15 +1531,18 @@ static int fastrpc_internal_invoke(struct fastrpc_user *fl,  u32 kernel,
>   	PERF_END);
>   
>   wait:
> -	if (kernel) {
> -		if (!wait_for_completion_timeout(&ctx->work, 10 * HZ))
> -			err = -ETIMEDOUT;
> -	} else {
> -		err = wait_for_completion_interruptible(&ctx->work);
> +	fastrpc_wait_for_completion(ctx, &interrupted, kernel);
> +	if (interrupted != 0) {
> +		err = interrupted;
> +		goto bail;
>   	}
>   
> -	if (err)
> +	if (!ctx->is_work_done) {
> +		err = -ETIMEDOUT;
> +		dev_err(fl->sctx->dev, "Error: Invalid workdone state for handle 0x%x, sc 0x%x\n",
> +			handle, sc);
>   		goto bail;
> +	}
>   
>   	/* make sure that all memory writes by DSP are seen by CPU */
>   	dma_rmb();
> @@ -2056,6 +2216,36 @@ static int fastrpc_get_info_from_kernel(struct fastrpc_ioctl_capability *cap,
>   	return 0;
>   }
>   
> +static int fastrpc_send_cpuinfo_to_dsp(struct fastrpc_user *fl)
> +{
> +	int err = 0;
> +	u64 cpuinfo = 0;
> +	struct fastrpc_invoke_args args[1];
> +	struct fastrpc_enhanced_invoke ioctl;
> +
> +	if (!fl)
> +		return -EBADF;
> +
> +	cpuinfo = fl->cctx->cpuinfo_todsp;
> +	/* return success if already updated to remote processor */
> +	if (fl->cctx->cpuinfo_status)
> +		return 0;
> +
> +	args[0].ptr = (u64)(uintptr_t)&cpuinfo;
> +	args[0].length = sizeof(cpuinfo);
> +	args[0].fd = -1;
> +
> +	ioctl.inv.handle = FASTRPC_DSP_UTILITIES_HANDLE;
> +	ioctl.inv.sc = FASTRPC_SCALARS(1, 1, 0);
> +	ioctl.inv.args = (__u64)args;
> +
> +	err = fastrpc_internal_invoke(fl, true, &ioctl);
> +	if (!err)
> +		fl->cctx->cpuinfo_status = true;
> +
> +	return err;
> +}
> +
>   static int fastrpc_get_dsp_info(struct fastrpc_user *fl, char __user *argp)
>   {
>   	struct fastrpc_ioctl_capability cap = {0};
> @@ -2407,6 +2597,8 @@ static long fastrpc_device_ioctl(struct file *file, unsigned int cmd,
>   		break;
>   	case FASTRPC_IOCTL_INIT_ATTACH:
>   		err = fastrpc_init_attach(fl, ROOT_PD);
> +		if (!err)
> +			fastrpc_send_cpuinfo_to_dsp(fl);
>   		break;
>   	case FASTRPC_IOCTL_INIT_ATTACH_SNS:
>   		err = fastrpc_init_attach(fl, SENSORS_PD);
> @@ -2627,6 +2819,7 @@ static int fastrpc_rpmsg_probe(struct rpmsg_device *rpdev)
>   		err = fastrpc_device_register(rdev, data, secure_dsp, domains[domain_id]);
>   		if (err)
>   			goto fdev_error;
> +		data->cpuinfo_todsp = FASTRPC_CPUINFO_DEFAULT;
>   		break;
>   	case CDSP_DOMAIN_ID:
>   		data->unsigned_support = true;
> @@ -2638,6 +2831,7 @@ static int fastrpc_rpmsg_probe(struct rpmsg_device *rpdev)
>   		err = fastrpc_device_register(rdev, data, false, domains[domain_id]);
>   		if (err)
>   			goto fdev_error;
> +		data->cpuinfo_todsp = FASTRPC_CPUINFO_EARLY_WAKEUP;
>   		break;
>   	default:
>   		err = -EINVAL;
> @@ -2680,10 +2874,12 @@ static void fastrpc_notify_users(struct fastrpc_user *user)
>   	spin_lock(&user->lock);
>   	list_for_each_entry(ctx, &user->pending, node) {
>   		ctx->retval = -EPIPE;
> +		ctx->is_work_done = true;
>   		complete(&ctx->work);
>   	}
>   	list_for_each_entry(ctx, &user->interrupted, node) {
>   		ctx->retval = -EPIPE;
> +		ctx->is_work_done = true;
>   		complete(&ctx->work);
>   	}
>   	spin_unlock(&user->lock);
> @@ -2720,31 +2916,74 @@ static void fastrpc_rpmsg_remove(struct rpmsg_device *rpdev)
>   	fastrpc_channel_ctx_put(cctx);
>   }
>   
> +static void fastrpc_notify_user_ctx(struct fastrpc_invoke_ctx *ctx, int retval,
> +		u32 rsp_flags, u32 early_wake_time)
> +{
> +	ctx->retval = retval;
> +	ctx->rsp_flags = (enum fastrpc_response_flags)rsp_flags;
> +	switch (rsp_flags) {
> +	case NORMAL_RESPONSE:
> +	case COMPLETE_SIGNAL:
> +		/* normal and complete response with return value */
> +		ctx->is_work_done = true;
> +		complete(&ctx->work);
> +		break;
> +	case USER_EARLY_SIGNAL:
> +		/* user hint of approximate time of completion */
> +		ctx->early_wake_time = early_wake_time;
> +		break;
> +	case EARLY_RESPONSE:
> +		/* rpc framework early response with return value */
> +		complete(&ctx->work);
> +		break;
> +	default:
> +		break;
> +	}
> +}
> +
>   static int fastrpc_rpmsg_callback(struct rpmsg_device *rpdev, void *data,
>   				  int len, void *priv, u32 addr)
>   {
>   	struct fastrpc_channel_ctx *cctx = dev_get_drvdata(&rpdev->dev);
>   	struct fastrpc_invoke_rsp *rsp = data;
> +	struct fastrpc_invoke_rspv2 *rspv2 = NULL;
>   	struct fastrpc_invoke_ctx *ctx;
>   	unsigned long flags;
>   	unsigned long ctxid;
> +	u32 rsp_flags = 0;
> +	u32 early_wake_time = 0;
>   
>   	if (len < sizeof(*rsp))
>   		return -EINVAL;
>   
> +	if (len >= sizeof(*rspv2)) {
> +		rspv2 = data;
> +		if (rspv2) {
> +			early_wake_time = rspv2->early_wake_time;
> +			rsp_flags = rspv2->flags;
> +		}
> +	}
> +
>   	ctxid = ((rsp->ctx & FASTRPC_CTXID_MASK) >> 4);
>   
>   	spin_lock_irqsave(&cctx->lock, flags);
>   	ctx = idr_find(&cctx->ctx_idr, ctxid);
> -	spin_unlock_irqrestore(&cctx->lock, flags);
>   
>   	if (!ctx) {
> -		dev_err(&rpdev->dev, "No context ID matches response\n");
> -		return -ENOENT;
> +		dev_info(&cctx->rpdev->dev, "Warning: No context ID matches response\n");
> +		spin_unlock_irqrestore(&cctx->lock, flags);
> +		return 0;
>   	}
>   
> -	ctx->retval = rsp->retval;
> -	complete(&ctx->work);
> +	if (rspv2) {
> +		if (rspv2->version != FASTRPC_RSP_VERSION2) {
> +			dev_err(&cctx->rpdev->dev, "Incorrect response version %d\n", rspv2->version);
> +			spin_unlock_irqrestore(&cctx->lock, flags);
> +			return -EINVAL;
> +		}
> +	}
> +	fastrpc_notify_user_ctx(ctx, rsp->retval, rsp_flags, early_wake_time);
> +	spin_unlock_irqrestore(&cctx->lock, flags);
>   
>   	/*
>   	 * The DMA buffer associated with the context cannot be freed in

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ