lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <3b26b0fc-e2fd-4f9a-a745-43f40c513812@linaro.org>
Date:   Fri, 24 Nov 2023 13:26:46 +0000
From:   Srinivas Kandagatla <srinivas.kandagatla@...aro.org>
To:     Ekansh Gupta <quic_ekangupt@...cinc.com>,
        linux-arm-msm@...r.kernel.org
Cc:     gregkh@...uxfoundation.org, linux-kernel@...r.kernel.org
Subject: Re: [PATCH v7 3/5] misc: fastrpc: Capture kernel and DSP performance
 counters



On 21/11/2023 09:48, Ekansh Gupta wrote:
> Add support to capture kernel performance counters for different
> kernel level operations. These counters collects the information
> for remote call and copies the information to a buffer shared
> by user.
> 
> Collection of DSP performance counters is also added as part of
> this change. DSP updates the performance information in the
> metadata which is then copied to a buffer passed by the user.
> 
> Signed-off-by: Ekansh Gupta <quic_ekangupt@...cinc.com>
> ---
> Changes in v2:
>    - Fixed compile time warnings
> Changes in v3:
>    - Squashed commits to get proper patch series
> Changes in v7:
>    - Rebase the patch to latest kernel version
> 
>   drivers/misc/fastrpc.c      | 141 ++++++++++++++++++++++++++++++++++--
>   include/uapi/misc/fastrpc.h |  14 ++++
>   2 files changed, 147 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/misc/fastrpc.c b/drivers/misc/fastrpc.cin:sent 
> index 55f126c779cb..cbcac0b3d09b 100644
> --- a/drivers/misc/fastrpc.c
> +++ b/drivers/misc/fastrpc.c
> @@ -19,6 +19,7 @@
>   #include <linux/rpmsg.h>
>   #include <linux/scatterlist.h>
>   #include <linux/slab.h>
> +#include <linux/delay.h>
>   #include <linux/firmware/qcom/qcom_scm.h>
>   #include <uapi/misc/fastrpc.h>
>   #include <linux/of_reserved_mem.h>
> @@ -33,6 +34,8 @@
>   #define FASTRPC_ALIGN		128
>   #define FASTRPC_MAX_FDLIST	16
>   #define FASTRPC_MAX_CRCLIST	64
> +#define FASTRPC_KERNEL_PERF_LIST (PERF_KEY_MAX)
> +#define FASTRPC_DSP_PERF_LIST 12
>   #define FASTRPC_PHYS(p)	((p) & 0xffffffff)
>   #define FASTRPC_CTX_MAX (256)
>   #define FASTRPC_INIT_HANDLE	1
> @@ -105,6 +108,27 @@
>   
>   #define miscdev_to_fdevice(d) container_of(d, struct fastrpc_device, miscdev)
>   
> +#define PERF_END ((void)0)
> +
> +#define PERF(enb, cnt, ff) \
> +	{\
> +		struct timespec64 startT = {0};\
> +		uint64_t *counter = cnt;\
> +		if (enb && counter) {\
> +			ktime_get_real_ts64(&startT);\
> +		} \
> +		ff ;\
> +		if (enb && counter) {\
> +			*counter += getnstimediff(&startT);\
> +		} \
> +	}
> +
> +#define GET_COUNTER(perf_ptr, offset)  \
> +	(perf_ptr != NULL ?\
> +		(((offset >= 0) && (offset < PERF_KEY_MAX)) ?\
> +			(uint64_t *)(perf_ptr + offset)\
> +				: (uint64_t *)NULL) : (uint64_t *)NULL)
> +
>   static const char *domains[FASTRPC_DEV_MAX] = { "adsp", "mdsp",
>   						"sdsp", "cdsp"};
>   struct fastrpc_phy_page {
> @@ -228,6 +252,19 @@ struct fastrpc_map {
>   	struct kref refcount;
>   };
>   
> +struct fastrpc_perf {
> +	u64 count;
> +	u64 flush;
> +	u64 map;
> +	u64 copy;
> +	u64 link;
> +	u64 getargs;
> +	u64 putargs;
> +	u64 invargs;
> +	u64 invoke;
> +	u64 tid;
> +};
> +
>   struct fastrpc_invoke_ctx {
>   	int nscalars;
>   	int nbufs;
> @@ -236,6 +273,8 @@ struct fastrpc_invoke_ctx {
>   	int tgid;
>   	u32 sc;
>   	u32 *crc;
> +	u64 *perf_kernel;
> +	u64 *perf_dsp;
>   	u64 ctxid;
>   	u64 msg_sz;
>   	struct kref refcount;
> @@ -250,6 +289,7 @@ struct fastrpc_invoke_ctx {
>   	struct fastrpc_invoke_args *args;
>   	struct fastrpc_buf_overlap *olaps;
>   	struct fastrpc_channel_ctx *cctx;
> +	struct fastrpc_perf *perf;
>   };
>   
>   struct fastrpc_session_ctx {
> @@ -299,6 +339,7 @@ struct fastrpc_user {
>   	struct fastrpc_session_ctx *sctx;
>   	struct fastrpc_buf *init_mem;
>   
> +	u32 profile;
>   	int tgid;
>   	int pd;
>   	bool is_secure_dev;
> @@ -308,6 +349,17 @@ struct fastrpc_user {
>   	struct mutex mutex;
>   };
>   
> +static inline int64_t getnstimediff(struct timespec64 *start)
> +{
> +	int64_t ns;
> +	struct timespec64 ts, b;
> +
> +	ktime_get_real_ts64(&ts);
> +	b = timespec64_sub(ts, *start);
> +	ns = timespec64_to_ns(&b);
> +	return ns;
> +}
> +
>   static void fastrpc_free_map(struct kref *ref)
>   {
>   	struct fastrpc_map *map;
> @@ -493,6 +545,9 @@ static void fastrpc_context_free(struct kref *ref)
>   	if (ctx->buf)
>   		fastrpc_buf_free(ctx->buf);
>   
> +	if (ctx->fl->profile)
> +		kfree(ctx->perf);
> +
>   	spin_lock_irqsave(&cctx->lock, flags);
>   	idr_remove(&cctx->ctx_idr, ctx->ctxid >> 4);
>   	spin_unlock_irqrestore(&cctx->lock, flags);
> @@ -612,6 +667,14 @@ static struct fastrpc_invoke_ctx *fastrpc_context_alloc(
>   	fastrpc_channel_ctx_get(cctx);
>   
>   	ctx->crc = (u32 *)(uintptr_t)invoke->crc;
> +	ctx->perf_dsp = (u64 *)(uintptr_t)invoke->perf_dsp;
> +	ctx->perf_kernel = (u64 *)(uintptr_t)invoke->perf_kernel;
> +	if (ctx->fl->profile) {
> +		ctx->perf = kzalloc(sizeof(*(ctx->perf)), GFP_KERNEL);
> +		if (!ctx->perf)
> +			return ERR_PTR(-ENOMEM);
> +		ctx->perf->tid = ctx->fl->tgid;
> +	}
>   	ctx->sc = sc;
>   	ctx->retval = -1;
>   	ctx->pid = current->pid;
> @@ -875,7 +938,8 @@ static int fastrpc_get_meta_size(struct fastrpc_invoke_ctx *ctx)
>   		sizeof(struct fastrpc_invoke_buf) +
>   		sizeof(struct fastrpc_phy_page)) * ctx->nscalars +
>   		sizeof(u64) * FASTRPC_MAX_FDLIST +
> -		sizeof(u32) * FASTRPC_MAX_CRCLIST;
> +		sizeof(u32) * FASTRPC_MAX_CRCLIST +
> +		sizeof(u32) + sizeof(u64) * FASTRPC_DSP_PERF_LIST;
>   
>   	return size;
>   }
> @@ -942,16 +1006,22 @@ static int fastrpc_get_args(u32 kernel, struct fastrpc_invoke_ctx *ctx)
>   	int inbufs, i, oix, err = 0;
>   	u64 len, rlen, pkt_size;
>   	u64 pg_start, pg_end;
> +	u64 *perf_counter = NULL;
>   	uintptr_t args;
>   	int metalen;
>   
> +	if (ctx->fl->profile)
> +		perf_counter = (u64 *)ctx->perf + PERF_COUNT;
> +
>   	inbufs = REMOTE_SCALARS_INBUFS(ctx->sc);
>   	metalen = fastrpc_get_meta_size(ctx);
>   	pkt_size = fastrpc_get_payload_size(ctx, metalen);
>   
> +	PERF(ctx->fl->profile, GET_COUNTER(perf_counter, PERF_MAP),
>   	err = fastrpc_create_maps(ctx);
>   	if (err)
>   		return err;
> +	PERF_END);
>   
>   	ctx->msg_sz = pkt_size;
>   
> @@ -984,6 +1054,7 @@ static int fastrpc_get_args(u32 kernel, struct fastrpc_invoke_ctx *ctx)
>   		if (ctx->maps[i]) {
>   			struct vm_area_struct *vma = NULL;
>   
> +			PERF(ctx->fl->profile, GET_COUNTER(perf_counter, PERF_MAP),
>   			rpra[i].buf.pv = (u64) ctx->args[i].ptr;
>   			pages[i].addr = ctx->maps[i]->phys;
>   
> @@ -998,9 +1069,9 @@ static int fastrpc_get_args(u32 kernel, struct fastrpc_invoke_ctx *ctx)
>   			pg_end = ((ctx->args[i].ptr + len - 1) & PAGE_MASK) >>
>   				  PAGE_SHIFT;
>   			pages[i].size = (pg_end - pg_start + 1) * PAGE_SIZE;
> -
> +			PERF_END);
>   		} else {
> -
> +			PERF(ctx->fl->profile, GET_COUNTER(perf_counter, PERF_COPY),
>   			if (ctx->olaps[oix].offset == 0) {
>   				rlen -= ALIGN(args, FASTRPC_ALIGN) - args;
>   				args = ALIGN(args, FASTRPC_ALIGN);
> @@ -1022,12 +1093,14 @@ static int fastrpc_get_args(u32 kernel, struct fastrpc_invoke_ctx *ctx)
>   			pages[i].size = (pg_end - pg_start + 1) * PAGE_SIZE;
>   			args = args + mlen;
>   			rlen -= mlen;
> +			PERF_END);
>   		}
>   
>   		if (i < inbufs && !ctx->maps[i]) {
>   			void *dst = (void *)(uintptr_t)rpra[i].buf.pv;
>   			void *src = (void *)(uintptr_t)ctx->args[i].ptr;
>   
> +			PERF(ctx->fl->profile, GET_COUNTER(perf_counter, PERF_COPY),
>   			if (!kernel) {
>   				if (copy_from_user(dst, (void __user *)src,
>   						   len)) {
> @@ -1037,6 +1110,7 @@ static int fastrpc_get_args(u32 kernel, struct fastrpc_invoke_ctx *ctx)
>   			} else {
>   				memcpy(dst, src, len);
>   			}
> +			PERF_END);
>   		}
>   	}
>   
> @@ -1067,9 +1141,9 @@ static int fastrpc_put_args(struct fastrpc_invoke_ctx *ctx,
>   	struct fastrpc_map *mmap = NULL;
>   	struct fastrpc_invoke_buf *list;
>   	struct fastrpc_phy_page *pages;
> -	u64 *fdlist;
> -	u32 *crclist;
> -	int i, inbufs, outbufs, handles;
> +	u64 *fdlist, *perf_dsp_list;
> +	u32 *crclist, *poll;
> +	int i, inbufs, outbufs, handles, perferr;
>   
>   	inbufs = REMOTE_SCALARS_INBUFS(ctx->sc);
>   	outbufs = REMOTE_SCALARS_OUTBUFS(ctx->sc);
> @@ -1078,6 +1152,8 @@ static int fastrpc_put_args(struct fastrpc_invoke_ctx *ctx,
>   	pages = fastrpc_phy_page_start(list, ctx->nscalars);
>   	fdlist = (u64 *)(pages + inbufs + outbufs + handles);
>   	crclist = (u32 *)(fdlist + FASTRPC_MAX_FDLIST);
> +	poll = (u32 *)(crclist + FASTRPC_MAX_CRCLIST);
> +	perf_dsp_list = (u64 *)(poll + 1);
>   
>   	for (i = inbufs; i < ctx->nbufs; ++i) {
>   		if (!ctx->maps[i]) {
> @@ -1103,8 +1179,16 @@ static int fastrpc_put_args(struct fastrpc_invoke_ctx *ctx,
>   	}
>   
>   	if (ctx->crc && crclist && rpra) {
> -		if (copy_to_user((void __user *)ctx->crc, crclist, FASTRPC_MAX_CRCLIST * sizeof(u32)))
> +		if (copy_to_user((void __user *)ctx->crc, crclist,
> +					FASTRPC_MAX_CRCLIST * sizeof(u32))) {
>   			return -EFAULT;
> +		}
> +	}
> +	if (ctx->perf_dsp && perf_dsp_list) {
> +		perferr = copy_to_user((void __user *)ctx->perf_dsp,
> +				perf_dsp_list, FASTRPC_DSP_PERF_LIST * sizeof(u64));
> +		if (perferr)
> +			dev_info(fl->sctx->dev, "Warning: failed to copy perf data %d\n", perferr);
>   	}
>   	return 0;
>   }
> @@ -1141,6 +1225,21 @@ static int fastrpc_invoke_send(struct fastrpc_session_ctx *sctx,
>   
>   }
>   
> +static void fastrpc_update_invoke_count(u32 handle, u64 *perf_counter,
> +					struct timespec64 *invoket)
> +{
> +	u64 *invcount, *count;
> +
> +	invcount = GET_COUNTER(perf_counter, PERF_INVOKE);
> +	if (invcount)
> +		*invcount += getnstimediff(invoket);
> +
> +	count = GET_COUNTER(perf_counter, PERF_COUNT);
> +	if (count)
> +		*count += 1;
> +}
> +
> +
>   static int fastrpc_internal_invoke(struct fastrpc_user *fl,  u32 kernel,
>   				   struct fastrpc_enhanced_invoke *invoke)
>   {
> @@ -1148,7 +1247,12 @@ static int fastrpc_internal_invoke(struct fastrpc_user *fl,  u32 kernel,
>   	struct fastrpc_buf *buf, *b;
>   	struct fastrpc_invoke *inv = &invoke->inv;
>   	u32 handle, sc;
> -	int err = 0;
> +	u64 *perf_counter = NULL;
> +	int err = 0, perferr = 0;
> +	struct timespec64 invoket = {0};
> +
> +	if (fl->profile)
> +		ktime_get_real_ts64(&invoket);
>   
>   	if (!fl->sctx)
>   		return -EINVAL;
> @@ -1167,16 +1271,22 @@ static int fastrpc_internal_invoke(struct fastrpc_user *fl,  u32 kernel,
>   	if (IS_ERR(ctx))
>   		return PTR_ERR(ctx);
>   
> +	if (fl->profile)
> +		perf_counter = (u64 *)ctx->perf + PERF_COUNT;
> +	PERF(fl->profile, GET_COUNTER(perf_counter, PERF_GETARGS),
>   	err = fastrpc_get_args(kernel, ctx);
>   	if (err)
>   		goto bail;
> +	PERF_END);
>   
>   	/* make sure that all CPU memory writes are seen by DSP */
>   	dma_wmb();
> +	PERF(fl->profile, GET_COUNTER(perf_counter, PERF_LINK),
>   	/* Send invoke buffer to remote dsp */
>   	err = fastrpc_invoke_send(fl->sctx, ctx, kernel, handle);
>   	if (err)
>   		goto bail;
> +	PERF_END);
>   
>   	if (kernel) {
>   		if (!wait_for_completion_timeout(&ctx->work, 10 * HZ))
> @@ -1190,10 +1300,12 @@ static int fastrpc_internal_invoke(struct fastrpc_user *fl,  u32 kernel,
>   
>   	/* make sure that all memory writes by DSP are seen by CPU */
>   	dma_rmb();
> +	PERF(fl->profile, GET_COUNTER(perf_counter, PERF_PUTARGS),
>   	/* populate all the output buffers with results */
>   	err = fastrpc_put_args(ctx, kernel);
>   	if (err)
>   		goto bail;
> +	PERF_END);
>   
>   	/* Check the response from remote dsp */
>   	err = ctx->retval;
> @@ -1214,6 +1326,15 @@ static int fastrpc_internal_invoke(struct fastrpc_user *fl,  u32 kernel,
>   			list_del(&buf->node);
>   			list_add_tail(&buf->node, &fl->cctx->invoke_interrupted_mmaps);
>   		}
> +	} else if (ctx) {
> +		if (fl->profile && !err)
> +			fastrpc_update_invoke_count(handle, perf_counter, &invoket);
> +		if (fl->profile && ctx->perf && ctx->perf_kernel) {
> +			perferr = copy_to_user((void __user *)ctx->perf_kernel,
> +						ctx->perf, FASTRPC_KERNEL_PERF_LIST * sizeof(u64));
> +			if (perferr)
> +				dev_info(fl->sctx->dev, "Warning: failed to copy perf data %d\n", perferr);
> +		}
>   	}
>   
>   	if (err)
> @@ -1712,6 +1833,7 @@ static int fastrpc_multimode_invoke(struct fastrpc_user *fl, char __user *argp)
>   	struct fastrpc_invoke_args *args = NULL;
>   	struct fastrpc_ioctl_multimode_invoke invoke;
>   	u32 nscalars;
> +	u64 *perf_kernel;
>   	int err, i;
>   
>   	if (copy_from_user(&invoke, argp, sizeof(invoke)))
> @@ -1746,6 +1868,9 @@ static int fastrpc_multimode_invoke(struct fastrpc_user *fl, char __user *argp)
>   				return -EFAULT;
>   			}
>   		}
> +		perf_kernel = (u64 *)(uintptr_t)einv.perf_kernel;
> +		if (perf_kernel)
> +			fl->profile = true;
>   		einv.inv.args = (__u64)args;
>   		err = fastrpc_internal_invoke(fl, false, &einv);
>   		kfree(args);
> diff --git a/include/uapi/misc/fastrpc.h b/include/uapi/misc/fastrpc.h
> index 45c15be1de58..074675ee646f 100644
> --- a/include/uapi/misc/fastrpc.h
> +++ b/include/uapi/misc/fastrpc.h
> @@ -166,4 +166,18 @@ struct fastrpc_ioctl_capability {
>   	__u32 reserved[4];
>   };
>   
> +enum fastrpc_perfkeys {
> +	PERF_COUNT = 0,
> +	PERF_RESERVED1 = 1,

why reserved in middle of ranges? if you know already pl add the proper 
name for it.

> +	PERF_MAP = 2,
> +	PERF_COPY = 3,
> +	PERF_LINK = 4,
> +	PERF_GETARGS = 5,
> +	PERF_PUTARGS = 6,
> +	PERF_RESERVED2 = 7,
> +	PERF_INVOKE = 8,
> +	PERF_RESERVED3 = 9,
> +	PERF_KEY_MAX = 10,
> +};
> +
>   #endif /* __QCOM_FASTRPC_H__ */

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ