[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <3b26b0fc-e2fd-4f9a-a745-43f40c513812@linaro.org>
Date: Fri, 24 Nov 2023 13:26:46 +0000
From: Srinivas Kandagatla <srinivas.kandagatla@...aro.org>
To: Ekansh Gupta <quic_ekangupt@...cinc.com>,
linux-arm-msm@...r.kernel.org
Cc: gregkh@...uxfoundation.org, linux-kernel@...r.kernel.org
Subject: Re: [PATCH v7 3/5] misc: fastrpc: Capture kernel and DSP performance
counters
On 21/11/2023 09:48, Ekansh Gupta wrote:
> Add support to capture kernel performance counters for different
> kernel level operations. These counters collects the information
> for remote call and copies the information to a buffer shared
> by user.
>
> Collection of DSP performance counters is also added as part of
> this change. DSP updates the performance information in the
> metadata which is then copied to a buffer passed by the user.
>
> Signed-off-by: Ekansh Gupta <quic_ekangupt@...cinc.com>
> ---
> Changes in v2:
> - Fixed compile time warnings
> Changes in v3:
> - Squashed commits to get proper patch series
> Changes in v7:
> - Rebase the patch to latest kernel version
>
> drivers/misc/fastrpc.c | 141 ++++++++++++++++++++++++++++++++++--
> include/uapi/misc/fastrpc.h | 14 ++++
> 2 files changed, 147 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/misc/fastrpc.c b/drivers/misc/fastrpc.cin:sent
> index 55f126c779cb..cbcac0b3d09b 100644
> --- a/drivers/misc/fastrpc.c
> +++ b/drivers/misc/fastrpc.c
> @@ -19,6 +19,7 @@
> #include <linux/rpmsg.h>
> #include <linux/scatterlist.h>
> #include <linux/slab.h>
> +#include <linux/delay.h>
> #include <linux/firmware/qcom/qcom_scm.h>
> #include <uapi/misc/fastrpc.h>
> #include <linux/of_reserved_mem.h>
> @@ -33,6 +34,8 @@
> #define FASTRPC_ALIGN 128
> #define FASTRPC_MAX_FDLIST 16
> #define FASTRPC_MAX_CRCLIST 64
> +#define FASTRPC_KERNEL_PERF_LIST (PERF_KEY_MAX)
> +#define FASTRPC_DSP_PERF_LIST 12
> #define FASTRPC_PHYS(p) ((p) & 0xffffffff)
> #define FASTRPC_CTX_MAX (256)
> #define FASTRPC_INIT_HANDLE 1
> @@ -105,6 +108,27 @@
>
> #define miscdev_to_fdevice(d) container_of(d, struct fastrpc_device, miscdev)
>
> +#define PERF_END ((void)0)
> +
> +#define PERF(enb, cnt, ff) \
> + {\
> + struct timespec64 startT = {0};\
> + uint64_t *counter = cnt;\
> + if (enb && counter) {\
> + ktime_get_real_ts64(&startT);\
> + } \
> + ff ;\
> + if (enb && counter) {\
> + *counter += getnstimediff(&startT);\
> + } \
> + }
> +
> +#define GET_COUNTER(perf_ptr, offset) \
> + (perf_ptr != NULL ?\
> + (((offset >= 0) && (offset < PERF_KEY_MAX)) ?\
> + (uint64_t *)(perf_ptr + offset)\
> + : (uint64_t *)NULL) : (uint64_t *)NULL)
> +
> static const char *domains[FASTRPC_DEV_MAX] = { "adsp", "mdsp",
> "sdsp", "cdsp"};
> struct fastrpc_phy_page {
> @@ -228,6 +252,19 @@ struct fastrpc_map {
> struct kref refcount;
> };
>
> +struct fastrpc_perf {
> + u64 count;
> + u64 flush;
> + u64 map;
> + u64 copy;
> + u64 link;
> + u64 getargs;
> + u64 putargs;
> + u64 invargs;
> + u64 invoke;
> + u64 tid;
> +};
> +
> struct fastrpc_invoke_ctx {
> int nscalars;
> int nbufs;
> @@ -236,6 +273,8 @@ struct fastrpc_invoke_ctx {
> int tgid;
> u32 sc;
> u32 *crc;
> + u64 *perf_kernel;
> + u64 *perf_dsp;
> u64 ctxid;
> u64 msg_sz;
> struct kref refcount;
> @@ -250,6 +289,7 @@ struct fastrpc_invoke_ctx {
> struct fastrpc_invoke_args *args;
> struct fastrpc_buf_overlap *olaps;
> struct fastrpc_channel_ctx *cctx;
> + struct fastrpc_perf *perf;
> };
>
> struct fastrpc_session_ctx {
> @@ -299,6 +339,7 @@ struct fastrpc_user {
> struct fastrpc_session_ctx *sctx;
> struct fastrpc_buf *init_mem;
>
> + u32 profile;
> int tgid;
> int pd;
> bool is_secure_dev;
> @@ -308,6 +349,17 @@ struct fastrpc_user {
> struct mutex mutex;
> };
>
> +static inline int64_t getnstimediff(struct timespec64 *start)
> +{
> + int64_t ns;
> + struct timespec64 ts, b;
> +
> + ktime_get_real_ts64(&ts);
> + b = timespec64_sub(ts, *start);
> + ns = timespec64_to_ns(&b);
> + return ns;
> +}
> +
> static void fastrpc_free_map(struct kref *ref)
> {
> struct fastrpc_map *map;
> @@ -493,6 +545,9 @@ static void fastrpc_context_free(struct kref *ref)
> if (ctx->buf)
> fastrpc_buf_free(ctx->buf);
>
> + if (ctx->fl->profile)
> + kfree(ctx->perf);
> +
> spin_lock_irqsave(&cctx->lock, flags);
> idr_remove(&cctx->ctx_idr, ctx->ctxid >> 4);
> spin_unlock_irqrestore(&cctx->lock, flags);
> @@ -612,6 +667,14 @@ static struct fastrpc_invoke_ctx *fastrpc_context_alloc(
> fastrpc_channel_ctx_get(cctx);
>
> ctx->crc = (u32 *)(uintptr_t)invoke->crc;
> + ctx->perf_dsp = (u64 *)(uintptr_t)invoke->perf_dsp;
> + ctx->perf_kernel = (u64 *)(uintptr_t)invoke->perf_kernel;
> + if (ctx->fl->profile) {
> + ctx->perf = kzalloc(sizeof(*(ctx->perf)), GFP_KERNEL);
> + if (!ctx->perf)
> + return ERR_PTR(-ENOMEM);
> + ctx->perf->tid = ctx->fl->tgid;
> + }
> ctx->sc = sc;
> ctx->retval = -1;
> ctx->pid = current->pid;
> @@ -875,7 +938,8 @@ static int fastrpc_get_meta_size(struct fastrpc_invoke_ctx *ctx)
> sizeof(struct fastrpc_invoke_buf) +
> sizeof(struct fastrpc_phy_page)) * ctx->nscalars +
> sizeof(u64) * FASTRPC_MAX_FDLIST +
> - sizeof(u32) * FASTRPC_MAX_CRCLIST;
> + sizeof(u32) * FASTRPC_MAX_CRCLIST +
> + sizeof(u32) + sizeof(u64) * FASTRPC_DSP_PERF_LIST;
>
> return size;
> }
> @@ -942,16 +1006,22 @@ static int fastrpc_get_args(u32 kernel, struct fastrpc_invoke_ctx *ctx)
> int inbufs, i, oix, err = 0;
> u64 len, rlen, pkt_size;
> u64 pg_start, pg_end;
> + u64 *perf_counter = NULL;
> uintptr_t args;
> int metalen;
>
> + if (ctx->fl->profile)
> + perf_counter = (u64 *)ctx->perf + PERF_COUNT;
> +
> inbufs = REMOTE_SCALARS_INBUFS(ctx->sc);
> metalen = fastrpc_get_meta_size(ctx);
> pkt_size = fastrpc_get_payload_size(ctx, metalen);
>
> + PERF(ctx->fl->profile, GET_COUNTER(perf_counter, PERF_MAP),
> err = fastrpc_create_maps(ctx);
> if (err)
> return err;
> + PERF_END);
>
> ctx->msg_sz = pkt_size;
>
> @@ -984,6 +1054,7 @@ static int fastrpc_get_args(u32 kernel, struct fastrpc_invoke_ctx *ctx)
> if (ctx->maps[i]) {
> struct vm_area_struct *vma = NULL;
>
> + PERF(ctx->fl->profile, GET_COUNTER(perf_counter, PERF_MAP),
> rpra[i].buf.pv = (u64) ctx->args[i].ptr;
> pages[i].addr = ctx->maps[i]->phys;
>
> @@ -998,9 +1069,9 @@ static int fastrpc_get_args(u32 kernel, struct fastrpc_invoke_ctx *ctx)
> pg_end = ((ctx->args[i].ptr + len - 1) & PAGE_MASK) >>
> PAGE_SHIFT;
> pages[i].size = (pg_end - pg_start + 1) * PAGE_SIZE;
> -
> + PERF_END);
> } else {
> -
> + PERF(ctx->fl->profile, GET_COUNTER(perf_counter, PERF_COPY),
> if (ctx->olaps[oix].offset == 0) {
> rlen -= ALIGN(args, FASTRPC_ALIGN) - args;
> args = ALIGN(args, FASTRPC_ALIGN);
> @@ -1022,12 +1093,14 @@ static int fastrpc_get_args(u32 kernel, struct fastrpc_invoke_ctx *ctx)
> pages[i].size = (pg_end - pg_start + 1) * PAGE_SIZE;
> args = args + mlen;
> rlen -= mlen;
> + PERF_END);
> }
>
> if (i < inbufs && !ctx->maps[i]) {
> void *dst = (void *)(uintptr_t)rpra[i].buf.pv;
> void *src = (void *)(uintptr_t)ctx->args[i].ptr;
>
> + PERF(ctx->fl->profile, GET_COUNTER(perf_counter, PERF_COPY),
> if (!kernel) {
> if (copy_from_user(dst, (void __user *)src,
> len)) {
> @@ -1037,6 +1110,7 @@ static int fastrpc_get_args(u32 kernel, struct fastrpc_invoke_ctx *ctx)
> } else {
> memcpy(dst, src, len);
> }
> + PERF_END);
> }
> }
>
> @@ -1067,9 +1141,9 @@ static int fastrpc_put_args(struct fastrpc_invoke_ctx *ctx,
> struct fastrpc_map *mmap = NULL;
> struct fastrpc_invoke_buf *list;
> struct fastrpc_phy_page *pages;
> - u64 *fdlist;
> - u32 *crclist;
> - int i, inbufs, outbufs, handles;
> + u64 *fdlist, *perf_dsp_list;
> + u32 *crclist, *poll;
> + int i, inbufs, outbufs, handles, perferr;
>
> inbufs = REMOTE_SCALARS_INBUFS(ctx->sc);
> outbufs = REMOTE_SCALARS_OUTBUFS(ctx->sc);
> @@ -1078,6 +1152,8 @@ static int fastrpc_put_args(struct fastrpc_invoke_ctx *ctx,
> pages = fastrpc_phy_page_start(list, ctx->nscalars);
> fdlist = (u64 *)(pages + inbufs + outbufs + handles);
> crclist = (u32 *)(fdlist + FASTRPC_MAX_FDLIST);
> + poll = (u32 *)(crclist + FASTRPC_MAX_CRCLIST);
> + perf_dsp_list = (u64 *)(poll + 1);
>
> for (i = inbufs; i < ctx->nbufs; ++i) {
> if (!ctx->maps[i]) {
> @@ -1103,8 +1179,16 @@ static int fastrpc_put_args(struct fastrpc_invoke_ctx *ctx,
> }
>
> if (ctx->crc && crclist && rpra) {
> - if (copy_to_user((void __user *)ctx->crc, crclist, FASTRPC_MAX_CRCLIST * sizeof(u32)))
> + if (copy_to_user((void __user *)ctx->crc, crclist,
> + FASTRPC_MAX_CRCLIST * sizeof(u32))) {
> return -EFAULT;
> + }
> + }
> + if (ctx->perf_dsp && perf_dsp_list) {
> + perferr = copy_to_user((void __user *)ctx->perf_dsp,
> + perf_dsp_list, FASTRPC_DSP_PERF_LIST * sizeof(u64));
> + if (perferr)
> + dev_info(fl->sctx->dev, "Warning: failed to copy perf data %d\n", perferr);
> }
> return 0;
> }
> @@ -1141,6 +1225,21 @@ static int fastrpc_invoke_send(struct fastrpc_session_ctx *sctx,
>
> }
>
> +static void fastrpc_update_invoke_count(u32 handle, u64 *perf_counter,
> + struct timespec64 *invoket)
> +{
> + u64 *invcount, *count;
> +
> + invcount = GET_COUNTER(perf_counter, PERF_INVOKE);
> + if (invcount)
> + *invcount += getnstimediff(invoket);
> +
> + count = GET_COUNTER(perf_counter, PERF_COUNT);
> + if (count)
> + *count += 1;
> +}
> +
> +
> static int fastrpc_internal_invoke(struct fastrpc_user *fl, u32 kernel,
> struct fastrpc_enhanced_invoke *invoke)
> {
> @@ -1148,7 +1247,12 @@ static int fastrpc_internal_invoke(struct fastrpc_user *fl, u32 kernel,
> struct fastrpc_buf *buf, *b;
> struct fastrpc_invoke *inv = &invoke->inv;
> u32 handle, sc;
> - int err = 0;
> + u64 *perf_counter = NULL;
> + int err = 0, perferr = 0;
> + struct timespec64 invoket = {0};
> +
> + if (fl->profile)
> + ktime_get_real_ts64(&invoket);
>
> if (!fl->sctx)
> return -EINVAL;
> @@ -1167,16 +1271,22 @@ static int fastrpc_internal_invoke(struct fastrpc_user *fl, u32 kernel,
> if (IS_ERR(ctx))
> return PTR_ERR(ctx);
>
> + if (fl->profile)
> + perf_counter = (u64 *)ctx->perf + PERF_COUNT;
> + PERF(fl->profile, GET_COUNTER(perf_counter, PERF_GETARGS),
> err = fastrpc_get_args(kernel, ctx);
> if (err)
> goto bail;
> + PERF_END);
>
> /* make sure that all CPU memory writes are seen by DSP */
> dma_wmb();
> + PERF(fl->profile, GET_COUNTER(perf_counter, PERF_LINK),
> /* Send invoke buffer to remote dsp */
> err = fastrpc_invoke_send(fl->sctx, ctx, kernel, handle);
> if (err)
> goto bail;
> + PERF_END);
>
> if (kernel) {
> if (!wait_for_completion_timeout(&ctx->work, 10 * HZ))
> @@ -1190,10 +1300,12 @@ static int fastrpc_internal_invoke(struct fastrpc_user *fl, u32 kernel,
>
> /* make sure that all memory writes by DSP are seen by CPU */
> dma_rmb();
> + PERF(fl->profile, GET_COUNTER(perf_counter, PERF_PUTARGS),
> /* populate all the output buffers with results */
> err = fastrpc_put_args(ctx, kernel);
> if (err)
> goto bail;
> + PERF_END);
>
> /* Check the response from remote dsp */
> err = ctx->retval;
> @@ -1214,6 +1326,15 @@ static int fastrpc_internal_invoke(struct fastrpc_user *fl, u32 kernel,
> list_del(&buf->node);
> list_add_tail(&buf->node, &fl->cctx->invoke_interrupted_mmaps);
> }
> + } else if (ctx) {
> + if (fl->profile && !err)
> + fastrpc_update_invoke_count(handle, perf_counter, &invoket);
> + if (fl->profile && ctx->perf && ctx->perf_kernel) {
> + perferr = copy_to_user((void __user *)ctx->perf_kernel,
> + ctx->perf, FASTRPC_KERNEL_PERF_LIST * sizeof(u64));
> + if (perferr)
> + dev_info(fl->sctx->dev, "Warning: failed to copy perf data %d\n", perferr);
> + }
> }
>
> if (err)
> @@ -1712,6 +1833,7 @@ static int fastrpc_multimode_invoke(struct fastrpc_user *fl, char __user *argp)
> struct fastrpc_invoke_args *args = NULL;
> struct fastrpc_ioctl_multimode_invoke invoke;
> u32 nscalars;
> + u64 *perf_kernel;
> int err, i;
>
> if (copy_from_user(&invoke, argp, sizeof(invoke)))
> @@ -1746,6 +1868,9 @@ static int fastrpc_multimode_invoke(struct fastrpc_user *fl, char __user *argp)
> return -EFAULT;
> }
> }
> + perf_kernel = (u64 *)(uintptr_t)einv.perf_kernel;
> + if (perf_kernel)
> + fl->profile = true;
> einv.inv.args = (__u64)args;
> err = fastrpc_internal_invoke(fl, false, &einv);
> kfree(args);
> diff --git a/include/uapi/misc/fastrpc.h b/include/uapi/misc/fastrpc.h
> index 45c15be1de58..074675ee646f 100644
> --- a/include/uapi/misc/fastrpc.h
> +++ b/include/uapi/misc/fastrpc.h
> @@ -166,4 +166,18 @@ struct fastrpc_ioctl_capability {
> __u32 reserved[4];
> };
>
> +enum fastrpc_perfkeys {
> + PERF_COUNT = 0,
> + PERF_RESERVED1 = 1,
why reserved in middle of ranges? if you know already pl add the proper
name for it.
> + PERF_MAP = 2,
> + PERF_COPY = 3,
> + PERF_LINK = 4,
> + PERF_GETARGS = 5,
> + PERF_PUTARGS = 6,
> + PERF_RESERVED2 = 7,
> + PERF_INVOKE = 8,
> + PERF_RESERVED3 = 9,
> + PERF_KEY_MAX = 10,
> +};
> +
> #endif /* __QCOM_FASTRPC_H__ */
Powered by blists - more mailing lists