[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20241007084518.3649876-3-quic_ekangupt@quicinc.com>
Date: Mon, 7 Oct 2024 14:15:16 +0530
From: Ekansh Gupta <quic_ekangupt@...cinc.com>
To: <srinivas.kandagatla@...aro.org>, <linux-arm-msm@...r.kernel.org>
CC: <gregkh@...uxfoundation.org>, <quic_bkumar@...cinc.com>,
<linux-kernel@...r.kernel.org>, <quic_chennak@...cinc.com>,
<dri-devel@...ts.freedesktop.org>, <arnd@...db.de>
Subject: [PATCH v1 2/4] misc: fastrpc: Capture kernel and DSP performance counters
Add support to capture kernel performance counters for different
kernel level operations. These counters collects the information
for remote call and copies the information to a buffer shared
by user.
Collection of DSP performance counters is also added as part of
this change. DSP updates the performance information in the
metadata which is then copied to a buffer passed by the users.
Signed-off-by: Ekansh Gupta <quic_ekangupt@...cinc.com>
---
drivers/misc/fastrpc.c | 143 ++++++++++++++++++++++++++++++++++--
include/uapi/misc/fastrpc.h | 4 +-
2 files changed, 139 insertions(+), 8 deletions(-)
diff --git a/drivers/misc/fastrpc.c b/drivers/misc/fastrpc.c
index 8e817a763d1d..54a562fc94fb 100644
--- a/drivers/misc/fastrpc.c
+++ b/drivers/misc/fastrpc.c
@@ -19,6 +19,7 @@
#include <linux/rpmsg.h>
#include <linux/scatterlist.h>
#include <linux/slab.h>
+#include <linux/delay.h>
#include <linux/firmware/qcom/qcom_scm.h>
#include <uapi/misc/fastrpc.h>
#include <linux/of_reserved_mem.h>
@@ -38,6 +39,7 @@
#define FASTRPC_CTX_MAX (256)
#define FASTRPC_INIT_HANDLE 1
#define FASTRPC_DSP_UTILITIES_HANDLE 2
+#define FASTRPC_MAX_STATIC_HANDLE (20)
#define FASTRPC_CTXID_MASK (0xFF0)
#define INIT_FILELEN_MAX (2 * 1024 * 1024)
#define INIT_FILE_NAMELEN_MAX (128)
@@ -106,6 +108,9 @@
#define miscdev_to_fdevice(d) container_of(d, struct fastrpc_device, miscdev)
+#define FASTRPC_KERNEL_PERF_LIST (PERF_KEY_MAX)
+#define FASTRPC_DSP_PERF_LIST 12
+
static const char *domains[FASTRPC_DEV_MAX] = { "adsp", "mdsp",
"sdsp", "cdsp", "cdsp1" };
struct fastrpc_phy_page {
@@ -229,6 +234,18 @@ struct fastrpc_map {
struct kref refcount;
};
+struct fastrpc_perf {
+ u64 count;
+ u64 flush;
+ u64 map;
+ u64 copy;
+ u64 link;
+ u64 getargs;
+ u64 putargs;
+ u64 invargs;
+ u64 invoke;
+};
+
struct fastrpc_invoke_ctx {
int nscalars;
int nbufs;
@@ -237,6 +254,8 @@ struct fastrpc_invoke_ctx {
int tgid;
u32 sc;
u32 *crc;
+ u64 *perf_kernel;
+ u64 *perf_dsp;
u64 ctxid;
u64 msg_sz;
struct kref refcount;
@@ -251,6 +270,7 @@ struct fastrpc_invoke_ctx {
struct fastrpc_invoke_args *args;
struct fastrpc_buf_overlap *olaps;
struct fastrpc_channel_ctx *cctx;
+ struct fastrpc_perf *perf;
};
struct fastrpc_session_ctx {
@@ -308,6 +328,51 @@ struct fastrpc_user {
struct mutex mutex;
};
+enum fastrpc_perfkeys {
+ PERF_COUNT = 0,
+ PERF_FLUSH = 1,
+ PERF_MAP = 2,
+ PERF_COPY = 3,
+ PERF_LINK = 4,
+ PERF_GETARGS = 5,
+ PERF_PUTARGS = 6,
+ PERF_INVARGS = 7,
+ PERF_INVOKE = 8,
+ PERF_KEY_MAX = 9,
+};
+
+#define PERF_END ((void)0)
+
+#define PERF(enb, cnt, ff) \
+ {\
+ struct timespec64 startT = {0};\
+ u64 *counter = cnt;\
+ if (enb && counter) {\
+ ktime_get_boottime_ts64(&startT);\
+ } \
+ ff ;\
+ if (enb && counter) {\
+ *counter += getnstimediff(&startT);\
+ } \
+ }
+
+#define GET_COUNTER(perf_ptr, offset) \
+ (perf_ptr != NULL ?\
+ (((offset >= 0) && (offset < PERF_KEY_MAX)) ?\
+ (u64 *)(perf_ptr + offset)\
+ : (u64 *)NULL) : (u64 *)NULL)
+
+static inline s64 getnstimediff(struct timespec64 *start)
+{
+ s64 ns;
+ struct timespec64 ts, b;
+
+ ktime_get_boottime_ts64(&ts);
+ b = timespec64_sub(ts, *start);
+ ns = timespec64_to_ns(&b);
+ return ns;
+}
+
static void fastrpc_free_map(struct kref *ref)
{
struct fastrpc_map *map;
@@ -497,6 +562,7 @@ static void fastrpc_context_free(struct kref *ref)
idr_remove(&cctx->ctx_idr, ctx->ctxid >> 4);
spin_unlock_irqrestore(&cctx->lock, flags);
+ kfree(ctx->perf);
kfree(ctx->maps);
kfree(ctx->olaps);
kfree(ctx);
@@ -614,6 +680,13 @@ static struct fastrpc_invoke_ctx *fastrpc_context_alloc(
fastrpc_channel_ctx_get(cctx);
ctx->crc = (u32 *)(uintptr_t)inv2->crc;
+ ctx->perf_dsp = (u64 *)(uintptr_t)inv2->perf_dsp;
+ ctx->perf_kernel = (u64 *)(uintptr_t)inv2->perf_kernel;
+ if (ctx->perf_kernel) {
+ ctx->perf = kzalloc(sizeof(*(ctx->perf)), GFP_KERNEL);
+ if (!ctx->perf)
+ return ERR_PTR(-ENOMEM);
+ }
ctx->sc = sc;
ctx->retval = -1;
ctx->pid = current->pid;
@@ -877,7 +950,8 @@ static int fastrpc_get_meta_size(struct fastrpc_invoke_ctx *ctx)
sizeof(struct fastrpc_invoke_buf) +
sizeof(struct fastrpc_phy_page)) * ctx->nscalars +
sizeof(u64) * FASTRPC_MAX_FDLIST +
- sizeof(u32) * FASTRPC_MAX_CRCLIST;
+ sizeof(u32) * FASTRPC_MAX_CRCLIST +
+ sizeof(u32) + sizeof(u64) * FASTRPC_DSP_PERF_LIST;
return size;
}
@@ -951,7 +1025,9 @@ static int fastrpc_get_args(u32 kernel, struct fastrpc_invoke_ctx *ctx)
metalen = fastrpc_get_meta_size(ctx);
pkt_size = fastrpc_get_payload_size(ctx, metalen);
+ PERF(ctx->perf_kernel, GET_COUNTER((u64 *)ctx->perf, PERF_MAP),
err = fastrpc_create_maps(ctx);
+ PERF_END);
if (err)
return err;
@@ -987,6 +1063,7 @@ static int fastrpc_get_args(u32 kernel, struct fastrpc_invoke_ctx *ctx)
continue;
if (ctx->maps[i]) {
+ PERF(ctx->perf_kernel, GET_COUNTER((u64 *)ctx->perf, PERF_MAP),
struct vm_area_struct *vma = NULL;
rpra[i].buf.pv = (u64) ctx->args[i].ptr;
@@ -1003,9 +1080,9 @@ static int fastrpc_get_args(u32 kernel, struct fastrpc_invoke_ctx *ctx)
pg_end = ((ctx->args[i].ptr + len - 1) & PAGE_MASK) >>
PAGE_SHIFT;
pages[i].size = (pg_end - pg_start + 1) * PAGE_SIZE;
-
+ PERF_END);
} else {
-
+ PERF(ctx->perf_kernel, GET_COUNTER((u64 *)ctx->perf, PERF_COPY),
if (ctx->olaps[oix].offset == 0) {
rlen -= ALIGN(args, FASTRPC_ALIGN) - args;
args = ALIGN(args, FASTRPC_ALIGN);
@@ -1027,12 +1104,14 @@ static int fastrpc_get_args(u32 kernel, struct fastrpc_invoke_ctx *ctx)
pages[i].size = (pg_end - pg_start + 1) * PAGE_SIZE;
args = args + mlen;
rlen -= mlen;
+ PERF_END);
}
if (i < inbufs && !ctx->maps[i]) {
void *dst = (void *)(uintptr_t)rpra[i].buf.pv;
void *src = (void *)(uintptr_t)ctx->args[i].ptr;
+ PERF(ctx->perf_kernel, GET_COUNTER((u64 *)ctx->perf, PERF_COPY),
if (!kernel) {
if (copy_from_user(dst, (void __user *)src,
len)) {
@@ -1042,6 +1121,7 @@ static int fastrpc_get_args(u32 kernel, struct fastrpc_invoke_ctx *ctx)
} else {
memcpy(dst, src, len);
}
+ PERF_END);
}
}
@@ -1072,9 +1152,9 @@ static int fastrpc_put_args(struct fastrpc_invoke_ctx *ctx,
struct fastrpc_map *mmap = NULL;
struct fastrpc_invoke_buf *list;
struct fastrpc_phy_page *pages;
- u64 *fdlist;
- u32 *crclist;
- int i, inbufs, outbufs, handles;
+ u64 *fdlist, *perf_dsp_list;
+ u32 *crclist, *poll;
+ int i, inbufs, outbufs, handles, perferr;
inbufs = REMOTE_SCALARS_INBUFS(ctx->sc);
outbufs = REMOTE_SCALARS_OUTBUFS(ctx->sc);
@@ -1083,6 +1163,8 @@ static int fastrpc_put_args(struct fastrpc_invoke_ctx *ctx,
pages = fastrpc_phy_page_start(list, ctx->nscalars);
fdlist = (uint64_t *)(pages + inbufs + outbufs + handles);
crclist = (u32 *)(fdlist + FASTRPC_MAX_FDLIST);
+ poll = (u32 *)(crclist + FASTRPC_MAX_CRCLIST);
+ perf_dsp_list = (u64 *)(poll + 1);
for (i = inbufs; i < ctx->nbufs; ++i) {
if (!ctx->maps[i]) {
@@ -1113,6 +1195,14 @@ static int fastrpc_put_args(struct fastrpc_invoke_ctx *ctx,
return -EFAULT;
}
+ if (ctx->perf_dsp && perf_dsp_list) {
+ perferr = copy_to_user((void __user *)ctx->perf_dsp,
+ perf_dsp_list,
+ FASTRPC_DSP_PERF_LIST * sizeof(u64));
+ if (perferr)
+ dev_dbg(fl->sctx->dev, "Warning: failed to copy perf data %d\n", perferr);
+ }
+
return 0;
}
@@ -1148,13 +1238,35 @@ static int fastrpc_invoke_send(struct fastrpc_session_ctx *sctx,
}
+static void fastrpc_update_invoke_count(u32 handle, u64 *perf_counter,
+ struct timespec64 *invoket)
+{
+ /* update invoke count for dynamic handles */
+ u64 *invcount, *count;
+
+ if (handle <= FASTRPC_MAX_STATIC_HANDLE)
+ return;
+
+ invcount = GET_COUNTER(perf_counter, PERF_INVOKE);
+ if (invcount)
+ *invcount += getnstimediff(invoket);
+
+ count = GET_COUNTER(perf_counter, PERF_COUNT);
+ if (count)
+ *count++;
+}
+
static int fastrpc_internal_invoke(struct fastrpc_user *fl, u32 kernel, struct fastrpc_invoke_v2 *inv2)
{
struct fastrpc_invoke_ctx *ctx = NULL;
struct fastrpc_buf *buf, *b;
struct fastrpc_invoke inv;
u32 handle, sc;
- int err = 0;
+ int err = 0, perferr = 0;
+ struct timespec64 invoket = {0};
+
+ if (inv2->perf_kernel)
+ ktime_get_boottime_ts64(&invoket);
if (!fl->sctx)
return -EINVAL;
@@ -1174,14 +1286,18 @@ static int fastrpc_internal_invoke(struct fastrpc_user *fl, u32 kernel, struct
if (IS_ERR(ctx))
return PTR_ERR(ctx);
+ PERF(ctx->perf_kernel, GET_COUNTER((u64 *)ctx->perf, PERF_GETARGS),
err = fastrpc_get_args(kernel, ctx);
+ PERF_END);
if (err)
goto bail;
/* make sure that all CPU memory writes are seen by DSP */
dma_wmb();
+ PERF(ctx->perf_kernel, GET_COUNTER((u64 *)ctx->perf, PERF_LINK),
/* Send invoke buffer to remote dsp */
err = fastrpc_invoke_send(fl->sctx, ctx, kernel, handle);
+ PERF_END);
if (err)
goto bail;
@@ -1197,8 +1313,10 @@ static int fastrpc_internal_invoke(struct fastrpc_user *fl, u32 kernel, struct
/* make sure that all memory writes by DSP are seen by CPU */
dma_rmb();
+ PERF(ctx->perf_kernel, GET_COUNTER((u64 *)ctx->perf, PERF_PUTARGS),
/* populate all the output buffers with results */
err = fastrpc_put_args(ctx, kernel);
+ PERF_END);
if (err)
goto bail;
@@ -1209,6 +1327,17 @@ static int fastrpc_internal_invoke(struct fastrpc_user *fl, u32 kernel, struct
bail:
if (err != -ERESTARTSYS && err != -ETIMEDOUT) {
+ if (ctx) {
+ if (ctx->perf_kernel && !err)
+ fastrpc_update_invoke_count(handle, (u64 *)ctx->perf, &invoket);
+ if (ctx->perf_kernel && ctx->perf && ctx->perf_kernel) {
+ perferr = copy_to_user((void __user *)ctx->perf_kernel,
+ ctx->perf,
+ FASTRPC_KERNEL_PERF_LIST * sizeof(u64));
+ if (perferr)
+ dev_dbg(fl->sctx->dev, "Warning: failed to copy perf data %d\n", perferr);
+ }
+ }
/* We are done with this compute context */
spin_lock(&fl->lock);
list_del(&ctx->node);
diff --git a/include/uapi/misc/fastrpc.h b/include/uapi/misc/fastrpc.h
index 406b80555d41..1edc7c04b171 100644
--- a/include/uapi/misc/fastrpc.h
+++ b/include/uapi/misc/fastrpc.h
@@ -84,7 +84,9 @@ struct fastrpc_invoke {
struct fastrpc_invoke_v2 {
struct fastrpc_invoke inv;
__u64 crc;
- __u32 reserved[16];
+ __u64 perf_kernel;
+ __u64 perf_dsp;
+ __u32 reserved[12];
};
struct fastrpc_init_create {
--
2.34.1
Powered by blists - more mailing lists