[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20220917072356.2255620-11-jiho.chu@samsung.com>
Date: Sat, 17 Sep 2022 16:23:53 +0900
From: Jiho Chu <jiho.chu@...sung.com>
To: gregkh@...uxfoundation.org, arnd@...db.de, ogabbay@...nel.org,
krzysztof.kozlowski@...aro.org, broonie@...nel.org
Cc: linux-kernel@...r.kernel.org, yelini.jeong@...sung.com,
myungjoo.ham@...sung.com, jiho.chu@...sung.com
Subject: [PATCH v2 10/13] trinity: Add profile module
This patch is for profile module.
The samsung NPU provides internal statistics data,
and it includes memory read/write counts, consumed clock
cycle for each operation. This statistics can be read by
ioctl control command.
Signed-off-by: Jiho Chu <jiho.chu@...sung.com>
Signed-off-by: Yelin Jeong <yelini.jeong@...sung.com>
Signed-off-by: Dongju Chae <dongju.chae@...sung.com>
Signed-off-by: MyungJoo Ham <myungjoo.ham@...sung.com>
---
drivers/misc/trinity/trinity_vision2_drv.c | 326 ++++++++++++++++++
.../misc/trinity/trinity_vision2_profile.h | 324 +++++++++++++++++
2 files changed, 650 insertions(+)
create mode 100644 drivers/misc/trinity/trinity_vision2_profile.h
diff --git a/drivers/misc/trinity/trinity_vision2_drv.c b/drivers/misc/trinity/trinity_vision2_drv.c
index 3dd89920cdf5..111623322895 100644
--- a/drivers/misc/trinity/trinity_vision2_drv.c
+++ b/drivers/misc/trinity/trinity_vision2_drv.c
@@ -18,6 +18,7 @@
#include "trinity_common.h"
#include "trinity_sched.h"
+#include "trinity_vision2_profile.h"
#include "trinity_vision2_regs.h"
#define TRIV2_DRV_GET_PDATA(drv) ((struct triv2_pdata *)(drv->pdata))
@@ -146,6 +147,11 @@ struct triv2_pdata {
/* back buffer for context switching */
struct trinity_dma back_buf;
+
+ /* profiling */
+ struct trinity_dma prof_buf;
+ struct mutex prof_lock;
+ DECLARE_HASHTABLE(prof_htable, TRIV2_PROFILE_HASH_BITS);
};
static void triv2_idu_setup(struct trinity_driver *drv);
@@ -156,6 +162,150 @@ static void triv2_handle_cmd_done(struct trinity_driver *drv,
struct triv2_cmd *cmd, bool timeout);
static void triv2_setup_buffers(struct trinity_driver *drv);
+static const char *const triv2_op_names[] =
+ TRIV2_FOREACH_OPNAME(TRIV2_GENERATE_OPNAME);
+
+static struct triv2_profile *
+triv2_find_profile(const struct trinity_driver *drv, int req_id)
+{
+ struct triv2_pdata *pdata = TRIV2_DRV_GET_PDATA(drv);
+ unsigned long key = TRIV2_PROFILE_HASH_KEY(req_id);
+ struct triv2_profile *profile = NULL;
+
+ hash_for_each_possible(pdata->prof_htable, profile, hlist, key) {
+ if (profile->req_id == req_id)
+ break;
+ }
+
+ return profile;
+}
+
+static void triv2_fini_profile(struct device *dev, struct trinity_dma *prof_buf)
+{
+ if (!prof_buf->addr)
+ return;
+
+ trinity_dma_free(dev, prof_buf);
+ memset(prof_buf, '\x00', sizeof(*prof_buf));
+}
+
+static void triv2_init_profile(struct trinity_driver *drv,
+ unsigned long profile_size)
+{
+ struct device *dev = drv_to_dev_ptr(drv);
+ struct trinity_dma *prof_buf = TRIV2_DRV_GET_PROF_BUF(drv);
+
+ if (profile_size > 0) {
+ /* allocate profile buffer and enable it */
+ struct iommu_domain *domain;
+ phys_addr_t paddr;
+ int status;
+
+ triv2_fini_profile(dev, prof_buf);
+
+ status = trinity_dma_alloc(dev, profile_size, prof_buf);
+ if (status < 0) {
+ dev_err(dev,
+ "Couldn't allocate memory for profiling buffer: %d",
+ status);
+ return;
+ }
+
+ domain = iommu_get_domain_for_dev(drv_to_dev_ptr(drv));
+ paddr = trinity_get_paddr(domain, prof_buf->dma_handle);
+ iowrite32(TRIV2_IDU_ADDR(paddr),
+ trinity_get_iomem_addr(drv->mmreg_vaddr[0],
+ OFFSET_NPU_PROF_ADDR));
+ iowrite32(prof_buf->size,
+ trinity_get_iomem_addr(drv->mmreg_vaddr[0],
+ OFFSET_NPU_PROF_SIZE));
+ } else {
+ /* disable profiling */
+ triv2_fini_profile(dev, prof_buf);
+
+ iowrite32(0, trinity_get_iomem_addr(drv->mmreg_vaddr[0],
+ OFFSET_NPU_PROF_ADDR));
+ iowrite32(0, trinity_get_iomem_addr(drv->mmreg_vaddr[0],
+ OFFSET_NPU_PROF_SIZE));
+ }
+}
+
+static void triv2_assign_opnames(struct triv2_cmd_profile *cmd)
+{
+ struct triv2_op_profile *ops = cmd->profile_ops;
+ uint32_t i;
+
+ for (i = 0; i < cmd->total_ops; i++)
+ snprintf(ops[i].op_name, TRIV2_MAX_OPNAME, "%s",
+ triv2_op_names[ops[i].opcode]);
+}
+
+static int32_t triv2_check_profile(struct trinity_driver *drv,
+ struct trinity_req *req)
+{
+ struct triv2_pdata *pdata = TRIV2_DRV_GET_PDATA(drv);
+ struct triv2_req *t_req = TRIV2_GET_REQ(req);
+ struct trinity_dma *profile_buf;
+ struct triv2_cmd_profile *profile_cmd;
+ struct triv2_cmd_profile *profile_cmd_new;
+ struct triv2_profile *profile;
+
+ uint32_t offset = t_req->profile_offset;
+ uint32_t total_ops, total_size;
+
+ profile_buf = TRIV2_DRV_GET_PROF_BUF(drv);
+ if (!profile_buf->addr)
+ return 0;
+
+ if (profile_buf->size <= offset) {
+ dev_err(drv_to_dev_ptr(drv),
+ "Invalid profile offset detected: 0x%x", offset);
+ return -EINVAL;
+ }
+
+ profile_cmd = (struct triv2_cmd_profile *)((char *)profile_buf->addr +
+ offset);
+ profile_cmd->total_cycles = t_req->total_cycles;
+
+ total_ops = profile_cmd->total_ops;
+ total_size = sizeof(struct triv2_cmd_profile) +
+ total_ops * sizeof(struct triv2_op_profile);
+
+ profile_cmd_new = vzalloc(total_size);
+ if (!profile_cmd_new)
+ return -ENOMEM;
+
+ mutex_lock(&pdata->prof_lock);
+
+ profile = req->stat->profile;
+ if (profile) {
+ WARN_ON(!profile->data);
+ vfree(profile->data);
+ profile->data = profile_cmd_new;
+ } else {
+ int req_id = req->input.config.req_id;
+ unsigned long key = TRIV2_PROFILE_HASH_KEY(req_id);
+
+ profile = vzalloc(sizeof(struct triv2_profile));
+ if (!profile) {
+ vfree(profile_cmd_new);
+ mutex_unlock(&pdata->prof_lock);
+ return -ENOMEM;
+ }
+ profile->req_id = req_id;
+ profile->data = profile_cmd_new;
+
+ hash_add(pdata->prof_htable, &profile->hlist, key);
+
+ req->stat->profile = profile;
+ }
+ memcpy(profile_cmd_new, profile_cmd, total_size);
+ triv2_assign_opnames(profile_cmd_new);
+
+ mutex_unlock(&pdata->prof_lock);
+ return 0;
+}
+
/**
* triv2_get_state() - Get state (TRINITY_STATE_READY/TRINITY_STATE_PAUSE) of the device.
* @returns (enum triv2_state) TRINITY_STATE_READY (i.e., 1) or TRINITY_STATE_PAUSE (i.e., 0 )
@@ -447,6 +597,157 @@ static void triv2_stop_reqs(struct work_struct *work)
triv2_cancel_reqs(drv);
}
+/**
+ * triv2_get_profile_meta() - get profile metadata for the target req
+ */
+static int32_t triv2_get_profile_meta(const struct trinity_driver *drv,
+ struct trinity_ioctl_profile_meta *meta)
+{
+ struct triv2_pdata *pdata = TRIV2_DRV_GET_PDATA(drv);
+ struct triv2_profile *profile;
+ struct triv2_cmd_profile *profile_data;
+ int ret = 0;
+
+ mutex_lock(&pdata->prof_lock);
+
+ profile = triv2_find_profile(drv, meta->req_id);
+ if (!profile) {
+ ret = -ENOENT;
+ goto out;
+ }
+
+ profile_data = profile->data;
+ WARN_ON(!profile_data);
+
+ meta->total_cycles = profile_data->total_cycles;
+ meta->total_ops = profile_data->total_ops;
+ meta->profile_size =
+ profile_data->total_ops * sizeof(struct triv2_op_profile);
+ /* unsupported for now */
+ meta->input_footprint = -1;
+ meta->output_footprint = -1;
+
+out:
+ mutex_unlock(&pdata->prof_lock);
+
+ return ret;
+}
+
+/**
+ * triv2_get_profile_buff() - get profile buffer for the target req
+ */
+static int32_t triv2_get_profile_buff(const struct trinity_driver *drv,
+ struct trinity_ioctl_profile_buff *buff)
+{
+ struct triv2_pdata *pdata = TRIV2_DRV_GET_PDATA(drv);
+ struct triv2_profile *profile;
+ struct triv2_cmd_profile *profile_data;
+ uint32_t total_size;
+ int ret = 0;
+
+ mutex_lock(&pdata->prof_lock);
+
+ profile = triv2_find_profile(drv, buff->req_id);
+ if (!profile) {
+ ret = -ENOENT;
+ goto out;
+ }
+
+ profile_data = profile->data;
+ WARN_ON(!profile_data);
+
+ profile_data = profile->data;
+ total_size = profile_data->total_ops * sizeof(struct triv2_op_profile);
+
+ if (buff->profile_pos + buff->profile_size > total_size) {
+ dev_err(drv_to_dev_ptr(drv),
+ "Profile data out-of-range! pos(%u) size(%u) > total_size(%u)",
+ buff->profile_pos, buff->profile_size, total_size);
+ ret = -ERANGE;
+ goto out;
+ }
+
+ /* consider partial memory copies */
+ if (copy_to_user((char __user *)buff->profile_buf,
+ (char *)profile_data->profile_ops + buff->profile_pos,
+ buff->profile_size))
+ ret = -EACCES;
+
+out:
+ mutex_unlock(&pdata->prof_lock);
+
+ return ret;
+}
+
+static ssize_t triv2_get_profile(const struct trinity_driver *drv, char *buf, int req_id)
+{
+ struct triv2_pdata *pdata = TRIV2_DRV_GET_PDATA(drv);
+ struct triv2_profile *profile;
+ struct triv2_cmd_profile *profile_data;
+ uint32_t i;
+ ssize_t len = 0;
+
+ mutex_lock(&pdata->prof_lock);
+
+ profile = triv2_find_profile(drv, req_id);
+ if (!profile) {
+ len += snprintf(buf, PAGE_SIZE, "Unable to find the profile data (req_id %d)",
+ req_id);
+ goto out;
+ }
+
+ profile_data = profile->data;
+ WARN_ON(!profile_data);
+
+ len += snprintf(buf, PAGE_SIZE, "Total cycles: %lld", profile_data->total_cycles);
+ len += snprintf(buf, PAGE_SIZE, "Total ops: %u", profile_data->total_ops);
+
+ for (i = 0; i < profile_data->total_ops; i++) {
+ struct triv2_op_profile *op = &profile_data->profile_ops[i];
+
+ len += snprintf(buf, PAGE_SIZE, "[%u] opcode: %u name:%s", i, op->opcode,
+ op->op_name);
+ len += snprintf(buf, PAGE_SIZE, "\tcycles: %lld", op->cycles);
+ len += snprintf(buf, PAGE_SIZE, "\tprog_seq: %lld", op->prog_seq);
+ len += snprintf(buf, PAGE_SIZE, "\texec_seq: %lld", op->exec_seq);
+ if (op->dram_read > 0)
+ len += snprintf(buf, PAGE_SIZE, "\tdram_read: %lld", op->dram_read);
+ if (op->dram_write > 0)
+ len += snprintf(buf, PAGE_SIZE, "\tdram_write: %lld", op->dram_write);
+ if (op->sram_read > 0)
+ len += snprintf(buf, PAGE_SIZE, "\tsram_read: %lld", op->sram_read);
+ if (op->sram_write > 0)
+ len += snprintf(buf, PAGE_SIZE, "\tsram_write: %lld", op->sram_write);
+ }
+out:
+ mutex_unlock(&pdata->prof_lock);
+ return len;
+}
+
+/**
+ * triv2_destroy_profile() - destroy profile data
+ */
+static void triv2_destroy_profile(const struct trinity_driver *drv, void *data)
+{
+ struct triv2_pdata *pdata = TRIV2_DRV_GET_PDATA(drv);
+ struct triv2_profile *profile = data;
+ struct triv2_cmd_profile *profile_data;
+
+ if (!profile)
+ return;
+
+ mutex_lock(&pdata->prof_lock);
+
+ profile_data = profile->data;
+ WARN_ON(!profile_data);
+ vfree(profile_data);
+
+ hash_del(&profile->hlist);
+ vfree(profile);
+
+ mutex_unlock(&pdata->prof_lock);
+}
+
static void triv2_handle_irq_cmds(struct trinity_driver *drv)
{
struct triv2_cmd_info *info;
@@ -1021,11 +1322,13 @@ static void triv2_setup_buffers(struct trinity_driver *drv)
struct iommu_domain *domain;
struct trinity_dma *cmd_buf;
struct trinity_dma *back_buf;
+ struct trinity_dma *prof_buf;
phys_addr_t paddr;
domain = iommu_get_domain_for_dev(dev);
cmd_buf = TRIV2_DRV_GET_CMD_BUF(drv);
back_buf = TRIV2_DRV_GET_BACK_BUF(drv);
+ prof_buf = TRIV2_DRV_GET_PROF_BUF(drv);
/* command */
paddr = trinity_get_paddr(domain, cmd_buf->dma_handle);
@@ -1038,6 +1341,22 @@ static void triv2_setup_buffers(struct trinity_driver *drv)
OFFSET_NPU_BACK_ADDR));
iowrite32(back_buf->size, trinity_get_iomem_addr(drv->mmreg_vaddr[0],
OFFSET_NPU_BACK_SIZE));
+
+ /* profile */
+ if (prof_buf->size > 0) {
+ paddr = trinity_get_paddr(domain, prof_buf->dma_handle);
+ iowrite32(TRIV2_IDU_ADDR(paddr),
+ trinity_get_iomem_addr(drv->mmreg_vaddr[0],
+ OFFSET_NPU_PROF_ADDR));
+ iowrite32(prof_buf->size,
+ trinity_get_iomem_addr(drv->mmreg_vaddr[0],
+ OFFSET_NPU_PROF_SIZE));
+ } else {
+ iowrite32(0, trinity_get_iomem_addr(drv->mmreg_vaddr[0],
+ OFFSET_NPU_PROF_ADDR));
+ iowrite32(0, trinity_get_iomem_addr(drv->mmreg_vaddr[0],
+ OFFSET_NPU_PROF_SIZE));
+ }
}
static int32_t triv2_init_pdata(struct trinity_driver *drv)
@@ -1203,6 +1522,13 @@ static struct trinity_desc triv2_desc = {
.dealloc_req = triv2_dealloc_req,
.prepare_req = triv2_prepare_req,
.invoke_req = triv2_invoke_req,
+ /* profile */
+ .init_profile = triv2_init_profile,
+ .check_profile = triv2_check_profile,
+ .get_profile_meta = triv2_get_profile_meta,
+ .get_profile_buff = triv2_get_profile_buff,
+ .get_profile = triv2_get_profile,
+ .destroy_profile = triv2_destroy_profile,
/* etc. */
.handle_timeout = triv2_handle_timeout,
.stop_reqs = triv2_stop_reqs,
diff --git a/drivers/misc/trinity/trinity_vision2_profile.h b/drivers/misc/trinity/trinity_vision2_profile.h
new file mode 100644
index 000000000000..7e5b169eca6b
--- /dev/null
+++ b/drivers/misc/trinity/trinity_vision2_profile.h
@@ -0,0 +1,324 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/**
+ * Profile header for TRIV2 devices
+ *
+ * Copyright (C) 2021-2022 Samsung Electronics
+ * Copyright (C) 2021 Dongju Chae <dongju.chae@...sung.com>
+ * Copyright (C) 2022 MyungJoo Ham <myungjoo.ham@...sung.com>
+ * Copyright (C) 2022 Yelin Jeong <yelini.jeong@...sung.com>
+ * Copyright (C) 2022 Jiho Chu <jiho.chu@...sung.com>
+ */
+
+#ifndef __DRIVERS_MISC_TRINITY_VISION2_PROFILE_H__
+#define __DRIVERS_MISC_TRINITY_VISION2_PROFILE_H__
+
+#include <linux/types.h>
+
+#define TRIV2_MAX_OPNAME (128)
+#define TRIV2_MAX_PROFILE_SIZE (256)
+
+/**
+ * struct triv2_op_profile - A profile data per operation
+ *
+ * @op_name: The physical DMA address of this DMA buffer.
+ * @cycles: total number of cycles
+ * @dram_read: a count for dram read
+ * @dram_write: a count for dram write
+ * @sram_read: a count for sram read
+ * @sram_write: a count for sram write
+ * @start_cycles: a count for starting cycles
+ * @end_cycles: a cont for ending cycles
+ * @opcode: operation code
+ * @prog_seq: program sequence number
+ * @exec_seq: execution sequence number
+ * @reserved: reserved
+ */
+struct triv2_op_profile {
+ union {
+ struct {
+ char op_name[TRIV2_MAX_OPNAME];
+
+ int64_t cycles;
+
+ int64_t dram_read;
+ int64_t dram_write;
+
+ int64_t sram_read;
+ int64_t sram_write;
+
+ int64_t start_cycles;
+ int64_t end_cycles;
+
+ uint32_t opcode;
+ int64_t prog_seq;
+ int64_t exec_seq;
+ } __packed;
+ uint8_t reserved[TRIV2_MAX_PROFILE_SIZE];
+ };
+};
+
+/**
+ * struct triv2_cmd_profile - A profile data per command
+ *
+ * @total_cycles: total number of cycles for a command
+ * @total_ops: total operations of command
+ * @profile_ops: list of profile data for operations
+ */
+struct triv2_cmd_profile {
+ int64_t total_cycles;
+ uint32_t total_ops;
+ /* zero-length array */
+ struct triv2_op_profile profile_ops[];
+} __packed;
+
+/**
+ * struct triv2_profile - A profile data
+ *
+ * @req_id: total number of cycles for a command
+ * @hlist: list of profile data
+ * @data: command profile data
+ */
+struct triv2_profile {
+ int req_id;
+ struct hlist_node hlist;
+ struct triv2_cmd_profile *data;
+};
+
+enum {
+ NOP = 0x00,
+ HALT = 0x01,
+ ADMA_IN = 0x02,
+ ADMA_OUT = 0x03,
+ RESCALE_I8 = 0x04,
+ RESCALE_I16 = 0x05,
+ CONVERT_I16_I8 = 0x06,
+ CONVERT_I8_I16 = 0x07,
+ RELUN_I8 = 0x08,
+ RELUN_I16 = 0x09,
+ PRELU_I8 = 0x0A,
+ PRELU_I16 = 0x0B,
+ ADD_I8 = 0x0C,
+ ADD_I16 = 0x0D,
+ REDUCE_MEAN_I8 = 0x0E,
+ REDUCE_MEAN_I16 = 0x0F,
+ MAX_POOL_I8 = 0x10,
+ MAX_POOL_I16 = 0x11,
+ AVG_POOL_I8 = 0x12,
+ AVG_POOL_I16 = 0x13,
+ CONV_I8 = 0x14,
+ CONV_I16 = 0x15,
+ CONVE_I8 = 0x16,
+ CONVE_I16 = 0x17,
+ TCONV_I8 = 0x18,
+ TCONV_I16 = 0x19,
+ MUL_I8 = 0x1A,
+ MUL_I16 = 0x1B,
+ DCONV_I8 = 0x1C,
+ DCONV_I16 = 0x1D,
+ DCONVE_I8 = 0x1E,
+ DCONVE_I16 = 0x1F,
+ CONV_I8_P = 0x20,
+ CONV_I16_P = 0x21,
+ PDMA_IN = 0x40,
+ PDMA_OUT = 0x41,
+ ARGMAX_I8 = 0x42,
+ ARGMAX_I16 = 0x43,
+ RESHAPE_I8 = 0x44,
+ RESHAPE_I16 = 0x45,
+ TRANSPOSE_I8 = 0x46,
+ TRANSPOSE_I16 = 0x47,
+ CONCAT_I8 = 0x48,
+ CONCAT_I16 = 0x49,
+ PAD_I8 = 0x4A,
+ PAD_I16 = 0x4B,
+ STRIDED_SLICE_I8 = 0x4C,
+ STRIDED_SLICE_I16 = 0x4D,
+ CONVERT_FORMAT_I8 = 0x4E,
+ CONVERT_FORMAT_I16 = 0x4F,
+ SIGMOID_I8 = 0x50,
+ SIGMOID_I16 = 0x51,
+ TANH_I8 = 0x52,
+ TANH_I16 = 0x53,
+ ELU_I8 = 0x54,
+ ELU_I16 = 0x55,
+ FLOOR_I8 = 0x56,
+ FLOOR_I16 = 0x57,
+ RSQRT_I8 = 0x58,
+ RSQRT_I16 = 0x59,
+ SQRT_I8 = 0x5A,
+ SQRT_I16 = 0x5B,
+ SOFTMAX_I8 = 0x5C,
+ SOFTMAX_I16 = 0x5D,
+ DIVIDE_I8 = 0x60,
+ DIVIDE_I16 = 0x61,
+ FLOORDIV_I8 = 0x62,
+ FLOORDIV_I16 = 0x63,
+ LOGICAL_OR_I8 = 0x64,
+ LOGICAL_OR_I16 = 0x65,
+ GREATER_I8 = 0x66,
+ GREATER_I16 = 0x67,
+ GREATER_EQUAL_I8 = 0x68,
+ GREATER_EQUAL_I16 = 0x69,
+ POW_I8 = 0x6A,
+ POW_I16 = 0x6B,
+ EXP_I8 = 0x6C,
+ EXP_I16 = 0x6D,
+ NOT_EQUAL_I8 = 0x6E,
+ NOT_EQUAL_I16 = 0x6F,
+ BATCH_TO_SPACE_I8 = 0x70,
+ BATCH_TO_SPACE_I16 = 0x71,
+ SPACE_TO_BATCH_I8 = 0x72,
+ SPACE_TO_BATCH_I16 = 0x73,
+ DEPTH_TO_SPACE_I8 = 0x74,
+ DEPTH_TO_SPACE_I16 = 0x75,
+ SPACE_TO_DEPTH_I8 = 0x76,
+ SPACE_TO_DEPTH_I16 = 0x77,
+ YUV_TO_RGB_I8 = 0x7A,
+ YUV_TO_RGB_I16 = 0x7B,
+ RESIZE_BILINEAR_I8 = 0x7C,
+ RESIZE_BILINEAR_I16 = 0x7D,
+ RESIZE_NEAREST_NEIGHBOR_I8 = 0x7E,
+ RESIZE_NEAREST_NEIGHBOR_I16 = 0x7F,
+ LOCAL_RESPONSE_NORM_I8 = 0x80,
+ LOCAL_RESPONSE_NORM_I16 = 0x81,
+ INSTANCE_NORM_I8 = 0x82,
+ INSTANCE_NORM_I16 = 0x83,
+ REDUCED_SUM_SSUM_I8 = 0x84,
+ REDUCED_SUM_SSUM_I16 = 0x85,
+ REDUCED_SUM_SSUM_ACC_I8 = 0x86,
+ REDUCED_SUM_SSUM_ACC_I16 = 0x87,
+ REDUCED_SUM_2SUM_I8 = 0x88,
+ REDUCED_SUM_2SUM_I16 = 0x89,
+ REDUCED_MEAN_DEV_WSUM_I8 = 0x8A,
+ REDUCED_MEAN_DEV_WSUM_I16 = 0x8B,
+ REDUCED_MEAN_DEV_I8 = 0x8C,
+ REDUCED_MEAN_DEV_I16 = 0x8D,
+ RESCALE_CW_I8 = 0x8E,
+ RESCALE_CW_I16 = 0x8F,
+ REDUCED_MEAN_SCALE_WSUM_I8 = 0x90,
+ REDUCED_MEAN_SCALE_WSUM_I16 = 0x91,
+ RESCALE_CHANNELWISE_I8 = 0x92,
+ RESCALE_CHANNELWISE_I16 = 0x93,
+};
+
+/** generate opnames */
+#define TRIV2_GENERATE_OPNAME(OPNAME) \
+ [OPNAME] = #OPNAME,
+
+#define TRIV2_FOREACH_OPNAME(GEN) {\
+ GEN(NOP) \
+ GEN(HALT) \
+ GEN(ADMA_IN) \
+ GEN(ADMA_OUT) \
+ GEN(RESCALE_I8) \
+ GEN(RESCALE_I16) \
+ GEN(CONVERT_I16_I8) \
+ GEN(CONVERT_I8_I16) \
+ GEN(RELUN_I8) \
+ GEN(RELUN_I16) \
+ GEN(PRELU_I8) \
+ GEN(PRELU_I16) \
+ GEN(ADD_I8) \
+ GEN(ADD_I16) \
+ GEN(REDUCE_MEAN_I8) \
+ GEN(REDUCE_MEAN_I16) \
+ GEN(MAX_POOL_I8) \
+ GEN(MAX_POOL_I16) \
+ GEN(AVG_POOL_I8) \
+ GEN(AVG_POOL_I16) \
+ GEN(CONV_I8) \
+ GEN(CONV_I16) \
+ GEN(CONVE_I8) \
+ GEN(CONVE_I16) \
+ GEN(TCONV_I8) \
+ GEN(TCONV_I16) \
+ GEN(MUL_I8) \
+ GEN(MUL_I16) \
+ GEN(DCONV_I8) \
+ GEN(DCONV_I16) \
+ GEN(DCONVE_I8) \
+ GEN(DCONVE_I16) \
+ GEN(CONV_I8_P) \
+ GEN(CONV_I16_P) \
+ GEN(PDMA_IN) \
+ GEN(PDMA_OUT) \
+ GEN(ARGMAX_I8) \
+ GEN(ARGMAX_I16) \
+ GEN(RESHAPE_I8) \
+ GEN(RESHAPE_I16) \
+ GEN(TRANSPOSE_I8) \
+ GEN(TRANSPOSE_I16) \
+ GEN(CONCAT_I8) \
+ GEN(CONCAT_I16) \
+ GEN(PAD_I8) \
+ GEN(PAD_I16) \
+ GEN(STRIDED_SLICE_I8) \
+ GEN(STRIDED_SLICE_I16) \
+ GEN(CONVERT_FORMAT_I8) \
+ GEN(CONVERT_FORMAT_I16) \
+ GEN(SIGMOID_I8) \
+ GEN(SIGMOID_I16) \
+ GEN(TANH_I8) \
+ GEN(TANH_I16) \
+ GEN(ELU_I8) \
+ GEN(ELU_I16) \
+ GEN(FLOOR_I8) \
+ GEN(FLOOR_I16) \
+ GEN(RSQRT_I8) \
+ GEN(RSQRT_I16) \
+ GEN(SQRT_I8) \
+ GEN(SQRT_I16) \
+ GEN(SOFTMAX_I8) \
+ GEN(SOFTMAX_I16) \
+ GEN(DIVIDE_I8) \
+ GEN(DIVIDE_I16) \
+ GEN(FLOORDIV_I8) \
+ GEN(FLOORDIV_I16) \
+ GEN(LOGICAL_OR_I8) \
+ GEN(LOGICAL_OR_I16) \
+ GEN(GREATER_I8) \
+ GEN(GREATER_I16) \
+ GEN(GREATER_EQUAL_I8) \
+ GEN(GREATER_EQUAL_I16) \
+ GEN(POW_I8) \
+ GEN(POW_I16) \
+ GEN(EXP_I8) \
+ GEN(EXP_I16) \
+ GEN(NOT_EQUAL_I8) \
+ GEN(NOT_EQUAL_I16) \
+ GEN(BATCH_TO_SPACE_I8) \
+ GEN(BATCH_TO_SPACE_I16) \
+ GEN(SPACE_TO_BATCH_I8) \
+ GEN(SPACE_TO_BATCH_I16) \
+ GEN(DEPTH_TO_SPACE_I8) \
+ GEN(DEPTH_TO_SPACE_I16) \
+ GEN(SPACE_TO_DEPTH_I8) \
+ GEN(SPACE_TO_DEPTH_I16) \
+ GEN(YUV_TO_RGB_I8) \
+ GEN(YUV_TO_RGB_I16) \
+ GEN(RESIZE_BILINEAR_I8) \
+ GEN(RESIZE_BILINEAR_I16) \
+ GEN(RESIZE_NEAREST_NEIGHBOR_I8) \
+ GEN(RESIZE_NEAREST_NEIGHBOR_I16) \
+ GEN(LOCAL_RESPONSE_NORM_I8) \
+ GEN(LOCAL_RESPONSE_NORM_I16) \
+ GEN(INSTANCE_NORM_I8) \
+ GEN(INSTANCE_NORM_I16) \
+ GEN(REDUCED_SUM_SSUM_I8) \
+ GEN(REDUCED_SUM_SSUM_I16) \
+ GEN(REDUCED_SUM_SSUM_ACC_I8) \
+ GEN(REDUCED_SUM_SSUM_ACC_I16) \
+ GEN(REDUCED_SUM_2SUM_I8) \
+ GEN(REDUCED_SUM_2SUM_I16) \
+ GEN(REDUCED_MEAN_DEV_WSUM_I8) \
+ GEN(REDUCED_MEAN_DEV_WSUM_I16) \
+ GEN(REDUCED_MEAN_DEV_I8) \
+ GEN(REDUCED_MEAN_DEV_I16) \
+ GEN(RESCALE_CW_I8) \
+ GEN(RESCALE_CW_I16) \
+ GEN(REDUCED_MEAN_SCALE_WSUM_I8) \
+ GEN(REDUCED_MEAN_SCALE_WSUM_I16) \
+ GEN(RESCALE_CHANNELWISE_I8) \
+ GEN(RESCALE_CHANNELWISE_I16) \
+}
+#endif /* __DRIVERS_MISC_TRINITY_VISION2_PROFILE_H__ */
--
2.25.1
Powered by blists - more mailing lists