[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20220725065308.2457024-8-jiho.chu@samsung.com>
Date: Mon, 25 Jul 2022 15:53:06 +0900
From: Jiho Chu <jiho.chu@...sung.com>
To: gregkh@...uxfoundation.org, arnd@...db.de,
linux-kernel@...r.kernel.org
Cc: yelini.jeong@...sung.com, myungjoo.ham@...sung.com,
Jiho Chu <jiho.chu@...sung.com>
Subject: [PATCH 7/9] trinity: Add profile module
This patch is for profile module.
The samsung NPU provides internal statistics data,
and it includes memory read/write counts, consumed clock
cycle for each operation. This statistics can be read by
ioctl control command.
Signed-off-by: Jiho Chu <jiho.chu@...sung.com>
Signed-off-by: Yelin Jeong <yelini.jeong@...sung.com>
Signed-off-by: Dongju Chae <dongju.chae@...sung.com>
Signed-off-by: MyungJoo Ham <myungjoo.ham@...sung.com>
---
drivers/misc/trinity/trinity_vision2_drv.c | 467 +++++++++++++++++-
.../misc/trinity/trinity_vision2_profile.h | 324 ++++++++++++
2 files changed, 771 insertions(+), 20 deletions(-)
create mode 100644 drivers/misc/trinity/trinity_vision2_profile.h
diff --git a/drivers/misc/trinity/trinity_vision2_drv.c b/drivers/misc/trinity/trinity_vision2_drv.c
index ddc1739afdd8..539eadeca09d 100644
--- a/drivers/misc/trinity/trinity_vision2_drv.c
+++ b/drivers/misc/trinity/trinity_vision2_drv.c
@@ -177,31 +177,154 @@ static int triv2_idu_load(struct trinity_driver *drv, const char *dirpath,
static LIST_HEAD(triv2_driver_list);
static struct hlist_bl_head triv2_model_node_hlist[TRIV2_MODEL_HASH_SIZE];
+static const char * const triv2_op_names[] = TRIV2_FOREACH_OPNAME(TRIV2_GENERATE_OPNAME);
static struct triv2_profile *
triv2_find_profile(const struct trinity_driver *drv, int req_id)
{
- /* find profile */
+ struct triv2_pdata *pdata = TRIV2_DRV_GET_PDATA(drv);
+ unsigned long key = TRIV2_PROFILE_HASH_KEY(req_id);
+ struct triv2_profile *profile = NULL;
+
+ hash_for_each_possible(pdata->prof_htable, profile, hlist, key) {
+ if (profile->req_id == req_id)
+ break;
+ }
- return NULL;
+ return profile;
}
static void triv2_fini_profile(struct trinity_resv_mem *prof_buf)
{
- /* finish profile */
+ if (!prof_buf->vaddr)
+ return;
+
+ trinity_free_from_resv_mem(prof_buf, false);
+ memset(prof_buf, '\x00', sizeof(*prof_buf));
}
static void triv2_init_profile(struct trinity_driver *drv,
unsigned long profile_size)
{
- /* init profile */
+ struct device *dev = drv_to_dev_ptr(drv);
+ struct trinity_resv_mem *prof_buf = TRIV2_DRV_GET_PROF_BUF(drv);
+
+ if (profile_size > 0) {
+ /* allocate profile buffer and enable it */
+ struct iommu_domain *domain;
+ phys_addr_t paddr;
+ int status;
+
+ triv2_fini_profile(prof_buf);
+
+ profile_size = PAGE_ALIGN(profile_size);
+ status = trinity_alloc_from_resv_mem(profile_size, prof_buf,
+ false);
+ if (status < 0) {
+ dev_err(dev,
+ "Couldn't allocate memory for profiling buffer: %d",
+ status);
+ return;
+ }
+
+ domain = iommu_get_domain_for_dev(drv_to_dev_ptr(drv));
+ paddr = trinity_get_paddr(domain, prof_buf->daddr);
+ iowrite32(TRIV2_IDU_ADDR(paddr),
+ trinity_get_iomem_addr(drv->mmreg_vaddr[0],
+ OFFSET_NPU_PROF_ADDR));
+ iowrite32(prof_buf->size,
+ trinity_get_iomem_addr(drv->mmreg_vaddr[0],
+ OFFSET_NPU_PROF_SIZE));
+
+ if (drv->verbose)
+ dev_info(dev, "Profiling enabled (%ld bytes)",
+ profile_size);
+ } else {
+ /* disable profiling */
+ triv2_fini_profile(prof_buf);
+
+ iowrite32(0, trinity_get_iomem_addr(drv->mmreg_vaddr[0],
+ OFFSET_NPU_PROF_ADDR));
+ iowrite32(0, trinity_get_iomem_addr(drv->mmreg_vaddr[0],
+ OFFSET_NPU_PROF_SIZE));
+ if (drv->verbose)
+ dev_info(dev, "Profiling disabled");
+ }
+}
+
+static void triv2_assign_opnames(struct triv2_cmd_profile *cmd)
+{
+ struct triv2_op_profile *ops = cmd->profile_ops;
+ uint32_t i;
+
+ for (i = 0; i < cmd->total_ops; i++)
+ snprintf(ops[i].op_name, TRIV2_MAX_OPNAME, "%s",
+ triv2_op_names[ops[i].opcode]);
}
static int32_t triv2_check_profile(struct trinity_driver *drv,
struct trinity_req *req)
{
- /* check profile */
+ struct triv2_pdata *pdata = TRIV2_DRV_GET_PDATA(drv);
+ struct triv2_req *t_req = TRIV2_GET_REQ(req);
+ struct trinity_resv_mem *profile_buf;
+ struct triv2_cmd_profile *profile_cmd;
+ struct triv2_cmd_profile *profile_cmd_new;
+ struct triv2_profile *profile;
+
+ uint32_t offset = t_req->profile_offset;
+ uint32_t total_ops, total_size;
+
+ profile_buf = TRIV2_DRV_GET_PROF_BUF(drv);
+ if (!profile_buf->vaddr)
+ return 0;
+
+ if (profile_buf->size <= offset) {
+ dev_err(drv_to_dev_ptr(drv),
+ "Invalid profile offset detected: 0x%x", offset);
+ return -EINVAL;
+ }
+
+ profile_cmd = (struct triv2_cmd_profile *)((char *)profile_buf->vaddr +
+ offset);
+ profile_cmd->total_cycles = t_req->total_cycles;
+ total_ops = profile_cmd->total_ops;
+ total_size = sizeof(struct triv2_cmd_profile) +
+ total_ops * sizeof(struct triv2_op_profile);
+
+ profile_cmd_new = vzalloc(total_size);
+ if (!profile_cmd_new)
+ return -ENOMEM;
+
+ mutex_lock(&pdata->prof_lock);
+
+ profile = req->stat->profile;
+ if (profile) {
+ WARN_ON(!profile->data);
+ vfree(profile->data);
+ profile->data = profile_cmd_new;
+ } else {
+ int req_id = req->input.config.req_id;
+ unsigned long key = TRIV2_PROFILE_HASH_KEY(req_id);
+
+ profile = vzalloc(sizeof(struct triv2_profile));
+ if (!profile) {
+ vfree(profile_cmd_new);
+ mutex_unlock(&pdata->prof_lock);
+ return -ENOMEM;
+ }
+ profile->req_id = req_id;
+ profile->data = profile_cmd_new;
+
+ hash_add(pdata->prof_htable, &profile->hlist, key);
+
+ req->stat->profile = profile;
+ }
+ memcpy(profile_cmd_new, profile_cmd, total_size);
+ triv2_assign_opnames(profile_cmd_new);
+
+ mutex_unlock(&pdata->prof_lock);
return 0;
}
@@ -400,6 +523,47 @@ static void triv2_reset(struct trinity_driver *drv)
mutex_unlock(&pdata->drv->lock);
}
+enum triv2_idu_stage {
+ IDU_STAGE_UNKNOWN = 0,
+ IDU_STAGE_WAITING,
+ IDU_STAGE_GET_CMD,
+ IDU_STAGE_RUN_CMD,
+ IDU_STAGE_SWAP_OUT,
+ IDU_STAGE_SWAP_IN,
+ IDU_STAGE_SEND_IRQ,
+};
+
+/**
+ * triv2_run_trigger() - trigger memory-mapped register for inference running
+ */
+static void triv2_run_trigger(const struct trinity_driver *drv, int slot)
+{
+ struct triv2_cmd_info *cmd_info = TRIV2_DRV_GET_CMD_INFO(drv);
+ struct triv2_req *t_req = cmd_info->reqs[slot];
+
+ if (!t_req) {
+ dev_err(drv_to_dev_ptr(drv),
+ "Unable to find the corresponding req");
+ return;
+ }
+
+ if (triv2_sync_segt_entries(drv, t_req) < 0)
+ dev_err(drv_to_dev_ptr(drv),
+ "Unable to sync the segment table");
+
+ /* sync the current bitmap */
+ iowrite32(*cmd_info->bitmap,
+ trinity_get_iomem_addr(drv->mmreg_vaddr[0],
+ OFFSET_NPU_CMD_REQ));
+
+ t_req->req.stat->scheduled = ktime_get();
+ t_req->req.stat->completed = 0;
+ t_req->req.scheduled = true;
+
+ /* trigger the event (we do not assume that IDU always accepts this event) */
+ triv2_wakeup_cp(drv);
+}
+
static void triv2_clear_cmd(struct trinity_driver *drv, struct triv2_req *req,
struct triv2_cmd *cmd)
{
@@ -458,6 +622,128 @@ static void triv2_handle_cmd_done(struct trinity_driver *drv,
complete_all(&req->complete);
}
+/**
+ * triv2_prepare_cmd() - Prepare command info. for the target req before invoking
+ */
+static int32_t triv2_prepare_cmd(struct trinity_driver *drv,
+ struct trinity_req *req, void *sched_data)
+{
+ struct triv2_cmd_info *cmd_info;
+ struct triv2_cmd cmd = { 0 };
+ struct triv2_req *t;
+
+ const struct trinity_model *model = req->model;
+ const struct trinity_input *input = &req->input;
+
+ int32_t slot;
+ struct iommu_domain *domain;
+ phys_addr_t paddr;
+ unsigned long flags;
+
+ /** Note that the program base is not behind iommu */
+ domain = iommu_get_domain_for_dev(drv_to_dev_ptr(drv));
+
+ paddr = trinity_get_paddr(domain, model->import_info.dma_addr);
+ cmd.prog_addr = TRIV2_IDU_ADDR(paddr);
+ cmd.prog_addr += model->config.program_offset_addr;
+ cmd.prog_size = model->config.program_size;
+
+ paddr = trinity_get_paddr(domain, input->import_info.dma_addr);
+ cmd.segt_addr = TRIV2_IDU_ADDR(paddr);
+ cmd.num_visa = model->config.num_visa_insts;
+
+ cmd.priority = input->config.priority;
+ cmd.input_mode = input->config.input_mode;
+ cmd.output_mode = input->config.output_mode;
+
+ /** Find a empty cmd slot in bitmap (need a spin lock) */
+ cmd_info = TRIV2_DRV_GET_CMD_INFO(drv);
+ t = TRIV2_GET_REQ(req);
+
+ spin_lock_irqsave(&cmd_info->lock, flags);
+
+ slot = find_first_zero_bit(cmd_info->bitmap, TRIV2_MAX_CMDSLOTS);
+ if (slot < TRIV2_MAX_CMDSLOTS) {
+ set_bit(slot, cmd_info->bitmap);
+ cmd_info->reqs[slot] = t;
+ t->cmd_slot = slot;
+ }
+
+ spin_unlock_irqrestore(&cmd_info->lock, flags);
+
+ /** Will be retried (rely on platform device's scheduling) */
+ if (slot >= TRIV2_MAX_CMDSLOTS)
+ return -EBUSY;
+
+ cmd.slot = slot;
+ cmd.status = STATUS_CMD_READY;
+
+ memcpy_toio(cmd_info->buf.vaddr + slot * sizeof(struct triv2_cmd), &cmd,
+ sizeof(struct triv2_cmd));
+
+ return slot;
+}
+
+/**
+ * triv2_invoke_req() - Invoke a req on the device. Note that all configurations
+ * required by running should be done before invocation of this function.
+ */
+static int32_t triv2_invoke_req(struct trinity_driver *drv,
+ struct trinity_req *req, void *sched_data)
+{
+ enum trinity_output_mode mode;
+ int32_t slot;
+
+ mode = req->input.config.output_mode;
+ slot = triv2_prepare_cmd(drv, req, sched_data);
+ if (slot < 0)
+ return slot;
+
+ if (mode == TRINITY_OUTPUT_HW || mode == TRINITY_OUTPUT_CPU_POLL ||
+ mode == TRINITY_OUTPUT_CPU_INTR) {
+ triv2_run_trigger(drv, slot);
+ } else {
+ dev_err(drv_to_dev_ptr(drv), "Invalid output mode: %d\n", mode);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static struct trinity_req *triv2_alloc_req(struct trinity_driver *drv)
+{
+ struct triv2_req *t_req;
+
+ t_req = kzalloc(sizeof(struct triv2_req), GFP_KERNEL);
+ if (!t_req)
+ return NULL;
+
+ t_req->cmd_slot = -1;
+
+ return &(t_req->req);
+}
+
+static void triv2_dealloc_req(struct trinity_driver *drv,
+ struct trinity_req *req)
+{
+ struct triv2_req *t_req = TRIV2_GET_REQ(req);
+
+ if (t_req->seg_import) {
+ struct trinity_hwmem_import *import;
+ uint32_t i;
+
+ for (i = 0; i < req->input.config.num_segments; i++) {
+ import = &(t_req->seg_import[i]);
+ if (import->addr)
+ trinity_hwmem_import_dmabuf_end(import);
+ }
+ kfree(t_req->seg_import);
+ }
+
+ kfree(t_req->kernel);
+ kfree(t_req);
+}
+
static void triv2_handle_timeout(struct trinity_driver *drv,
struct trinity_req *req)
{
@@ -494,6 +780,156 @@ static void triv2_stop_reqs(struct work_struct *work)
triv2_cancel_reqs(drv);
}
+/**
+ * triv2_get_profile_meta() - get profile metadata for the target req
+ */
+static int32_t triv2_get_profile_meta(const struct trinity_driver *drv,
+ struct trinity_ioctl_profile_meta *meta)
+{
+ struct triv2_pdata *pdata = TRIV2_DRV_GET_PDATA(drv);
+ struct triv2_profile *profile;
+ struct triv2_cmd_profile *profile_data;
+ int ret = 0;
+
+ mutex_lock(&pdata->prof_lock);
+
+ profile = triv2_find_profile(drv, meta->req_id);
+ if (!profile) {
+ ret = -ENOENT;
+ goto out;
+ }
+
+ profile_data = profile->data;
+ WARN_ON(!profile_data);
+
+ meta->total_cycles = profile_data->total_cycles;
+ meta->total_ops = profile_data->total_ops;
+ meta->profile_size =
+ profile_data->total_ops * sizeof(struct triv2_op_profile);
+ /* unsupported for now */
+ meta->input_footprint = -1;
+ meta->output_footprint = -1;
+
+out:
+ mutex_unlock(&pdata->prof_lock);
+
+ return ret;
+}
+
+/**
+ * triv2_get_profile_buff() - get profile buffer for the target req
+ */
+static int32_t triv2_get_profile_buff(const struct trinity_driver *drv,
+ struct trinity_ioctl_profile_buff *buff)
+{
+ struct triv2_pdata *pdata = TRIV2_DRV_GET_PDATA(drv);
+ struct triv2_profile *profile;
+ struct triv2_cmd_profile *profile_data;
+ uint32_t total_size;
+ int ret = 0;
+
+ mutex_lock(&pdata->prof_lock);
+
+ profile = triv2_find_profile(drv, buff->req_id);
+ if (!profile) {
+ ret = -ENOENT;
+ goto out;
+ }
+
+ profile_data = profile->data;
+ WARN_ON(!profile_data);
+
+ profile_data = profile->data;
+ total_size = profile_data->total_ops * sizeof(struct triv2_op_profile);
+
+ if (buff->profile_pos + buff->profile_size > total_size) {
+ dev_err(drv_to_dev_ptr(drv),
+ "Profile data out-of-range! pos(%u) size(%u) > total_size(%u)",
+ buff->profile_pos, buff->profile_size, total_size);
+ ret = -ERANGE;
+ goto out;
+ }
+
+ /* consider partial memory copies */
+ if (copy_to_user((char __user *)buff->profile_buf,
+ (char *)profile_data->profile_ops + buff->profile_pos,
+ buff->profile_size))
+ ret = -EACCES;
+
+out:
+ mutex_unlock(&pdata->prof_lock);
+
+ return ret;
+}
+
+static void triv2_show_profile(const struct trinity_driver *drv, int req_id)
+{
+ struct device *dev = drv_to_dev_ptr(drv);
+ struct triv2_pdata *pdata = TRIV2_DRV_GET_PDATA(drv);
+ struct triv2_profile *profile;
+ struct triv2_cmd_profile *profile_data;
+ uint32_t i;
+
+ mutex_lock(&pdata->prof_lock);
+
+ profile = triv2_find_profile(drv, req_id);
+ if (!profile) {
+ dev_warn(dev, "Unable to find the profile data (req_id %d)",
+ req_id);
+ goto out;
+ }
+
+ profile_data = profile->data;
+ WARN_ON(!profile_data);
+
+ dev_info(dev, "Total cycles: %lld", profile_data->total_cycles);
+ dev_info(dev, "Total ops: %u", profile_data->total_ops);
+
+ for (i = 0; i < profile_data->total_ops; i++) {
+ struct triv2_op_profile *op = &profile_data->profile_ops[i];
+
+ dev_info(dev, "[%u] opcode: %u name:%s", i, op->opcode,
+ op->op_name);
+ dev_info(dev, "\tcycles: %lld", op->cycles);
+ dev_info(dev, "\tprog_seq: %lld", op->prog_seq);
+ dev_info(dev, "\texec_seq: %lld", op->exec_seq);
+ if (op->dram_read > 0)
+ dev_info(dev, "\tdram_read: %lld", op->dram_read);
+ if (op->dram_write > 0)
+ dev_info(dev, "\tdram_write: %lld", op->dram_write);
+ if (op->sram_read > 0)
+ dev_info(dev, "\tsram_read: %lld", op->sram_read);
+ if (op->sram_write > 0)
+ dev_info(dev, "\tsram_write: %lld", op->sram_write);
+ }
+out:
+ mutex_unlock(&pdata->prof_lock);
+}
+
+/**
+ * triv2_destroy_profile() - destroy profile data
+ */
+static void triv2_destroy_profile(const struct trinity_driver *drv, void *data)
+{
+ struct triv2_pdata *pdata = TRIV2_DRV_GET_PDATA(drv);
+ struct triv2_profile *profile = data;
+ struct triv2_cmd_profile *profile_data;
+
+ if (!profile)
+ return;
+
+ mutex_lock(&pdata->prof_lock);
+
+ profile_data = profile->data;
+ WARN_ON(!profile_data);
+ vfree(profile_data);
+
+ hash_del(&profile->hlist);
+ vfree(profile);
+
+ mutex_unlock(&pdata->prof_lock);
+}
+
static void triv2_handle_irq_cmds(struct trinity_driver *drv)
{
struct triv2_cmd_info *info;
@@ -667,18 +1103,6 @@ static int32_t triv2_prepare_req(struct trinity_driver *drv,
return ret;
}
-/**
- * triv2_invoke_req() - Invoke a req on the device. Note that all configurations
- * required by running should be done before invocation of this function.
- */
-static int32_t triv2_invoke_req(struct trinity_driver *drv,
- struct trinity_req *req, void *sched_data)
-{
- /* invoke request */
-
- return 0;
-}
-
static long triv2_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
{
struct trinity_driver *drv = f->private_data;
@@ -740,13 +1164,16 @@ static void triv2_setup_dsp(struct trinity_driver *drv, phys_addr_t paddr)
static void triv2_init_common(void)
{
- static bool done;
+ static bool need_init = true;
+ int i;
- if (done)
+ if (!need_init)
return;
/* init hlists */
- done = true;
+ for (i = 0; i < TRIV2_MODEL_HASH_SIZE; ++i)
+ INIT_HLIST_BL_HEAD(&triv2_model_node_hlist[i]);
+ need_init = false;
}
static int triv2_idu_alloc(struct device *dev, struct trinity_resv_mem *mem)
diff --git a/drivers/misc/trinity/trinity_vision2_profile.h b/drivers/misc/trinity/trinity_vision2_profile.h
new file mode 100644
index 000000000000..90b42cf56c54
--- /dev/null
+++ b/drivers/misc/trinity/trinity_vision2_profile.h
@@ -0,0 +1,324 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/**
+ * Profile header for TRIV2 devices
+ *
+ * Copyright (C) 2021-2022 Samsung Electronics
+ * Copyright (C) 2021 Dongju Chae <dongju.chae@...sung.com>
+ * Copyright (C) 2022 MyungJoo Ham <myungjoo.ham@...sung.com>
+ * Copyright (C) 2022 Yelin Jeong <yelini.jeong@...sung.com>
+ * Copyright (C) 2022 Jiho Chu <jiho.chu@...sung.com>
+ */
+
+#ifndef __TRINITY_VISION2_PROFILE_H__
+#define __TRINITY_VISION2_PROFILE_H__
+
+#include <linux/types.h>
+
+#define TRIV2_MAX_OPNAME (128)
+#define TRIV2_MAX_PROFILE_SIZE (256)
+
+/**
+ * struct triv2_op_profile - A profile data per operation
+ *
+ * @op_name: The physical DMA address of this DMA buffer.
+ * @cycles: total number of cycles
+ * @dram_read: a count for dram read
+ * @dram_write: a count for dram write
+ * @sram_read: a count for sram read
+ * @sram_write: a count for sram write
+ * @start_cycles: a count for starting cycles
+ * @end_cycles: a cont for ending cycles
+ * @opcode: operation code
+ * @prog_seq: program sequence number
+ * @exec_seq: execution sequence number
+ * @reserved: reserved
+ */
+struct triv2_op_profile {
+ union {
+ struct {
+ char op_name[TRIV2_MAX_OPNAME];
+
+ int64_t cycles;
+
+ int64_t dram_read;
+ int64_t dram_write;
+
+ int64_t sram_read;
+ int64_t sram_write;
+
+ int64_t start_cycles;
+ int64_t end_cycles;
+
+ uint32_t opcode;
+ int64_t prog_seq;
+ int64_t exec_seq;
+ } __packed;
+ uint8_t reserved[TRIV2_MAX_PROFILE_SIZE];
+ };
+};
+
+/**
+ * struct triv2_cmd_profile - A profile data per command
+ *
+ * @total_cycles: total number of cycles for a command
+ * @total_ops: total operations of command
+ * @profile_ops: list of profile data for operations
+ */
+struct triv2_cmd_profile {
+ int64_t total_cycles;
+ uint32_t total_ops;
+ /* zero-length array */
+ struct triv2_op_profile profile_ops[];
+} __packed;
+
+/**
+ * struct triv2_profile - A profile data
+ *
+ * @req_id: total number of cycles for a command
+ * @hlist: list of profile data
+ * @data: command profile data
+ */
+struct triv2_profile {
+ int req_id;
+ struct hlist_node hlist;
+ struct triv2_cmd_profile *data;
+};
+
+enum {
+ NOP = 0x00,
+ HALT = 0x01,
+ ADMA_IN = 0x02,
+ ADMA_OUT = 0x03,
+ RESCALE_I8 = 0x04,
+ RESCALE_I16 = 0x05,
+ CONVERT_I16_I8 = 0x06,
+ CONVERT_I8_I16 = 0x07,
+ RELUN_I8 = 0x08,
+ RELUN_I16 = 0x09,
+ PRELU_I8 = 0x0A,
+ PRELU_I16 = 0x0B,
+ ADD_I8 = 0x0C,
+ ADD_I16 = 0x0D,
+ REDUCE_MEAN_I8 = 0x0E,
+ REDUCE_MEAN_I16 = 0x0F,
+ MAX_POOL_I8 = 0x10,
+ MAX_POOL_I16 = 0x11,
+ AVG_POOL_I8 = 0x12,
+ AVG_POOL_I16 = 0x13,
+ CONV_I8 = 0x14,
+ CONV_I16 = 0x15,
+ CONVE_I8 = 0x16,
+ CONVE_I16 = 0x17,
+ TCONV_I8 = 0x18,
+ TCONV_I16 = 0x19,
+ MUL_I8 = 0x1A,
+ MUL_I16 = 0x1B,
+ DCONV_I8 = 0x1C,
+ DCONV_I16 = 0x1D,
+ DCONVE_I8 = 0x1E,
+ DCONVE_I16 = 0x1F,
+ CONV_I8_P = 0x20,
+ CONV_I16_P = 0x21,
+ PDMA_IN = 0x40,
+ PDMA_OUT = 0x41,
+ ARGMAX_I8 = 0x42,
+ ARGMAX_I16 = 0x43,
+ RESHAPE_I8 = 0x44,
+ RESHAPE_I16 = 0x45,
+ TRANSPOSE_I8 = 0x46,
+ TRANSPOSE_I16 = 0x47,
+ CONCAT_I8 = 0x48,
+ CONCAT_I16 = 0x49,
+ PAD_I8 = 0x4A,
+ PAD_I16 = 0x4B,
+ STRIDED_SLICE_I8 = 0x4C,
+ STRIDED_SLICE_I16 = 0x4D,
+ CONVERT_FORMAT_I8 = 0x4E,
+ CONVERT_FORMAT_I16 = 0x4F,
+ SIGMOID_I8 = 0x50,
+ SIGMOID_I16 = 0x51,
+ TANH_I8 = 0x52,
+ TANH_I16 = 0x53,
+ ELU_I8 = 0x54,
+ ELU_I16 = 0x55,
+ FLOOR_I8 = 0x56,
+ FLOOR_I16 = 0x57,
+ RSQRT_I8 = 0x58,
+ RSQRT_I16 = 0x59,
+ SQRT_I8 = 0x5A,
+ SQRT_I16 = 0x5B,
+ SOFTMAX_I8 = 0x5C,
+ SOFTMAX_I16 = 0x5D,
+ DIVIDE_I8 = 0x60,
+ DIVIDE_I16 = 0x61,
+ FLOORDIV_I8 = 0x62,
+ FLOORDIV_I16 = 0x63,
+ LOGICAL_OR_I8 = 0x64,
+ LOGICAL_OR_I16 = 0x65,
+ GREATER_I8 = 0x66,
+ GREATER_I16 = 0x67,
+ GREATER_EQUAL_I8 = 0x68,
+ GREATER_EQUAL_I16 = 0x69,
+ POW_I8 = 0x6A,
+ POW_I16 = 0x6B,
+ EXP_I8 = 0x6C,
+ EXP_I16 = 0x6D,
+ NOT_EQUAL_I8 = 0x6E,
+ NOT_EQUAL_I16 = 0x6F,
+ BATCH_TO_SPACE_I8 = 0x70,
+ BATCH_TO_SPACE_I16 = 0x71,
+ SPACE_TO_BATCH_I8 = 0x72,
+ SPACE_TO_BATCH_I16 = 0x73,
+ DEPTH_TO_SPACE_I8 = 0x74,
+ DEPTH_TO_SPACE_I16 = 0x75,
+ SPACE_TO_DEPTH_I8 = 0x76,
+ SPACE_TO_DEPTH_I16 = 0x77,
+ YUV_TO_RGB_I8 = 0x7A,
+ YUV_TO_RGB_I16 = 0x7B,
+ RESIZE_BILINEAR_I8 = 0x7C,
+ RESIZE_BILINEAR_I16 = 0x7D,
+ RESIZE_NEAREST_NEIGHBOR_I8 = 0x7E,
+ RESIZE_NEAREST_NEIGHBOR_I16 = 0x7F,
+ LOCAL_RESPONSE_NORM_I8 = 0x80,
+ LOCAL_RESPONSE_NORM_I16 = 0x81,
+ INSTANCE_NORM_I8 = 0x82,
+ INSTANCE_NORM_I16 = 0x83,
+ REDUCED_SUM_SSUM_I8 = 0x84,
+ REDUCED_SUM_SSUM_I16 = 0x85,
+ REDUCED_SUM_SSUM_ACC_I8 = 0x86,
+ REDUCED_SUM_SSUM_ACC_I16 = 0x87,
+ REDUCED_SUM_2SUM_I8 = 0x88,
+ REDUCED_SUM_2SUM_I16 = 0x89,
+ REDUCED_MEAN_DEV_WSUM_I8 = 0x8A,
+ REDUCED_MEAN_DEV_WSUM_I16 = 0x8B,
+ REDUCED_MEAN_DEV_I8 = 0x8C,
+ REDUCED_MEAN_DEV_I16 = 0x8D,
+ RESCALE_CW_I8 = 0x8E,
+ RESCALE_CW_I16 = 0x8F,
+ REDUCED_MEAN_SCALE_WSUM_I8 = 0x90,
+ REDUCED_MEAN_SCALE_WSUM_I16 = 0x91,
+ RESCALE_CHANNELWISE_I8 = 0x92,
+ RESCALE_CHANNELWISE_I16 = 0x93,
+};
+
+/** generate opnames */
+#define TRIV2_GENERATE_OPNAME(OPNAME) \
+ [OPNAME] = #OPNAME,
+
+#define TRIV2_FOREACH_OPNAME(GEN) {\
+ GEN(NOP) \
+ GEN(HALT) \
+ GEN(ADMA_IN) \
+ GEN(ADMA_OUT) \
+ GEN(RESCALE_I8) \
+ GEN(RESCALE_I16) \
+ GEN(CONVERT_I16_I8) \
+ GEN(CONVERT_I8_I16) \
+ GEN(RELUN_I8) \
+ GEN(RELUN_I16) \
+ GEN(PRELU_I8) \
+ GEN(PRELU_I16) \
+ GEN(ADD_I8) \
+ GEN(ADD_I16) \
+ GEN(REDUCE_MEAN_I8) \
+ GEN(REDUCE_MEAN_I16) \
+ GEN(MAX_POOL_I8) \
+ GEN(MAX_POOL_I16) \
+ GEN(AVG_POOL_I8) \
+ GEN(AVG_POOL_I16) \
+ GEN(CONV_I8) \
+ GEN(CONV_I16) \
+ GEN(CONVE_I8) \
+ GEN(CONVE_I16) \
+ GEN(TCONV_I8) \
+ GEN(TCONV_I16) \
+ GEN(MUL_I8) \
+ GEN(MUL_I16) \
+ GEN(DCONV_I8) \
+ GEN(DCONV_I16) \
+ GEN(DCONVE_I8) \
+ GEN(DCONVE_I16) \
+ GEN(CONV_I8_P) \
+ GEN(CONV_I16_P) \
+ GEN(PDMA_IN) \
+ GEN(PDMA_OUT) \
+ GEN(ARGMAX_I8) \
+ GEN(ARGMAX_I16) \
+ GEN(RESHAPE_I8) \
+ GEN(RESHAPE_I16) \
+ GEN(TRANSPOSE_I8) \
+ GEN(TRANSPOSE_I16) \
+ GEN(CONCAT_I8) \
+ GEN(CONCAT_I16) \
+ GEN(PAD_I8) \
+ GEN(PAD_I16) \
+ GEN(STRIDED_SLICE_I8) \
+ GEN(STRIDED_SLICE_I16) \
+ GEN(CONVERT_FORMAT_I8) \
+ GEN(CONVERT_FORMAT_I16) \
+ GEN(SIGMOID_I8) \
+ GEN(SIGMOID_I16) \
+ GEN(TANH_I8) \
+ GEN(TANH_I16) \
+ GEN(ELU_I8) \
+ GEN(ELU_I16) \
+ GEN(FLOOR_I8) \
+ GEN(FLOOR_I16) \
+ GEN(RSQRT_I8) \
+ GEN(RSQRT_I16) \
+ GEN(SQRT_I8) \
+ GEN(SQRT_I16) \
+ GEN(SOFTMAX_I8) \
+ GEN(SOFTMAX_I16) \
+ GEN(DIVIDE_I8) \
+ GEN(DIVIDE_I16) \
+ GEN(FLOORDIV_I8) \
+ GEN(FLOORDIV_I16) \
+ GEN(LOGICAL_OR_I8) \
+ GEN(LOGICAL_OR_I16) \
+ GEN(GREATER_I8) \
+ GEN(GREATER_I16) \
+ GEN(GREATER_EQUAL_I8) \
+ GEN(GREATER_EQUAL_I16) \
+ GEN(POW_I8) \
+ GEN(POW_I16) \
+ GEN(EXP_I8) \
+ GEN(EXP_I16) \
+ GEN(NOT_EQUAL_I8) \
+ GEN(NOT_EQUAL_I16) \
+ GEN(BATCH_TO_SPACE_I8) \
+ GEN(BATCH_TO_SPACE_I16) \
+ GEN(SPACE_TO_BATCH_I8) \
+ GEN(SPACE_TO_BATCH_I16) \
+ GEN(DEPTH_TO_SPACE_I8) \
+ GEN(DEPTH_TO_SPACE_I16) \
+ GEN(SPACE_TO_DEPTH_I8) \
+ GEN(SPACE_TO_DEPTH_I16) \
+ GEN(YUV_TO_RGB_I8) \
+ GEN(YUV_TO_RGB_I16) \
+ GEN(RESIZE_BILINEAR_I8) \
+ GEN(RESIZE_BILINEAR_I16) \
+ GEN(RESIZE_NEAREST_NEIGHBOR_I8) \
+ GEN(RESIZE_NEAREST_NEIGHBOR_I16) \
+ GEN(LOCAL_RESPONSE_NORM_I8) \
+ GEN(LOCAL_RESPONSE_NORM_I16) \
+ GEN(INSTANCE_NORM_I8) \
+ GEN(INSTANCE_NORM_I16) \
+ GEN(REDUCED_SUM_SSUM_I8) \
+ GEN(REDUCED_SUM_SSUM_I16) \
+ GEN(REDUCED_SUM_SSUM_ACC_I8) \
+ GEN(REDUCED_SUM_SSUM_ACC_I16) \
+ GEN(REDUCED_SUM_2SUM_I8) \
+ GEN(REDUCED_SUM_2SUM_I16) \
+ GEN(REDUCED_MEAN_DEV_WSUM_I8) \
+ GEN(REDUCED_MEAN_DEV_WSUM_I16) \
+ GEN(REDUCED_MEAN_DEV_I8) \
+ GEN(REDUCED_MEAN_DEV_I16) \
+ GEN(RESCALE_CW_I8) \
+ GEN(RESCALE_CW_I16) \
+ GEN(REDUCED_MEAN_SCALE_WSUM_I8) \
+ GEN(REDUCED_MEAN_SCALE_WSUM_I16) \
+ GEN(RESCALE_CHANNELWISE_I8) \
+ GEN(RESCALE_CHANNELWISE_I16) \
+}
+#endif
--
2.25.1
Powered by blists - more mailing lists