lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <20190701083604.24528-1-ttayar@habana.ai>
Date:   Mon, 1 Jul 2019 08:36:16 +0000
From:   Tomer Tayar <ttayar@...ana.ai>
To:     "oded.gabbay@...il.com" <oded.gabbay@...il.com>
CC:     "linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>
Subject: [PATCH 1/2] habanalabs: Add debugfs node for engines status

Command submissions sent to the device are composed of command buffers
which are targeted to different device engines, like DMA and compute
entities. When a command submission gets stuck, knowing in which engine
the stuck is, is crucial for debugging.
This patch adds a debugfs node that exports this information, by
displaying the engines' various registers that assemble their idle/busy
status.

The information retrieval is based on the is_device_idle ASIC function.
The printout in this function, of the first detected busy engine, is
removed because it becomes redundant in the presence of the more
elaborated info of the new debugfs node.

The patch also updates the device idle check:
- Add reading the DMA core status register, because it is possible that
  QMAN has finished its work but the DMA itself is still running.
- Remove the MME shadow status check, as the MME ARCH status register
  includes the status of all MME shadows.

Signed-off-by: Tomer Tayar <ttayar@...ana.ai>
---
 drivers/misc/habanalabs/debugfs.c             |  12 +
 drivers/misc/habanalabs/goya/goya.c           | 127 ++++--
 drivers/misc/habanalabs/habanalabs.h          |   8 +-
 drivers/misc/habanalabs/habanalabs_ioctl.c    |   2 +-
 .../include/goya/asic_reg/dma_ch_0_masks.h    | 418 ++++++++++++++++++
 .../include/goya/asic_reg/goya_regs.h         |   1 +
 6 files changed, 517 insertions(+), 51 deletions(-)
 create mode 100644 drivers/misc/habanalabs/include/goya/asic_reg/dma_ch_0_masks.h

diff --git a/drivers/misc/habanalabs/debugfs.c b/drivers/misc/habanalabs/debugfs.c
index 17974919b760..6a5dfb14eca1 100644
--- a/drivers/misc/habanalabs/debugfs.c
+++ b/drivers/misc/habanalabs/debugfs.c
@@ -500,6 +500,17 @@ static ssize_t mmu_write(struct file *file, const char __user *buf,
 	return -EINVAL;
 }
 
+static int engines_show(struct seq_file *s, void *data)
+{
+	struct hl_debugfs_entry *entry = s->private;
+	struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
+	struct hl_device *hdev = dev_entry->hdev;
+
+	hdev->asic_funcs->is_device_idle(hdev, s);
+
+	return 0;
+}
+
 static bool hl_is_device_va(struct hl_device *hdev, u64 addr)
 {
 	struct asic_fixed_properties *prop = &hdev->asic_prop;
@@ -893,6 +904,7 @@ static const struct hl_info_list hl_debugfs_list[] = {
 	{"userptr", userptr_show, NULL},
 	{"vm", vm_show, NULL},
 	{"mmu", mmu_show, mmu_write},
+	{"engines", engines_show, NULL}
 };
 
 static int hl_debugfs_open(struct inode *inode, struct file *file)
diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c
index ce127a6f606f..41e97531f300 100644
--- a/drivers/misc/habanalabs/goya/goya.c
+++ b/drivers/misc/habanalabs/goya/goya.c
@@ -15,6 +15,7 @@
 #include <linux/hwmon.h>
 #include <linux/io-64-nonatomic-lo-hi.h>
 #include <linux/iommu.h>
+#include <linux/seq_file.h>
 
 /*
  * GOYA security scheme:
@@ -90,6 +91,30 @@
 #define GOYA_CB_POOL_CB_CNT		512
 #define GOYA_CB_POOL_CB_SIZE		0x20000		/* 128KB */
 
+#define IS_QM_IDLE(engine, qm_glbl_sts0) \
+	(((qm_glbl_sts0) & engine##_QM_IDLE_MASK) == engine##_QM_IDLE_MASK)
+#define IS_DMA_QM_IDLE(qm_glbl_sts0)	IS_QM_IDLE(DMA, qm_glbl_sts0)
+#define IS_TPC_QM_IDLE(qm_glbl_sts0)	IS_QM_IDLE(TPC, qm_glbl_sts0)
+#define IS_MME_QM_IDLE(qm_glbl_sts0)	IS_QM_IDLE(MME, qm_glbl_sts0)
+
+#define IS_CMDQ_IDLE(engine, cmdq_glbl_sts0) \
+	(((cmdq_glbl_sts0) & engine##_CMDQ_IDLE_MASK) == \
+			engine##_CMDQ_IDLE_MASK)
+#define IS_TPC_CMDQ_IDLE(cmdq_glbl_sts0) \
+	IS_CMDQ_IDLE(TPC, cmdq_glbl_sts0)
+#define IS_MME_CMDQ_IDLE(cmdq_glbl_sts0) \
+	IS_CMDQ_IDLE(MME, cmdq_glbl_sts0)
+
+#define IS_DMA_IDLE(dma_core_sts0) \
+	!((dma_core_sts0) & DMA_CH_0_STS0_DMA_BUSY_MASK)
+
+#define IS_TPC_IDLE(tpc_cfg_sts) \
+	(((tpc_cfg_sts) & TPC_CFG_IDLE_MASK) == TPC_CFG_IDLE_MASK)
+
+#define IS_MME_IDLE(mme_arch_sts) \
+	(((mme_arch_sts) & MME_ARCH_IDLE_MASK) == MME_ARCH_IDLE_MASK)
+
+
 static const char goya_irq_name[GOYA_MSIX_ENTRIES][GOYA_MAX_STRING_LEN] = {
 		"goya cq 0", "goya cq 1", "goya cq 2", "goya cq 3",
 		"goya cq 4", "goya cpu eq"
@@ -2796,7 +2821,6 @@ static int goya_send_job_on_qman0(struct hl_device *hdev, struct hl_cs_job *job)
 	dma_addr_t fence_dma_addr;
 	struct hl_cb *cb;
 	u32 tmp, timeout;
-	char buf[16] = {};
 	int rc;
 
 	if (hdev->pldm)
@@ -2804,10 +2828,9 @@ static int goya_send_job_on_qman0(struct hl_device *hdev, struct hl_cs_job *job)
 	else
 		timeout = HL_DEVICE_TIMEOUT_USEC;
 
-	if (!hdev->asic_funcs->is_device_idle(hdev, buf, sizeof(buf))) {
+	if (!hdev->asic_funcs->is_device_idle(hdev, NULL)) {
 		dev_err_ratelimited(hdev->dev,
-			"Can't send KMD job on QMAN0 because %s is busy\n",
-			buf);
+			"Can't send KMD job on QMAN0 because the device is not idle\n");
 		return -EBUSY;
 	}
 
@@ -4891,57 +4914,75 @@ int goya_armcp_info_get(struct hl_device *hdev)
 	return 0;
 }
 
-static bool goya_is_device_idle(struct hl_device *hdev, char *buf, size_t size)
+static bool goya_is_device_idle(struct hl_device *hdev, struct seq_file *s)
 {
-	u64 offset, dma_qm_reg, tpc_qm_reg, tpc_cmdq_reg, tpc_cfg_reg;
+	const char *fmt = "%-5d%-9s%#-14x%#-16x%#x\n";
+	const char *dma_fmt = "%-5d%-9s%#-14x%#x\n";
+	u32 qm_glbl_sts0, cmdq_glbl_sts0, dma_core_sts0, tpc_cfg_sts,
+		mme_arch_sts;
+	bool is_idle = true, is_eng_idle;
+	u64 offset;
 	int i;
 
+	if (s)
+		seq_puts(s, "\nDMA  is_idle  QM_GLBL_STS0  DMA_CORE_STS0\n"
+				"---  -------  ------------  -------------\n");
+
 	offset = mmDMA_QM_1_GLBL_STS0 - mmDMA_QM_0_GLBL_STS0;
 
 	for (i = 0 ; i < DMA_MAX_NUM ; i++) {
-		dma_qm_reg = mmDMA_QM_0_GLBL_STS0 + i * offset;
+		qm_glbl_sts0 = RREG32(mmDMA_QM_0_GLBL_STS0 + i * offset);
+		dma_core_sts0 = RREG32(mmDMA_CH_0_STS0 + i * offset);
+		is_eng_idle = IS_DMA_QM_IDLE(qm_glbl_sts0) &&
+				IS_DMA_IDLE(dma_core_sts0);
+		is_idle &= is_eng_idle;
 
-		if ((RREG32(dma_qm_reg) & DMA_QM_IDLE_MASK) !=
-				DMA_QM_IDLE_MASK)
-			return HL_ENG_BUSY(buf, size, "DMA%d_QM", i);
+		if (s)
+			seq_printf(s, dma_fmt, i, is_eng_idle ? "Y" : "N",
+					qm_glbl_sts0, dma_core_sts0);
 	}
 
+	if (s)
+		seq_puts(s,
+			"\nTPC  is_idle  QM_GLBL_STS0  CMDQ_GLBL_STS0  CFG_STATUS\n"
+			"---  -------  ------------  --------------  ----------\n");
+
 	offset = mmTPC1_QM_GLBL_STS0 - mmTPC0_QM_GLBL_STS0;
 
 	for (i = 0 ; i < TPC_MAX_NUM ; i++) {
-		tpc_qm_reg = mmTPC0_QM_GLBL_STS0 + i * offset;
-		tpc_cmdq_reg = mmTPC0_CMDQ_GLBL_STS0 + i * offset;
-		tpc_cfg_reg = mmTPC0_CFG_STATUS + i * offset;
-
-		if ((RREG32(tpc_qm_reg) & TPC_QM_IDLE_MASK) !=
-				TPC_QM_IDLE_MASK)
-			return HL_ENG_BUSY(buf, size, "TPC%d_QM", i);
-
-		if ((RREG32(tpc_cmdq_reg) & TPC_CMDQ_IDLE_MASK) !=
-				TPC_CMDQ_IDLE_MASK)
-			return HL_ENG_BUSY(buf, size, "TPC%d_CMDQ", i);
-
-		if ((RREG32(tpc_cfg_reg) & TPC_CFG_IDLE_MASK) !=
-				TPC_CFG_IDLE_MASK)
-			return HL_ENG_BUSY(buf, size, "TPC%d_CFG", i);
-	}
-
-	if ((RREG32(mmMME_QM_GLBL_STS0) & MME_QM_IDLE_MASK) !=
-			MME_QM_IDLE_MASK)
-		return HL_ENG_BUSY(buf, size, "MME_QM");
-
-	if ((RREG32(mmMME_CMDQ_GLBL_STS0) & MME_CMDQ_IDLE_MASK) !=
-			MME_CMDQ_IDLE_MASK)
-		return HL_ENG_BUSY(buf, size, "MME_CMDQ");
-
-	if ((RREG32(mmMME_ARCH_STATUS) & MME_ARCH_IDLE_MASK) !=
-			MME_ARCH_IDLE_MASK)
-		return HL_ENG_BUSY(buf, size, "MME_ARCH");
-
-	if (RREG32(mmMME_SHADOW_0_STATUS) & MME_SHADOW_IDLE_MASK)
-		return HL_ENG_BUSY(buf, size, "MME");
-
-	return true;
+		qm_glbl_sts0 = RREG32(mmTPC0_QM_GLBL_STS0 + i * offset);
+		cmdq_glbl_sts0 = RREG32(mmTPC0_CMDQ_GLBL_STS0 + i * offset);
+		tpc_cfg_sts = RREG32(mmTPC0_CFG_STATUS + i * offset);
+		is_eng_idle = IS_TPC_QM_IDLE(qm_glbl_sts0) &&
+				IS_TPC_CMDQ_IDLE(cmdq_glbl_sts0) &&
+				IS_TPC_IDLE(tpc_cfg_sts);
+		is_idle &= is_eng_idle;
+
+		if (s)
+			seq_printf(s, fmt, i, is_eng_idle ? "Y" : "N",
+				qm_glbl_sts0, cmdq_glbl_sts0, tpc_cfg_sts);
+	}
+
+	if (s)
+		seq_puts(s,
+			"\nMME  is_idle  QM_GLBL_STS0  CMDQ_GLBL_STS0  ARCH_STATUS\n"
+			"---  -------  ------------  --------------  -----------\n");
+
+	qm_glbl_sts0 = RREG32(mmMME_QM_GLBL_STS0);
+	cmdq_glbl_sts0 = RREG32(mmMME_CMDQ_GLBL_STS0);
+	mme_arch_sts = RREG32(mmMME_ARCH_STATUS);
+	is_eng_idle = IS_MME_QM_IDLE(qm_glbl_sts0) &&
+			IS_MME_CMDQ_IDLE(cmdq_glbl_sts0) &&
+			IS_MME_IDLE(mme_arch_sts);
+	is_idle &= is_eng_idle;
+
+	if (s) {
+		seq_printf(s, fmt, 0, is_eng_idle ? "Y" : "N", qm_glbl_sts0,
+				cmdq_glbl_sts0, mme_arch_sts);
+		seq_puts(s, "\n");
+	}
+
+	return is_idle;
 }
 
 static void goya_hw_queues_lock(struct hl_device *hdev)
diff --git a/drivers/misc/habanalabs/habanalabs.h b/drivers/misc/habanalabs/habanalabs.h
index 5e4a631b3d88..2c9ea61099b4 100644
--- a/drivers/misc/habanalabs/habanalabs.h
+++ b/drivers/misc/habanalabs/habanalabs.h
@@ -557,7 +557,7 @@ struct hl_asic_funcs {
 			u32 asid, u64 va, u64 size);
 	int (*send_heartbeat)(struct hl_device *hdev);
 	int (*debug_coresight)(struct hl_device *hdev, void *data);
-	bool (*is_device_idle)(struct hl_device *hdev, char *buf, size_t size);
+	bool (*is_device_idle)(struct hl_device *hdev, struct seq_file *s);
 	int (*soft_reset_late_init)(struct hl_device *hdev);
 	void (*hw_queues_lock)(struct hl_device *hdev);
 	void (*hw_queues_unlock)(struct hl_device *hdev);
@@ -1112,12 +1112,6 @@ void hl_wreg(struct hl_device *hdev, u32 reg, u32 val);
 	(cond) ? 0 : -ETIMEDOUT; \
 })
 
-#define HL_ENG_BUSY(buf, size, fmt, ...) ({ \
-		if (buf) \
-			snprintf(buf, size, fmt, ##__VA_ARGS__); \
-		false; \
-	})
-
 struct hwmon_chip_info;
 
 /**
diff --git a/drivers/misc/habanalabs/habanalabs_ioctl.c b/drivers/misc/habanalabs/habanalabs_ioctl.c
index c641c7eb6f7c..b04585af27ad 100644
--- a/drivers/misc/habanalabs/habanalabs_ioctl.c
+++ b/drivers/misc/habanalabs/habanalabs_ioctl.c
@@ -119,7 +119,7 @@ static int hw_idle(struct hl_device *hdev, struct hl_info_args *args)
 	if ((!max_size) || (!out))
 		return -EINVAL;
 
-	hw_idle.is_idle = hdev->asic_funcs->is_device_idle(hdev, NULL, 0);
+	hw_idle.is_idle = hdev->asic_funcs->is_device_idle(hdev, NULL);
 
 	return copy_to_user(out, &hw_idle,
 		min((size_t) max_size, sizeof(hw_idle))) ? -EFAULT : 0;
diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/dma_ch_0_masks.h b/drivers/misc/habanalabs/include/goya/asic_reg/dma_ch_0_masks.h
new file mode 100644
index 000000000000..028143408401
--- /dev/null
+++ b/drivers/misc/habanalabs/include/goya/asic_reg/dma_ch_0_masks.h
@@ -0,0 +1,418 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * Copyright 2016-2018 HabanaLabs, Ltd.
+ * All Rights Reserved.
+ *
+ */
+
+/************************************
+ ** This is an auto-generated file **
+ **       DO NOT EDIT BELOW        **
+ ************************************/
+
+#ifndef ASIC_REG_DMA_CH_0_MASKS_H_
+#define ASIC_REG_DMA_CH_0_MASKS_H_
+
+/*
+ *****************************************
+ *   DMA_CH_0 (Prototype: DMA_CH)
+ *****************************************
+ */
+
+/* DMA_CH_0_CFG0 */
+#define DMA_CH_0_CFG0_RD_MAX_OUTSTAND_SHIFT                          0
+#define DMA_CH_0_CFG0_RD_MAX_OUTSTAND_MASK                           0x3FF
+#define DMA_CH_0_CFG0_WR_MAX_OUTSTAND_SHIFT                          16
+#define DMA_CH_0_CFG0_WR_MAX_OUTSTAND_MASK                           0xFFF0000
+
+/* DMA_CH_0_CFG1 */
+#define DMA_CH_0_CFG1_RD_BUF_MAX_SIZE_SHIFT                          0
+#define DMA_CH_0_CFG1_RD_BUF_MAX_SIZE_MASK                           0x3FF
+
+/* DMA_CH_0_ERRMSG_ADDR_LO */
+#define DMA_CH_0_ERRMSG_ADDR_LO_VAL_SHIFT                            0
+#define DMA_CH_0_ERRMSG_ADDR_LO_VAL_MASK                             0xFFFFFFFF
+
+/* DMA_CH_0_ERRMSG_ADDR_HI */
+#define DMA_CH_0_ERRMSG_ADDR_HI_VAL_SHIFT                            0
+#define DMA_CH_0_ERRMSG_ADDR_HI_VAL_MASK                             0xFFFFFFFF
+
+/* DMA_CH_0_ERRMSG_WDATA */
+#define DMA_CH_0_ERRMSG_WDATA_VAL_SHIFT                              0
+#define DMA_CH_0_ERRMSG_WDATA_VAL_MASK                               0xFFFFFFFF
+
+/* DMA_CH_0_RD_COMP_ADDR_LO */
+#define DMA_CH_0_RD_COMP_ADDR_LO_VAL_SHIFT                           0
+#define DMA_CH_0_RD_COMP_ADDR_LO_VAL_MASK                            0xFFFFFFFF
+
+/* DMA_CH_0_RD_COMP_ADDR_HI */
+#define DMA_CH_0_RD_COMP_ADDR_HI_VAL_SHIFT                           0
+#define DMA_CH_0_RD_COMP_ADDR_HI_VAL_MASK                            0xFFFFFFFF
+
+/* DMA_CH_0_RD_COMP_WDATA */
+#define DMA_CH_0_RD_COMP_WDATA_VAL_SHIFT                             0
+#define DMA_CH_0_RD_COMP_WDATA_VAL_MASK                              0xFFFFFFFF
+
+/* DMA_CH_0_WR_COMP_ADDR_LO */
+#define DMA_CH_0_WR_COMP_ADDR_LO_VAL_SHIFT                           0
+#define DMA_CH_0_WR_COMP_ADDR_LO_VAL_MASK                            0xFFFFFFFF
+
+/* DMA_CH_0_WR_COMP_ADDR_HI */
+#define DMA_CH_0_WR_COMP_ADDR_HI_VAL_SHIFT                           0
+#define DMA_CH_0_WR_COMP_ADDR_HI_VAL_MASK                            0xFFFFFFFF
+
+/* DMA_CH_0_WR_COMP_WDATA */
+#define DMA_CH_0_WR_COMP_WDATA_VAL_SHIFT                             0
+#define DMA_CH_0_WR_COMP_WDATA_VAL_MASK                              0xFFFFFFFF
+
+/* DMA_CH_0_LDMA_SRC_ADDR_LO */
+#define DMA_CH_0_LDMA_SRC_ADDR_LO_VAL_SHIFT                          0
+#define DMA_CH_0_LDMA_SRC_ADDR_LO_VAL_MASK                           0xFFFFFFFF
+
+/* DMA_CH_0_LDMA_SRC_ADDR_HI */
+#define DMA_CH_0_LDMA_SRC_ADDR_HI_VAL_SHIFT                          0
+#define DMA_CH_0_LDMA_SRC_ADDR_HI_VAL_MASK                           0xFFFFFFFF
+
+/* DMA_CH_0_LDMA_DST_ADDR_LO */
+#define DMA_CH_0_LDMA_DST_ADDR_LO_VAL_SHIFT                          0
+#define DMA_CH_0_LDMA_DST_ADDR_LO_VAL_MASK                           0xFFFFFFFF
+
+/* DMA_CH_0_LDMA_DST_ADDR_HI */
+#define DMA_CH_0_LDMA_DST_ADDR_HI_VAL_SHIFT                          0
+#define DMA_CH_0_LDMA_DST_ADDR_HI_VAL_MASK                           0xFFFFFFFF
+
+/* DMA_CH_0_LDMA_TSIZE */
+#define DMA_CH_0_LDMA_TSIZE_VAL_SHIFT                                0
+#define DMA_CH_0_LDMA_TSIZE_VAL_MASK                                 0xFFFFFFFF
+
+/* DMA_CH_0_COMIT_TRANSFER */
+#define DMA_CH_0_COMIT_TRANSFER_PCI_UPS_WKORDR_SHIFT                 0
+#define DMA_CH_0_COMIT_TRANSFER_PCI_UPS_WKORDR_MASK                  0x1
+#define DMA_CH_0_COMIT_TRANSFER_RD_COMP_EN_SHIFT                     1
+#define DMA_CH_0_COMIT_TRANSFER_RD_COMP_EN_MASK                      0x2
+#define DMA_CH_0_COMIT_TRANSFER_WR_COMP_EN_SHIFT                     2
+#define DMA_CH_0_COMIT_TRANSFER_WR_COMP_EN_MASK                      0x4
+#define DMA_CH_0_COMIT_TRANSFER_NOSNOOP_SHIFT                        3
+#define DMA_CH_0_COMIT_TRANSFER_NOSNOOP_MASK                         0x8
+#define DMA_CH_0_COMIT_TRANSFER_SRC_ADDR_INC_DIS_SHIFT               4
+#define DMA_CH_0_COMIT_TRANSFER_SRC_ADDR_INC_DIS_MASK                0x10
+#define DMA_CH_0_COMIT_TRANSFER_DST_ADDR_INC_DIS_SHIFT               5
+#define DMA_CH_0_COMIT_TRANSFER_DST_ADDR_INC_DIS_MASK                0x20
+#define DMA_CH_0_COMIT_TRANSFER_MEM_SET_SHIFT                        6
+#define DMA_CH_0_COMIT_TRANSFER_MEM_SET_MASK                         0x40
+#define DMA_CH_0_COMIT_TRANSFER_MOD_TENSOR_SHIFT                     15
+#define DMA_CH_0_COMIT_TRANSFER_MOD_TENSOR_MASK                      0x8000
+#define DMA_CH_0_COMIT_TRANSFER_CTL_SHIFT                            16
+#define DMA_CH_0_COMIT_TRANSFER_CTL_MASK                             0xFFFF0000
+
+/* DMA_CH_0_STS0 */
+#define DMA_CH_0_STS0_DMA_BUSY_SHIFT                                 0
+#define DMA_CH_0_STS0_DMA_BUSY_MASK                                  0x1
+#define DMA_CH_0_STS0_RD_STS_CTX_FULL_SHIFT                          1
+#define DMA_CH_0_STS0_RD_STS_CTX_FULL_MASK                           0x2
+#define DMA_CH_0_STS0_WR_STS_CTX_FULL_SHIFT                          2
+#define DMA_CH_0_STS0_WR_STS_CTX_FULL_MASK                           0x4
+
+/* DMA_CH_0_STS1 */
+#define DMA_CH_0_STS1_RD_STS_CTX_CNT_SHIFT                           0
+#define DMA_CH_0_STS1_RD_STS_CTX_CNT_MASK                            0xFFFFFFFF
+
+/* DMA_CH_0_STS2 */
+#define DMA_CH_0_STS2_WR_STS_CTX_CNT_SHIFT                           0
+#define DMA_CH_0_STS2_WR_STS_CTX_CNT_MASK                            0xFFFFFFFF
+
+/* DMA_CH_0_STS3 */
+#define DMA_CH_0_STS3_RD_STS_TRN_CNT_SHIFT                           0
+#define DMA_CH_0_STS3_RD_STS_TRN_CNT_MASK                            0xFFFFFFFF
+
+/* DMA_CH_0_STS4 */
+#define DMA_CH_0_STS4_WR_STS_TRN_CNT_SHIFT                           0
+#define DMA_CH_0_STS4_WR_STS_TRN_CNT_MASK                            0xFFFFFFFF
+
+/* DMA_CH_0_SRC_ADDR_LO_STS */
+#define DMA_CH_0_SRC_ADDR_LO_STS_VAL_SHIFT                           0
+#define DMA_CH_0_SRC_ADDR_LO_STS_VAL_MASK                            0xFFFFFFFF
+
+/* DMA_CH_0_SRC_ADDR_HI_STS */
+#define DMA_CH_0_SRC_ADDR_HI_STS_VAL_SHIFT                           0
+#define DMA_CH_0_SRC_ADDR_HI_STS_VAL_MASK                            0xFFFFFFFF
+
+/* DMA_CH_0_SRC_TSIZE_STS */
+#define DMA_CH_0_SRC_TSIZE_STS_VAL_SHIFT                             0
+#define DMA_CH_0_SRC_TSIZE_STS_VAL_MASK                              0xFFFFFFFF
+
+/* DMA_CH_0_DST_ADDR_LO_STS */
+#define DMA_CH_0_DST_ADDR_LO_STS_VAL_SHIFT                           0
+#define DMA_CH_0_DST_ADDR_LO_STS_VAL_MASK                            0xFFFFFFFF
+
+/* DMA_CH_0_DST_ADDR_HI_STS */
+#define DMA_CH_0_DST_ADDR_HI_STS_VAL_SHIFT                           0
+#define DMA_CH_0_DST_ADDR_HI_STS_VAL_MASK                            0xFFFFFFFF
+
+/* DMA_CH_0_DST_TSIZE_STS */
+#define DMA_CH_0_DST_TSIZE_STS_VAL_SHIFT                             0
+#define DMA_CH_0_DST_TSIZE_STS_VAL_MASK                              0xFFFFFFFF
+
+/* DMA_CH_0_RD_RATE_LIM_EN */
+#define DMA_CH_0_RD_RATE_LIM_EN_VAL_SHIFT                            0
+#define DMA_CH_0_RD_RATE_LIM_EN_VAL_MASK                             0x1
+
+/* DMA_CH_0_RD_RATE_LIM_RST_TOKEN */
+#define DMA_CH_0_RD_RATE_LIM_RST_TOKEN_VAL_SHIFT                     0
+#define DMA_CH_0_RD_RATE_LIM_RST_TOKEN_VAL_MASK                      0xFFFF
+
+/* DMA_CH_0_RD_RATE_LIM_SAT */
+#define DMA_CH_0_RD_RATE_LIM_SAT_VAL_SHIFT                           0
+#define DMA_CH_0_RD_RATE_LIM_SAT_VAL_MASK                            0xFFFF
+
+/* DMA_CH_0_RD_RATE_LIM_TOUT */
+#define DMA_CH_0_RD_RATE_LIM_TOUT_VAL_SHIFT                          0
+#define DMA_CH_0_RD_RATE_LIM_TOUT_VAL_MASK                           0x7FFFFFFF
+
+/* DMA_CH_0_WR_RATE_LIM_EN */
+#define DMA_CH_0_WR_RATE_LIM_EN_VAL_SHIFT                            0
+#define DMA_CH_0_WR_RATE_LIM_EN_VAL_MASK                             0x1
+
+/* DMA_CH_0_WR_RATE_LIM_RST_TOKEN */
+#define DMA_CH_0_WR_RATE_LIM_RST_TOKEN_VAL_SHIFT                     0
+#define DMA_CH_0_WR_RATE_LIM_RST_TOKEN_VAL_MASK                      0xFFFF
+
+/* DMA_CH_0_WR_RATE_LIM_SAT */
+#define DMA_CH_0_WR_RATE_LIM_SAT_VAL_SHIFT                           0
+#define DMA_CH_0_WR_RATE_LIM_SAT_VAL_MASK                            0xFFFF
+
+/* DMA_CH_0_WR_RATE_LIM_TOUT */
+#define DMA_CH_0_WR_RATE_LIM_TOUT_VAL_SHIFT                          0
+#define DMA_CH_0_WR_RATE_LIM_TOUT_VAL_MASK                           0x7FFFFFFF
+
+/* DMA_CH_0_CFG2 */
+#define DMA_CH_0_CFG2_FORCE_WORD_SHIFT                               0
+#define DMA_CH_0_CFG2_FORCE_WORD_MASK                                0x1
+
+/* DMA_CH_0_TDMA_CTL */
+#define DMA_CH_0_TDMA_CTL_DTYPE_SHIFT                                0
+#define DMA_CH_0_TDMA_CTL_DTYPE_MASK                                 0x7
+
+/* DMA_CH_0_TDMA_SRC_BASE_ADDR_LO */
+#define DMA_CH_0_TDMA_SRC_BASE_ADDR_LO_VAL_SHIFT                     0
+#define DMA_CH_0_TDMA_SRC_BASE_ADDR_LO_VAL_MASK                      0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_SRC_BASE_ADDR_HI */
+#define DMA_CH_0_TDMA_SRC_BASE_ADDR_HI_VAL_SHIFT                     0
+#define DMA_CH_0_TDMA_SRC_BASE_ADDR_HI_VAL_MASK                      0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_SRC_ROI_BASE_0 */
+#define DMA_CH_0_TDMA_SRC_ROI_BASE_0_VAL_SHIFT                       0
+#define DMA_CH_0_TDMA_SRC_ROI_BASE_0_VAL_MASK                        0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_SRC_ROI_SIZE_0 */
+#define DMA_CH_0_TDMA_SRC_ROI_SIZE_0_VAL_SHIFT                       0
+#define DMA_CH_0_TDMA_SRC_ROI_SIZE_0_VAL_MASK                        0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_SRC_VALID_ELEMENTS_0 */
+#define DMA_CH_0_TDMA_SRC_VALID_ELEMENTS_0_VAL_SHIFT                 0
+#define DMA_CH_0_TDMA_SRC_VALID_ELEMENTS_0_VAL_MASK                  0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_SRC_START_OFFSET_0 */
+#define DMA_CH_0_TDMA_SRC_START_OFFSET_0_VAL_SHIFT                   0
+#define DMA_CH_0_TDMA_SRC_START_OFFSET_0_VAL_MASK                    0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_SRC_STRIDE_0 */
+#define DMA_CH_0_TDMA_SRC_STRIDE_0_VAL_SHIFT                         0
+#define DMA_CH_0_TDMA_SRC_STRIDE_0_VAL_MASK                          0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_SRC_ROI_BASE_1 */
+#define DMA_CH_0_TDMA_SRC_ROI_BASE_1_VAL_SHIFT                       0
+#define DMA_CH_0_TDMA_SRC_ROI_BASE_1_VAL_MASK                        0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_SRC_ROI_SIZE_1 */
+#define DMA_CH_0_TDMA_SRC_ROI_SIZE_1_VAL_SHIFT                       0
+#define DMA_CH_0_TDMA_SRC_ROI_SIZE_1_VAL_MASK                        0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_SRC_VALID_ELEMENTS_1 */
+#define DMA_CH_0_TDMA_SRC_VALID_ELEMENTS_1_VAL_SHIFT                 0
+#define DMA_CH_0_TDMA_SRC_VALID_ELEMENTS_1_VAL_MASK                  0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_SRC_START_OFFSET_1 */
+#define DMA_CH_0_TDMA_SRC_START_OFFSET_1_VAL_SHIFT                   0
+#define DMA_CH_0_TDMA_SRC_START_OFFSET_1_VAL_MASK                    0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_SRC_STRIDE_1 */
+#define DMA_CH_0_TDMA_SRC_STRIDE_1_VAL_SHIFT                         0
+#define DMA_CH_0_TDMA_SRC_STRIDE_1_VAL_MASK                          0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_SRC_ROI_BASE_2 */
+#define DMA_CH_0_TDMA_SRC_ROI_BASE_2_VAL_SHIFT                       0
+#define DMA_CH_0_TDMA_SRC_ROI_BASE_2_VAL_MASK                        0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_SRC_ROI_SIZE_2 */
+#define DMA_CH_0_TDMA_SRC_ROI_SIZE_2_VAL_SHIFT                       0
+#define DMA_CH_0_TDMA_SRC_ROI_SIZE_2_VAL_MASK                        0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_SRC_VALID_ELEMENTS_2 */
+#define DMA_CH_0_TDMA_SRC_VALID_ELEMENTS_2_VAL_SHIFT                 0
+#define DMA_CH_0_TDMA_SRC_VALID_ELEMENTS_2_VAL_MASK                  0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_SRC_START_OFFSET_2 */
+#define DMA_CH_0_TDMA_SRC_START_OFFSET_2_VAL_SHIFT                   0
+#define DMA_CH_0_TDMA_SRC_START_OFFSET_2_VAL_MASK                    0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_SRC_STRIDE_2 */
+#define DMA_CH_0_TDMA_SRC_STRIDE_2_VAL_SHIFT                         0
+#define DMA_CH_0_TDMA_SRC_STRIDE_2_VAL_MASK                          0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_SRC_ROI_BASE_3 */
+#define DMA_CH_0_TDMA_SRC_ROI_BASE_3_VAL_SHIFT                       0
+#define DMA_CH_0_TDMA_SRC_ROI_BASE_3_VAL_MASK                        0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_SRC_ROI_SIZE_3 */
+#define DMA_CH_0_TDMA_SRC_ROI_SIZE_3_VAL_SHIFT                       0
+#define DMA_CH_0_TDMA_SRC_ROI_SIZE_3_VAL_MASK                        0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_SRC_VALID_ELEMENTS_3 */
+#define DMA_CH_0_TDMA_SRC_VALID_ELEMENTS_3_VAL_SHIFT                 0
+#define DMA_CH_0_TDMA_SRC_VALID_ELEMENTS_3_VAL_MASK                  0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_SRC_START_OFFSET_3 */
+#define DMA_CH_0_TDMA_SRC_START_OFFSET_3_VAL_SHIFT                   0
+#define DMA_CH_0_TDMA_SRC_START_OFFSET_3_VAL_MASK                    0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_SRC_STRIDE_3 */
+#define DMA_CH_0_TDMA_SRC_STRIDE_3_VAL_SHIFT                         0
+#define DMA_CH_0_TDMA_SRC_STRIDE_3_VAL_MASK                          0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_SRC_ROI_BASE_4 */
+#define DMA_CH_0_TDMA_SRC_ROI_BASE_4_VAL_SHIFT                       0
+#define DMA_CH_0_TDMA_SRC_ROI_BASE_4_VAL_MASK                        0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_SRC_ROI_SIZE_4 */
+#define DMA_CH_0_TDMA_SRC_ROI_SIZE_4_VAL_SHIFT                       0
+#define DMA_CH_0_TDMA_SRC_ROI_SIZE_4_VAL_MASK                        0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_SRC_VALID_ELEMENTS_4 */
+#define DMA_CH_0_TDMA_SRC_VALID_ELEMENTS_4_VAL_SHIFT                 0
+#define DMA_CH_0_TDMA_SRC_VALID_ELEMENTS_4_VAL_MASK                  0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_SRC_START_OFFSET_4 */
+#define DMA_CH_0_TDMA_SRC_START_OFFSET_4_VAL_SHIFT                   0
+#define DMA_CH_0_TDMA_SRC_START_OFFSET_4_VAL_MASK                    0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_SRC_STRIDE_4 */
+#define DMA_CH_0_TDMA_SRC_STRIDE_4_VAL_SHIFT                         0
+#define DMA_CH_0_TDMA_SRC_STRIDE_4_VAL_MASK                          0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_DST_BASE_ADDR_LO */
+#define DMA_CH_0_TDMA_DST_BASE_ADDR_LO_VAL_SHIFT                     0
+#define DMA_CH_0_TDMA_DST_BASE_ADDR_LO_VAL_MASK                      0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_DST_BASE_ADDR_HI */
+#define DMA_CH_0_TDMA_DST_BASE_ADDR_HI_VAL_SHIFT                     0
+#define DMA_CH_0_TDMA_DST_BASE_ADDR_HI_VAL_MASK                      0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_DST_ROI_BASE_0 */
+#define DMA_CH_0_TDMA_DST_ROI_BASE_0_VAL_SHIFT                       0
+#define DMA_CH_0_TDMA_DST_ROI_BASE_0_VAL_MASK                        0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_DST_ROI_SIZE_0 */
+#define DMA_CH_0_TDMA_DST_ROI_SIZE_0_VAL_SHIFT                       0
+#define DMA_CH_0_TDMA_DST_ROI_SIZE_0_VAL_MASK                        0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_DST_VALID_ELEMENTS_0 */
+#define DMA_CH_0_TDMA_DST_VALID_ELEMENTS_0_VAL_SHIFT                 0
+#define DMA_CH_0_TDMA_DST_VALID_ELEMENTS_0_VAL_MASK                  0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_DST_START_OFFSET_0 */
+#define DMA_CH_0_TDMA_DST_START_OFFSET_0_VAL_SHIFT                   0
+#define DMA_CH_0_TDMA_DST_START_OFFSET_0_VAL_MASK                    0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_DST_STRIDE_0 */
+#define DMA_CH_0_TDMA_DST_STRIDE_0_VAL_SHIFT                         0
+#define DMA_CH_0_TDMA_DST_STRIDE_0_VAL_MASK                          0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_DST_ROI_BASE_1 */
+#define DMA_CH_0_TDMA_DST_ROI_BASE_1_VAL_SHIFT                       0
+#define DMA_CH_0_TDMA_DST_ROI_BASE_1_VAL_MASK                        0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_DST_ROI_SIZE_1 */
+#define DMA_CH_0_TDMA_DST_ROI_SIZE_1_VAL_SHIFT                       0
+#define DMA_CH_0_TDMA_DST_ROI_SIZE_1_VAL_MASK                        0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_DST_VALID_ELEMENTS_1 */
+#define DMA_CH_0_TDMA_DST_VALID_ELEMENTS_1_VAL_SHIFT                 0
+#define DMA_CH_0_TDMA_DST_VALID_ELEMENTS_1_VAL_MASK                  0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_DST_START_OFFSET_1 */
+#define DMA_CH_0_TDMA_DST_START_OFFSET_1_VAL_SHIFT                   0
+#define DMA_CH_0_TDMA_DST_START_OFFSET_1_VAL_MASK                    0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_DST_STRIDE_1 */
+#define DMA_CH_0_TDMA_DST_STRIDE_1_VAL_SHIFT                         0
+#define DMA_CH_0_TDMA_DST_STRIDE_1_VAL_MASK                          0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_DST_ROI_BASE_2 */
+#define DMA_CH_0_TDMA_DST_ROI_BASE_2_VAL_SHIFT                       0
+#define DMA_CH_0_TDMA_DST_ROI_BASE_2_VAL_MASK                        0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_DST_ROI_SIZE_2 */
+#define DMA_CH_0_TDMA_DST_ROI_SIZE_2_VAL_SHIFT                       0
+#define DMA_CH_0_TDMA_DST_ROI_SIZE_2_VAL_MASK                        0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_DST_VALID_ELEMENTS_2 */
+#define DMA_CH_0_TDMA_DST_VALID_ELEMENTS_2_VAL_SHIFT                 0
+#define DMA_CH_0_TDMA_DST_VALID_ELEMENTS_2_VAL_MASK                  0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_DST_START_OFFSET_2 */
+#define DMA_CH_0_TDMA_DST_START_OFFSET_2_VAL_SHIFT                   0
+#define DMA_CH_0_TDMA_DST_START_OFFSET_2_VAL_MASK                    0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_DST_STRIDE_2 */
+#define DMA_CH_0_TDMA_DST_STRIDE_2_VAL_SHIFT                         0
+#define DMA_CH_0_TDMA_DST_STRIDE_2_VAL_MASK                          0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_DST_ROI_BASE_3 */
+#define DMA_CH_0_TDMA_DST_ROI_BASE_3_VAL_SHIFT                       0
+#define DMA_CH_0_TDMA_DST_ROI_BASE_3_VAL_MASK                        0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_DST_ROI_SIZE_3 */
+#define DMA_CH_0_TDMA_DST_ROI_SIZE_3_VAL_SHIFT                       0
+#define DMA_CH_0_TDMA_DST_ROI_SIZE_3_VAL_MASK                        0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_DST_VALID_ELEMENTS_3 */
+#define DMA_CH_0_TDMA_DST_VALID_ELEMENTS_3_VAL_SHIFT                 0
+#define DMA_CH_0_TDMA_DST_VALID_ELEMENTS_3_VAL_MASK                  0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_DST_START_OFFSET_3 */
+#define DMA_CH_0_TDMA_DST_START_OFFSET_3_VAL_SHIFT                   0
+#define DMA_CH_0_TDMA_DST_START_OFFSET_3_VAL_MASK                    0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_DST_STRIDE_3 */
+#define DMA_CH_0_TDMA_DST_STRIDE_3_VAL_SHIFT                         0
+#define DMA_CH_0_TDMA_DST_STRIDE_3_VAL_MASK                          0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_DST_ROI_BASE_4 */
+#define DMA_CH_0_TDMA_DST_ROI_BASE_4_VAL_SHIFT                       0
+#define DMA_CH_0_TDMA_DST_ROI_BASE_4_VAL_MASK                        0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_DST_ROI_SIZE_4 */
+#define DMA_CH_0_TDMA_DST_ROI_SIZE_4_VAL_SHIFT                       0
+#define DMA_CH_0_TDMA_DST_ROI_SIZE_4_VAL_MASK                        0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_DST_VALID_ELEMENTS_4 */
+#define DMA_CH_0_TDMA_DST_VALID_ELEMENTS_4_VAL_SHIFT                 0
+#define DMA_CH_0_TDMA_DST_VALID_ELEMENTS_4_VAL_MASK                  0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_DST_START_OFFSET_4 */
+#define DMA_CH_0_TDMA_DST_START_OFFSET_4_VAL_SHIFT                   0
+#define DMA_CH_0_TDMA_DST_START_OFFSET_4_VAL_MASK                    0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_DST_STRIDE_4 */
+#define DMA_CH_0_TDMA_DST_STRIDE_4_VAL_SHIFT                         0
+#define DMA_CH_0_TDMA_DST_STRIDE_4_VAL_MASK                          0xFFFFFFFF
+
+/* DMA_CH_0_MEM_INIT_BUSY */
+#define DMA_CH_0_MEM_INIT_BUSY_SBC_DATA_SHIFT                        0
+#define DMA_CH_0_MEM_INIT_BUSY_SBC_DATA_MASK                         0xFF
+#define DMA_CH_0_MEM_INIT_BUSY_SBC_MD_SHIFT                          8
+#define DMA_CH_0_MEM_INIT_BUSY_SBC_MD_MASK                           0x100
+
+#endif /* ASIC_REG_DMA_CH_0_MASKS_H_ */
diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/goya_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/goya_regs.h
index 506e71e201e1..19b0f0ef1d0b 100644
--- a/drivers/misc/habanalabs/include/goya/asic_reg/goya_regs.h
+++ b/drivers/misc/habanalabs/include/goya/asic_reg/goya_regs.h
@@ -88,6 +88,7 @@
 #include "psoc_global_conf_masks.h"
 #include "dma_macro_masks.h"
 #include "dma_qm_0_masks.h"
+#include "dma_ch_0_masks.h"
 #include "tpc0_qm_masks.h"
 #include "tpc0_cmdq_masks.h"
 #include "mme_qm_masks.h"
-- 
2.17.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ