lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-Id: <20210524084619.4680-1-ogabbay@kernel.org>
Date:   Mon, 24 May 2021 11:46:17 +0300
From:   Oded Gabbay <ogabbay@...nel.org>
To:     linux-kernel@...r.kernel.org
Cc:     Koby Elbaz <kelbaz@...ana.ai>
Subject: [PATCH 1/3] habanalabs/gaudi: use COMMS to reset device / halt CPU

From: Koby Elbaz <kelbaz@...ana.ai>

This is needed because legacy FW 'communication' protocol will soon
become obsolete.
Because COMMS is a boot protocol, communicating through it is supported
only until Linux is loaded to the device CPU, where in that case we
will fallback to the former implementation.

Signed-off-by: Koby Elbaz <kelbaz@...ana.ai>
Reviewed-by: Oded Gabbay <ogabbay@...nel.org>
Signed-off-by: Oded Gabbay <ogabbay@...nel.org>
---
 drivers/misc/habanalabs/common/firmware_if.c |  2 +-
 drivers/misc/habanalabs/common/habanalabs.h  |  5 ++-
 drivers/misc/habanalabs/gaudi/gaudi.c        | 36 ++++++++++++++++++--
 3 files changed, 39 insertions(+), 4 deletions(-)

diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c
index 399d64e4f4c2..c19acefdb7e4 100644
--- a/drivers/misc/habanalabs/common/firmware_if.c
+++ b/drivers/misc/habanalabs/common/firmware_if.c
@@ -1390,7 +1390,7 @@ static int hl_fw_dynamic_send_clear_cmd(struct hl_device *hdev,
  *             leftovers between command
  * NOOP command: necessary to avoid loop on the clear command by the FW
  */
-static int hl_fw_dynamic_send_protocol_cmd(struct hl_device *hdev,
+int hl_fw_dynamic_send_protocol_cmd(struct hl_device *hdev,
 				struct fw_load_mgr *fw_loader,
 				enum comms_cmd cmd, unsigned int size,
 				bool wait_ok, u32 timeout)
diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
index 433262bfb7e6..f1ff4d503cf2 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -2574,7 +2574,10 @@ int hl_fw_read_preboot_status(struct hl_device *hdev, u32 cpu_boot_status_reg,
 				u32 sts_boot_dev_sts0_reg,
 				u32 sts_boot_dev_sts1_reg, u32 boot_err0_reg,
 				u32 boot_err1_reg, u32 timeout);
-
+int hl_fw_dynamic_send_protocol_cmd(struct hl_device *hdev,
+				struct fw_load_mgr *fw_loader,
+				enum comms_cmd cmd, unsigned int size,
+				bool wait_ok, u32 timeout);
 int hl_pci_bars_map(struct hl_device *hdev, const char * const name[3],
 			bool is_wc[3]);
 int hl_pci_elbi_read(struct hl_device *hdev, u64 addr, u32 *data);
diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c
index 9be3809d4d0d..e155fae5edcb 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -1931,6 +1931,38 @@ static void gaudi_disable_msi(struct hl_device *hdev)
 	gaudi->hw_cap_initialized &= ~HW_CAP_MSI;
 }
 
+static void gaudi_fw_hard_reset(struct hl_device *hdev)
+{
+	int rc;
+
+	if (hdev->asic_prop.dynamic_fw_load && !hdev->fw_loader.linux_loaded) {
+		rc = hl_fw_dynamic_send_protocol_cmd(hdev, &hdev->fw_loader,
+				COMMS_RST_DEV, 0, false,
+				hdev->fw_loader.cpu_timeout);
+		if (rc)
+			dev_warn(hdev->dev, "Failed sending COMMS_RST_DEV\n");
+	} else {
+		WREG32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU, KMD_MSG_RST_DEV);
+	}
+}
+
+static void gaudi_fw_halt_cpu(struct hl_device *hdev)
+{
+	int rc;
+
+	/* Stop device CPU to make sure nothing bad happens */
+	if (hdev->asic_prop.dynamic_fw_load && !hdev->fw_loader.linux_loaded) {
+		rc = hl_fw_dynamic_send_protocol_cmd(hdev, &hdev->fw_loader,
+				COMMS_GOTO_WFE, 0, true,
+				hdev->fw_loader.cpu_timeout);
+		if (rc)
+			dev_warn(hdev->dev, "Failed sending COMMS_GOTO_WFE\n");
+	} else {
+		WREG32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU, KMD_MSG_GOTO_WFE);
+		msleep(GAUDI_CPU_RESET_WAIT_MSEC);
+	}
+}
+
 static void gaudi_init_scrambler_sram(struct hl_device *hdev)
 {
 	struct gaudi_device *gaudi = hdev->asic_specific;
@@ -4106,9 +4138,9 @@ static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset)
 	 * stopped in any means necessary
 	 */
 	if (hdev->asic_prop.hard_reset_done_by_fw)
-		WREG32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU, KMD_MSG_RST_DEV);
+		gaudi_fw_hard_reset(hdev);
 	else
-		WREG32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU, KMD_MSG_GOTO_WFE);
+		gaudi_fw_halt_cpu(hdev);
 
 	if (hdev->fw_loader.linux_loaded) {
 		irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
-- 
2.25.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ