lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-Id: <20220829092254.930753-7-ogabbay@kernel.org>
Date:   Mon, 29 Aug 2022 12:22:54 +0300
From:   Oded Gabbay <ogabbay@...nel.org>
To:     linux-kernel@...r.kernel.org
Cc:     farah kassabri <fkassabri@...ana.ai>
Subject: [PATCH 7/7] habanalabs: send device active message to f/w

From: farah kassabri <fkassabri@...ana.ai>

As part of the RAS that is done by the f/w, we should send a message
to the f/w when a user either acquires or releases the device.

Signed-off-by: farah kassabri <fkassabri@...ana.ai>
Reviewed-by: Oded Gabbay <ogabbay@...nel.org>
Signed-off-by: Oded Gabbay <ogabbay@...nel.org>
---
 drivers/misc/habanalabs/common/device.c           |  2 ++
 drivers/misc/habanalabs/common/firmware_if.c      | 15 +++++++++++++++
 drivers/misc/habanalabs/common/habanalabs.h       |  3 +++
 drivers/misc/habanalabs/common/habanalabs_drv.c   |  2 ++
 drivers/misc/habanalabs/gaudi/gaudi.c             |  6 ++++++
 drivers/misc/habanalabs/gaudi2/gaudi2.c           | 12 ++++++++++++
 drivers/misc/habanalabs/gaudi2/gaudi2P.h          |  1 +
 drivers/misc/habanalabs/goya/goya.c               |  6 ++++++
 drivers/misc/habanalabs/include/common/cpucp_if.h | 11 +++++++++++
 9 files changed, 58 insertions(+)

diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c
index ea6238e614f1..986045de552e 100644
--- a/drivers/misc/habanalabs/common/device.c
+++ b/drivers/misc/habanalabs/common/device.c
@@ -492,6 +492,8 @@ static int hl_device_release(struct inode *inode, struct file *filp)
 	hdev->last_open_session_duration_jif =
 		jiffies - hdev->last_successful_open_jif;
 
+	hdev->asic_funcs->send_device_activity(hdev, false);
+
 	return 0;
 }
 
diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c
index 4ede4bb03e8e..cd2eb7e73be5 100644
--- a/drivers/misc/habanalabs/common/firmware_if.c
+++ b/drivers/misc/habanalabs/common/firmware_if.c
@@ -454,6 +454,21 @@ void hl_fw_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size,
 			size);
 }
 
+int hl_fw_send_device_activity(struct hl_device *hdev, bool open)
+{
+	struct cpucp_packet pkt;
+	int rc;
+
+	memset(&pkt, 0, sizeof(pkt));
+	pkt.ctl = cpu_to_le32(CPUCP_PACKET_ACTIVE_STATUS_SET <<	CPUCP_PKT_CTL_OPCODE_SHIFT);
+	pkt.value = cpu_to_le64(open);
+	rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), 0, NULL);
+	if (rc)
+		dev_err(hdev->dev, "failed to send device activity msg(%u)\n", open);
+
+	return rc;
+}
+
 int hl_fw_send_heartbeat(struct hl_device *hdev)
 {
 	struct cpucp_packet hb_pkt;
diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
index cf9cbbd09063..01426e1306ee 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -1528,6 +1528,7 @@ struct engines_data {
  * @access_dev_mem: access device memory
  * @set_dram_bar_base: set the base of the DRAM BAR
  * @set_engine_cores: set a config command to enigne cores
+ * @send_device_activity: indication to FW about device availability
  */
 struct hl_asic_funcs {
 	int (*early_init)(struct hl_device *hdev);
@@ -1664,6 +1665,7 @@ struct hl_asic_funcs {
 	u64 (*set_dram_bar_base)(struct hl_device *hdev, u64 addr);
 	int (*set_engine_cores)(struct hl_device *hdev, u32 *core_ids,
 					u32 num_cores, u32 core_command);
+	int (*send_device_activity)(struct hl_device *hdev, bool open);
 };
 
 
@@ -3717,6 +3719,7 @@ int hl_fw_dram_replaced_row_get(struct hl_device *hdev,
 				struct cpucp_hbm_row_info *info);
 int hl_fw_dram_pending_row_get(struct hl_device *hdev, u32 *pend_rows_num);
 int hl_fw_cpucp_engine_core_asid_set(struct hl_device *hdev, u32 asid);
+int hl_fw_send_device_activity(struct hl_device *hdev, bool open);
 int hl_pci_bars_map(struct hl_device *hdev, const char * const name[3],
 			bool is_wc[3]);
 int hl_pci_elbi_read(struct hl_device *hdev, u64 addr, u32 *data);
diff --git a/drivers/misc/habanalabs/common/habanalabs_drv.c b/drivers/misc/habanalabs/common/habanalabs_drv.c
index 7bf0dc245d39..f9db7b91bfe3 100644
--- a/drivers/misc/habanalabs/common/habanalabs_drv.c
+++ b/drivers/misc/habanalabs/common/habanalabs_drv.c
@@ -204,6 +204,8 @@ int hl_device_open(struct inode *inode, struct file *filp)
 		goto out_err;
 	}
 
+	rc = hdev->asic_funcs->send_device_activity(hdev, true);
+
 	list_add(&hpriv->dev_node, &hdev->fpriv_list);
 	mutex_unlock(&hdev->fpriv_list_lock);
 
diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c
index 96020693ac29..87dbdbb220da 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -9132,6 +9132,11 @@ static void gaudi_add_device_attr(struct hl_device *hdev, struct attribute_group
 	dev_vrm_attr_grp->attrs = gaudi_vrm_dev_attrs;
 }
 
+static int gaudi_send_device_activity(struct hl_device *hdev, bool open)
+{
+	return 0;
+}
+
 static const struct hl_asic_funcs gaudi_funcs = {
 	.early_init = gaudi_early_init,
 	.early_fini = gaudi_early_fini,
@@ -9224,6 +9229,7 @@ static const struct hl_asic_funcs gaudi_funcs = {
 	.mmu_get_real_page_size = hl_mmu_get_real_page_size,
 	.access_dev_mem = hl_access_dev_mem,
 	.set_dram_bar_base = gaudi_set_hbm_bar_base,
+	.send_device_activity = gaudi_send_device_activity,
 };
 
 /**
diff --git a/drivers/misc/habanalabs/gaudi2/gaudi2.c b/drivers/misc/habanalabs/gaudi2/gaudi2.c
index 4696da7a57c1..330869cb4c0b 100644
--- a/drivers/misc/habanalabs/gaudi2/gaudi2.c
+++ b/drivers/misc/habanalabs/gaudi2/gaudi2.c
@@ -10031,6 +10031,17 @@ static int gaudi2_get_monitor_dump(struct hl_device *hdev, void *data)
 	return -EOPNOTSUPP;
 }
 
+int gaudi2_send_device_activity(struct hl_device *hdev, bool open)
+{
+	struct gaudi2_device *gaudi2 = hdev->asic_specific;
+
+	if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q) || hdev->fw_major_version < 37)
+		return 0;
+
+	/* TODO: add check for FW version using minor ver once it's known */
+	return hl_fw_send_device_activity(hdev, open);
+}
+
 static const struct hl_asic_funcs gaudi2_funcs = {
 	.early_init = gaudi2_early_init,
 	.early_fini = gaudi2_early_fini,
@@ -10127,6 +10138,7 @@ static const struct hl_asic_funcs gaudi2_funcs = {
 	.access_dev_mem = hl_access_dev_mem,
 	.set_dram_bar_base = gaudi2_set_hbm_bar_base,
 	.set_engine_cores = gaudi2_set_engine_cores,
+	.send_device_activity = gaudi2_send_device_activity,
 };
 
 void gaudi2_set_asic_funcs(struct hl_device *hdev)
diff --git a/drivers/misc/habanalabs/gaudi2/gaudi2P.h b/drivers/misc/habanalabs/gaudi2/gaudi2P.h
index 9094a702678d..a99c348bbf39 100644
--- a/drivers/misc/habanalabs/gaudi2/gaudi2P.h
+++ b/drivers/misc/habanalabs/gaudi2/gaudi2P.h
@@ -553,5 +553,6 @@ void gaudi2_pb_print_security_errors(struct hl_device *hdev, u32 block_addr, u32
 					u32 offended_addr);
 int gaudi2_init_security(struct hl_device *hdev);
 void gaudi2_ack_protection_bits_errors(struct hl_device *hdev);
+int gaudi2_send_device_activity(struct hl_device *hdev, bool open);
 
 #endif /* GAUDI2P_H_ */
diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c
index d8fb91d257b9..5ef9e3ca97a6 100644
--- a/drivers/misc/habanalabs/goya/goya.c
+++ b/drivers/misc/habanalabs/goya/goya.c
@@ -5420,6 +5420,11 @@ static int goya_scrub_device_dram(struct hl_device *hdev, u64 val)
 	return -EOPNOTSUPP;
 }
 
+static int goya_send_device_activity(struct hl_device *hdev, bool open)
+{
+	return 0;
+}
+
 static const struct hl_asic_funcs goya_funcs = {
 	.early_init = goya_early_init,
 	.early_fini = goya_early_fini,
@@ -5512,6 +5517,7 @@ static const struct hl_asic_funcs goya_funcs = {
 	.mmu_get_real_page_size = hl_mmu_get_real_page_size,
 	.access_dev_mem = hl_access_dev_mem,
 	.set_dram_bar_base = goya_set_ddr_bar_base,
+	.send_device_activity = goya_send_device_activity,
 };
 
 /*
diff --git a/drivers/misc/habanalabs/include/common/cpucp_if.h b/drivers/misc/habanalabs/include/common/cpucp_if.h
index abf40e1c4965..b837bb1f4cd3 100644
--- a/drivers/misc/habanalabs/include/common/cpucp_if.h
+++ b/drivers/misc/habanalabs/include/common/cpucp_if.h
@@ -636,6 +636,10 @@ enum pq_init_status {
  *       passes the max size it allows the CpuCP to write to the structure, to prevent
  *       data corruption in case of mismatched driver/FW versions.
  *       Relevant only to Gaudi.
+ *
+ * CPUCP_PACKET_ACTIVE_STATUS_SET -
+ *       LKD sends FW indication whether device is free or in use, this indication is reported
+ *       also to the BMC.
  */
 
 enum cpucp_packet_id {
@@ -691,6 +695,13 @@ enum cpucp_packet_id {
 	CPUCP_PACKET_RESERVED4,			/* not used */
 	CPUCP_PACKET_RESERVED5,			/* not used */
 	CPUCP_PACKET_MONITOR_DUMP_GET,		/* debugfs */
+	CPUCP_PACKET_RESERVED6,			/* not used */
+	CPUCP_PACKET_RESERVED7,			/* not used */
+	CPUCP_PACKET_RESERVED8,			/* not used */
+	CPUCP_PACKET_RESERVED9,			/* not used */
+	CPUCP_PACKET_RESERVED10,		/* not used */
+	CPUCP_PACKET_ACTIVE_STATUS_SET,		/* internal */
+	CPUCP_PACKET_ID_MAX			/* must be last */
 };
 
 #define CPUCP_PACKET_FENCE_VAL	0xFE8CE7A5
-- 
2.25.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ