lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-Id: <20211226124304.237107-5-ogabbay@kernel.org>
Date:   Sun, 26 Dec 2021 14:43:04 +0200
From:   Oded Gabbay <ogabbay@...nel.org>
To:     linux-kernel@...r.kernel.org
Cc:     Ofir Bitton <obitton@...ana.ai>
Subject: [PATCH 5/5] habanalabs: support hard-reset scheduling during soft-reset

From: Ofir Bitton <obitton@...ana.ai>

As hard-reset can be requested during soft-reset, driver must allow
it or else critical events received during soft-reset will be
ignored.

Signed-off-by: Ofir Bitton <obitton@...ana.ai>
Reviewed-by: Oded Gabbay <ogabbay@...nel.org>
Signed-off-by: Oded Gabbay <ogabbay@...nel.org>
---
 drivers/misc/habanalabs/common/device.c     | 31 +++++++++++++++++++--
 drivers/misc/habanalabs/common/habanalabs.h |  3 ++
 2 files changed, 31 insertions(+), 3 deletions(-)

diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c
index 84621ad765bc..733338ab6f1d 100644
--- a/drivers/misc/habanalabs/common/device.c
+++ b/drivers/misc/habanalabs/common/device.c
@@ -978,7 +978,7 @@ static void handle_reset_trigger(struct hl_device *hdev, u32 flags)
 int hl_device_reset(struct hl_device *hdev, u32 flags)
 {
 	bool hard_reset, from_hard_reset_thread, fw_reset, hard_instead_soft = false,
-								reset_upon_device_release = false;
+			reset_upon_device_release = false, schedule_hard_reset = false;
 	u64 idle_mask[HL_BUSY_ENGINES_MASK_EXT_SIZE] = {0};
 	struct hl_ctx *ctx;
 	int i, rc;
@@ -1031,6 +1031,9 @@ int hl_device_reset(struct hl_device *hdev, u32 flags)
 		/* Block future CS/VM/JOB completion operations */
 		spin_lock(&hdev->reset_info.lock);
 		if (hdev->reset_info.in_reset) {
+			/* We only allow scheduling of a hard reset during soft reset */
+			if (hard_reset && hdev->reset_info.is_in_soft_reset)
+				hdev->reset_info.hard_reset_schedule_flags = flags;
 			spin_unlock(&hdev->reset_info.lock);
 			return 0;
 		}
@@ -1193,7 +1196,6 @@ int hl_device_reset(struct hl_device *hdev, u32 flags)
 	 * is required for the initialization itself
 	 */
 	hdev->disabled = false;
-	hdev->reset_info.is_in_soft_reset = false;
 
 	rc = hdev->asic_funcs->hw_init(hdev);
 	if (rc) {
@@ -1243,7 +1245,20 @@ int hl_device_reset(struct hl_device *hdev, u32 flags)
 		}
 	}
 
-	hdev->reset_info.in_reset = 0;
+	spin_lock(&hdev->reset_info.lock);
+	hdev->reset_info.is_in_soft_reset = false;
+
+	/* Schedule hard reset only if requested and if not already in hard reset.
+	 * We keep 'in_reset' enabled, so no other reset can go in during the hard
+	 * reset schedule
+	 */
+	if (!hard_reset && hdev->reset_info.hard_reset_schedule_flags)
+		schedule_hard_reset = true;
+	else
+		hdev->reset_info.in_reset = 0;
+
+	spin_unlock(&hdev->reset_info.lock);
+
 	hdev->reset_info.needs_reset = false;
 
 	dev_notice(hdev->dev, "Successfully finished resetting the device\n");
@@ -1261,6 +1276,16 @@ int hl_device_reset(struct hl_device *hdev, u32 flags)
 		hdev->reset_info.soft_reset_cnt++;
 	}
 
+	if (schedule_hard_reset) {
+		dev_info(hdev->dev, "Performing hard reset scheduled during soft reset\n");
+		flags = hdev->reset_info.hard_reset_schedule_flags;
+		hdev->reset_info.hard_reset_schedule_flags = 0;
+		hdev->disabled = true;
+		hard_reset = true;
+		handle_reset_trigger(hdev, flags);
+		goto again;
+	}
+
 	return 0;
 
 out_err:
diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
index 37a3a469b42f..cb710fd478b6 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -2460,6 +2460,8 @@ struct last_error_session_info {
  * @lock: lock to protect critical reset flows.
  * @soft_reset_cnt: number of soft reset since the driver was loaded.
  * @hard_reset_cnt: number of hard reset since the driver was loaded.
+ * @hard_reset_schedule_flags: hard reset is scheduled to after current soft reset,
+ *                             here we hold the hard reset flags.
  * @in_reset: is device in reset flow.
  * @is_in_soft_reset: Device is currently in soft reset process.
  * @needs_reset: true if reset_on_lockup is false and device should be reset
@@ -2478,6 +2480,7 @@ struct hl_reset_info {
 	spinlock_t	lock;
 	u32		soft_reset_cnt;
 	u32		hard_reset_cnt;
+	u32		hard_reset_schedule_flags;
 	u8		in_reset;
 	u8		is_in_soft_reset;
 	u8		needs_reset;
-- 
2.25.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ