[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20190728112818.30397-8-oded.gabbay@gmail.com>
Date: Sun, 28 Jul 2019 14:28:16 +0300
From: Oded Gabbay <oded.gabbay@...il.com>
To: linux-kernel@...r.kernel.org, oshpigelman@...ana.ai,
ttayar@...ana.ai, gregkh@...uxfoundation.org
Subject: [PATCH 7/9] habanalabs: protect only pointer dereference in hard-reset
This patch changes the location of taking a mutex lock and releasing it
during the hard-reset process of the ASIC.
The only place we need to protect is when we dereference pointers that may
go away in case the user process aborts/closes the FD.
That way, we allow the user process to actually close its FD in case we
tell him that an error occurred.
Signed-off-by: Oded Gabbay <oded.gabbay@...il.com>
---
drivers/misc/habanalabs/device.c | 26 ++++++++++++++------------
1 file changed, 14 insertions(+), 12 deletions(-)
diff --git a/drivers/misc/habanalabs/device.c b/drivers/misc/habanalabs/device.c
index 5400e65ba5fa..471506b54217 100644
--- a/drivers/misc/habanalabs/device.c
+++ b/drivers/misc/habanalabs/device.c
@@ -574,20 +574,21 @@ static void device_kill_open_processes(struct hl_device *hdev)
else
pending_total = HL_PENDING_RESET_PER_SEC;
- pending_cnt = pending_total;
-
/* Flush all processes that are inside hl_open */
mutex_lock(&hdev->fpriv_list_lock);
+ mutex_unlock(&hdev->fpriv_list_lock);
- while ((!list_empty(&hdev->fpriv_list)) && (pending_cnt)) {
-
- pending_cnt--;
-
- dev_info(hdev->dev,
- "Can't HARD reset, waiting for user to close FD\n");
+ /* Giving time for user to close FD, and for processes that are inside
+ * hl_device_open to finish
+ */
+ if (!list_empty(&hdev->fpriv_list))
ssleep(1);
- }
+ mutex_lock(&hdev->fpriv_list_lock);
+
+ /* This section must be protected because we are dereferencing
+ * pointers that are freed if the process exits
+ */
if (!list_empty(&hdev->fpriv_list)) {
task = get_pid_task(hdev->compute_ctx->hpriv->taskpid,
PIDTYPE_PID);
@@ -600,6 +601,8 @@ static void device_kill_open_processes(struct hl_device *hdev)
}
}
+ mutex_unlock(&hdev->fpriv_list_lock);
+
/* We killed the open users, but because the driver cleans up after the
* user contexts are closed (e.g. mmu mappings), we need to wait again
* to make sure the cleaning phase is finished before continuing with
@@ -609,6 +612,8 @@ static void device_kill_open_processes(struct hl_device *hdev)
pending_cnt = pending_total;
while ((!list_empty(&hdev->fpriv_list)) && (pending_cnt)) {
+ dev_info(hdev->dev,
+ "Waiting for all unmap operations to finish before hard reset\n");
pending_cnt--;
@@ -618,9 +623,6 @@ static void device_kill_open_processes(struct hl_device *hdev)
if (!list_empty(&hdev->fpriv_list))
dev_crit(hdev->dev,
"Going to hard reset with open user contexts\n");
-
- mutex_unlock(&hdev->fpriv_list_lock);
-
}
static void device_hard_reset_pending(struct work_struct *work)
--
2.17.1
Powered by blists - more mailing lists