[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-Id: <20211205154236.2198481-9-ogabbay@kernel.org>
Date: Sun, 5 Dec 2021 17:42:36 +0200
From: Oded Gabbay <ogabbay@...nel.org>
To: linux-kernel@...r.kernel.org
Cc: Ohad Sharabi <osharabi@...ana.ai>,
Dani Liberman <dliberman@...ana.ai>
Subject: [PATCH 9/9] habanalabs: wait again for multi-CS if no CS completed
From: Ohad Sharabi <osharabi@...ana.ai>
The original multi-CS design assumption that stream masters are used
exclusively (i.e. multi-CS with set of stream master QIDs will not get
completed by CS not from the multi-CS set) is inaccurate.
Thus multi-CS behavior is now modified not to treat such case as an
error.
Instead, if we have multi-CS completion but we detect that no CS from
the list is actually completed we will do another multi-CS wait (with
modified timeout).
Signed-off-by: Ohad Sharabi <osharabi@...ana.ai>
Reviewed-by: Dani Liberman <dliberman@...ana.ai>
Reviewed-by: Oded Gabbay <ogabbay@...nel.org>
Signed-off-by: Oded Gabbay <ogabbay@...nel.org>
---
.../habanalabs/common/command_submission.c | 97 +++++++++----------
drivers/misc/habanalabs/common/habanalabs.h | 4 +-
2 files changed, 50 insertions(+), 51 deletions(-)
diff --git a/drivers/misc/habanalabs/common/command_submission.c b/drivers/misc/habanalabs/common/command_submission.c
index a63ebbc04787..f58fff3671d6 100644
--- a/drivers/misc/habanalabs/common/command_submission.c
+++ b/drivers/misc/habanalabs/common/command_submission.c
@@ -545,13 +545,6 @@ static void complete_multi_cs(struct hl_device *hdev, struct hl_cs *cs)
* mcs fences.
*/
fence->mcs_handling_done = true;
- /*
- * Since CS (and its related fence) can be associated with only one
- * multi CS context, once it triggered multi CS completion no need to
- * continue checking other multi CS contexts.
- */
- spin_unlock(&mcs_compl->lock);
- break;
}
spin_unlock(&mcs_compl->lock);
@@ -2498,6 +2491,21 @@ static int _hl_cs_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
return rc;
}
+static inline unsigned long hl_usecs64_to_jiffies(const u64 usecs)
+{
+ if (usecs <= U32_MAX)
+ return usecs_to_jiffies(usecs);
+
+ /*
+ * If the value in nanoseconds is larger than 64 bit, use the largest
+ * 64 bit value.
+ */
+ if (usecs >= ((u64)(U64_MAX / NSEC_PER_USEC)))
+ return nsecs_to_jiffies(U64_MAX);
+
+ return nsecs_to_jiffies(usecs * NSEC_PER_USEC);
+}
+
/*
* hl_wait_multi_cs_completion_init - init completion structure
*
@@ -2534,8 +2542,7 @@ static struct multi_cs_completion *hl_wait_multi_cs_completion_init(
}
if (i == MULTI_CS_MAX_USER_CTX) {
- dev_err(hdev->dev,
- "no available multi-CS completion structure\n");
+ dev_err(hdev->dev, "no available multi-CS completion structure\n");
return ERR_PTR(-ENOMEM);
}
return mcs_compl;
@@ -2566,27 +2573,18 @@ static void hl_wait_multi_cs_completion_fini(
*
* @return 0 on success, otherwise non 0 error code
*/
-static int hl_wait_multi_cs_completion(struct multi_cs_data *mcs_data)
+static int hl_wait_multi_cs_completion(struct multi_cs_data *mcs_data,
+ struct multi_cs_completion *mcs_compl)
{
- struct hl_device *hdev = mcs_data->ctx->hdev;
- struct multi_cs_completion *mcs_compl;
long completion_rc;
- mcs_compl = hl_wait_multi_cs_completion_init(hdev,
- mcs_data->stream_master_qid_map);
- if (IS_ERR(mcs_compl))
- return PTR_ERR(mcs_compl);
-
- completion_rc = wait_for_completion_interruptible_timeout(
- &mcs_compl->completion,
- usecs_to_jiffies(mcs_data->timeout_us));
+ completion_rc = wait_for_completion_interruptible_timeout(&mcs_compl->completion,
+ mcs_data->timeout_jiffies);
/* update timestamp */
if (completion_rc > 0)
mcs_data->timestamp = mcs_compl->timestamp;
- hl_wait_multi_cs_completion_fini(mcs_compl);
-
mcs_data->wait_status = completion_rc;
return 0;
@@ -2619,6 +2617,7 @@ void hl_multi_cs_completion_init(struct hl_device *hdev)
*/
static int hl_multi_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data)
{
+ struct multi_cs_completion *mcs_compl;
struct hl_device *hdev = hpriv->hdev;
struct multi_cs_data mcs_data = {0};
union hl_wait_cs_args *args = data;
@@ -2686,12 +2685,19 @@ static int hl_multi_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data)
goto put_ctx;
/* wait (with timeout) for the first CS to be completed */
- mcs_data.timeout_us = args->in.timeout_us;
- rc = hl_wait_multi_cs_completion(&mcs_data);
- if (rc)
+ mcs_data.timeout_jiffies = hl_usecs64_to_jiffies(args->in.timeout_us);
+
+ mcs_compl = hl_wait_multi_cs_completion_init(hdev, mcs_data.stream_master_qid_map);
+ if (IS_ERR(mcs_compl)) {
+ rc = PTR_ERR(mcs_compl);
goto put_ctx;
+ }
+
+ while (true) {
+ rc = hl_wait_multi_cs_completion(&mcs_data, mcs_compl);
+ if (rc || (mcs_data.wait_status == 0))
+ break;
- if (mcs_data.wait_status > 0) {
/*
* poll fences once again to update the CS map.
* no timestamp should be updated this time.
@@ -2699,18 +2705,26 @@ static int hl_multi_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data)
mcs_data.update_ts = false;
rc = hl_cs_poll_fences(&mcs_data);
+ if (mcs_data.completion_bitmap)
+ break;
+
/*
* if hl_wait_multi_cs_completion returned before timeout (i.e.
- * it got a completion) we expect to see at least one CS
- * completed after the poll function.
+ * it got a completion) it either got completed by CS in the multi CS list
+ * (in which case the indication will be non empty completion_bitmap) or it
+ * got completed by CS submitted to one of the shared stream master but
+ * not in the multi CS list (in which case we should wait again but reinit
+ * the completion, modify the timeout and set timestamp as zero to let a CS
+ * related to the current multi-CS set a new, relevant, timestamp)
*/
- if (!mcs_data.completion_bitmap) {
- dev_warn_ratelimited(hdev->dev,
- "Multi-CS got completion on wait but no CS completed\n");
- rc = -EFAULT;
- }
+ /* wait again with modified timeout */
+ mcs_data.timeout_jiffies = mcs_data.wait_status;
+ reinit_completion(&mcs_compl->completion);
+ mcs_compl->timestamp = 0;
}
+ hl_wait_multi_cs_completion_fini(mcs_compl);
+
put_ctx:
hl_ctx_put(ctx);
kfree(fence_arr);
@@ -2741,7 +2755,7 @@ static int hl_multi_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data)
}
/* update if some CS was gone */
- if (mcs_data.timestamp)
+ if (!mcs_data.timestamp)
args->out.flags |= HL_WAIT_CS_STATUS_FLAG_GONE;
} else {
args->out.status = HL_WAIT_CS_STATUS_BUSY;
@@ -2807,21 +2821,6 @@ static int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data)
return 0;
}
-static inline unsigned long hl_usecs64_to_jiffies(const u64 usecs)
-{
- if (usecs <= U32_MAX)
- return usecs_to_jiffies(usecs);
-
- /*
- * If the value in nanoseconds is larger than 64 bit, use the largest
- * 64 bit value.
- */
- if (usecs >= ((u64)(U64_MAX / NSEC_PER_USEC)))
- return nsecs_to_jiffies(U64_MAX);
-
- return nsecs_to_jiffies(usecs * NSEC_PER_USEC);
-}
-
static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
u64 timeout_us, u64 user_address,
u64 target_value, struct hl_user_interrupt *interrupt,
diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
index df1935952c28..eda1c70f6966 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -2362,7 +2362,7 @@ struct multi_cs_completion {
* @ctx: pointer to the context structure
* @fence_arr: array of fences of all CSs
* @seq_arr: array of CS sequence numbers
- * @timeout_us: timeout in usec for waiting for CS to complete
+ * @timeout_jiffies: timeout in jiffies for waiting for CS to complete
* @timestamp: timestamp of first completed CS
* @wait_status: wait for CS status
* @completion_bitmap: bitmap of completed CSs (1- completed, otherwise 0)
@@ -2376,7 +2376,7 @@ struct multi_cs_data {
struct hl_ctx *ctx;
struct hl_fence **fence_arr;
u64 *seq_arr;
- s64 timeout_us;
+ s64 timeout_jiffies;
s64 timestamp;
long wait_status;
u32 completion_bitmap;
--
2.25.1
Powered by blists - more mailing lists