lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Thu, 19 Jan 2023 12:33:35 +0200
From:   Oded Gabbay <ogabbay@...nel.org>
To:     linux-kernel@...r.kernel.org
Cc:     Ofir Bitton <obitton@...ana.ai>
Subject: [PATCH 06/10] habanalabs: optimize command submission completion timestamp

From: Ofir Bitton <obitton@...ana.ai>

Completion timestamp is taken during the actual command submission
release. As the release happens in a work queue, the timestamp taken
is not accurate. Hence, we will take the timestamp in the interrupt
handler itself while propagating it to the release function.

Signed-off-by: Ofir Bitton <obitton@...ana.ai>
Reviewed-by: Oded Gabbay <ogabbay@...nel.org>
Signed-off-by: Oded Gabbay <ogabbay@...nel.org>
---
 .../accel/habanalabs/common/command_submission.c    | 12 ++++++++++--
 drivers/accel/habanalabs/common/habanalabs.h        |  4 ++++
 drivers/accel/habanalabs/common/irq.c               | 13 +++++++++----
 3 files changed, 23 insertions(+), 6 deletions(-)

diff --git a/drivers/accel/habanalabs/common/command_submission.c b/drivers/accel/habanalabs/common/command_submission.c
index 00fedf2d8654..8270db0a72a2 100644
--- a/drivers/accel/habanalabs/common/command_submission.c
+++ b/drivers/accel/habanalabs/common/command_submission.c
@@ -398,8 +398,16 @@ static void hl_complete_job(struct hl_device *hdev, struct hl_cs_job *job)
 	 * flow by calling 'hl_hw_queue_update_ci'.
 	 */
 	if (cs_needs_completion(cs) &&
-		(job->queue_type == QUEUE_TYPE_EXT || job->queue_type == QUEUE_TYPE_HW))
+			(job->queue_type == QUEUE_TYPE_EXT || job->queue_type == QUEUE_TYPE_HW)) {
+
+		/* In CS based completions, the timestamp is already available,
+		 * so no need to extract it from job
+		 */
+		if (hdev->asic_prop.completion_mode == HL_COMPLETION_MODE_JOB)
+			cs->completion_timestamp = job->timestamp;
+
 		cs_put(cs);
+	}
 
 	hl_cs_job_put(job);
 }
@@ -776,7 +784,7 @@ static void cs_do_release(struct kref *ref)
 	}
 
 	if (cs->timestamp) {
-		cs->fence->timestamp = ktime_get();
+		cs->fence->timestamp = cs->completion_timestamp;
 		hl_push_cs_outcome(hdev, &cs->ctx->outcome_store, cs->sequence,
 				   cs->fence->timestamp, cs->fence->error);
 	}
diff --git a/drivers/accel/habanalabs/common/habanalabs.h b/drivers/accel/habanalabs/common/habanalabs.h
index a0dfbf4f6cbb..afc0c0d3f9e3 100644
--- a/drivers/accel/habanalabs/common/habanalabs.h
+++ b/drivers/accel/habanalabs/common/habanalabs.h
@@ -1940,6 +1940,7 @@ struct hl_userptr {
  * @type: CS_TYPE_*.
  * @jobs_cnt: counter of submitted jobs on all queues.
  * @encaps_sig_hdl_id: encaps signals handle id, set for the first staged cs.
+ * @completion_timestamp: timestamp of the last completed cs job.
  * @sob_addr_offset: sob offset from the configuration base address.
  * @initial_sob_count: count of completed signals in SOB before current submission of signal or
  *                     cs with encaps signals.
@@ -1972,6 +1973,7 @@ struct hl_cs {
 	struct list_head	staged_cs_node;
 	struct list_head	debugfs_list;
 	struct hl_cs_encaps_sig_handle *encaps_sig_hdl;
+	ktime_t			completion_timestamp;
 	u64			sequence;
 	u64			staged_sequence;
 	u64			timeout_jiffies;
@@ -2007,6 +2009,7 @@ struct hl_cs {
  * @debugfs_list: node in debugfs list of command submission jobs.
  * @refcount: reference counter for usage of the CS job.
  * @queue_type: the type of the H/W queue this job is submitted to.
+ * @timestamp: timestamp upon job completion
  * @id: the id of this job inside a CS.
  * @hw_queue_id: the id of the H/W queue this job is submitted to.
  * @user_cb_size: the actual size of the CB we got from the user.
@@ -2033,6 +2036,7 @@ struct hl_cs_job {
 	struct list_head	debugfs_list;
 	struct kref		refcount;
 	enum hl_queue_type	queue_type;
+	ktime_t			timestamp;
 	u32			id;
 	u32			hw_queue_id;
 	u32			user_cb_size;
diff --git a/drivers/accel/habanalabs/common/irq.c b/drivers/accel/habanalabs/common/irq.c
index a986d7dea453..04844e843a7b 100644
--- a/drivers/accel/habanalabs/common/irq.c
+++ b/drivers/accel/habanalabs/common/irq.c
@@ -72,15 +72,17 @@ static void irq_handle_eqe(struct work_struct *work)
  * @hdev: pointer to device structure
  * @cs_seq: command submission sequence
  * @cq: completion queue
+ * @timestamp: interrupt timestamp
  *
  */
-static void job_finish(struct hl_device *hdev, u32 cs_seq, struct hl_cq *cq)
+static void job_finish(struct hl_device *hdev, u32 cs_seq, struct hl_cq *cq, ktime_t timestamp)
 {
 	struct hl_hw_queue *queue;
 	struct hl_cs_job *job;
 
 	queue = &hdev->kernel_queues[cq->hw_queue_id];
 	job = queue->shadow_queue[hl_pi_2_offset(cs_seq)];
+	job->timestamp = timestamp;
 	queue_work(hdev->cq_wq[cq->cq_idx], &job->finish_work);
 
 	atomic_inc(&queue->ci);
@@ -91,9 +93,10 @@ static void job_finish(struct hl_device *hdev, u32 cs_seq, struct hl_cq *cq)
  *
  * @hdev: pointer to device structure
  * @cs_seq: command submission sequence
+ * @timestamp: interrupt timestamp
  *
  */
-static void cs_finish(struct hl_device *hdev, u16 cs_seq)
+static void cs_finish(struct hl_device *hdev, u16 cs_seq, ktime_t timestamp)
 {
 	struct asic_fixed_properties *prop = &hdev->asic_prop;
 	struct hl_hw_queue *queue;
@@ -113,6 +116,7 @@ static void cs_finish(struct hl_device *hdev, u16 cs_seq)
 		atomic_inc(&queue->ci);
 	}
 
+	cs->completion_timestamp = timestamp;
 	queue_work(hdev->cs_cmplt_wq, &cs->finish_work);
 }
 
@@ -130,6 +134,7 @@ irqreturn_t hl_irq_handler_cq(int irq, void *arg)
 	bool shadow_index_valid, entry_ready;
 	u16 shadow_index;
 	struct hl_cq_entry *cq_entry, *cq_base;
+	ktime_t timestamp = ktime_get();
 
 	if (hdev->disabled) {
 		dev_dbg(hdev->dev,
@@ -171,9 +176,9 @@ irqreturn_t hl_irq_handler_cq(int irq, void *arg)
 		if (shadow_index_valid && !hdev->disabled) {
 			if (hdev->asic_prop.completion_mode ==
 					HL_COMPLETION_MODE_CS)
-				cs_finish(hdev, shadow_index);
+				cs_finish(hdev, shadow_index, timestamp);
 			else
-				job_finish(hdev, shadow_index, cq);
+				job_finish(hdev, shadow_index, cq, timestamp);
 		}
 
 		/* Clear CQ entry ready bit */
-- 
2.25.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ