lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <20191003084209.9547-4-ttayar@habana.ai>
Date:   Thu, 3 Oct 2019 08:42:18 +0000
From:   Tomer Tayar <ttayar@...ana.ai>
To:     "oded.gabbay@...il.com" <oded.gabbay@...il.com>
CC:     "linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>
Subject: [PATCH 4/4] habanalabs: Add IRQ handler for CS completions

This patch adds an IRQ handler for CS completions of CS jobs which are
sent on H/W queues.
The patch adds a CS shadow queue, from which the handler retrieves the
CS, and a dedicated workqueue, on which the handler queues a work to
free the CS jobs.

Signed-off-by: Tomer Tayar <ttayar@...ana.ai>
---
 drivers/misc/habanalabs/command_submission.c | 16 +++++++
 drivers/misc/habanalabs/device.c             | 27 +++++++++++-
 drivers/misc/habanalabs/habanalabs.h         | 18 ++++++++
 drivers/misc/habanalabs/hw_queue.c           |  2 +
 drivers/misc/habanalabs/irq.c                | 46 ++++++++++++++++++++
 5 files changed, 107 insertions(+), 2 deletions(-)

diff --git a/drivers/misc/habanalabs/command_submission.c b/drivers/misc/habanalabs/command_submission.c
index 25dc7308da19..b995a02a31dd 100644
--- a/drivers/misc/habanalabs/command_submission.c
+++ b/drivers/misc/habanalabs/command_submission.c
@@ -267,6 +267,8 @@ static void cs_do_release(struct kref *ref)
 
 	hl_ctx_put(cs->ctx);
 
+	hdev->shadow_cs_queue[cs->sequence & (HL_MAX_PENDING_CS - 1)] = NULL;
+
 	if (cs->timedout)
 		dma_fence_set_error(cs->fence, -ETIMEDOUT);
 	else if (cs->aborted)
@@ -391,6 +393,7 @@ void hl_cs_rollback_all(struct hl_device *hdev)
 
 	/* flush all completions */
 	flush_workqueue(hdev->cq_wq);
+	flush_workqueue(hdev->cs_cmplt_wq);
 
 	/* Make sure we don't have leftovers in the H/W queues mirror list */
 	list_for_each_entry_safe(cs, tmp, &hdev->hw_queues_mirror_list,
@@ -415,6 +418,16 @@ static void job_wq_completion(struct work_struct *work)
 	free_job(hdev, job);
 }
 
+static void cs_completion(struct work_struct *work)
+{
+	struct hl_cs *cs = container_of(work, struct hl_cs, finish_work);
+	struct hl_device *hdev = cs->ctx->hdev;
+	struct hl_cs_job *job, *tmp;
+
+	list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node)
+		free_job(hdev, job);
+}
+
 static int validate_queue_index(struct hl_device *hdev,
 				struct hl_cs_chunk *chunk,
 				enum hl_queue_type *queue_type,
@@ -625,6 +638,9 @@ static int _hl_cs_ioctl(struct hl_fpriv *hpriv, void __user *chunks,
 		goto free_cs_object;
 	}
 
+	if (job->queue_type == QUEUE_TYPE_HW)
+		INIT_WORK(&cs->finish_work, cs_completion);
+
 	rc = hl_hw_queue_schedule_cs(cs);
 	if (rc) {
 		dev_err(hdev->dev,
diff --git a/drivers/misc/habanalabs/device.c b/drivers/misc/habanalabs/device.c
index 2f5a4da707e7..6c13f05c3120 100644
--- a/drivers/misc/habanalabs/device.c
+++ b/drivers/misc/habanalabs/device.c
@@ -284,11 +284,19 @@ static int device_early_init(struct hl_device *hdev)
 		goto free_cq_wq;
 	}
 
+	hdev->cs_cmplt_wq = alloc_workqueue("hl-cs-completions", WQ_UNBOUND, 0);
+	if (!hdev->cs_cmplt_wq) {
+		dev_err(hdev->dev,
+			"Failed to allocate CS completions workqueue\n");
+		rc = -ENOMEM;
+		goto free_eq_wq;
+	}
+
 	hdev->hl_chip_info = kzalloc(sizeof(struct hwmon_chip_info),
 					GFP_KERNEL);
 	if (!hdev->hl_chip_info) {
 		rc = -ENOMEM;
-		goto free_eq_wq;
+		goto free_cs_cmplt_wq;
 	}
 
 	hdev->idle_busy_ts_arr = kmalloc_array(HL_IDLE_BUSY_TS_ARR_SIZE,
@@ -314,6 +322,8 @@ static int device_early_init(struct hl_device *hdev)
 
 free_chip_info:
 	kfree(hdev->hl_chip_info);
+free_cs_cmplt_wq:
+	destroy_workqueue(hdev->cs_cmplt_wq);
 free_eq_wq:
 	destroy_workqueue(hdev->eq_wq);
 free_cq_wq:
@@ -346,6 +356,7 @@ static void device_early_fini(struct hl_device *hdev)
 	kfree(hdev->idle_busy_ts_arr);
 	kfree(hdev->hl_chip_info);
 
+	destroy_workqueue(hdev->cs_cmplt_wq);
 	destroy_workqueue(hdev->eq_wq);
 	destroy_workqueue(hdev->cq_wq);
 
@@ -1138,6 +1149,14 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)
 		}
 	}
 
+	hdev->shadow_cs_queue = kmalloc_array(HL_MAX_PENDING_CS,
+						sizeof(*hdev->shadow_cs_queue),
+						GFP_KERNEL | __GFP_ZERO);
+	if (!hdev->shadow_cs_queue) {
+		rc = -ENOMEM;
+		goto cq_fini;
+	}
+
 	/*
 	 * Initialize the event queue. Must be done before hw_init,
 	 * because there the address of the event queue is being
@@ -1146,7 +1165,7 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)
 	rc = hl_eq_init(hdev, &hdev->event_queue);
 	if (rc) {
 		dev_err(hdev->dev, "failed to initialize event queue\n");
-		goto cq_fini;
+		goto free_shadow_cs_queue;
 	}
 
 	/* MMU S/W must be initialized before kernel context is created */
@@ -1269,6 +1288,8 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)
 	hl_mmu_fini(hdev);
 eq_fini:
 	hl_eq_fini(hdev, &hdev->event_queue);
+free_shadow_cs_queue:
+	kfree(hdev->shadow_cs_queue);
 cq_fini:
 	for (i = 0 ; i < cq_ready_cnt ; i++)
 		hl_cq_fini(hdev, &hdev->completion_queue[i]);
@@ -1383,6 +1404,8 @@ void hl_device_fini(struct hl_device *hdev)
 
 	hl_eq_fini(hdev, &hdev->event_queue);
 
+	kfree(hdev->shadow_cs_queue);
+
 	for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
 		hl_cq_fini(hdev, &hdev->completion_queue[i]);
 	kfree(hdev->completion_queue);
diff --git a/drivers/misc/habanalabs/habanalabs.h b/drivers/misc/habanalabs/habanalabs.h
index c1af83f96415..2efb5e1e62cb 100644
--- a/drivers/misc/habanalabs/habanalabs.h
+++ b/drivers/misc/habanalabs/habanalabs.h
@@ -722,6 +722,7 @@ struct hl_userptr {
  * @job_lock: spinlock for the CS's jobs list. Needed for free_job.
  * @refcount: reference counter for usage of the CS.
  * @fence: pointer to the fence object of this CS.
+ * @finish_work: workqueue object to run when CS is completed by H/W.
  * @work_tdr: delayed work node for TDR.
  * @mirror_node : node in device mirror list of command submissions.
  * @debugfs_list: node in debugfs list of command submissions.
@@ -741,6 +742,7 @@ struct hl_cs {
 	spinlock_t		job_lock;
 	struct kref		refcount;
 	struct dma_fence	*fence;
+	struct work_struct	finish_work;
 	struct delayed_work	work_tdr;
 	struct list_head	mirror_node;
 	struct list_head	debugfs_list;
@@ -1203,8 +1205,12 @@ struct hl_device_idle_busy_ts {
  * @asic_name: ASIC specific nmae.
  * @asic_type: ASIC specific type.
  * @completion_queue: array of hl_cq.
+ * @shadow_cs_queue: pointer to a shadow queue that holds pointers to
+ *                   outstanding command submissions.
  * @cq_wq: work queue of completion queues for executing work in process context
  * @eq_wq: work queue of event queue for executing work in process context.
+ * @cs_cmplt_wq: work queue of CS completions for executing work in process
+ *               context.
  * @kernel_ctx: Kernel driver context structure.
  * @kernel_queues: array of hl_hw_queue.
  * @hw_queues_mirror_list: CS mirror list for TDR.
@@ -1284,8 +1290,10 @@ struct hl_device {
 	char				asic_name[16];
 	enum hl_asic_type		asic_type;
 	struct hl_cq			*completion_queue;
+	struct hl_cs			**shadow_cs_queue;
 	struct workqueue_struct		*cq_wq;
 	struct workqueue_struct		*eq_wq;
+	struct workqueue_struct		*cs_cmplt_wq;
 	struct hl_ctx			*kernel_ctx;
 	struct hl_hw_queue		*kernel_queues;
 	struct list_head		hw_queues_mirror_list;
@@ -1359,6 +1367,15 @@ struct hl_device {
 	u8				pldm;
 };
 
+/**
+ * struct hl_cs_irq_info - IRQ info structure for CS completion interrupt.
+ * @hdev: pointer to habanalabs device structure.
+ * @relative_idx: CS completion relative interrupt index (0-based).
+ */
+struct hl_cs_irq_info {
+	struct hl_device *hdev;
+	int relative_idx;
+};
 
 /*
  * IOCTLs
@@ -1470,6 +1487,7 @@ void hl_cq_reset(struct hl_device *hdev, struct hl_cq *q);
 void hl_eq_reset(struct hl_device *hdev, struct hl_eq *q);
 irqreturn_t hl_irq_handler_cq(int irq, void *arg);
 irqreturn_t hl_irq_handler_eq(int irq, void *arg);
+irqreturn_t hl_irq_handler_cs_cmplt(int irq, void *arg);
 u32 hl_cq_inc_ptr(u32 ptr);
 
 int hl_asid_init(struct hl_device *hdev);
diff --git a/drivers/misc/habanalabs/hw_queue.c b/drivers/misc/habanalabs/hw_queue.c
index a1205ae47250..7b80e571a27c 100644
--- a/drivers/misc/habanalabs/hw_queue.c
+++ b/drivers/misc/habanalabs/hw_queue.c
@@ -469,6 +469,8 @@ int hl_hw_queue_schedule_cs(struct hl_cs *cs)
 		goto unroll_cq_resv;
 	}
 
+	hdev->shadow_cs_queue[cs->sequence & (HL_MAX_PENDING_CS - 1)] = cs;
+
 	spin_lock(&hdev->hw_queues_mirror_lock);
 	list_add_tail(&cs->mirror_node, &hdev->hw_queues_mirror_list);
 
diff --git a/drivers/misc/habanalabs/irq.c b/drivers/misc/habanalabs/irq.c
index fac65fbd70e8..93fa13218dd4 100644
--- a/drivers/misc/habanalabs/irq.c
+++ b/drivers/misc/habanalabs/irq.c
@@ -205,6 +205,52 @@ irqreturn_t hl_irq_handler_eq(int irq, void *arg)
 	return IRQ_HANDLED;
 }
 
+/*
+ * hl_irq_handler_cs_cmplt() - irq handler for CS completions.
+ * @irq: IRQ number
+ * @arg: pointer to hl_device structure.
+ */
+irqreturn_t hl_irq_handler_cs_cmplt(int irq, void *arg)
+{
+	struct hl_cs_irq_info *cs_irq_info = arg;
+	struct hl_device *hdev = cs_irq_info->hdev;
+	struct hl_cs *cs;
+	struct hl_cs_job *job;
+	struct hl_cq *cq;
+	int relative_idx = cs_irq_info->relative_idx;
+
+	if (hdev->disabled) {
+		dev_dbg(hdev->dev,
+			"Device disabled but received IRQ %d for CS completion\n",
+			irq);
+		goto out;
+	}
+
+	cs = hdev->shadow_cs_queue[relative_idx & (HL_MAX_PENDING_CS - 1)];
+	if (!cs) {
+		dev_warn(hdev->dev,
+			"No pointer to CS in shadow array at index %d\n",
+			relative_idx);
+		goto out;
+	}
+
+	queue_work(hdev->cs_cmplt_wq, &cs->finish_work);
+
+	/*
+	 * The same CQs can be accessed from parallel IRQ handlers that handle
+	 * the completion of different CSs. However, locking is not needed
+	 * because the "free_slots_cnt" variable is atomic.
+	 * There is no need to update the CI counters of the queues/CQs, as they
+	 * are not needed/used for the H/W queue type.
+	 */
+	list_for_each_entry(job, &cs->job_list, cs_node) {
+		cq = &hdev->completion_queue[job->hw_queue_id];
+		atomic_inc(&cq->free_slots_cnt);
+	}
+out:
+	return IRQ_HANDLED;
+}
+
 /*
  * hl_cq_init - main initialization function for an cq object
  *
-- 
2.17.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ