lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-Id: <20200716073129.5924-1-oded.gabbay@gmail.com>
Date:   Thu, 16 Jul 2020 10:31:28 +0300
From:   Oded Gabbay <oded.gabbay@...il.com>
To:     linux-kernel@...r.kernel.org, SW_Drivers@...ana.ai
Cc:     Ofir Bitton <obitton@...ana.ai>
Subject: [PATCH 1/2] habanalabs: create internal CB pool

From: Ofir Bitton <obitton@...ana.ai>

Create a device MMU-mapped internal command buffer pool, in order to allow
the driver to allocate CBs for the signal/wait operations
that are fetched by the queues when they are configured with the user's
address space ID.

We must pre-map this internal pool due to performance issues.

This pool is needed for future ASIC support and it is currently unused in
GOYA and GAUDI.

Signed-off-by: Ofir Bitton <obitton@...ana.ai>
Reviewed-by: Oded Gabbay <oded.gabbay@...il.com>
Signed-off-by: Oded Gabbay <oded.gabbay@...il.com>
---
 .../misc/habanalabs/common/command_buffer.c   | 82 ++++++++++++-------
 .../habanalabs/common/command_submission.c    | 13 +--
 drivers/misc/habanalabs/common/context.c      |  8 ++
 drivers/misc/habanalabs/common/habanalabs.h   | 18 +++-
 drivers/misc/habanalabs/gaudi/gaudi.c         | 20 +++--
 drivers/misc/habanalabs/goya/goya.c           | 18 ++--
 6 files changed, 106 insertions(+), 53 deletions(-)

diff --git a/drivers/misc/habanalabs/common/command_buffer.c b/drivers/misc/habanalabs/common/command_buffer.c
index 02d13f71b1df..7c38c4f7f9c0 100644
--- a/drivers/misc/habanalabs/common/command_buffer.c
+++ b/drivers/misc/habanalabs/common/command_buffer.c
@@ -10,12 +10,18 @@
 
 #include <linux/mm.h>
 #include <linux/slab.h>
+#include <linux/genalloc.h>
 
 static void cb_fini(struct hl_device *hdev, struct hl_cb *cb)
 {
-	hdev->asic_funcs->asic_dma_free_coherent(hdev, cb->size,
-			(void *) (uintptr_t) cb->kernel_address,
-			cb->bus_address);
+	if (cb->is_internal)
+		gen_pool_free(hdev->internal_cb_pool,
+				cb->kernel_address, cb->size);
+	else
+		hdev->asic_funcs->asic_dma_free_coherent(hdev, cb->size,
+				(void *) (uintptr_t) cb->kernel_address,
+				cb->bus_address);
+
 	kfree(cb);
 }
 
@@ -44,9 +50,10 @@ static void cb_release(struct kref *ref)
 }
 
 static struct hl_cb *hl_cb_alloc(struct hl_device *hdev, u32 cb_size,
-					int ctx_id)
+					int ctx_id, bool internal_cb)
 {
 	struct hl_cb *cb;
+	u32 cb_offset;
 	void *p;
 
 	/*
@@ -65,13 +72,25 @@ static struct hl_cb *hl_cb_alloc(struct hl_device *hdev, u32 cb_size,
 	if (!cb)
 		return NULL;
 
-	if (ctx_id == HL_KERNEL_ASID_ID)
+	if (internal_cb) {
+		p = (void *) gen_pool_alloc(hdev->internal_cb_pool, cb_size);
+		if (!p) {
+			kfree(cb);
+			return NULL;
+		}
+
+		cb_offset = p - hdev->internal_cb_pool_virt_addr;
+		cb->is_internal = true;
+		cb->bus_address =  hdev->internal_cb_va_base + cb_offset;
+	} else if (ctx_id == HL_KERNEL_ASID_ID) {
 		p = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, cb_size,
 						&cb->bus_address, GFP_ATOMIC);
-	else
+	} else {
 		p = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, cb_size,
 						&cb->bus_address,
 						GFP_USER | __GFP_ZERO);
+	}
+
 	if (!p) {
 		dev_err(hdev->dev,
 			"failed to allocate %d of dma memory for CB\n",
@@ -87,7 +106,7 @@ static struct hl_cb *hl_cb_alloc(struct hl_device *hdev, u32 cb_size,
 }
 
 int hl_cb_create(struct hl_device *hdev, struct hl_cb_mgr *mgr,
-			u32 cb_size, u64 *handle, int ctx_id)
+			u32 cb_size, u64 *handle, int ctx_id, bool internal_cb)
 {
 	struct hl_cb *cb;
 	bool alloc_new_cb = true;
@@ -112,28 +131,30 @@ int hl_cb_create(struct hl_device *hdev, struct hl_cb_mgr *mgr,
 		goto out_err;
 	}
 
-	/* Minimum allocation must be PAGE SIZE */
-	if (cb_size < PAGE_SIZE)
-		cb_size = PAGE_SIZE;
-
-	if (ctx_id == HL_KERNEL_ASID_ID &&
-			cb_size <= hdev->asic_prop.cb_pool_cb_size) {
-
-		spin_lock(&hdev->cb_pool_lock);
-		if (!list_empty(&hdev->cb_pool)) {
-			cb = list_first_entry(&hdev->cb_pool, typeof(*cb),
-					pool_list);
-			list_del(&cb->pool_list);
-			spin_unlock(&hdev->cb_pool_lock);
-			alloc_new_cb = false;
-		} else {
-			spin_unlock(&hdev->cb_pool_lock);
-			dev_dbg(hdev->dev, "CB pool is empty\n");
+	if (!internal_cb) {
+		/* Minimum allocation must be PAGE SIZE */
+		if (cb_size < PAGE_SIZE)
+			cb_size = PAGE_SIZE;
+
+		if (ctx_id == HL_KERNEL_ASID_ID &&
+				cb_size <= hdev->asic_prop.cb_pool_cb_size) {
+
+			spin_lock(&hdev->cb_pool_lock);
+			if (!list_empty(&hdev->cb_pool)) {
+				cb = list_first_entry(&hdev->cb_pool,
+						typeof(*cb), pool_list);
+				list_del(&cb->pool_list);
+				spin_unlock(&hdev->cb_pool_lock);
+				alloc_new_cb = false;
+			} else {
+				spin_unlock(&hdev->cb_pool_lock);
+				dev_dbg(hdev->dev, "CB pool is empty\n");
+			}
 		}
 	}
 
 	if (alloc_new_cb) {
-		cb = hl_cb_alloc(hdev, cb_size, ctx_id);
+		cb = hl_cb_alloc(hdev, cb_size, ctx_id, internal_cb);
 		if (!cb) {
 			rc = -ENOMEM;
 			goto out_err;
@@ -229,8 +250,8 @@ int hl_cb_ioctl(struct hl_fpriv *hpriv, void *data)
 			rc = -EINVAL;
 		} else {
 			rc = hl_cb_create(hdev, &hpriv->cb_mgr,
-						args->in.cb_size, &handle,
-						hpriv->ctx->asid);
+					args->in.cb_size, &handle,
+					hpriv->ctx->asid, false);
 		}
 
 		memset(args, 0, sizeof(*args));
@@ -398,14 +419,15 @@ void hl_cb_mgr_fini(struct hl_device *hdev, struct hl_cb_mgr *mgr)
 	idr_destroy(&mgr->cb_handles);
 }
 
-struct hl_cb *hl_cb_kernel_create(struct hl_device *hdev, u32 cb_size)
+struct hl_cb *hl_cb_kernel_create(struct hl_device *hdev, u32 cb_size,
+					bool internal_cb)
 {
 	u64 cb_handle;
 	struct hl_cb *cb;
 	int rc;
 
 	rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, cb_size, &cb_handle,
-			HL_KERNEL_ASID_ID);
+			HL_KERNEL_ASID_ID, internal_cb);
 	if (rc) {
 		dev_err(hdev->dev,
 			"Failed to allocate CB for the kernel driver %d\n", rc);
@@ -437,7 +459,7 @@ int hl_cb_pool_init(struct hl_device *hdev)
 
 	for (i = 0 ; i < hdev->asic_prop.cb_pool_cb_cnt ; i++) {
 		cb = hl_cb_alloc(hdev, hdev->asic_prop.cb_pool_cb_size,
-				HL_KERNEL_ASID_ID);
+				HL_KERNEL_ASID_ID, false);
 		if (cb) {
 			cb->is_pool = true;
 			list_add(&cb->pool_list, &hdev->cb_pool);
diff --git a/drivers/misc/habanalabs/common/command_submission.c b/drivers/misc/habanalabs/common/command_submission.c
index 54f2f5afdd2a..272d79256ed2 100644
--- a/drivers/misc/habanalabs/common/command_submission.c
+++ b/drivers/misc/habanalabs/common/command_submission.c
@@ -911,7 +911,13 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
 		goto put_cs;
 	}
 
-	cb = hl_cb_kernel_create(hdev, PAGE_SIZE);
+	if (cs->type == CS_TYPE_WAIT)
+		cb_size = hdev->asic_funcs->get_wait_cb_size(hdev);
+	else
+		cb_size = hdev->asic_funcs->get_signal_cb_size(hdev);
+
+	cb = hl_cb_kernel_create(hdev, cb_size,
+				q_type == QUEUE_TYPE_HW && hdev->mmu_enable);
 	if (!cb) {
 		ctx->cs_counters.out_of_mem_drop_cnt++;
 		kfree(job);
@@ -919,11 +925,6 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
 		goto put_cs;
 	}
 
-	if (cs->type == CS_TYPE_WAIT)
-		cb_size = hdev->asic_funcs->get_wait_cb_size(hdev);
-	else
-		cb_size = hdev->asic_funcs->get_signal_cb_size(hdev);
-
 	job->id = 0;
 	job->cs = cs;
 	job->user_cb = cb;
diff --git a/drivers/misc/habanalabs/common/context.c b/drivers/misc/habanalabs/common/context.c
index 1e3e5b19ecd9..b75a20364fad 100644
--- a/drivers/misc/habanalabs/common/context.c
+++ b/drivers/misc/habanalabs/common/context.c
@@ -153,10 +153,18 @@ int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx)
 			rc = -ENOMEM;
 			goto mem_ctx_err;
 		}
+
+		rc = hdev->asic_funcs->ctx_init(ctx);
+		if (rc) {
+			dev_err(hdev->dev, "ctx_init failed\n");
+			goto ctx_init_err;
+		}
 	}
 
 	return 0;
 
+ctx_init_err:
+	hl_vm_ctx_fini(ctx);
 mem_ctx_err:
 	if (ctx->asid != HL_KERNEL_ASID_ID)
 		hl_asid_free(hdev, ctx->asid);
diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
index 82532f1f94cb..bf9abfa47b7a 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -392,6 +392,7 @@ struct hl_cb_mgr {
  * @ctx_id: holds the ID of the owner's context.
  * @mmap: true if the CB is currently mmaped to user.
  * @is_pool: true if CB was acquired from the pool, false otherwise.
+ * @is_internal: internaly allocated
  */
 struct hl_cb {
 	struct kref		refcount;
@@ -408,6 +409,7 @@ struct hl_cb {
 	u32			ctx_id;
 	u8			mmap;
 	u8			is_pool;
+	u8			is_internal;
 };
 
 
@@ -643,6 +645,7 @@ enum div_select_defs {
  * @rreg: Read a register. Needed for simulator support.
  * @wreg: Write a register. Needed for simulator support.
  * @halt_coresight: stop the ETF and ETR traces.
+ * @ctx_init: context dependent initialization.
  * @get_clk_rate: Retrieve the ASIC current and maximum clock rate in MHz
  * @get_queue_id_for_cq: Get the H/W queue id related to the given CQ index.
  * @read_device_fw_version: read the device's firmware versions that are
@@ -745,6 +748,7 @@ struct hl_asic_funcs {
 	u32 (*rreg)(struct hl_device *hdev, u32 reg);
 	void (*wreg)(struct hl_device *hdev, u32 reg, u32 val);
 	void (*halt_coresight)(struct hl_device *hdev);
+	int (*ctx_init)(struct hl_ctx *ctx);
 	int (*get_clk_rate)(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk);
 	u32 (*get_queue_id_for_cq)(struct hl_device *hdev, u32 cq_idx);
 	void (*read_device_fw_version)(struct hl_device *hdev,
@@ -1432,6 +1436,10 @@ struct hl_device_idle_busy_ts {
  * @hl_debugfs: device's debugfs manager.
  * @cb_pool: list of preallocated CBs.
  * @cb_pool_lock: protects the CB pool.
+ * @internal_cb_pool_virt_addr: internal command buffer pool virtual address.
+ * @internal_cb_pool_dma_addr: internal command buffer pool dma address.
+ * @internal_cb_pool: internal command buffer memory pool.
+ * @internal_cb_va_base: internal cb pool mmu virtual address base
  * @fpriv_list: list of file private data structures. Each structure is created
  *              when a user opens the device
  * @fpriv_list_lock: protects the fpriv_list
@@ -1531,6 +1539,11 @@ struct hl_device {
 	struct list_head		cb_pool;
 	spinlock_t			cb_pool_lock;
 
+	void				*internal_cb_pool_virt_addr;
+	dma_addr_t			internal_cb_pool_dma_addr;
+	struct gen_pool			*internal_cb_pool;
+	u64				internal_cb_va_base;
+
 	struct list_head		fpriv_list;
 	struct mutex			fpriv_list_lock;
 
@@ -1741,7 +1754,7 @@ int hl_hwmon_init(struct hl_device *hdev);
 void hl_hwmon_fini(struct hl_device *hdev);
 
 int hl_cb_create(struct hl_device *hdev, struct hl_cb_mgr *mgr, u32 cb_size,
-		u64 *handle, int ctx_id);
+		u64 *handle, int ctx_id, bool internal_cb);
 int hl_cb_destroy(struct hl_device *hdev, struct hl_cb_mgr *mgr, u64 cb_handle);
 int hl_cb_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma);
 struct hl_cb *hl_cb_get(struct hl_device *hdev,	struct hl_cb_mgr *mgr,
@@ -1749,7 +1762,8 @@ struct hl_cb *hl_cb_get(struct hl_device *hdev,	struct hl_cb_mgr *mgr,
 void hl_cb_put(struct hl_cb *cb);
 void hl_cb_mgr_init(struct hl_cb_mgr *mgr);
 void hl_cb_mgr_fini(struct hl_device *hdev, struct hl_cb_mgr *mgr);
-struct hl_cb *hl_cb_kernel_create(struct hl_device *hdev, u32 cb_size);
+struct hl_cb *hl_cb_kernel_create(struct hl_device *hdev, u32 cb_size,
+					bool internal_cb);
 int hl_cb_pool_init(struct hl_device *hdev);
 int hl_cb_pool_fini(struct hl_device *hdev);
 
diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c
index 57b2b9392cb2..86cfaf73ad74 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -635,7 +635,7 @@ static int _gaudi_init_tpc_mem(struct hl_device *hdev,
 	u8 tpc_id;
 	int rc;
 
-	cb = hl_cb_kernel_create(hdev, PAGE_SIZE);
+	cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
 	if (!cb)
 		return -EFAULT;
 
@@ -4048,9 +4048,8 @@ static int gaudi_parse_cb_mmu(struct hl_device *hdev,
 	parser->patched_cb_size = parser->user_cb_size +
 			sizeof(struct packet_msg_prot) * 2;
 
-	rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr,
-				parser->patched_cb_size,
-				&patched_cb_handle, HL_KERNEL_ASID_ID);
+	rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, parser->patched_cb_size,
+			&patched_cb_handle, HL_KERNEL_ASID_ID, false);
 
 	if (rc) {
 		dev_err(hdev->dev,
@@ -4122,9 +4121,8 @@ static int gaudi_parse_cb_no_mmu(struct hl_device *hdev,
 	if (rc)
 		goto free_userptr;
 
-	rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr,
-				parser->patched_cb_size,
-				&patched_cb_handle, HL_KERNEL_ASID_ID);
+	rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, parser->patched_cb_size,
+			&patched_cb_handle, HL_KERNEL_ASID_ID, false);
 	if (rc) {
 		dev_err(hdev->dev,
 			"Failed to allocate patched CB for DMA CS %d\n", rc);
@@ -4257,7 +4255,7 @@ static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
 	struct hl_cb *cb;
 	int rc;
 
-	cb = hl_cb_kernel_create(hdev, PAGE_SIZE);
+	cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
 	if (!cb)
 		return -EFAULT;
 
@@ -6229,6 +6227,11 @@ static enum hl_device_hw_state gaudi_get_hw_state(struct hl_device *hdev)
 	return RREG32(mmHW_STATE);
 }
 
+int gaudi_ctx_init(struct hl_ctx *ctx)
+{
+	return 0;
+}
+
 static u32 gaudi_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
 {
 	return gaudi_cq_assignment[cq_idx];
@@ -6532,6 +6535,7 @@ static const struct hl_asic_funcs gaudi_funcs = {
 	.rreg = hl_rreg,
 	.wreg = hl_wreg,
 	.halt_coresight = gaudi_halt_coresight,
+	.ctx_init = gaudi_ctx_init,
 	.get_clk_rate = gaudi_get_clk_rate,
 	.get_queue_id_for_cq = gaudi_get_queue_id_for_cq,
 	.read_device_fw_version = gaudi_read_device_fw_version,
diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c
index 2b0937d950c1..4473ded313d6 100644
--- a/drivers/misc/habanalabs/goya/goya.c
+++ b/drivers/misc/habanalabs/goya/goya.c
@@ -3771,9 +3771,8 @@ static int goya_parse_cb_mmu(struct hl_device *hdev,
 	parser->patched_cb_size = parser->user_cb_size +
 			sizeof(struct packet_msg_prot) * 2;
 
-	rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr,
-				parser->patched_cb_size,
-				&patched_cb_handle, HL_KERNEL_ASID_ID);
+	rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, parser->patched_cb_size,
+			&patched_cb_handle, HL_KERNEL_ASID_ID, false);
 
 	if (rc) {
 		dev_err(hdev->dev,
@@ -3845,9 +3844,8 @@ static int goya_parse_cb_no_mmu(struct hl_device *hdev,
 	if (rc)
 		goto free_userptr;
 
-	rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr,
-				parser->patched_cb_size,
-				&patched_cb_handle, HL_KERNEL_ASID_ID);
+	rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, parser->patched_cb_size,
+			&patched_cb_handle, HL_KERNEL_ASID_ID, false);
 	if (rc) {
 		dev_err(hdev->dev,
 			"Failed to allocate patched CB for DMA CS %d\n", rc);
@@ -4693,7 +4691,7 @@ static int goya_memset_device_memory(struct hl_device *hdev, u64 addr, u64 size,
 	lin_dma_pkts_cnt = DIV_ROUND_UP_ULL(size, SZ_2G);
 	cb_size = lin_dma_pkts_cnt * sizeof(struct packet_lin_dma) +
 						sizeof(struct packet_msg_prot);
-	cb = hl_cb_kernel_create(hdev, cb_size);
+	cb = hl_cb_kernel_create(hdev, cb_size, false);
 	if (!cb)
 		return -ENOMEM;
 
@@ -5223,6 +5221,11 @@ static enum hl_device_hw_state goya_get_hw_state(struct hl_device *hdev)
 	return RREG32(mmHW_STATE);
 }
 
+int goya_ctx_init(struct hl_ctx *ctx)
+{
+	return 0;
+}
+
 u32 goya_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
 {
 	return cq_idx;
@@ -5336,6 +5339,7 @@ static const struct hl_asic_funcs goya_funcs = {
 	.rreg = hl_rreg,
 	.wreg = hl_wreg,
 	.halt_coresight = goya_halt_coresight,
+	.ctx_init = goya_ctx_init,
 	.get_clk_rate = goya_get_clk_rate,
 	.get_queue_id_for_cq = goya_get_queue_id_for_cq,
 	.read_device_fw_version = goya_read_device_fw_version,
-- 
2.17.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ