lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20221208151350.1833823-22-ogabbay@kernel.org>
Date:   Thu,  8 Dec 2022 17:13:46 +0200
From:   Oded Gabbay <ogabbay@...nel.org>
To:     linux-kernel@...r.kernel.org
Cc:     Ohad Sharabi <osharabi@...ana.ai>
Subject: [PATCH 22/26] habanalabs: fix dmabuf to export only required size

From: Ohad Sharabi <osharabi@...ana.ai>

This patch fixes a bug that was found in the dmabuf flow.
Bug description as found on Gaudi2 device:
1. User allocates 4MB of device memory
    - Note that although the allocation size was 4MB the HMMU allocated
      a full page of 768MB to back the request.
    - The user gets a memory handle that points to a single page (768MB)
    - Mapping the handle, the user gets virtual address to the start of
      the page.
2. User exports the buffer
3. User registers the exported buffer in the importer. This flow has
   a callback to the exporter which in turn converts the phys_page_pack
   to an SG list for the importer. This SG list is of single entry of
   size 768MB. However, the size that was passed to the importer was
   only 4MB.

The solution for this is to make sure the importer gets exposure only
to the exported size.

This will be done by fixing the SG created by the exporter to be of
the total size of the actual exported memory requested by the user.

Signed-off-by: Ohad Sharabi <osharabi@...ana.ai>
Reviewed-by: Oded Gabbay <ogabbay@...nel.org>
Signed-off-by: Oded Gabbay <ogabbay@...nel.org>
---
 drivers/misc/habanalabs/common/habanalabs.h |  2 ++
 drivers/misc/habanalabs/common/memory.c     | 35 +++++++++++++++------
 2 files changed, 28 insertions(+), 9 deletions(-)

diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
index ef5a765f3313..de715c91a87e 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -2120,6 +2120,7 @@ struct hl_vm_hw_block_list_node {
  * @pages: the physical page array.
  * @npages: num physical pages in the pack.
  * @total_size: total size of all the pages in this list.
+ * @exported_size: buffer exported size.
  * @node: used to attach to deletion list that is used when all the allocations are cleared
  *        at the teardown of the context.
  * @mapping_cnt: number of shared mappings.
@@ -2136,6 +2137,7 @@ struct hl_vm_phys_pg_pack {
 	u64			*pages;
 	u64			npages;
 	u64			total_size;
+	u64			exported_size;
 	struct list_head	node;
 	atomic_t		mapping_cnt;
 	u32			asid;
diff --git a/drivers/misc/habanalabs/common/memory.c b/drivers/misc/habanalabs/common/memory.c
index c7c27ffa6309..3f05bb398c70 100644
--- a/drivers/misc/habanalabs/common/memory.c
+++ b/drivers/misc/habanalabs/common/memory.c
@@ -1548,10 +1548,10 @@ static int set_dma_sg(struct scatterlist *sg, u64 bar_address, u64 chunk_size,
 }
 
 static struct sg_table *alloc_sgt_from_device_pages(struct hl_device *hdev, u64 *pages, u64 npages,
-						u64 page_size, struct device *dev,
-						enum dma_data_direction dir)
+						u64 page_size, u64 exported_size,
+						struct device *dev, enum dma_data_direction dir)
 {
-	u64 chunk_size, bar_address, dma_max_seg_size;
+	u64 chunk_size, bar_address, dma_max_seg_size, cur_size_to_export, cur_npages;
 	struct asic_fixed_properties *prop;
 	int rc, i, j, nents, cur_page;
 	struct scatterlist *sg;
@@ -1577,16 +1577,23 @@ static struct sg_table *alloc_sgt_from_device_pages(struct hl_device *hdev, u64
 	if (!sgt)
 		return ERR_PTR(-ENOMEM);
 
+	/* remove export size restrictions in case not explicitly defined */
+	cur_size_to_export = exported_size ? exported_size : (npages * page_size);
+
 	/* If the size of each page is larger than the dma max segment size,
 	 * then we can't combine pages and the number of entries in the SGL
 	 * will just be the
 	 * <number of pages> * <chunks of max segment size in each page>
 	 */
-	if (page_size > dma_max_seg_size)
-		nents = npages * DIV_ROUND_UP_ULL(page_size, dma_max_seg_size);
-	else
+	if (page_size > dma_max_seg_size) {
+		/* we should limit number of pages according to the exported size */
+		cur_npages = DIV_ROUND_UP_SECTOR_T(cur_size_to_export, page_size);
+		nents = cur_npages * DIV_ROUND_UP_SECTOR_T(page_size, dma_max_seg_size);
+	} else {
+		cur_npages = npages;
+
 		/* Get number of non-contiguous chunks */
-		for (i = 1, nents = 1, chunk_size = page_size ; i < npages ; i++) {
+		for (i = 1, nents = 1, chunk_size = page_size ; i < cur_npages ; i++) {
 			if (pages[i - 1] + page_size != pages[i] ||
 					chunk_size + page_size > dma_max_seg_size) {
 				nents++;
@@ -1596,6 +1603,7 @@ static struct sg_table *alloc_sgt_from_device_pages(struct hl_device *hdev, u64
 
 			chunk_size += page_size;
 		}
+	}
 
 	rc = sg_alloc_table(sgt, nents, GFP_KERNEL | __GFP_ZERO);
 	if (rc)
@@ -1618,7 +1626,8 @@ static struct sg_table *alloc_sgt_from_device_pages(struct hl_device *hdev, u64
 			else
 				cur_device_address += dma_max_seg_size;
 
-			chunk_size = min(size_left, dma_max_seg_size);
+			/* make sure not to export over exported size */
+			chunk_size = min3(size_left, dma_max_seg_size, cur_size_to_export);
 
 			bar_address = hdev->dram_pci_bar_start + cur_device_address;
 
@@ -1626,6 +1635,8 @@ static struct sg_table *alloc_sgt_from_device_pages(struct hl_device *hdev, u64
 			if (rc)
 				goto error_unmap;
 
+			cur_size_to_export -= chunk_size;
+
 			if (size_left > dma_max_seg_size) {
 				size_left -= dma_max_seg_size;
 			} else {
@@ -1637,7 +1648,7 @@ static struct sg_table *alloc_sgt_from_device_pages(struct hl_device *hdev, u64
 		/* Merge pages and put them into the scatterlist */
 		for_each_sgtable_dma_sg(sgt, sg, i) {
 			chunk_size = page_size;
-			for (j = cur_page + 1 ; j < npages ; j++) {
+			for (j = cur_page + 1 ; j < cur_npages ; j++) {
 				if (pages[j - 1] + page_size != pages[j] ||
 						chunk_size + page_size > dma_max_seg_size)
 					break;
@@ -1648,10 +1659,13 @@ static struct sg_table *alloc_sgt_from_device_pages(struct hl_device *hdev, u64
 			bar_address = hdev->dram_pci_bar_start +
 					(pages[cur_page] - prop->dram_base_address);
 
+			/* make sure not to export over exported size */
+			chunk_size = min(chunk_size, cur_size_to_export);
 			rc = set_dma_sg(sg, bar_address, chunk_size, dev, dir);
 			if (rc)
 				goto error_unmap;
 
+			cur_size_to_export -= chunk_size;
 			cur_page = j;
 		}
 	}
@@ -1722,6 +1736,7 @@ static struct sg_table *hl_map_dmabuf(struct dma_buf_attachment *attachment,
 						phys_pg_pack->pages,
 						phys_pg_pack->npages,
 						phys_pg_pack->page_size,
+						phys_pg_pack->exported_size,
 						attachment->dev,
 						dir);
 	else
@@ -1729,6 +1744,7 @@ static struct sg_table *hl_map_dmabuf(struct dma_buf_attachment *attachment,
 						&hl_dmabuf->device_address,
 						1,
 						hl_dmabuf->dmabuf->size,
+						0,
 						attachment->dev,
 						dir);
 
@@ -2033,6 +2049,7 @@ static int export_dmabuf_from_addr(struct hl_ctx *ctx, u64 addr, u64 size, u64 o
 		if (rc)
 			goto dec_memhash_export_cnt;
 
+		phys_pg_pack->exported_size = size;
 		hl_dmabuf->phys_pg_pack = phys_pg_pack;
 		hl_dmabuf->memhash_hnode = hnode;
 	} else {
-- 
2.25.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ