linux-kernel - [PATCH v2 4/4] udmabuf: remove folio unpin list

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20240805032550.3912454-5-link@vivo.com>
Date: Mon,  5 Aug 2024 11:25:46 +0800
From: Huan Yang <link@...o.com>
To: Gerd Hoffmann <kraxel@...hat.com>,
	Sumit Semwal <sumit.semwal@...aro.org>,
	Christian König <christian.koenig@....com>,
	dri-devel@...ts.freedesktop.org,
	linux-media@...r.kernel.org,
	linaro-mm-sig@...ts.linaro.org,
	linux-kernel@...r.kernel.org
Cc: opensource.kernel@...o.com,
	Huan Yang <link@...o.com>
Subject: [PATCH v2 4/4] udmabuf: remove folio unpin list

Currently, udmabuf handles folio by creating an unpin list to record
each folio obtained from the list and unpinning them when released. To
maintain this approach, many data structures have been established.

However, maintaining this type of data structure requires a significant
amount of memory and traversing the list is a substantial overhead,
which is not friendly to the CPU cache, TLB, and so on.

Therefore, this patch removes the relationship between the folio and its
offset in the linear address mapping.

As an alternative, udmabuf both maintain the folio array and page array,
folio array use to unpin, and the page array is used as before to handle
the requirements for the page.

So, udmabuf's folios only save the folio struct, foliocount point
the size of array. pages save page in folios, number offset given by
create list, pagecount point the size of array.

Even if we restore the pages structure, its memory usage should be
smaller than the combined memory usage of offsets(8 bytes in 64bit machine)
and udmabuf_folio structures(24 bytes in 64bit machine).

By doing this, we can accept the overhead of the udmabuf_folio structure
and the performance loss of traversing the list during unpinning.

Signed-off-by: Huan Yang <link@...o.com>
---
 drivers/dma-buf/udmabuf.c | 167 ++++++++++++++------------------------
 1 file changed, 61 insertions(+), 106 deletions(-)

diff --git a/drivers/dma-buf/udmabuf.c b/drivers/dma-buf/udmabuf.c
index 9737f063b6b3..442ed99d8b33 100644
--- a/drivers/dma-buf/udmabuf.c
+++ b/drivers/dma-buf/udmabuf.c
@@ -25,17 +25,24 @@ module_param(size_limit_mb, int, 0644);
 MODULE_PARM_DESC(size_limit_mb, "Max size of a dmabuf, in megabytes. Default is 64.");
 
 struct udmabuf {
+	/**
+	 * Each page used by udmabuf in the folio. When obtaining a page from a
+	 * folio, it does not necessarily begin from the head page. This is
+	 * determined by the offset of the memfd when udmabuf created.
+	 */
 	pgoff_t pagecount;
+	struct page **pages;
+
+	/**
+	 * Each folio in memfd, when a udmabuf is created, it is pinned to
+	 * ensure that the folio is not moved or reclaimed.
+	 * folio array used to unpin all when releasing.
+	 */
+	pgoff_t foliocount;
 	struct folio **folios;
+
 	struct sg_table *sg;
 	struct miscdevice *device;
-	pgoff_t *offsets;
-	struct list_head unpin_list;
-};
-
-struct udmabuf_folio {
-	struct folio *folio;
-	struct list_head list;
 };
 
 static int mmap_udmabuf(struct dma_buf *buf, struct vm_area_struct *vma)
@@ -51,9 +58,7 @@ static int mmap_udmabuf(struct dma_buf *buf, struct vm_area_struct *vma)
 
 	for (pgoff = vma->vm_pgoff, end = vma->vm_end, addr = vma->vm_start;
 	     addr < end; pgoff++, addr += PAGE_SIZE) {
-		struct page *page =
-			folio_page(ubuf->folios[pgoff],
-				   ubuf->offsets[pgoff] >> PAGE_SHIFT);
+		struct page *page = ubuf->pages[pgoff];
 
 		ret = remap_pfn_range(vma, addr, page_to_pfn(page), PAGE_SIZE,
 				      vma->vm_page_prot);
@@ -67,22 +72,11 @@ static int mmap_udmabuf(struct dma_buf *buf, struct vm_area_struct *vma)
 static int vmap_udmabuf(struct dma_buf *buf, struct iosys_map *map)
 {
 	struct udmabuf *ubuf = buf->priv;
-	struct page **pages;
 	void *vaddr;
-	pgoff_t pg;
 
 	dma_resv_assert_held(buf->resv);
 
-	pages = kvmalloc_array(ubuf->pagecount, sizeof(*pages), GFP_KERNEL);
-	if (!pages)
-		return -ENOMEM;
-
-	for (pg = 0; pg < ubuf->pagecount; pg++)
-		pages[pg] = folio_page(ubuf->folios[pg],
-				       ubuf->offsets[pg] >> PAGE_SHIFT);
-
-	vaddr = vm_map_ram(pages, ubuf->pagecount, -1);
-	kvfree(pages);
+	vaddr = vm_map_ram(ubuf->pages, ubuf->pagecount, -1);
 	if (!vaddr)
 		return -EINVAL;
 
@@ -104,30 +98,25 @@ static struct sg_table *get_sg_table(struct device *dev, struct dma_buf *buf,
 {
 	struct udmabuf *ubuf = buf->priv;
 	struct sg_table *sg;
-	struct scatterlist *sgl;
-	unsigned int i = 0;
 	int ret;
 
 	sg = kzalloc(sizeof(*sg), GFP_KERNEL);
 	if (!sg)
 		return ERR_PTR(-ENOMEM);
 
-	ret = sg_alloc_table(sg, ubuf->pagecount, GFP_KERNEL);
+	ret = sg_alloc_table_from_pages(sg, ubuf->pages, ubuf->pagecount,
+					0, ubuf->pagecount << PAGE_SHIFT,
+					GFP_KERNEL);
 	if (ret < 0)
-		goto err_alloc;
-
-	for_each_sg(sg->sgl, sgl, ubuf->pagecount, i)
-		sg_set_folio(sgl, ubuf->folios[i], PAGE_SIZE,
-			     ubuf->offsets[i]);
+		goto err;
 
 	ret = dma_map_sgtable(dev, sg, direction, 0);
 	if (ret < 0)
-		goto err_map;
+		goto err;
 	return sg;
 
-err_map:
+err:
 	sg_free_table(sg);
-err_alloc:
 	kfree(sg);
 	return ERR_PTR(ret);
 }
@@ -153,34 +142,6 @@ static void unmap_udmabuf(struct dma_buf_attachment *at,
 	return put_sg_table(at->dev, sg, direction);
 }
 
-static void unpin_all_folios(struct list_head *unpin_list)
-{
-	struct udmabuf_folio *ubuf_folio;
-
-	while (!list_empty(unpin_list)) {
-		ubuf_folio = list_first_entry(unpin_list,
-					      struct udmabuf_folio, list);
-		unpin_folio(ubuf_folio->folio);
-
-		list_del(&ubuf_folio->list);
-		kfree(ubuf_folio);
-	}
-}
-
-static int add_to_unpin_list(struct list_head *unpin_list,
-			     struct folio *folio)
-{
-	struct udmabuf_folio *ubuf_folio;
-
-	ubuf_folio = kzalloc(sizeof(*ubuf_folio), GFP_KERNEL);
-	if (!ubuf_folio)
-		return -ENOMEM;
-
-	ubuf_folio->folio = folio;
-	list_add_tail(&ubuf_folio->list, unpin_list);
-	return 0;
-}
-
 static void release_udmabuf(struct dma_buf *buf)
 {
 	struct udmabuf *ubuf = buf->priv;
@@ -189,9 +150,9 @@ static void release_udmabuf(struct dma_buf *buf)
 	if (ubuf->sg)
 		put_sg_table(dev, ubuf->sg, DMA_BIDIRECTIONAL);
 
-	unpin_all_folios(&ubuf->unpin_list);
-	kvfree(ubuf->offsets);
+	unpin_folios(ubuf->folios, ubuf->foliocount);
 	kvfree(ubuf->folios);
+	kvfree(ubuf->pages);
 	kfree(ubuf);
 }
 
@@ -289,19 +250,18 @@ static long udmabuf_create(struct miscdevice *device,
 			   struct udmabuf_create_list *head,
 			   struct udmabuf_create_item *list)
 {
-	pgoff_t pgoff, pgcnt, pglimit, pgbuf = 0;
-	long nr_folios, ret = -EINVAL;
+	pgoff_t pgoff, pgcnt, pglimit, nr_pages;
+	long nr_folios = 0, ret = -EINVAL;
 	struct file *memfd = NULL;
 	struct folio **folios;
 	struct udmabuf *ubuf;
-	u32 i, j, k, flags;
+	u32 i, flags;
 	loff_t end;
 
 	ubuf = kzalloc(sizeof(*ubuf), GFP_KERNEL);
 	if (!ubuf)
 		return -ENOMEM;
 
-	INIT_LIST_HEAD(&ubuf->unpin_list);
 	pglimit = (size_limit_mb * 1024 * 1024) >> PAGE_SHIFT;
 	for (i = 0; i < head->count; i++) {
 		if (!IS_ALIGNED(list[i].offset, PAGE_SIZE))
@@ -322,64 +282,58 @@ static long udmabuf_create(struct miscdevice *device,
 		ret = -ENOMEM;
 		goto err;
 	}
-	ubuf->offsets =
-		kvcalloc(ubuf->pagecount, sizeof(*ubuf->offsets), GFP_KERNEL);
-	if (!ubuf->offsets) {
+	folios = ubuf->folios;
+
+	ubuf->pages = kvmalloc_array(ubuf->pagecount, sizeof(*ubuf->pages),
+				     GFP_KERNEL);
+	if (!ubuf->pages) {
 		ret = -ENOMEM;
 		goto err;
 	}
 
-	pgbuf = 0;
-	for (i = 0; i < head->count; i++) {
+	for (i = 0, nr_pages = 0; i < head->count; i++) {
+		u32 j, pg;
+
 		memfd = fget(list[i].memfd);
 		ret = check_memfd_seals(memfd);
 		if (ret < 0)
 			goto err;
 
 		pgcnt = list[i].size >> PAGE_SHIFT;
-		folios = kvmalloc_array(pgcnt, sizeof(*folios), GFP_KERNEL);
-		if (!folios) {
-			ret = -ENOMEM;
-			goto err;
-		}
 
 		end = list[i].offset + (pgcnt << PAGE_SHIFT) - 1;
-		ret = memfd_pin_folios(memfd, list[i].offset, end,
-				       folios, pgcnt, &pgoff);
+		ret = memfd_pin_folios(memfd, list[i].offset, end, folios,
+				       pgcnt, &pgoff);
 		if (ret <= 0) {
-			kvfree(folios);
-			if (!ret)
-				ret = -EINVAL;
+			ret = ret ?: -EINVAL;
 			goto err;
 		}
 
-		nr_folios = ret;
-		pgoff >>= PAGE_SHIFT;
-		for (j = 0, k = 0; j < pgcnt; j++) {
-			ubuf->folios[pgbuf] = folios[k];
-			ubuf->offsets[pgbuf] = pgoff << PAGE_SHIFT;
-
-			if (j == 0 || ubuf->folios[pgbuf-1] != folios[k]) {
-				ret = add_to_unpin_list(&ubuf->unpin_list,
-							folios[k]);
-				if (ret < 0) {
-					kfree(folios);
-					goto err;
-				}
-			}
-
-			pgbuf++;
-			if (++pgoff == folio_nr_pages(folios[k])) {
-				pgoff = 0;
-				if (++k == nr_folios)
-					break;
+		/**
+		 * Iter the pinned folios and record them for later unpin
+		 * when releasing.
+		 * memfd may start from any offset, so we need check it
+		 * carefully at first.
+		 */
+		for (j = 0, pgoff >>= PAGE_SHIFT, pg = 0; j < ret;
+		     ++j, pgoff = 0) {
+			pgoff_t k;
+			struct folio *folio = folios[j];
+
+			for (k = pgoff; k < folio_nr_pages(folio); ++k) {
+				ubuf->pages[nr_pages++] = folio_page(folio, k);
+
+				if (++pg >= pgcnt)
+					goto end;
 			}
 		}
-
-		kvfree(folios);
+end:
+		folios += ret;
+		nr_folios += ret;
 		fput(memfd);
 		memfd = NULL;
 	}
+	ubuf->foliocount = nr_folios;
 
 	flags = head->flags & UDMABUF_FLAGS_CLOEXEC ? O_CLOEXEC : 0;
 	ret = export_udmabuf(ubuf, device, flags);
@@ -391,8 +345,9 @@ static long udmabuf_create(struct miscdevice *device,
 err:
 	if (memfd)
 		fput(memfd);
-	unpin_all_folios(&ubuf->unpin_list);
-	kvfree(ubuf->offsets);
+	if (nr_folios)
+		unpin_folios(ubuf->folios, nr_folios);
+	kvfree(ubuf->pages);
 	kvfree(ubuf->folios);
 	kfree(ubuf);
 	return ret;
-- 
2.45.2