lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <60d22fc6-88d6-c7c2-90bd-1e8eccb1fdcc@huawei.com>
Date:   Sat, 21 Nov 2020 15:58:37 +0800
From:   "xuxiaoyang (C)" <xuxiaoyang2@...wei.com>
To:     <linux-kernel@...r.kernel.org>, <kvm@...r.kernel.org>,
        Alex Williamson <alex.williamson@...hat.com>
CC:     <kwankhede@...dia.com>, <wu.wubin@...wei.com>,
        <maoming.maoming@...wei.com>, <xieyingtai@...wei.com>,
        <lizhengui@...wei.com>, <wubinfeng@...wei.com>,
        "xuxiaoyang (C)" <xuxiaoyang2@...wei.com>
Subject: [PATCH v2] vfio iommu type1: Improve vfio_iommu_type1_pin_pages
 performance

vfio_pin_pages() accepts an array of unrelated iova pfns and processes
each to return the physical pfn.  When dealing with large arrays of
contiguous iovas, vfio_iommu_type1_pin_pages is very inefficient because
it is processed page by page.In this case, we can divide the iova pfn
array into multiple continuous ranges and optimize them.  For example,
when the iova pfn array is {1,5,6,7,9}, it will be divided into three
groups {1}, {5,6,7}, {9} for processing.  When processing {5,6,7}, the
number of calls to pin_user_pages_remote is reduced from 3 times to once.
For single page or large array of discontinuous iovas, we still use
vfio_pin_page_external to deal with it to reduce the performance loss
caused by refactoring.

Signed-off-by: Xiaoyang Xu <xuxiaoyang2@...wei.com>
---
v1 -> v2:
 * make vfio_iommu_type1_pin_contiguous_pages use vfio_pin_page_external
 to pin single page when npage=1
 * make vfio_pin_contiguous_pages_external use set npage to mark
 consecutive pages as dirty. simplify the processing logic of unwind
 * remove unnecessary checks in vfio_get_contiguous_pages_length, put
 the least costly judgment logic at the top, and replace
 vfio_iova_get_vfio_pfn with vfio_find_vpfn

 drivers/vfio/vfio_iommu_type1.c | 231 ++++++++++++++++++++++++++++----
 1 file changed, 204 insertions(+), 27 deletions(-)

diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index 67e827638995..080727b531c6 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -628,6 +628,196 @@ static int vfio_unpin_page_external(struct vfio_dma *dma, dma_addr_t iova,
 	return unlocked;
 }

+static int contiguous_vaddr_get_pfn(struct mm_struct *mm, unsigned long vaddr,
+				    int prot, long npage, unsigned long *phys_pfn)
+{
+	struct page **pages = NULL;
+	unsigned int flags = 0;
+	int i, ret;
+
+	pages = kvmalloc_array(npage, sizeof(struct page *), GFP_KERNEL);
+	if (!pages)
+		return -ENOMEM;
+
+	if (prot & IOMMU_WRITE)
+		flags |= FOLL_WRITE;
+
+	mmap_read_lock(mm);
+	ret = pin_user_pages_remote(mm, vaddr, npage, flags | FOLL_LONGTERM,
+				    pages, NULL, NULL);
+	mmap_read_unlock(mm);
+
+	for (i = 0; i < ret; i++)
+		*(phys_pfn + i) = page_to_pfn(pages[i]);
+
+	kvfree(pages);
+
+	return ret;
+}
+
+static int vfio_pin_contiguous_pages_external(struct vfio_iommu *iommu,
+				    struct vfio_dma *dma,
+				    unsigned long *user_pfn,
+				    int npage, unsigned long *phys_pfn,
+				    bool do_accounting)
+{
+	int ret, i, j, lock_acct = 0;
+	unsigned long remote_vaddr;
+	dma_addr_t iova;
+	struct mm_struct *mm;
+	struct vfio_pfn *vpfn;
+
+	mm = get_task_mm(dma->task);
+	if (!mm)
+		return -ENODEV;
+
+	iova = user_pfn[0] << PAGE_SHIFT;
+	remote_vaddr = dma->vaddr + iova - dma->iova;
+	ret = contiguous_vaddr_get_pfn(mm, remote_vaddr, dma->prot,
+					    npage, phys_pfn);
+	mmput(mm);
+	if (ret <= 0)
+		return ret;
+
+	npage = ret;
+	for (i = 0; i < npage; i++) {
+		iova = user_pfn[i] << PAGE_SHIFT;
+		ret = vfio_add_to_pfn_list(dma, iova, phys_pfn[i]);
+		if (ret)
+			goto unwind;
+
+		if (!is_invalid_reserved_pfn(phys_pfn[i]))
+			lock_acct++;
+	}
+
+	if (do_accounting) {
+		ret = vfio_lock_acct(dma, lock_acct, true);
+		if (ret) {
+			if (ret == -ENOMEM)
+				pr_warn("%s: Task %s (%d) RLIMIT_MEMLOCK (%ld) exceeded\n",
+					__func__, dma->task->comm, task_pid_nr(dma->task),
+					task_rlimit(dma->task, RLIMIT_MEMLOCK));
+			goto unwind;
+		}
+	}
+
+	if (iommu->dirty_page_tracking) {
+		unsigned long pgshift = __ffs(iommu->pgsize_bitmap);
+
+		/*
+		 * Bitmap populated with the smallest supported page
+		 * size
+		 */
+		bitmap_set(dma->bitmap,
+			   ((user_pfn[0] << PAGE_SHIFT) - dma->iova) >> pgshift, npage);
+	}
+
+	return i;
+unwind:
+	for (j = 0; j < npage; j++) {
+		if (j < i) {
+			iova = user_pfn[j] << PAGE_SHIFT;
+			vpfn = vfio_find_vpfn(dma, iova);
+			vfio_iova_put_vfio_pfn(dma, vpfn);
+		} else {
+			put_pfn(phys_pfn[j], dma->prot);
+		}
+
+		phys_pfn[j] = 0;
+	}
+
+	return ret;
+}
+
+static int vfio_iommu_type1_pin_contiguous_pages(struct vfio_iommu *iommu,
+					    struct vfio_dma *dma,
+					    unsigned long *user_pfn,
+					    int npage, unsigned long *phys_pfn,
+					    bool do_accounting)
+{
+	int ret = 0, i, j;
+	unsigned long remote_vaddr;
+	dma_addr_t iova;
+
+	if (npage == 1)
+		goto pin_single_page;
+
+	ret = vfio_pin_contiguous_pages_external(iommu, dma, user_pfn, npage,
+				phys_pfn, do_accounting);
+	if (ret == npage)
+		return ret;
+
+	if (ret < 0)
+		ret = 0;
+
+pin_single_page:
+	for (i = ret; i < npage; i++) {
+		iova = user_pfn[i] << PAGE_SHIFT;
+		remote_vaddr = dma->vaddr + iova - dma->iova;
+
+		ret = vfio_pin_page_external(dma, remote_vaddr, &phys_pfn[i],
+			    do_accounting);
+		if (ret)
+			goto pin_unwind;
+
+		ret = vfio_add_to_pfn_list(dma, iova, phys_pfn[i]);
+		if (ret) {
+			if (put_pfn(phys_pfn[i], dma->prot) && do_accounting)
+				vfio_lock_acct(dma, -1, true);
+			goto pin_unwind;
+		}
+
+		if (iommu->dirty_page_tracking) {
+			unsigned long pgshift = __ffs(iommu->pgsize_bitmap);
+
+			/*
+			 * Bitmap populated with the smallest supported page
+			 * size
+			 */
+			bitmap_set(dma->bitmap,
+					   (iova - dma->iova) >> pgshift, 1);
+		}
+	}
+
+	return i;
+
+pin_unwind:
+	phys_pfn[i] = 0;
+	for (j = 0; j < i; j++) {
+		iova = user_pfn[j] << PAGE_SHIFT;
+		vfio_unpin_page_external(dma, iova, do_accounting);
+		phys_pfn[j] = 0;
+	}
+
+	return ret;
+}
+
+static int vfio_get_contiguous_pages_length(struct vfio_dma *dma,
+				    unsigned long *user_pfn, int npage)
+{
+	int i;
+	dma_addr_t iova = user_pfn[0] << PAGE_SHIFT;
+	struct vfio_pfn *vpfn;
+
+	if (npage <= 1)
+		return npage;
+
+	for (i = 1; i < npage; i++) {
+		if (user_pfn[i] != user_pfn[0] + i)
+			break;
+
+		iova = user_pfn[i] << PAGE_SHIFT;
+		if (iova >= dma->iova + dma->size ||
+				iova + PAGE_SIZE <= dma->iova)
+			break;
+
+		vpfn = vfio_find_vpfn(dma, iova);
+		if (vpfn)
+			break;
+	}
+	return i;
+}
+
 static int vfio_iommu_type1_pin_pages(void *iommu_data,
 				      struct iommu_group *iommu_group,
 				      unsigned long *user_pfn,
@@ -637,9 +827,9 @@ static int vfio_iommu_type1_pin_pages(void *iommu_data,
 	struct vfio_iommu *iommu = iommu_data;
 	struct vfio_group *group;
 	int i, j, ret;
-	unsigned long remote_vaddr;
 	struct vfio_dma *dma;
 	bool do_accounting;
+	int contiguous_npage;

 	if (!iommu || !user_pfn || !phys_pfn)
 		return -EINVAL;
@@ -663,7 +853,7 @@ static int vfio_iommu_type1_pin_pages(void *iommu_data,
 	 */
 	do_accounting = !IS_IOMMU_CAP_DOMAIN_IN_CONTAINER(iommu);

-	for (i = 0; i < npage; i++) {
+	for (i = 0; i < npage; i += contiguous_npage) {
 		dma_addr_t iova;
 		struct vfio_pfn *vpfn;

@@ -682,31 +872,18 @@ static int vfio_iommu_type1_pin_pages(void *iommu_data,
 		vpfn = vfio_iova_get_vfio_pfn(dma, iova);
 		if (vpfn) {
 			phys_pfn[i] = vpfn->pfn;
-			continue;
-		}
-
-		remote_vaddr = dma->vaddr + (iova - dma->iova);
-		ret = vfio_pin_page_external(dma, remote_vaddr, &phys_pfn[i],
-					     do_accounting);
-		if (ret)
-			goto pin_unwind;
-
-		ret = vfio_add_to_pfn_list(dma, iova, phys_pfn[i]);
-		if (ret) {
-			if (put_pfn(phys_pfn[i], dma->prot) && do_accounting)
-				vfio_lock_acct(dma, -1, true);
-			goto pin_unwind;
-		}
-
-		if (iommu->dirty_page_tracking) {
-			unsigned long pgshift = __ffs(iommu->pgsize_bitmap);
-
-			/*
-			 * Bitmap populated with the smallest supported page
-			 * size
-			 */
-			bitmap_set(dma->bitmap,
-				   (iova - dma->iova) >> pgshift, 1);
+			contiguous_npage = 1;
+		} else {
+			ret = vfio_get_contiguous_pages_length(dma,
+					&user_pfn[i], npage - i);
+			if (ret < 0)
+				goto pin_unwind;
+
+			ret = vfio_iommu_type1_pin_contiguous_pages(iommu,
+					dma, &user_pfn[i], ret, &phys_pfn[i], do_accounting);
+			if (ret < 0)
+				goto pin_unwind;
+			contiguous_npage = ret;
 		}
 	}
 	ret = i;
--
2.19.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ