lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20150513055809.GA3622@gwshan>
Date:	Wed, 13 May 2015 15:58:09 +1000
From:	Gavin Shan <gwshan@...ux.vnet.ibm.com>
To:	Alexey Kardashevskiy <aik@...abs.ru>
Cc:	linuxppc-dev@...ts.ozlabs.org,
	David Gibson <david@...son.dropbear.id.au>,
	Benjamin Herrenschmidt <benh@...nel.crashing.org>,
	Paul Mackerras <paulus@...ba.org>,
	Alex Williamson <alex.williamson@...hat.com>,
	Gavin Shan <gwshan@...ux.vnet.ibm.com>,
	Wei Yang <weiyang@...ux.vnet.ibm.com>,
	linux-kernel@...r.kernel.org
Subject: Re: [PATCH kernel v10 06/34] vfio: powerpc/spapr: Move page pinning
 from arch code to VFIO IOMMU driver

On Tue, May 12, 2015 at 01:38:55AM +1000, Alexey Kardashevskiy wrote:
>This moves page pinning (get_user_pages_fast()/put_page()) code out of
>the platform IOMMU code and puts it to VFIO IOMMU driver where it belongs
>to as the platform code does not deal with page pinning.
>
>This makes iommu_take_ownership()/iommu_release_ownership() deal with
>the IOMMU table bitmap only.
>
>This removes page unpinning from iommu_take_ownership() as the actual
>TCE table might contain garbage and doing put_page() on it is undefined
>behaviour.
>
>Besides the last part, the rest of the patch is mechanical.
>
>Signed-off-by: Alexey Kardashevskiy <aik@...abs.ru>
>[aw: for the vfio related changes]
>Acked-by: Alex Williamson <alex.williamson@...hat.com>
>Reviewed-by: David Gibson <david@...son.dropbear.id.au>

Reviewed-by: Gavin Shan <gwshan@...ux.vnet.ibm.com>

>---
>Changes:
>v9:
>* added missing tce_iommu_clear call after iommu_release_ownership()
>* brought @offset (a local variable) back to make patch even more
>mechanical
>
>v4:
>* s/iommu_tce_build(tbl, entry + 1/iommu_tce_build(tbl, entry + i/
>---
> arch/powerpc/include/asm/iommu.h    |  4 --
> arch/powerpc/kernel/iommu.c         | 55 -------------------------
> drivers/vfio/vfio_iommu_spapr_tce.c | 80 +++++++++++++++++++++++++++++++------
> 3 files changed, 67 insertions(+), 72 deletions(-)
>
>diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h
>index 8353c86..e94a5e3 100644
>--- a/arch/powerpc/include/asm/iommu.h
>+++ b/arch/powerpc/include/asm/iommu.h
>@@ -194,10 +194,6 @@ extern int iommu_tce_build(struct iommu_table *tbl, unsigned long entry,
> 		unsigned long hwaddr, enum dma_data_direction direction);
> extern unsigned long iommu_clear_tce(struct iommu_table *tbl,
> 		unsigned long entry);
>-extern int iommu_clear_tces_and_put_pages(struct iommu_table *tbl,
>-		unsigned long entry, unsigned long pages);
>-extern int iommu_put_tce_user_mode(struct iommu_table *tbl,
>-		unsigned long entry, unsigned long tce);
>
> extern void iommu_flush_tce(struct iommu_table *tbl);
> extern int iommu_take_ownership(struct iommu_table *tbl);
>diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
>index 2c02d4c..8673c94 100644
>--- a/arch/powerpc/kernel/iommu.c
>+++ b/arch/powerpc/kernel/iommu.c
>@@ -983,30 +983,6 @@ unsigned long iommu_clear_tce(struct iommu_table *tbl, unsigned long entry)
> }
> EXPORT_SYMBOL_GPL(iommu_clear_tce);
>
>-int iommu_clear_tces_and_put_pages(struct iommu_table *tbl,
>-		unsigned long entry, unsigned long pages)
>-{
>-	unsigned long oldtce;
>-	struct page *page;
>-
>-	for ( ; pages; --pages, ++entry) {
>-		oldtce = iommu_clear_tce(tbl, entry);
>-		if (!oldtce)
>-			continue;
>-
>-		page = pfn_to_page(oldtce >> PAGE_SHIFT);
>-		WARN_ON(!page);
>-		if (page) {
>-			if (oldtce & TCE_PCI_WRITE)
>-				SetPageDirty(page);
>-			put_page(page);
>-		}
>-	}
>-
>-	return 0;
>-}
>-EXPORT_SYMBOL_GPL(iommu_clear_tces_and_put_pages);
>-
> /*
>  * hwaddr is a kernel virtual address here (0xc... bazillion),
>  * tce_build converts it to a physical address.
>@@ -1036,35 +1012,6 @@ int iommu_tce_build(struct iommu_table *tbl, unsigned long entry,
> }
> EXPORT_SYMBOL_GPL(iommu_tce_build);
>
>-int iommu_put_tce_user_mode(struct iommu_table *tbl, unsigned long entry,
>-		unsigned long tce)
>-{
>-	int ret;
>-	struct page *page = NULL;
>-	unsigned long hwaddr, offset = tce & IOMMU_PAGE_MASK(tbl) & ~PAGE_MASK;
>-	enum dma_data_direction direction = iommu_tce_direction(tce);
>-
>-	ret = get_user_pages_fast(tce & PAGE_MASK, 1,
>-			direction != DMA_TO_DEVICE, &page);
>-	if (unlikely(ret != 1)) {
>-		/* pr_err("iommu_tce: get_user_pages_fast failed tce=%lx ioba=%lx ret=%d\n",
>-				tce, entry << tbl->it_page_shift, ret); */
>-		return -EFAULT;
>-	}
>-	hwaddr = (unsigned long) page_address(page) + offset;
>-
>-	ret = iommu_tce_build(tbl, entry, hwaddr, direction);
>-	if (ret)
>-		put_page(page);
>-
>-	if (ret < 0)
>-		pr_err("iommu_tce: %s failed ioba=%lx, tce=%lx, ret=%d\n",
>-			__func__, entry << tbl->it_page_shift, tce, ret);
>-
>-	return ret;
>-}
>-EXPORT_SYMBOL_GPL(iommu_put_tce_user_mode);
>-
> int iommu_take_ownership(struct iommu_table *tbl)
> {
> 	unsigned long sz = (tbl->it_size + 7) >> 3;
>@@ -1078,7 +1025,6 @@ int iommu_take_ownership(struct iommu_table *tbl)
> 	}
>
> 	memset(tbl->it_map, 0xff, sz);
>-	iommu_clear_tces_and_put_pages(tbl, tbl->it_offset, tbl->it_size);
>
> 	/*
> 	 * Disable iommu bypass, otherwise the user can DMA to all of
>@@ -1096,7 +1042,6 @@ void iommu_release_ownership(struct iommu_table *tbl)
> {
> 	unsigned long sz = (tbl->it_size + 7) >> 3;
>
>-	iommu_clear_tces_and_put_pages(tbl, tbl->it_offset, tbl->it_size);
> 	memset(tbl->it_map, 0, sz);
>
> 	/* Restore bit#0 set by iommu_init_table() */
>diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c
>index 730b4ef..b95fa2b 100644
>--- a/drivers/vfio/vfio_iommu_spapr_tce.c
>+++ b/drivers/vfio/vfio_iommu_spapr_tce.c
>@@ -147,6 +147,67 @@ static void tce_iommu_release(void *iommu_data)
> 	kfree(container);
> }
>
>+static int tce_iommu_clear(struct tce_container *container,
>+		struct iommu_table *tbl,
>+		unsigned long entry, unsigned long pages)
>+{
>+	unsigned long oldtce;
>+	struct page *page;
>+
>+	for ( ; pages; --pages, ++entry) {
>+		oldtce = iommu_clear_tce(tbl, entry);

It might be nice to rename iommu_clear_tce() to iommu_tce_free() with another
separate patch for two reasons as I can see: iommu_tce_{build, free} is one
pair of functions doing opposite things. iommu_tce_free() is implemented based
on ppc_md.tce_free() as iommu_tce_build() depends on ppc_md.tce_build().

>+		if (!oldtce)
>+			continue;
>+
>+		page = pfn_to_page(oldtce >> PAGE_SHIFT);
>+		WARN_ON(!page);
>+		if (page) {
>+			if (oldtce & TCE_PCI_WRITE)
>+				SetPageDirty(page);
>+			put_page(page);
>+		}
>+	}
>+
>+	return 0;
>+}
>+
>+static long tce_iommu_build(struct tce_container *container,
>+		struct iommu_table *tbl,
>+		unsigned long entry, unsigned long tce, unsigned long pages)
>+{
>+	long i, ret = 0;
>+	struct page *page = NULL;
>+	unsigned long hva;
>+	enum dma_data_direction direction = iommu_tce_direction(tce);
>+
>+	for (i = 0; i < pages; ++i) {
>+		unsigned long offset = tce & IOMMU_PAGE_MASK(tbl) & ~PAGE_MASK;
>+
>+		ret = get_user_pages_fast(tce & PAGE_MASK, 1,
>+				direction != DMA_TO_DEVICE, &page);
>+		if (unlikely(ret != 1)) {
>+			ret = -EFAULT;
>+			break;
>+		}
>+		hva = (unsigned long) page_address(page) + offset;
>+
>+		ret = iommu_tce_build(tbl, entry + i, hva, direction);
>+		if (ret) {
>+			put_page(page);
>+			pr_err("iommu_tce: %s failed ioba=%lx, tce=%lx, ret=%ld\n",
>+					__func__, entry << tbl->it_page_shift,
>+					tce, ret);
>+			break;
>+		}
>+		tce += IOMMU_PAGE_SIZE_4K;
>+	}
>+
>+	if (ret)
>+		tce_iommu_clear(container, tbl, entry, i);
>+
>+	return ret;
>+}
>+
> static long tce_iommu_ioctl(void *iommu_data,
> 				 unsigned int cmd, unsigned long arg)
> {
>@@ -195,7 +256,7 @@ static long tce_iommu_ioctl(void *iommu_data,
> 	case VFIO_IOMMU_MAP_DMA: {
> 		struct vfio_iommu_type1_dma_map param;
> 		struct iommu_table *tbl = container->tbl;
>-		unsigned long tce, i;
>+		unsigned long tce;
>
> 		if (!tbl)
> 			return -ENXIO;
>@@ -229,17 +290,9 @@ static long tce_iommu_ioctl(void *iommu_data,
> 		if (ret)
> 			return ret;
>
>-		for (i = 0; i < (param.size >> IOMMU_PAGE_SHIFT_4K); ++i) {
>-			ret = iommu_put_tce_user_mode(tbl,
>-					(param.iova >> IOMMU_PAGE_SHIFT_4K) + i,
>-					tce);
>-			if (ret)
>-				break;
>-			tce += IOMMU_PAGE_SIZE_4K;
>-		}
>-		if (ret)
>-			iommu_clear_tces_and_put_pages(tbl,
>-					param.iova >> IOMMU_PAGE_SHIFT_4K, i);
>+		ret = tce_iommu_build(container, tbl,
>+				param.iova >> IOMMU_PAGE_SHIFT_4K,
>+				tce, param.size >> IOMMU_PAGE_SHIFT_4K);
>
> 		iommu_flush_tce(tbl);
>
>@@ -273,7 +326,7 @@ static long tce_iommu_ioctl(void *iommu_data,
> 		if (ret)
> 			return ret;
>
>-		ret = iommu_clear_tces_and_put_pages(tbl,
>+		ret = tce_iommu_clear(container, tbl,
> 				param.iova >> IOMMU_PAGE_SHIFT_4K,
> 				param.size >> IOMMU_PAGE_SHIFT_4K);
> 		iommu_flush_tce(tbl);
>@@ -357,6 +410,7 @@ static void tce_iommu_detach_group(void *iommu_data,
> 		/* pr_debug("tce_vfio: detaching group #%u from iommu %p\n",
> 				iommu_group_id(iommu_group), iommu_group); */
> 		container->tbl = NULL;
>+		tce_iommu_clear(container, tbl, tbl->it_offset, tbl->it_size);
> 		iommu_release_ownership(tbl);
> 	}
> 	mutex_unlock(&container->lock);

Thanks,
Gavin

>-- 
>2.4.0.rc3.8.gfb3e7d5
>

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ