lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20240205120203.60312-17-jgowans@amazon.com>
Date: Mon, 5 Feb 2024 12:02:01 +0000
From: James Gowans <jgowans@...zon.com>
To: <linux-kernel@...r.kernel.org>
CC: Eric Biederman <ebiederm@...ssion.com>, <kexec@...ts.infradead.org>,
	"Joerg Roedel" <joro@...tes.org>, Will Deacon <will@...nel.org>,
	<iommu@...ts.linux.dev>, Alexander Viro <viro@...iv.linux.org.uk>, "Christian
 Brauner" <brauner@...nel.org>, <linux-fsdevel@...r.kernel.org>, Paolo Bonzini
	<pbonzini@...hat.com>, Sean Christopherson <seanjc@...gle.com>,
	<kvm@...r.kernel.org>, Andrew Morton <akpm@...ux-foundation.org>,
	<linux-mm@...ck.org>, Alexander Graf <graf@...zon.com>, David Woodhouse
	<dwmw@...zon.co.uk>, "Jan H . Schoenherr" <jschoenh@...zon.de>, Usama Arif
	<usama.arif@...edance.com>, Anthony Yznaga <anthony.yznaga@...cle.com>,
	Stanislav Kinsburskii <skinsburskii@...ux.microsoft.com>,
	<madvenka@...ux.microsoft.com>, <steven.sistare@...cle.com>,
	<yuleixzhang@...cent.com>
Subject: [RFC 16/18] vfio: support not mapping IOMMU pgtables on live-update

When restoring VMs after live update kexec, the IOVAs for the guest VM
are already present in the persisted page tables. It is unnecessary to
clobber the existing pgtable entries and it may introduce races if
pgtable modifications happen concurrently with DMA.

Provide a new VFIO MAP_DMA flag which userspace can supply to inform
VFIO that the IOVAs are already mapped. In this case VFIO will skip over
the call to the IOMMU driver to do the mapping. VFIO still needs the
MAP_DMA ioctl to set up its internal data structures about the mapping.

It would probably be better to move the persistence one layer up and
persist the VFIO container in pkernfs. That way the whole container
could be picked up and re-used without needing to do any MAP_DMA ioctls
after kexec.
---
 drivers/vfio/vfio_iommu_type1.c | 24 +++++++++++++-----------
 include/uapi/linux/vfio.h       |  1 +
 2 files changed, 14 insertions(+), 11 deletions(-)

diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index b36edfc5c9ef..dc2682fbda2e 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -1456,7 +1456,7 @@ static int vfio_iommu_map(struct vfio_iommu *iommu, dma_addr_t iova,
 }
 
 static int vfio_pin_map_dma(struct vfio_iommu *iommu, struct vfio_dma *dma,
-			    size_t map_size)
+			    size_t map_size, unsigned int flags)
 {
 	dma_addr_t iova = dma->iova;
 	unsigned long vaddr = dma->vaddr;
@@ -1479,14 +1479,16 @@ static int vfio_pin_map_dma(struct vfio_iommu *iommu, struct vfio_dma *dma,
 			break;
 		}
 
-		/* Map it! */
-		ret = vfio_iommu_map(iommu, iova + dma->size, pfn, npage,
-				     dma->prot);
-		if (ret) {
-			vfio_unpin_pages_remote(dma, iova + dma->size, pfn,
-						npage, true);
-			vfio_batch_unpin(&batch, dma);
-			break;
+		if (!(flags & VFIO_DMA_MAP_FLAG_LIVE_UPDATE)) {
+			/* Map it! */
+			ret = vfio_iommu_map(iommu, iova + dma->size, pfn, npage,
+					     dma->prot);
+			if (ret) {
+				vfio_unpin_pages_remote(dma, iova + dma->size, pfn,
+							npage, true);
+				vfio_batch_unpin(&batch, dma);
+				break;
+			}
 		}
 
 		size -= npage << PAGE_SHIFT;
@@ -1662,7 +1664,7 @@ static int vfio_dma_do_map(struct vfio_iommu *iommu,
 	if (list_empty(&iommu->domain_list))
 		dma->size = size;
 	else
-		ret = vfio_pin_map_dma(iommu, dma, size);
+		ret = vfio_pin_map_dma(iommu, dma, size, map->flags);
 
 	if (!ret && iommu->dirty_page_tracking) {
 		ret = vfio_dma_bitmap_alloc(dma, pgsize);
@@ -2836,7 +2838,7 @@ static int vfio_iommu_type1_map_dma(struct vfio_iommu *iommu,
 	struct vfio_iommu_type1_dma_map map;
 	unsigned long minsz;
 	uint32_t mask = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE |
-			VFIO_DMA_MAP_FLAG_VADDR;
+			VFIO_DMA_MAP_FLAG_VADDR | VFIO_DMA_MAP_FLAG_LIVE_UPDATE;
 
 	minsz = offsetofend(struct vfio_iommu_type1_dma_map, size);
 
diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
index fa9676bb4b26..d04d28e52110 100644
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -1536,6 +1536,7 @@ struct vfio_iommu_type1_dma_map {
 #define VFIO_DMA_MAP_FLAG_READ (1 << 0)		/* readable from device */
 #define VFIO_DMA_MAP_FLAG_WRITE (1 << 1)	/* writable from device */
 #define VFIO_DMA_MAP_FLAG_VADDR (1 << 2)
+#define VFIO_DMA_MAP_FLAG_LIVE_UPDATE  (1 << 3) /* IOVAs already mapped in IOMMU before LU */
 	__u64	vaddr;				/* Process virtual address */
 	__u64	iova;				/* IO virtual address */
 	__u64	size;				/* Size of mapping (bytes) */
-- 
2.40.1


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ