lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date: Tue,  7 May 2024 14:21:38 +0800
From: Yan Zhao <yan.y.zhao@...el.com>
To: kvm@...r.kernel.org,
	linux-kernel@...r.kernel.org,
	x86@...nel.org,
	alex.williamson@...hat.com,
	jgg@...dia.com,
	kevin.tian@...el.com
Cc: iommu@...ts.linux.dev,
	pbonzini@...hat.com,
	seanjc@...gle.com,
	dave.hansen@...ux.intel.com,
	luto@...nel.org,
	peterz@...radead.org,
	tglx@...utronix.de,
	mingo@...hat.com,
	bp@...en8.de,
	hpa@...or.com,
	corbet@....net,
	joro@...tes.org,
	will@...nel.org,
	robin.murphy@....com,
	baolu.lu@...ux.intel.com,
	yi.l.liu@...el.com,
	Yan Zhao <yan.y.zhao@...el.com>
Subject: [PATCH 4/5] vfio/type1: Flush CPU caches on DMA pages in non-coherent domains

Flush CPU cache on DMA pages before mapping them into the first
non-coherent domain (domain that does not enforce cache coherency, i.e. CPU
caches are not force-snooped) and after unmapping them from the last
domain.

Devices attached to non-coherent domains can execute non-coherent DMAs
(DMAs that lack CPU cache snooping) to access physical memory with CPU
caches bypassed.

Such a scenario could be exploited by a malicious guest, allowing them to
access stale host data in memory rather than the data initialized by the
host (e.g., zeros) in the cache, thus posing a risk of information leakage
attack.

Furthermore, the host kernel (e.g. a ksm thread) might encounter
inconsistent data between the CPU cache and memory (left by a malicious
guest) after a page is unpinned for DMA but before it's recycled.

Therefore, it is required to flush the CPU cache before a page is
accessible to non-coherent DMAs and after the page is inaccessible to
non-coherent DMAs.

However, the CPU cache is not flushed immediately when the page is unmapped
from the last non-coherent domain. Instead, the flushing is performed
lazily, right before the page is unpinned.
Take the following example to illustrate the process. The CPU cache is
flushed right before step 2 and step 5.
1. A page is mapped into a coherent domain.
2. The page is mapped into a non-coherent domain.
3. The page is unmapped from the non-coherent domain e.g.due to hot-unplug.
4. The page is unmapped from the coherent domain.
5. The page is unpinned.

Reasons for adopting this lazily flushing design include:
- There're several unmap paths and only one unpin path. Lazily flush before
  unpin wipes out the inconsistency between cache and physical memory
  before a page is globally visible and produces code that is simpler, more
  maintainable and easier to backport.
- Avoid dividing a large unmap range into several smaller ones or
  allocating additional memory to hold IOVA to HPA relationship.

Reported-by: Jason Gunthorpe <jgg@...dia.com>
Closes: https://lore.kernel.org/lkml/20240109002220.GA439767@nvidia.com
Fixes: 73fa0d10d077 ("vfio: Type1 IOMMU implementation")
Cc: Alex Williamson <alex.williamson@...hat.com>
Cc: Jason Gunthorpe <jgg@...dia.com>
Cc: Kevin Tian <kevin.tian@...el.com>
Signed-off-by: Yan Zhao <yan.y.zhao@...el.com>
---
 drivers/vfio/vfio_iommu_type1.c | 51 +++++++++++++++++++++++++++++++++
 1 file changed, 51 insertions(+)

diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index b5c15fe8f9fc..ce873f4220bf 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -74,6 +74,7 @@ struct vfio_iommu {
 	bool			v2;
 	bool			nesting;
 	bool			dirty_page_tracking;
+	bool			has_noncoherent_domain;
 	struct list_head	emulated_iommu_groups;
 };
 
@@ -99,6 +100,7 @@ struct vfio_dma {
 	unsigned long		*bitmap;
 	struct mm_struct	*mm;
 	size_t			locked_vm;
+	bool			cache_flush_required; /* For noncoherent domain */
 };
 
 struct vfio_batch {
@@ -716,6 +718,9 @@ static long vfio_unpin_pages_remote(struct vfio_dma *dma, dma_addr_t iova,
 	long unlocked = 0, locked = 0;
 	long i;
 
+	if (dma->cache_flush_required)
+		arch_clean_nonsnoop_dma(pfn << PAGE_SHIFT, npage << PAGE_SHIFT);
+
 	for (i = 0; i < npage; i++, iova += PAGE_SIZE) {
 		if (put_pfn(pfn++, dma->prot)) {
 			unlocked++;
@@ -1099,6 +1104,8 @@ static long vfio_unmap_unpin(struct vfio_iommu *iommu, struct vfio_dma *dma,
 					    &iotlb_gather);
 	}
 
+	dma->cache_flush_required = false;
+
 	if (do_accounting) {
 		vfio_lock_acct(dma, -unlocked, true);
 		return 0;
@@ -1120,6 +1127,21 @@ static void vfio_remove_dma(struct vfio_iommu *iommu, struct vfio_dma *dma)
 	iommu->dma_avail++;
 }
 
+static void vfio_update_noncoherent_domain_state(struct vfio_iommu *iommu)
+{
+	struct vfio_domain *domain;
+	bool has_noncoherent = false;
+
+	list_for_each_entry(domain, &iommu->domain_list, next) {
+		if (domain->enforce_cache_coherency)
+			continue;
+
+		has_noncoherent = true;
+		break;
+	}
+	iommu->has_noncoherent_domain = has_noncoherent;
+}
+
 static void vfio_update_pgsize_bitmap(struct vfio_iommu *iommu)
 {
 	struct vfio_domain *domain;
@@ -1455,6 +1477,12 @@ static int vfio_pin_map_dma(struct vfio_iommu *iommu, struct vfio_dma *dma,
 
 	vfio_batch_init(&batch);
 
+	/*
+	 * Record necessity to flush CPU cache to make sure CPU cache is flushed
+	 * for both pin & map and unmap & unpin (for unwind) paths.
+	 */
+	dma->cache_flush_required = iommu->has_noncoherent_domain;
+
 	while (size) {
 		/* Pin a contiguous chunk of memory */
 		npage = vfio_pin_pages_remote(dma, vaddr + dma->size,
@@ -1466,6 +1494,10 @@ static int vfio_pin_map_dma(struct vfio_iommu *iommu, struct vfio_dma *dma,
 			break;
 		}
 
+		if (dma->cache_flush_required)
+			arch_clean_nonsnoop_dma(pfn << PAGE_SHIFT,
+						npage << PAGE_SHIFT);
+
 		/* Map it! */
 		ret = vfio_iommu_map(iommu, iova + dma->size, pfn, npage,
 				     dma->prot);
@@ -1683,9 +1715,14 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu,
 	for (; n; n = rb_next(n)) {
 		struct vfio_dma *dma;
 		dma_addr_t iova;
+		bool cache_flush_required;
 
 		dma = rb_entry(n, struct vfio_dma, node);
 		iova = dma->iova;
+		cache_flush_required = !domain->enforce_cache_coherency &&
+				       !dma->cache_flush_required;
+		if (cache_flush_required)
+			dma->cache_flush_required = true;
 
 		while (iova < dma->iova + dma->size) {
 			phys_addr_t phys;
@@ -1737,6 +1774,9 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu,
 				size = npage << PAGE_SHIFT;
 			}
 
+			if (cache_flush_required)
+				arch_clean_nonsnoop_dma(phys, size);
+
 			ret = iommu_map(domain->domain, iova, phys, size,
 					dma->prot | IOMMU_CACHE,
 					GFP_KERNEL_ACCOUNT);
@@ -1801,6 +1841,7 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu,
 			vfio_unpin_pages_remote(dma, iova, phys >> PAGE_SHIFT,
 						size >> PAGE_SHIFT, true);
 		}
+		dma->cache_flush_required = false;
 	}
 
 	vfio_batch_fini(&batch);
@@ -1828,6 +1869,9 @@ static void vfio_test_domain_fgsp(struct vfio_domain *domain, struct list_head *
 	if (!pages)
 		return;
 
+	if (!domain->enforce_cache_coherency)
+		arch_clean_nonsnoop_dma(page_to_phys(pages), PAGE_SIZE * 2);
+
 	list_for_each_entry(region, regions, list) {
 		start = ALIGN(region->start, PAGE_SIZE * 2);
 		if (start >= region->end || (region->end - start < PAGE_SIZE * 2))
@@ -1847,6 +1891,9 @@ static void vfio_test_domain_fgsp(struct vfio_domain *domain, struct list_head *
 		break;
 	}
 
+	if (!domain->enforce_cache_coherency)
+		arch_clean_nonsnoop_dma(page_to_phys(pages), PAGE_SIZE * 2);
+
 	__free_pages(pages, order);
 }
 
@@ -2308,6 +2355,8 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,
 
 	list_add(&domain->next, &iommu->domain_list);
 	vfio_update_pgsize_bitmap(iommu);
+	if (!domain->enforce_cache_coherency)
+		vfio_update_noncoherent_domain_state(iommu);
 done:
 	/* Delete the old one and insert new iova list */
 	vfio_iommu_iova_insert_copy(iommu, &iova_copy);
@@ -2508,6 +2557,8 @@ static void vfio_iommu_type1_detach_group(void *iommu_data,
 			}
 			iommu_domain_free(domain->domain);
 			list_del(&domain->next);
+			if (!domain->enforce_cache_coherency)
+				vfio_update_noncoherent_domain_state(iommu);
 			kfree(domain);
 			vfio_iommu_aper_expand(iommu, &iova_copy);
 			vfio_update_pgsize_bitmap(iommu);
-- 
2.17.1


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ