lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Date: Mon, 24 Jun 2024 16:39:52 +0800
From: Zhang Zekun <zhangzekun11@...wei.com>
To: <robin.murphy@....com>, <joro@...tes.org>, <will@...nel.org>,
	<john.g.garry@...cle.com>
CC: <iommu@...ts.linux.dev>, <linux-kernel@...r.kernel.org>,
	<zhangzekun11@...wei.com>
Subject: [PATCH] iommu/iova: Bettering utilizing cpu_rcaches in no-strict mode

Currently, when iommu working in no-strict mode, fq_flush_timeout()
will always try to free iovas on one cpu. Freeing the iovas from all
cpus on one cpu is not cache-friendly to iova_rcache, because it will
first filling up the cpu_rcache and then pushing iovas to the depot,
iovas in the depot will finally goto the underlying rbtree if the
depot_size is greater than num_online_cpus(). Let fq_flush_timeout()
freeing iovas on cpus who call dma_unmap* APIs, can decrease the overall
time caused by fq_flush_timeout() by better utilizing the iova_rcache,
and minimizing the competition for the iova_rbtree_lock().

Signed-off-by: Zhang Zekun <zhangzekun11@...wei.com>
---
 drivers/iommu/dma-iommu.c | 22 +++++++++++++---------
 drivers/iommu/iova.c      | 21 ++++++++++++++-------
 include/linux/iova.h      |  7 +++++++
 3 files changed, 34 insertions(+), 16 deletions(-)

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 43520e7275cc..217b7c70d06c 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -145,7 +145,8 @@ static inline unsigned int fq_ring_add(struct iova_fq *fq)
 	return idx;
 }
 
-static void fq_ring_free_locked(struct iommu_dma_cookie *cookie, struct iova_fq *fq)
+static void fq_ring_free_locked(struct iommu_dma_cookie *cookie, struct iova_fq *fq,
+				int cpu)
 {
 	u64 counter = atomic64_read(&cookie->fq_flush_finish_cnt);
 	unsigned int idx;
@@ -158,20 +159,21 @@ static void fq_ring_free_locked(struct iommu_dma_cookie *cookie, struct iova_fq
 			break;
 
 		iommu_put_pages_list(&fq->entries[idx].freelist);
-		free_iova_fast(&cookie->iovad,
+		free_iova_fast_cpu(&cookie->iovad,
 			       fq->entries[idx].iova_pfn,
-			       fq->entries[idx].pages);
+			       fq->entries[idx].pages, cpu);
 
 		fq->head = (fq->head + 1) & fq->mod_mask;
 	}
 }
 
-static void fq_ring_free(struct iommu_dma_cookie *cookie, struct iova_fq *fq)
+static void fq_ring_free(struct iommu_dma_cookie *cookie, struct iova_fq *fq,
+			 int cpu)
 {
 	unsigned long flags;
 
 	spin_lock_irqsave(&fq->lock, flags);
-	fq_ring_free_locked(cookie, fq);
+	fq_ring_free_locked(cookie, fq, cpu);
 	spin_unlock_irqrestore(&fq->lock, flags);
 }
 
@@ -191,10 +193,11 @@ static void fq_flush_timeout(struct timer_list *t)
 	fq_flush_iotlb(cookie);
 
 	if (cookie->options.qt == IOMMU_DMA_OPTS_SINGLE_QUEUE) {
-		fq_ring_free(cookie, cookie->single_fq);
+		cpu = smp_processor_id();
+		fq_ring_free(cookie, cookie->single_fq, cpu);
 	} else {
 		for_each_possible_cpu(cpu)
-			fq_ring_free(cookie, per_cpu_ptr(cookie->percpu_fq, cpu));
+			fq_ring_free(cookie, per_cpu_ptr(cookie->percpu_fq, cpu), cpu);
 	}
 }
 
@@ -205,6 +208,7 @@ static void queue_iova(struct iommu_dma_cookie *cookie,
 	struct iova_fq *fq;
 	unsigned long flags;
 	unsigned int idx;
+	int cpu = smp_processor_id();
 
 	/*
 	 * Order against the IOMMU driver's pagetable update from unmapping
@@ -227,11 +231,11 @@ static void queue_iova(struct iommu_dma_cookie *cookie,
 	 * flushed out on another CPU. This makes the fq_full() check below less
 	 * likely to be true.
 	 */
-	fq_ring_free_locked(cookie, fq);
+	fq_ring_free_locked(cookie, fq, cpu);
 
 	if (fq_full(fq)) {
 		fq_flush_iotlb(cookie);
-		fq_ring_free_locked(cookie, fq);
+		fq_ring_free_locked(cookie, fq, cpu);
 	}
 
 	idx = fq_ring_add(fq);
diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c
index d59d0ea2fd21..46a2188c263b 100644
--- a/drivers/iommu/iova.c
+++ b/drivers/iommu/iova.c
@@ -20,7 +20,7 @@
 
 static bool iova_rcache_insert(struct iova_domain *iovad,
 			       unsigned long pfn,
-			       unsigned long size);
+			       unsigned long size, int cpu);
 static unsigned long iova_rcache_get(struct iova_domain *iovad,
 				     unsigned long size,
 				     unsigned long limit_pfn);
@@ -423,12 +423,19 @@ EXPORT_SYMBOL_GPL(alloc_iova_fast);
 void
 free_iova_fast(struct iova_domain *iovad, unsigned long pfn, unsigned long size)
 {
-	if (iova_rcache_insert(iovad, pfn, size))
+	free_iova_fast_cpu(iovad, pfn, size, smp_processor_id());
+}
+EXPORT_SYMBOL_GPL(free_iova_fast);
+
+void
+free_iova_fast_cpu(struct iova_domain *iovad, unsigned long pfn,
+		   unsigned long size, int cpu)
+{
+	if (iova_rcache_insert(iovad, pfn, size, cpu))
 		return;
 
 	free_iova(iovad, pfn);
 }
-EXPORT_SYMBOL_GPL(free_iova_fast);
 
 static void iova_domain_free_rcaches(struct iova_domain *iovad)
 {
@@ -762,13 +769,13 @@ EXPORT_SYMBOL_GPL(iova_domain_init_rcaches);
  */
 static bool __iova_rcache_insert(struct iova_domain *iovad,
 				 struct iova_rcache *rcache,
-				 unsigned long iova_pfn)
+				 unsigned long iova_pfn, int cpu)
 {
 	struct iova_cpu_rcache *cpu_rcache;
 	bool can_insert = false;
 	unsigned long flags;
 
-	cpu_rcache = raw_cpu_ptr(rcache->cpu_rcaches);
+	cpu_rcache = per_cpu_ptr(rcache->cpu_rcaches, cpu);
 	spin_lock_irqsave(&cpu_rcache->lock, flags);
 
 	if (!iova_magazine_full(cpu_rcache->loaded)) {
@@ -799,14 +806,14 @@ static bool __iova_rcache_insert(struct iova_domain *iovad,
 }
 
 static bool iova_rcache_insert(struct iova_domain *iovad, unsigned long pfn,
-			       unsigned long size)
+			       unsigned long size, int cpu)
 {
 	unsigned int log_size = order_base_2(size);
 
 	if (log_size >= IOVA_RANGE_CACHE_MAX_SIZE)
 		return false;
 
-	return __iova_rcache_insert(iovad, &iovad->rcaches[log_size], pfn);
+	return __iova_rcache_insert(iovad, &iovad->rcaches[log_size], pfn, cpu);
 }
 
 /*
diff --git a/include/linux/iova.h b/include/linux/iova.h
index d2c4fd923efa..91e4e3d5bcb3 100644
--- a/include/linux/iova.h
+++ b/include/linux/iova.h
@@ -93,6 +93,8 @@ struct iova *alloc_iova(struct iova_domain *iovad, unsigned long size,
 	bool size_aligned);
 void free_iova_fast(struct iova_domain *iovad, unsigned long pfn,
 		    unsigned long size);
+void free_iova_fast_cpu(struct iova_domain *iovad, unsigned long pfn,
+			unsigned long size, int cpu);
 unsigned long alloc_iova_fast(struct iova_domain *iovad, unsigned long size,
 			      unsigned long limit_pfn, bool flush_rcache);
 struct iova *reserve_iova(struct iova_domain *iovad, unsigned long pfn_lo,
@@ -134,6 +136,11 @@ static inline void free_iova_fast(struct iova_domain *iovad,
 {
 }
 
+static inline void free_iova_fast_cpu(struct iova_domain *iovad, unsigned long pfn,
+				      unsigned long size, int cpu);
+{
+}
+
 static inline unsigned long alloc_iova_fast(struct iova_domain *iovad,
 					    unsigned long size,
 					    unsigned long limit_pfn,
-- 
2.17.1


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ