lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [day] [month] [year] [list]
Message-ID: <000e01dbceeb$badbe050$3093a0f0$@samsung.com>
Date: Tue, 27 May 2025 18:42:58 +0900
From: <hyunki00.koo@...sung.com>
To: <john.garry@...wei.com>, <joro@...tes.org>, <will@...nel.org>,
	<mst@...hat.com>, <jasowang@...hat.com>, <robin.murphy@....com>
Cc: <iommu@...ts.linux-foundation.org>, <linux-kernel@...r.kernel.org>,
	<virtualization@...ts.linux-foundation.org>, <linuxarm@...wei.com>,
	<taeyang.choi@...sung.com>, <bumyong.lee@...sung.com>
Subject: Please backport 32e92d9f6f87
 ("iommu/iova: Separate out rcache init") to linux-5.15.y

Please backport 32e92d9f6f87 ("iommu/iova: Separate out rcache init") to
linux-5.15.y

Commit de53fd7aedb1 32e92d9f6f87 ("iommu/iova: Separate out rcache init")
fixes below issue.
This should be applied to all stable kernels that applied commit.

Issue
=====

As you metioned in commit message, fails in init_iova_rcaches() are not
handled safely, 
and a problem actually occurs.

By the context of the 2 lines below, 
callback of cpuhp may be called before the percpu variable is allocated .

cpuhp_state_add_instance_nocalls(CPUHP_IOMMU_IOVA_DEAD, &iovad->cpuhp_dead);
init_iova_rcaches(iovad);    

The problem occurred in the Linux kernel version 5.15.144, if
remove_cpu(cpu) is called between 2 line, 
The following is the panic log:
[    2.097125][    T1] Unable to handle kernel paging request at virtual
address ffffffcb74a6b004
...
[    2.097226][    T1] Call trace:
[    2.097323][    T1]  do_raw_spin_lock+0x1c/0x12c
[    2.098469][    T1]  _raw_spin_lock_irqsave+0x30/0x60
[    2.118152][    T1]  free_cpu_cached_iovas+0x50/0xb0
[    2.118307][    T1]  iova_cpuhp_dead+0x1c/0x30
[    2.119447][    T1]  cpuhp_invoke_callback+0x2d8/0x5b0
[    2.119608][    T1]  _cpu_down+0x17c/0x4a0
[    2.139216][    T1]  cpu_device_down+0x44/0x70
[    2.139353][    T1]  cpu_subsys_offline+0x10/0x20
[    2.140503][    T1]  device_offline+0xf4/0x130
[    2.140640][    T1]  remove_cpu+0x24/0x40
[    2.160305][    T1]  init_iova_domain+0xec/0x1f0

Here is my modification based on the top of the tree of linux-5.15.y

>From 8fc78c5bc321b1183a1edb9331250799e897c911 Mon Sep 17 00:00:00 2001
From: John Garry <john.garry@...wei.com>
Date: Thu, 3 Feb 2022 17:59:20 +0800
Subject: [PATCH] iommu/iova: Separate out rcache init

Currently the rcache structures are allocated for all IOVA domains, even if
they do not use "fast" alloc+free interface. This is wasteful of memory.

In addition, fails in init_iova_rcaches() are not handled safely, which is
less than ideal.

Make "fast" users call a separate rcache init explicitly, which includes
error checking.

Signed-off-by: John Garry <john.garry@...wei.com>
Reviewed-by: Robin Murphy <robin.murphy@....com>
Acked-by: Michael S. Tsirkin <mst@...hat.com>
Link:
https://lore.kernel.org/r/1643882360-241739-1-git-send-email-john.garry@huaw
ei.com
Signed-off-by: Joerg Roedel <jroedel@...e.de>
Change-Id: Ic2e71b82d66f94cd9a4661e593c03c851ef1b91f
---
 drivers/iommu/dma-iommu.c            |  4 ++
 drivers/iommu/iova.c                 | 73 ++++++++++++++++++++++------
 drivers/vdpa/vdpa_user/iova_domain.c | 11 +++++
 include/linux/iova.h                 | 15 ++----
 4 files changed, 76 insertions(+), 27 deletions(-)

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 48c6f7ff4aef..f9f65ac9b3c0 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -364,6 +364,7 @@ static int iommu_dma_init_domain(struct iommu_domain
*domain, dma_addr_t base,
 	struct iommu_dma_cookie *cookie = domain->iova_cookie;
 	unsigned long order, base_pfn;
 	struct iova_domain *iovad;
+	int ret;
 
 	if (!cookie || cookie->type != IOMMU_DMA_IOVA_COOKIE)
 		return -EINVAL;
@@ -398,6 +399,9 @@ static int iommu_dma_init_domain(struct iommu_domain
*domain, dma_addr_t base,
 	}
 
 	init_iova_domain(iovad, 1UL << order, base_pfn);
+	ret = iova_domain_init_rcaches(iovad);
+	if (ret)
+		return ret;
 
 	/* If the FQ fails we can simply fall back to strict mode */
 	if (domain->type == IOMMU_DOMAIN_DMA_FQ &&
iommu_dma_init_fq(domain))
diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c
index 0835f32e040a..ce79c840b9ed 100644
--- a/drivers/iommu/iova.c
+++ b/drivers/iommu/iova.c
@@ -21,7 +21,6 @@ static bool iova_rcache_insert(struct iova_domain *iovad,
 static unsigned long iova_rcache_get(struct iova_domain *iovad,
 				     unsigned long size,
 				     unsigned long limit_pfn);
-static void init_iova_rcaches(struct iova_domain *iovad);
 static void free_cpu_cached_iovas(unsigned int cpu, struct iova_domain
*iovad);
 static void free_iova_rcaches(struct iova_domain *iovad);
 static void fq_destroy_all_entries(struct iova_domain *iovad);
@@ -68,8 +67,6 @@ init_iova_domain(struct iova_domain *iovad, unsigned long
granule,
 	iovad->anchor.pfn_lo = iovad->anchor.pfn_hi = IOVA_ANCHOR;
 	rb_link_node(&iovad->anchor.node, NULL, &iovad->rbroot.rb_node);
 	rb_insert_color(&iovad->anchor.node, &iovad->rbroot);
-	cpuhp_state_add_instance_nocalls(CPUHP_IOMMU_IOVA_DEAD,
&iovad->cpuhp_dead);
-	init_iova_rcaches(iovad);
 }
 EXPORT_SYMBOL_GPL(init_iova_domain);
 
@@ -539,6 +536,13 @@ free_iova_fast(struct iova_domain *iovad, unsigned long
pfn, unsigned long size)
 }
 EXPORT_SYMBOL_GPL(free_iova_fast);
 
+static void iova_domain_free_rcaches(struct iova_domain *iovad)
+{
+	cpuhp_state_remove_instance_nocalls(CPUHP_IOMMU_IOVA_DEAD,
+					    &iovad->cpuhp_dead);
+	free_iova_rcaches(iovad);
+}
+
 #define fq_ring_for_each(i, fq) \
 	for ((i) = (fq)->head; (i) != (fq)->tail; (i) = ((i) + 1) %
IOVA_FQ_SIZE)
 
@@ -686,11 +690,11 @@ void put_iova_domain(struct iova_domain *iovad)
 {
 	struct iova *iova, *tmp;
 
-	cpuhp_state_remove_instance_nocalls(CPUHP_IOMMU_IOVA_DEAD,
-					    &iovad->cpuhp_dead);
-
 	free_iova_flush_queue(iovad);
-	free_iova_rcaches(iovad);
+
+	if (iovad->rcaches)
+		iova_domain_free_rcaches(iovad);
+
 	rbtree_postorder_for_each_entry_safe(iova, tmp, &iovad->rbroot,
node)
 		free_iova_mem(iova);
 }
@@ -799,6 +803,7 @@ EXPORT_SYMBOL_GPL(reserve_iova);
  */
 
 #define IOVA_MAG_SIZE 128
+#define MAX_GLOBAL_MAGS 32	/* magazines per bin */
 
 struct iova_magazine {
 	unsigned long size;
@@ -811,6 +816,13 @@ struct iova_cpu_rcache {
 	struct iova_magazine *prev;
 };
 
+struct iova_rcache {
+	spinlock_t lock;
+	unsigned long depot_size;
+	struct iova_magazine *depot[MAX_GLOBAL_MAGS];
+	struct iova_cpu_rcache __percpu *cpu_rcaches;
+};
+
 static struct iova_magazine *iova_magazine_alloc(gfp_t flags)
 {
 	return kzalloc(sizeof(struct iova_magazine), flags);
@@ -884,28 +896,54 @@ static void iova_magazine_push(struct iova_magazine
*mag, unsigned long pfn)
 	mag->pfns[mag->size++] = pfn;
 }
 
-static void init_iova_rcaches(struct iova_domain *iovad)
+int iova_domain_init_rcaches(struct iova_domain *iovad)
 {
-	struct iova_cpu_rcache *cpu_rcache;
-	struct iova_rcache *rcache;
 	unsigned int cpu;
-	int i;
+	int i, ret;
+
+	iovad->rcaches = kcalloc(IOVA_RANGE_CACHE_MAX_SIZE,
+				 sizeof(struct iova_rcache),
+				 GFP_KERNEL);
+	if (!iovad->rcaches)
+		return -ENOMEM;
 
 	for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) {
+		struct iova_cpu_rcache *cpu_rcache;
+		struct iova_rcache *rcache;
+
 		rcache = &iovad->rcaches[i];
 		spin_lock_init(&rcache->lock);
 		rcache->depot_size = 0;
-		rcache->cpu_rcaches = __alloc_percpu(sizeof(*cpu_rcache),
cache_line_size());
-		if (WARN_ON(!rcache->cpu_rcaches))
-			continue;
+		rcache->cpu_rcaches = __alloc_percpu(sizeof(*cpu_rcache),
+						     cache_line_size());
+		if (!rcache->cpu_rcaches) {
+			ret = -ENOMEM;
+			goto out_err;
+		}
 		for_each_possible_cpu(cpu) {
 			cpu_rcache = per_cpu_ptr(rcache->cpu_rcaches, cpu);
+
 			spin_lock_init(&cpu_rcache->lock);
 			cpu_rcache->loaded =
iova_magazine_alloc(GFP_KERNEL);
 			cpu_rcache->prev = iova_magazine_alloc(GFP_KERNEL);
+			if (!cpu_rcache->loaded || !cpu_rcache->prev) {
+				ret = -ENOMEM;
+				goto out_err;
+			}
 		}
 	}
+
+	ret = cpuhp_state_add_instance_nocalls(CPUHP_IOMMU_IOVA_DEAD,
+					       &iovad->cpuhp_dead);
+	if (ret)
+		goto out_err;
+	return 0;
+
+out_err:
+	free_iova_rcaches(iovad);
+	return ret;
 }
+EXPORT_SYMBOL_GPL(iova_domain_init_rcaches);
 
 /*
  * Try inserting IOVA range starting with 'iova_pfn' into 'rcache', and
@@ -1022,7 +1060,7 @@ static unsigned long iova_rcache_get(struct
iova_domain *iovad,
 {
 	unsigned int log_size = order_base_2(size);
 
-	if (log_size >= IOVA_RANGE_CACHE_MAX_SIZE)
+	if (log_size >= IOVA_RANGE_CACHE_MAX_SIZE || !iovad->rcaches)
 		return 0;
 
 	return __iova_rcache_get(&iovad->rcaches[log_size], limit_pfn -
size);
@@ -1040,6 +1078,8 @@ static void free_iova_rcaches(struct iova_domain
*iovad)
 
 	for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) {
 		rcache = &iovad->rcaches[i];
+		if (!rcache->cpu_rcaches)
+			break;
 		for_each_possible_cpu(cpu) {
 			cpu_rcache = per_cpu_ptr(rcache->cpu_rcaches, cpu);
 			iova_magazine_free(cpu_rcache->loaded);
@@ -1049,6 +1089,9 @@ static void free_iova_rcaches(struct iova_domain
*iovad)
 		for (j = 0; j < rcache->depot_size; ++j)
 			iova_magazine_free(rcache->depot[j]);
 	}
+
+	kfree(iovad->rcaches);
+	iovad->rcaches = NULL;
 }
 
 /*
diff --git a/drivers/vdpa/vdpa_user/iova_domain.c
b/drivers/vdpa/vdpa_user/iova_domain.c
index 0678c2514197..42122d11cbf3 100644
--- a/drivers/vdpa/vdpa_user/iova_domain.c
+++ b/drivers/vdpa/vdpa_user/iova_domain.c
@@ -488,6 +488,7 @@ vduse_domain_create(unsigned long iova_limit, size_t
bounce_size)
 	struct file *file;
 	struct vduse_bounce_map *map;
 	unsigned long pfn, bounce_pfns;
+	int ret;
 
 	bounce_pfns = PAGE_ALIGN(bounce_size) >> PAGE_SHIFT;
 	if (iova_limit <= bounce_size)
@@ -521,10 +522,20 @@ vduse_domain_create(unsigned long iova_limit, size_t
bounce_size)
 	spin_lock_init(&domain->iotlb_lock);
 	init_iova_domain(&domain->stream_iovad,
 			PAGE_SIZE, IOVA_START_PFN);
+	ret = iova_domain_init_rcaches(&domain->stream_iovad);
+	if (ret)
+		goto err_iovad_stream;
 	init_iova_domain(&domain->consistent_iovad,
 			PAGE_SIZE, bounce_pfns);
+	ret = iova_domain_init_rcaches(&domain->consistent_iovad);
+	if (ret)
+		goto err_iovad_consistent;
 
 	return domain;
+err_iovad_consistent:
+	put_iova_domain(&domain->stream_iovad);
+err_iovad_stream:
+	fput(file);
 err_file:
 	vfree(domain->bounce_maps);
 err_map:
diff --git a/include/linux/iova.h b/include/linux/iova.h
index 6b6cc104e300..8616489b97c1 100644
--- a/include/linux/iova.h
+++ b/include/linux/iova.h
@@ -22,20 +22,10 @@ struct iova {
 	unsigned long	pfn_lo; /* Lowest allocated pfn */
 };
 
-struct iova_magazine;
-struct iova_cpu_rcache;
-
 #define IOVA_RANGE_CACHE_MAX_SIZE 6	/* log of max cached IOVA range size
(in pages) */
-#define MAX_GLOBAL_MAGS 32	/* magazines per bin */
-
-struct iova_rcache {
-	spinlock_t lock;
-	unsigned long depot_size;
-	struct iova_magazine *depot[MAX_GLOBAL_MAGS];
-	struct iova_cpu_rcache __percpu *cpu_rcaches;
-};
 
 struct iova_domain;
+struct iova_rcache;
 
 /* Call-Back from IOVA code into IOMMU drivers */
 typedef void (* iova_flush_cb)(struct iova_domain *domain);
@@ -83,7 +73,7 @@ struct iova_domain {
 						   have been finished */
 
 	struct iova	anchor;		/* rbtree lookup anchor */
-	struct iova_rcache rcaches[IOVA_RANGE_CACHE_MAX_SIZE];	/* IOVA
range caches */
+	struct iova_rcache	*rcaches;
 
 	iova_flush_cb	flush_cb;	/* Call-Back function to flush IOMMU
 					   TLBs */
@@ -155,6 +145,7 @@ void init_iova_domain(struct iova_domain *iovad,
unsigned long granule,
 	unsigned long start_pfn);
 int init_iova_flush_queue(struct iova_domain *iovad,
 			  iova_flush_cb flush_cb, iova_entry_dtor
entry_dtor);
+int iova_domain_init_rcaches(struct iova_domain *iovad);
 struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn);
 void put_iova_domain(struct iova_domain *iovad);
 #else
-- 
2.25.1


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ