lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [day] [month] [year] [list]
Message-ID: <20251013063529.108949-1-wangwensheng4@huawei.com>
Date: Mon, 13 Oct 2025 14:35:29 +0800
From: Wang Wensheng <wangwensheng4@...wei.com>
To: <will@...nel.org>, <robin.murphy@....com>, <joro@...tes.org>,
	<jgg@...pe.ca>, <nicolinc@...dia.com>, <kevin.tian@...el.com>,
	<praan@...gle.com>, <baolu.lu@...ux.intel.com>,
	<linux-arm-kernel@...ts.infradead.org>, <iommu@...ts.linux.dev>,
	<linux-kernel@...r.kernel.org>
CC: <chenjun102@...wei.com>, <wangwensheng4@...wei.com>
Subject: [RFC PATCH] iommu/arm-smmu-v3: Defer shutdown to syscore_ops

We meet several softlockup while shutdown or reboot the system. The
kernel log is here:

[  126.487508] arm-smmu-v3 a8000000.camera_smmu_controller0: CMD_SYNC timeout at 0x000001a3 [hwprod 0x000001a4, hwcons 0x00000016]
[  126.487675] (4375,3191)[drv_camera][hicam_buf] isp_smmu_cleanup_iova_dom cluster_id=0 unmap, key=0x0000000000000000, iova=0x0000000000000000, size=49152
[  127.300458] rcu: INFO: rcu_sched detected stalls on CPUs/tasks:
[  127.300464] rcu: 	3-...0: (8 ticks this GP) idle=086/1/0x4000000000000000 softirq=25646/25646 fqs=2475
[  127.300466] rcu: 	(detected by 0, t=5252 jiffies, g=30897, q=752)
[  127.300470] Sending NMI from CPU 0 to CPUs 3:
[  127.556735] arm-smmu-v3 a8000000.camera_smmu_controller0: CMD_SYNC timeout at 0x000001b0 [hwprod 0x000001b1, hwcons 0x00000016]
[  127.556966] (4375,3191)[drv_camera][hicam_buf] isp_smmu_cleanup_iova_dom cluster_id=0 unmap, key=0x0000000000000000, iova=0x0000000000000000, size=49152
[  128.626066] arm-smmu-v3 a8000000.camera_smmu_controller0: CMD_SYNC timeout at 0x000001bd [hwprod 0x000001be, hwcons 0x00000016]
[  128.626232] (4375,3191)[drv_camera][hicam_buf] isp_smmu_cleanup_iova_dom cluster_id=0 unmap, key=0x0000000000000000, iova=0x0000000000000000, size=49152
...
[  132.903350] watchdog: BUG: soft lockup - CPU#7 stuck for 23s! [dds_discovery:3191]
...
[  132.903564] Call trace:
[  132.903566]  arm_smmu_cmdq_issue_cmdlist+0x560/0x6c8
[  132.903568]  __arm_smmu_tlb_inv_range.isra.41+0x160/0x20c
[  132.903570]  arm_smmu_tlb_inv_range_domain+0x90/0x164
[  132.903572]  arm_smmu_iotlb_sync+0x3c/0x50
[  132.903576]  iommu_unmap+0x88/0xc0
[  132.903589]  isp_smmu_do_iommu_unmap.isra.6+0x5c/0x128 [drv_hicam_buf]
[  132.903594]  isp_smmu_unmap_iova+0x128/0x2f4 [drv_hicam_buf]
[  132.903598]  isp_smmu_cleanup_iova_dom+0xf0/0x1c8 [drv_hicam_buf]
[  132.903602]  hicambuf_check_and_ummap_remain_buffer+0x90/0xa0 [drv_hicam_buf]
[  132.903609]  himdcisp_release+0x1d0/0x228 [drv_himdcisp]
[  132.903615]  __fput+0xa4/0x2cc
[  132.903617]  ____fput+0x20/0x30
[  132.903620]  task_work_run+0x120/0x198
[  132.903623]  do_exit+0x444/0xd20
[  132.903625]  do_group_exit+0x40/0x140
[  132.903628]  get_signal+0x21c/0xab0
[  132.903630]  do_notify_resume+0x380/0x4a8

The direct reason for this softlockup is that the driver want to access
the smmu device after it has been shutdown. Here the driver call the
iommu_unmap() a few times and get CMD_SYNC timeout, cost one second a
time, then the cpu where the driver runs on get stuck. There is another
case where a process that was bound to several smmu devices is exiting,
then the process would access the smmu devices through mmu_notifer
callbacks and get the similar stuck.

[   93.161307] Call trace:
[   93.161309]  arm_smmu_cmdq_issue_cmdlist+0x58c/0x948
[   93.161313]  __arm_smmu_cmdq_issue_cmd+0x60/0xb0
[   93.161316]  arm_smmu_tlb_inv_asid+0x6c/0x98
[   93.161321]  arm_smmu_mm_release+0x70/0xd4
[   93.161325]  __mmu_notifier_release+0x88/0x268
[   93.161332]  exit_mmap+0x374/0x4b4
[   93.161339]  mmput+0x7c/0x1c4
[   93.161346]  xsmem_release+0x6a8/0x91c [xsmem]
[   93.161364]  __fput+0x21c/0x340
[   93.161369]  ____fput+0x20/0x30
[   93.161371]  task_work_run+0x104/0x1a0
[   93.161377]  do_exit+0x4c0/0xe60
[   93.161382]  do_group_exit+0x38/0x138

Normally the reboot/shutdown command would kill all the process before
calling into kernel. But the user process may not exit in time, so the
process could run on the reboot_cpu while the reboot/shutdown command
running on another cpu run into kernel and shutdown smmu devices. Then
the process runs on the reboot_cpu would get stcuk and block the
reboot/shutdown command in migrate_to_reboot_cpu(). Move the shutdown
for smmu to syscore_ops to solve the issue. Because syscore_ops
would be called after migrate_to_reboot_cpu() and even another process
would access smmu device in other cpus after smmu shutdown, it cannot
block the reboot process.

Signed-off-by: Wang Wensheng <wangwensheng4@...wei.com>
---
 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 39 ++++++++++++++++-----
 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h |  2 ++
 2 files changed, 32 insertions(+), 9 deletions(-)

diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index 2a8b46b948f0..cf43ea249f22 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -29,6 +29,7 @@
 #include <linux/string_choices.h>
 #include <kunit/visibility.h>
 #include <uapi/linux/iommufd.h>
+#include <linux/syscore_ops.h>
 
 #include "arm-smmu-v3.h"
 #include "../../dma-iommu.h"
@@ -38,6 +39,9 @@ module_param(disable_msipolling, bool, 0444);
 MODULE_PARM_DESC(disable_msipolling,
 	"Disable MSI-based polling for CMD_SYNC completion.");
 
+static LIST_HEAD(smmu_device_list);
+static DEFINE_MUTEX(smmu_device_lock);
+
 static const struct iommu_ops arm_smmu_ops;
 static struct iommu_dirty_ops arm_smmu_dirty_ops;
 
@@ -4835,6 +4839,9 @@ static int arm_smmu_device_probe(struct platform_device *pdev)
 		goto err_free_sysfs;
 	}
 
+	scoped_guard(mutex, &smmu_device_lock)
+		list_add(&smmu->list_node, &smmu_device_list);
+
 	return 0;
 
 err_free_sysfs:
@@ -4850,6 +4857,8 @@ static void arm_smmu_device_remove(struct platform_device *pdev)
 {
 	struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
 
+	scoped_guard(mutex, &smmu_device_lock)
+		list_del(&smmu->list_node);
 	iommu_device_unregister(&smmu->iommu);
 	iommu_device_sysfs_remove(&smmu->iommu);
 	arm_smmu_device_disable(smmu);
@@ -4857,23 +4866,36 @@ static void arm_smmu_device_remove(struct platform_device *pdev)
 	ida_destroy(&smmu->vmid_map);
 }
 
-static void arm_smmu_device_shutdown(struct platform_device *pdev)
-{
-	struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
-
-	arm_smmu_device_disable(smmu);
-}
-
 static const struct of_device_id arm_smmu_of_match[] = {
 	{ .compatible = "arm,smmu-v3", },
 	{ },
 };
 MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
 
+static void arm_smmu_shutdown(void)
+{
+	struct arm_smmu_device *smmu;
+
+	guard(mutex)(&smmu_device_lock);
+	list_for_each_entry(smmu, &smmu_device_list, list_node)
+		arm_smmu_device_disable(smmu);
+}
+
+static struct syscore_ops arm_smmu_syscore_ops = {
+	.shutdown = arm_smmu_shutdown,
+};
+
 static void arm_smmu_driver_unregister(struct platform_driver *drv)
 {
 	arm_smmu_sva_notifier_synchronize();
 	platform_driver_unregister(drv);
+	unregister_syscore_ops(&arm_smmu_syscore_ops);
+}
+
+static int arm_smmu_driver_register(struct platform_driver *drv)
+{
+	register_syscore_ops(&arm_smmu_syscore_ops);
+	return platform_driver_register(drv);
 }
 
 static struct platform_driver arm_smmu_driver = {
@@ -4884,9 +4906,8 @@ static struct platform_driver arm_smmu_driver = {
 	},
 	.probe	= arm_smmu_device_probe,
 	.remove = arm_smmu_device_remove,
-	.shutdown = arm_smmu_device_shutdown,
 };
-module_driver(arm_smmu_driver, platform_driver_register,
+module_driver(arm_smmu_driver, arm_smmu_driver_register,
 	      arm_smmu_driver_unregister);
 
 MODULE_DESCRIPTION("IOMMU API for ARM architected SMMUv3 implementations");
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
index ae23aacc3840..1bb952e99676 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
@@ -805,6 +805,8 @@ struct arm_smmu_device {
 
 	struct rb_root			streams;
 	struct mutex			streams_mutex;
+
+	struct list_head		list_node;
 };
 
 struct arm_smmu_stream {
-- 
2.22.0


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ