lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20251006120944.7880-4-spasswolf@web.de>
Date: Mon,  6 Oct 2025 14:09:42 +0200
From: Bert Karwatzki <spasswolf@....de>
To: linux-kernel@...r.kernel.org
Cc: Bert Karwatzki <spasswolf@....de>,
	linux-next@...r.kernel.org,
	linux-stable@...r.kernel.org,
	regressions@...ts.linux.dev,
	linux-pci@...r.kernel.org,
	linux-acpi@...r.kernel.org,
	Mario Limonciello <superm1@...nel.org>,
	Christian König <christian.koenig@....com>,
	"Rafael J . Wysocki" <rafael.j.wysocki@...el.com>
Subject: [REGRESSION 03/04] Crash during resume of pcie bridge

In order to get a working crash I removed some of the monitoring again:

diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c
index bc365c0dbe2f..a984ccd4a2a0 100644
--- a/drivers/acpi/bus.c
+++ b/drivers/acpi/bus.c
@@ -514,60 +514,46 @@ static void acpi_bus_notify(acpi_handle handle, u32 type, void *data)
 
 	switch (type) {
 	case ACPI_NOTIFY_BUS_CHECK:
-		printk(KERN_INFO "%s %d: ACPI_NOTIFY_BUS_CHECK\n", __func__, __LINE__);
 		acpi_handle_debug(handle, "ACPI_NOTIFY_BUS_CHECK event\n");
 		break;
 
 	case ACPI_NOTIFY_DEVICE_CHECK:
-		printk(KERN_INFO "%s %d: ACPI_NOTIFY_DEVICE_CHECK\n", __func__, __LINE__);
 		acpi_handle_debug(handle, "ACPI_NOTIFY_DEVICE_CHECK event\n");
 		break;
 
 	case ACPI_NOTIFY_DEVICE_WAKE:
-		printk(KERN_INFO "%s %d: ACPI_NOTIFY_DEVICE_WAKE\n", __func__, __LINE__);
 		acpi_handle_debug(handle, "ACPI_NOTIFY_DEVICE_WAKE event\n");
 		return;
 
 	case ACPI_NOTIFY_EJECT_REQUEST:
-		printk(KERN_INFO "%s %d: ACPI_NOTIFY_EJECT_REQUEST\n", __func__, __LINE__);
 		acpi_handle_debug(handle, "ACPI_NOTIFY_EJECT_REQUEST event\n");
 		break;
 
 	case ACPI_NOTIFY_DEVICE_CHECK_LIGHT:
-		printk(KERN_INFO "%s %d: ACPI_NOTIFY_DEVICE_CHECK_LIGHT\n", __func__, __LINE__);
 		acpi_handle_debug(handle, "ACPI_NOTIFY_DEVICE_CHECK_LIGHT event\n");
 		/* TBD: Exactly what does 'light' mean? */
 		return;
 
 	case ACPI_NOTIFY_FREQUENCY_MISMATCH:
-		printk(KERN_INFO "%s %d: ACPI_NOTIFY_FREQUENCY_MISMATCH\n", __func__, __LINE__);
 		acpi_handle_err(handle, "Device cannot be configured due "
 				"to a frequency mismatch\n");
 		return;
 
 	case ACPI_NOTIFY_BUS_MODE_MISMATCH:
-		printk(KERN_INFO "%s %d: ACPI_NOTIFY_BUS_MODE_MISMATCH\n", __func__, __LINE__);
 		acpi_handle_err(handle, "Device cannot be configured due "
 				"to a bus mode mismatch\n");
 		return;
 
 	case ACPI_NOTIFY_POWER_FAULT:
-		printk(KERN_INFO "%s %d: ACPI_NOTIFY_POWER_FAULT\n", __func__, __LINE__);
 		acpi_handle_err(handle, "Device has suffered a power fault\n");
 		return;
 
 	default:
-		printk(KERN_INFO "%s %d: acpi unknown event type\n", __func__, __LINE__);
 		acpi_handle_debug(handle, "Unknown event type 0x%x\n", type);
 		return;
 	}
 
 	adev = acpi_get_acpi_dev(handle);
-	if (adev)
-		dev_info(&adev->dev, "%s %d\n", __func__, __LINE__);
-	else
-		printk(KERN_INFO "%s %d: adev = NULL\n", __func__, __LINE__);
-		
 
 	if (adev && ACPI_SUCCESS(acpi_hotplug_schedule(adev, type)))
 		return;
diff --git a/drivers/acpi/device_pm.c b/drivers/acpi/device_pm.c
index 9a7dc432b50d..4e0583274b8f 100644
--- a/drivers/acpi/device_pm.c
+++ b/drivers/acpi/device_pm.c
@@ -539,7 +539,6 @@ static void acpi_pm_notify_handler(acpi_handle handle, u32 val, void *not_used)
 	if (!adev)
 		return;
 
-	dev_info(&adev->dev, "%s %d\n", __func__, __LINE__);
 	mutex_lock(&acpi_pm_notifier_lock);
 
 	if (adev->wakeup.flags.notifier_present) {
diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c
index 0f6a16856119..5ff343096ece 100644
--- a/drivers/acpi/osl.c
+++ b/drivers/acpi/osl.c
@@ -1167,7 +1167,6 @@ void acpi_os_wait_events_complete(void)
 	 * Make sure the GPE handler or the fixed event handler is not used
 	 * on another CPU after removal.
 	 */
-	printk(KERN_INFO "%s %d\n", __func__, __LINE__);
 	if (acpi_sci_irq_valid())
 		synchronize_hardirq(acpi_sci_irq);
 	flush_workqueue(kacpid_wq);
@@ -1185,7 +1184,6 @@ static void acpi_hotplug_work_fn(struct work_struct *work)
 {
 	struct acpi_hp_work *hpw = container_of(work, struct acpi_hp_work, work);
 
-	printk(KERN_INFO "%s %d\n", __func__, __LINE__);
 	acpi_os_wait_events_complete();
 	acpi_device_hotplug(hpw->adev, hpw->src);
 	kfree(hpw);
@@ -1194,7 +1192,6 @@ static void acpi_hotplug_work_fn(struct work_struct *work)
 acpi_status acpi_hotplug_schedule(struct acpi_device *adev, u32 src)
 {
 	struct acpi_hp_work *hpw;
-	dev_info(&adev->dev, "%s %d\n", __func__, __LINE__);
 
 	acpi_handle_debug(adev->handle,
 			  "Scheduling hotplug event %u for deferred handling\n",
diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index d53be7e0388d..065abe56f440 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -251,7 +251,6 @@ static int acpi_scan_check_and_detach(struct acpi_device *adev, void *p)
 {
 	struct acpi_scan_handler *handler = adev->handler;
 	uintptr_t flags = (uintptr_t)p;
-	dev_info(&adev->dev, "%s %d\n", __func__, __LINE__);
 
 	acpi_dev_for_each_child_reverse(adev, acpi_scan_check_and_detach, p);
 
@@ -315,7 +314,6 @@ static void acpi_scan_check_subtree(struct acpi_device *adev)
 {
 	uintptr_t flags = ACPI_SCAN_CHECK_FLAG_STATUS;
 
-	dev_info(&adev->dev, "%s %d\n", __func__, __LINE__);
 	acpi_scan_check_and_detach(adev, (void *)flags);
 }
 
@@ -371,7 +369,6 @@ static int acpi_scan_rescan_bus(struct acpi_device *adev)
 {
 	struct acpi_scan_handler *handler = adev->handler;
 	int ret;
-	dev_info(&adev->dev, "%s %d\n", __func__, __LINE__);
 
 	if (handler && handler->hotplug.scan_dependent)
 		ret = handler->hotplug.scan_dependent(adev);
@@ -388,7 +385,6 @@ static int acpi_scan_device_check(struct acpi_device *adev)
 {
 	struct acpi_device *parent;
 
-	dev_info(&adev->dev, "%s %d\n", __func__, __LINE__);
 	acpi_scan_check_subtree(adev);
 
 	if (!acpi_device_is_present(adev))
@@ -416,24 +412,19 @@ static int acpi_scan_device_check(struct acpi_device *adev)
 static int acpi_scan_bus_check(struct acpi_device *adev)
 {
 	acpi_scan_check_subtree(adev);
-	dev_info(&adev->dev, "%s %d\n", __func__, __LINE__);
 
 	return acpi_scan_rescan_bus(adev);
 }
 
 static int acpi_generic_hotplug_event(struct acpi_device *adev, u32 type)
 {
-	dev_info(&adev->dev, "%s %d\n", __func__, __LINE__);
 	switch (type) {
 	case ACPI_NOTIFY_BUS_CHECK:
-		dev_info(&adev->dev, "%s %d\n", __func__, __LINE__);
 		return acpi_scan_bus_check(adev);
 	case ACPI_NOTIFY_DEVICE_CHECK:
-		dev_info(&adev->dev, "%s %d\n", __func__, __LINE__);
 		return acpi_scan_device_check(adev);
 	case ACPI_NOTIFY_EJECT_REQUEST:
 	case ACPI_OST_EC_OSPM_EJECT:
-		dev_info(&adev->dev, "%s %d\n", __func__, __LINE__);
 		if (adev->handler && !adev->handler->hotplug.enabled) {
 			dev_info(&adev->dev, "Eject disabled\n");
 			return -EPERM;
@@ -450,7 +441,6 @@ void acpi_device_hotplug(struct acpi_device *adev, u32 src)
 	u32 ost_code = ACPI_OST_SC_NON_SPECIFIC_FAILURE;
 	int error = -ENODEV;
 
-	dev_info(&adev->dev, "%s %d\n", __func__, __LINE__);
 	lock_device_hotplug();
 	mutex_lock(&acpi_scan_lock);
 
@@ -476,10 +466,9 @@ void acpi_device_hotplug(struct acpi_device *adev, u32 src)
 		 * There may be additional notify handlers for device objects
 		 * without the .event() callback, so ignore them here.
 		 */
-		if (notify) {
-			dev_info(&adev->dev, "%s %d: calling notify = %px\n", __func__, __LINE__, (void *) notify);
+		if (notify)
 			error = notify(adev, src);
-		} else
+		else
 			goto out;
 	}
 	switch (error) {
diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c
index 895898c3cd56..27cce7f1b1d3 100644
--- a/drivers/base/power/runtime.c
+++ b/drivers/base/power/runtime.c
@@ -142,6 +142,8 @@ EXPORT_SYMBOL_GPL(pm_runtime_suspended_time);
  */
 static void pm_runtime_deactivate_timer(struct device *dev)
 {
+	if (!strcmp(dev_name(dev), "0000:00:01.1"))
+		dev_info(dev, "%s %d\n", __func__, __LINE__);
 	if (dev->power.timer_expires > 0) {
 		hrtimer_try_to_cancel(&dev->power.suspend_timer);
 		dev->power.timer_expires = 0;
@@ -787,8 +789,6 @@ static int rpm_resume(struct device *dev, int rpmflags)
 	struct device *parent = NULL;
 	int retval = 0;
 
-	if (!strcmp(dev_name(dev), "0000:00:01.1"))
-		dev_info(dev, "%s %d\n", __func__, __LINE__);
 	trace_rpm_resume(dev, rpmflags);
 
  repeat:
@@ -815,7 +815,7 @@ static int rpm_resume(struct device *dev, int rpmflags)
 	 * future.
 	 */
 	if (!strcmp(dev_name(dev), "0000:00:01.1"))
-		dev_info(dev, "%s %d\n", __func__, __LINE__);
+		dev_info(dev, "%s %d dev = %px\n", __func__, __LINE__, dev);
 	dev->power.request = RPM_REQ_NONE;
 	if (!dev->power.timer_autosuspends)
 		pm_runtime_deactivate_timer(dev);
@@ -1231,22 +1231,16 @@ int __pm_runtime_resume(struct device *dev, int rpmflags)
 {
 	unsigned long flags;
 	int retval;
-	if (!strcmp(dev_name(dev), "0000:00:01.1"))
-		dev_info(dev, "%s %d\n", __func__, __LINE__);
 
 	might_sleep_if(!(rpmflags & RPM_ASYNC) && !dev->power.irq_safe &&
 			dev->power.runtime_status != RPM_ACTIVE);
 
-	if (!strcmp(dev_name(dev), "0000:00:01.1"))
-		dev_info(dev, "%s %d\n", __func__, __LINE__);
 	if (rpmflags & RPM_GET_PUT)
 		atomic_inc(&dev->power.usage_count);
 
-	if (!strcmp(dev_name(dev), "0000:00:01.1"))
-		dev_info(dev, "%s %d\n", __func__, __LINE__);
 	spin_lock_irqsave(&dev->power.lock, flags);
 	if (!strcmp(dev_name(dev), "0000:00:01.1"))
-		dev_info(dev, "%s %d\n", __func__, __LINE__);
+		dev_info(dev, "%s %d dev = %px\n", __func__, __LINE__, dev);
 	retval = rpm_resume(dev, rpmflags);
 	spin_unlock_irqrestore(&dev->power.lock, flags);
 
diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c
index e56ab308da20..e21255b97251 100644
--- a/drivers/pci/hotplug/acpiphp_glue.c
+++ b/drivers/pci/hotplug/acpiphp_glue.c
@@ -484,7 +484,6 @@ static void enable_slot(struct acpiphp_slot *slot, bool bridge)
 	struct pci_dev *dev;
 	struct pci_bus *bus = slot->bus;
 	struct acpiphp_func *func;
-	printk(KERN_INFO "%s %d\n", __func__, __LINE__);
 
 	if (bridge && bus->self && hotplug_is_native(bus->self)) {
 		/*
@@ -495,14 +494,11 @@ static void enable_slot(struct acpiphp_slot *slot, bool bridge)
 		 * as a Thunderbolt host controller.
 		 */
 		for_each_pci_bridge(dev, bus) {
-			dev_info(&dev->dev, "%s %d\n", __func__, __LINE__);
 			if (PCI_SLOT(dev->devfn) == slot->device) {
-				dev_info(&dev->dev, "%s %d\n", __func__, __LINE__);
 				acpiphp_native_scan_bridge(dev);
 			}
 		}
 	} else {
-		printk(KERN_INFO "%s %d\n", __func__, __LINE__);
 		LIST_HEAD(add_list);
 		int max, pass;
 
@@ -510,15 +506,12 @@ static void enable_slot(struct acpiphp_slot *slot, bool bridge)
 		max = acpiphp_max_busnr(bus);
 		for (pass = 0; pass < 2; pass++) {
 			for_each_pci_bridge(dev, bus) {
-				dev_info(&dev->dev, "%s %d\n", __func__, __LINE__);
 				if (PCI_SLOT(dev->devfn) != slot->device) {
-					printk(KERN_INFO "%s %d\n", __func__, __LINE__);
 					continue;
 				}
 
 				max = pci_scan_bridge(bus, dev, max, pass);
 				if (pass && dev->subordinate) {
-					dev_info(&dev->dev, "%s %d\n", __func__, __LINE__);
 					check_hotplug_bridge(slot, dev);
 					pcibios_resource_survey_bus(dev->subordinate);
 					__pci_bus_size_bridges(dev->subordinate,
@@ -535,7 +528,6 @@ static void enable_slot(struct acpiphp_slot *slot, bool bridge)
 
 	list_for_each_entry(dev, &bus->devices, bus_list) {
 		/* Assume that newly added devices are powered on already. */
-		dev_info(&dev->dev, "%s %d\n", __func__, __LINE__);
 		if (!pci_dev_is_added(dev))
 			dev->current_state = PCI_D0;
 	}
@@ -554,7 +546,6 @@ static void enable_slot(struct acpiphp_slot *slot, bool bridge)
 		}
 		pci_dev_put(dev);
 	}
-	printk(KERN_INFO "%s %d\n", __func__, __LINE__);
 }
 
 /**
@@ -823,7 +814,6 @@ static void hotplug_event(u32 type, struct acpiphp_context *context)
 	switch (type) {
 	case ACPI_NOTIFY_BUS_CHECK:
 		/* bus re-enumerate */
-		printk(KERN_INFO "%s %d: ACPI_NOTIFY_BUS_CHECK\n", __func__, __LINE__);
 		acpi_handle_debug(handle, "Bus check in %s()\n", __func__);
 		if (bridge)
 			acpiphp_check_bridge(bridge);
@@ -834,7 +824,6 @@ static void hotplug_event(u32 type, struct acpiphp_context *context)
 
 	case ACPI_NOTIFY_DEVICE_CHECK:
 		/* device check */
-		printk(KERN_INFO "%s %d: ACPI_NOTIFY_DEVICE_CHECK\n", __func__, __LINE__);
 		acpi_handle_debug(handle, "Device check in %s()\n", __func__);
 		if (bridge) {
 			acpiphp_check_bridge(bridge);
@@ -850,23 +839,19 @@ static void hotplug_event(u32 type, struct acpiphp_context *context)
 
 	case ACPI_NOTIFY_EJECT_REQUEST:
 		/* request device eject */
-		printk(KERN_INFO "%s %d: ACPI_NOTIFY_EJECT_REQUEST\n", __func__, __LINE__);
 		acpi_handle_debug(handle, "Eject request in %s()\n", __func__);
 		acpiphp_disable_and_eject_slot(slot);
 		break;
 	}
 
 	pci_unlock_rescan_remove();
-	printk(KERN_INFO "%s %d:\n", __func__, __LINE__);
 	if (bridge)
 		put_bridge(bridge);
-	printk(KERN_INFO "%s %d:\n", __func__, __LINE__);
 }
 
 static int acpiphp_hotplug_notify(struct acpi_device *adev, u32 type)
 {
 	struct acpiphp_context *context;
-	dev_info(&adev->dev, "%s %d: %s = %px\n", __func__, __LINE__, __func__, (void *) acpiphp_hotplug_notify);
 
 	context = acpiphp_grab_context(adev);
 	if (!context)
diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h
index 0888b0d5ec73..d88d6b6ccf5b 100644
--- a/include/linux/pm_runtime.h
+++ b/include/linux/pm_runtime.h
@@ -508,8 +508,6 @@ static inline int pm_runtime_get(struct device *dev)
  */
 static inline int pm_runtime_get_sync(struct device *dev)
 {
-	if (!strcmp(dev_name(dev), "0000:00:01.1"))
-		dev_info(dev, "%s\n", __func__);
 	return __pm_runtime_resume(dev, RPM_GET_PUT);
 }
 
This is the message from 6.17.0-rc6-next-20250917-gpudebug-00028-gf99cf81b1da7 crashing,
captured via netconsole:

2025-10-06T04:52:35.932429+02:00 [T248]evmisc-0132 ev_queue_notify_reques: Dispatching Notify on [GPP0] (Device) Value 0x00 (Bus Check) Node 0000000069c9623b
2025-10-06T04:52:35.932429+02:00 [T177395]pcieport 0000:00:01.1: acpiphp_check_bridge 708#012 SUBSYSTEM=pci#012 DEVICE=+pci:0000:00:01.1
2025-10-06T04:52:35.932429+02:00 [T177395]pcieport 0000:00:01.1: __pm_runtime_resume 1243 dev = ffff97c001c930c8#012 SUBSYSTEM=pci#012 DEVICE=+pci:0000:00:01.1
2025-10-06T04:52:35.932429+02:00 [177395]pcieport 0000:00:01.1: rpm_resume 818 dev = ffff97c001c930c8#012 SUBSYSTEM=pci#012 DEVICE=+pci:0000:00:01.1
2025-10-06T04:52:35.932429+02:00 [177395]pcieport 0000:00:01.1: pm_runtime_deactivate_timer 146#012 SUBSYSTEM=pci#012 DEVICE=+pci:0000:00:01.1
2025-10-06T04:52:35.932429+02:00 [177395]pcieport 0000:00:01.1: rpm_resume 930#012 SUBSYSTEM=pci#012 DEVICE=+pci:0000:00:01.1
2025-10-06T04:52:35.932429+02:00 [177395]pcieport 0000:00:01.1: rpm_resume 959#012 SUBSYSTEM=pci#012 DEVICE=+pci:0000:00:01.1
2025-10-06T04:52:35.932429+02:00 [177395]pcieport 0000:00:01.1: rpm_resume 818 dev = ffff97c001c930c8#012 SUBSYSTEM=pci#012 DEVICE=+pci:0000:00:01.1
2025-10-06T04:52:35.932429+02:00 [177395]pcieport 0000:00:01.1: pm_runtime_deactivate_timer 146#012 SUBSYSTEM=pci#012 DEVICE=+pci:0000:00:01.1
2025-10-06T04:52:35.932429+02:00 [177395]pcieport 0000:00:01.1: rpm_resume 965#012 SUBSYSTEM=pci#012 DEVICE=+pci:0000:00:01.1

So the crash seems to happen in this part of rpm_resume():

[...]
 skip_parent:

	if (!strcmp(dev_name(dev), "0000:00:01.1"))
		dev_info(dev, "%s %d\n", __func__, __LINE__); // this is the last reported line
	if (dev->power.no_callbacks)
		goto no_callback;	/* Assume success. */

	__update_runtime_status(dev, RPM_RESUMING);

	callback = RPM_GET_CALLBACK(dev, runtime_resume);

	dev_pm_disable_wake_irq_check(dev, false);
	retval = rpm_callback(callback, dev);
	if (retval) {
		if (!strcmp(dev_name(dev), "0000:00:01.1"))
			dev_info(dev, "%s %d\n", __func__, __LINE__);
		__update_runtime_status(dev, RPM_SUSPENDED);
		pm_runtime_cancel_pending(dev);
		dev_pm_enable_wake_irq_check(dev, false);
	} else {
 no_callback:
		if (!strcmp(dev_name(dev), "0000:00:01.1"))
			dev_info(dev, "%s %d\n", __func__, __LINE__);
[...]

Bert Karwatzki

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ