lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20251006120944.7880-2-spasswolf@web.de>
Date: Mon,  6 Oct 2025 14:09:40 +0200
From: Bert Karwatzki <spasswolf@....de>
To: linux-kernel@...r.kernel.org
Cc: Bert Karwatzki <spasswolf@....de>,
	linux-next@...r.kernel.org,
	linux-stable@...r.kernel.org,
	regressions@...ts.linux.dev,
	linux-pci@...r.kernel.org,
	linux-acpi@...r.kernel.org,
	Mario Limonciello <superm1@...nel.org>,
	Christian König <christian.koenig@....com>,
	"Rafael J . Wysocki" <rafael.j.wysocki@...el.com>
Subject: [REGRESSION 01/04] Crash during resume of pcie bridge

To further debug the issue I inserted calls to dev_info() and printk() into
the amdgpu suspend/resume code, and the acpi and pcie hotplug resume code.

This is the the patch used in kernel version 
6.17.0-rc6-next-20250917-gpudebug-00021-gab98d880e3c8 (see list in previous mail)
(on top of next-20250917)

diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c
index a984ccd4a2a0..bc365c0dbe2f 100644
--- a/drivers/acpi/bus.c
+++ b/drivers/acpi/bus.c
@@ -514,46 +514,60 @@ static void acpi_bus_notify(acpi_handle handle, u32 type, void *data)
 
 	switch (type) {
 	case ACPI_NOTIFY_BUS_CHECK:
+		printk(KERN_INFO "%s %d: ACPI_NOTIFY_BUS_CHECK\n", __func__, __LINE__);
 		acpi_handle_debug(handle, "ACPI_NOTIFY_BUS_CHECK event\n");
 		break;
 
 	case ACPI_NOTIFY_DEVICE_CHECK:
+		printk(KERN_INFO "%s %d: ACPI_NOTIFY_DEVICE_CHECK\n", __func__, __LINE__);
 		acpi_handle_debug(handle, "ACPI_NOTIFY_DEVICE_CHECK event\n");
 		break;
 
 	case ACPI_NOTIFY_DEVICE_WAKE:
+		printk(KERN_INFO "%s %d: ACPI_NOTIFY_DEVICE_WAKE\n", __func__, __LINE__);
 		acpi_handle_debug(handle, "ACPI_NOTIFY_DEVICE_WAKE event\n");
 		return;
 
 	case ACPI_NOTIFY_EJECT_REQUEST:
+		printk(KERN_INFO "%s %d: ACPI_NOTIFY_EJECT_REQUEST\n", __func__, __LINE__);
 		acpi_handle_debug(handle, "ACPI_NOTIFY_EJECT_REQUEST event\n");
 		break;
 
 	case ACPI_NOTIFY_DEVICE_CHECK_LIGHT:
+		printk(KERN_INFO "%s %d: ACPI_NOTIFY_DEVICE_CHECK_LIGHT\n", __func__, __LINE__);
 		acpi_handle_debug(handle, "ACPI_NOTIFY_DEVICE_CHECK_LIGHT event\n");
 		/* TBD: Exactly what does 'light' mean? */
 		return;
 
 	case ACPI_NOTIFY_FREQUENCY_MISMATCH:
+		printk(KERN_INFO "%s %d: ACPI_NOTIFY_FREQUENCY_MISMATCH\n", __func__, __LINE__);
 		acpi_handle_err(handle, "Device cannot be configured due "
 				"to a frequency mismatch\n");
 		return;
 
 	case ACPI_NOTIFY_BUS_MODE_MISMATCH:
+		printk(KERN_INFO "%s %d: ACPI_NOTIFY_BUS_MODE_MISMATCH\n", __func__, __LINE__);
 		acpi_handle_err(handle, "Device cannot be configured due "
 				"to a bus mode mismatch\n");
 		return;
 
 	case ACPI_NOTIFY_POWER_FAULT:
+		printk(KERN_INFO "%s %d: ACPI_NOTIFY_POWER_FAULT\n", __func__, __LINE__);
 		acpi_handle_err(handle, "Device has suffered a power fault\n");
 		return;
 
 	default:
+		printk(KERN_INFO "%s %d: acpi unknown event type\n", __func__, __LINE__);
 		acpi_handle_debug(handle, "Unknown event type 0x%x\n", type);
 		return;
 	}
 
 	adev = acpi_get_acpi_dev(handle);
+	if (adev)
+		dev_info(&adev->dev, "%s %d\n", __func__, __LINE__);
+	else
+		printk(KERN_INFO "%s %d: adev = NULL\n", __func__, __LINE__);
+		
 
 	if (adev && ACPI_SUCCESS(acpi_hotplug_schedule(adev, type)))
 		return;
diff --git a/drivers/acpi/device_pm.c b/drivers/acpi/device_pm.c
index 4e0583274b8f..9a7dc432b50d 100644
--- a/drivers/acpi/device_pm.c
+++ b/drivers/acpi/device_pm.c
@@ -539,6 +539,7 @@ static void acpi_pm_notify_handler(acpi_handle handle, u32 val, void *not_used)
 	if (!adev)
 		return;
 
+	dev_info(&adev->dev, "%s %d\n", __func__, __LINE__);
 	mutex_lock(&acpi_pm_notifier_lock);
 
 	if (adev->wakeup.flags.notifier_present) {
diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c
index 5ff343096ece..0f6a16856119 100644
--- a/drivers/acpi/osl.c
+++ b/drivers/acpi/osl.c
@@ -1167,6 +1167,7 @@ void acpi_os_wait_events_complete(void)
 	 * Make sure the GPE handler or the fixed event handler is not used
 	 * on another CPU after removal.
 	 */
+	printk(KERN_INFO "%s %d\n", __func__, __LINE__);
 	if (acpi_sci_irq_valid())
 		synchronize_hardirq(acpi_sci_irq);
 	flush_workqueue(kacpid_wq);
@@ -1184,6 +1185,7 @@ static void acpi_hotplug_work_fn(struct work_struct *work)
 {
 	struct acpi_hp_work *hpw = container_of(work, struct acpi_hp_work, work);
 
+	printk(KERN_INFO "%s %d\n", __func__, __LINE__);
 	acpi_os_wait_events_complete();
 	acpi_device_hotplug(hpw->adev, hpw->src);
 	kfree(hpw);
@@ -1192,6 +1194,7 @@ static void acpi_hotplug_work_fn(struct work_struct *work)
 acpi_status acpi_hotplug_schedule(struct acpi_device *adev, u32 src)
 {
 	struct acpi_hp_work *hpw;
+	dev_info(&adev->dev, "%s %d\n", __func__, __LINE__);
 
 	acpi_handle_debug(adev->handle,
 			  "Scheduling hotplug event %u for deferred handling\n",
diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index 065abe56f440..d53be7e0388d 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -251,6 +251,7 @@ static int acpi_scan_check_and_detach(struct acpi_device *adev, void *p)
 {
 	struct acpi_scan_handler *handler = adev->handler;
 	uintptr_t flags = (uintptr_t)p;
+	dev_info(&adev->dev, "%s %d\n", __func__, __LINE__);
 
 	acpi_dev_for_each_child_reverse(adev, acpi_scan_check_and_detach, p);
 
@@ -314,6 +315,7 @@ static void acpi_scan_check_subtree(struct acpi_device *adev)
 {
 	uintptr_t flags = ACPI_SCAN_CHECK_FLAG_STATUS;
 
+	dev_info(&adev->dev, "%s %d\n", __func__, __LINE__);
 	acpi_scan_check_and_detach(adev, (void *)flags);
 }
 
@@ -369,6 +371,7 @@ static int acpi_scan_rescan_bus(struct acpi_device *adev)
 {
 	struct acpi_scan_handler *handler = adev->handler;
 	int ret;
+	dev_info(&adev->dev, "%s %d\n", __func__, __LINE__);
 
 	if (handler && handler->hotplug.scan_dependent)
 		ret = handler->hotplug.scan_dependent(adev);
@@ -385,6 +388,7 @@ static int acpi_scan_device_check(struct acpi_device *adev)
 {
 	struct acpi_device *parent;
 
+	dev_info(&adev->dev, "%s %d\n", __func__, __LINE__);
 	acpi_scan_check_subtree(adev);
 
 	if (!acpi_device_is_present(adev))
@@ -412,19 +416,24 @@ static int acpi_scan_device_check(struct acpi_device *adev)
 static int acpi_scan_bus_check(struct acpi_device *adev)
 {
 	acpi_scan_check_subtree(adev);
+	dev_info(&adev->dev, "%s %d\n", __func__, __LINE__);
 
 	return acpi_scan_rescan_bus(adev);
 }
 
 static int acpi_generic_hotplug_event(struct acpi_device *adev, u32 type)
 {
+	dev_info(&adev->dev, "%s %d\n", __func__, __LINE__);
 	switch (type) {
 	case ACPI_NOTIFY_BUS_CHECK:
+		dev_info(&adev->dev, "%s %d\n", __func__, __LINE__);
 		return acpi_scan_bus_check(adev);
 	case ACPI_NOTIFY_DEVICE_CHECK:
+		dev_info(&adev->dev, "%s %d\n", __func__, __LINE__);
 		return acpi_scan_device_check(adev);
 	case ACPI_NOTIFY_EJECT_REQUEST:
 	case ACPI_OST_EC_OSPM_EJECT:
+		dev_info(&adev->dev, "%s %d\n", __func__, __LINE__);
 		if (adev->handler && !adev->handler->hotplug.enabled) {
 			dev_info(&adev->dev, "Eject disabled\n");
 			return -EPERM;
@@ -441,6 +450,7 @@ void acpi_device_hotplug(struct acpi_device *adev, u32 src)
 	u32 ost_code = ACPI_OST_SC_NON_SPECIFIC_FAILURE;
 	int error = -ENODEV;
 
+	dev_info(&adev->dev, "%s %d\n", __func__, __LINE__);
 	lock_device_hotplug();
 	mutex_lock(&acpi_scan_lock);
 
@@ -466,9 +476,10 @@ void acpi_device_hotplug(struct acpi_device *adev, u32 src)
 		 * There may be additional notify handlers for device objects
 		 * without the .event() callback, so ignore them here.
 		 */
-		if (notify)
+		if (notify) {
+			dev_info(&adev->dev, "%s %d: calling notify = %px\n", __func__, __LINE__, (void *) notify);
 			error = notify(adev, src);
-		else
+		} else
 			goto out;
 	}
 	switch (error) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
index 6c62e27b9800..4f00e15e7759 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
@@ -168,6 +168,7 @@ static union acpi_object *amdgpu_atif_call(struct amdgpu_atif *atif,
 		atif_arg_elements[1].integer.value = 0;
 	}
 
+	printk(KERN_INFO "%s %d\n", __func__, __LINE__);
 	status = acpi_evaluate_object(atif->handle, NULL, &atif_arg,
 				      &buffer);
 	obj = (union acpi_object *)buffer.pointer;
@@ -559,6 +560,7 @@ static union acpi_object *amdgpu_atcs_call(struct amdgpu_atcs *atcs,
 		atcs_arg_elements[1].integer.value = 0;
 	}
 
+	printk(KERN_INFO "%s %d\n", __func__, __LINE__);
 	status = acpi_evaluate_object(atcs->handle, NULL, &atcs_arg, &buffer);
 
 	/* Fail only if calling the method fails and ATIF is supported */
@@ -608,6 +610,7 @@ static int amdgpu_atcs_verify_interface(struct amdgpu_atcs *atcs)
 	size_t size;
 	int err = 0;
 
+	printk(KERN_INFO "%s %d\n", __func__, __LINE__);
 	info = amdgpu_atcs_call(atcs, ATCS_FUNCTION_VERIFY_INTERFACE, NULL);
 	if (!info)
 		return -EIO;
@@ -682,6 +685,7 @@ int amdgpu_acpi_pcie_notify_device_ready(struct amdgpu_device *adev)
 	if (!atcs->functions.pcie_dev_rdy)
 		return -EINVAL;
 
+	dev_info(adev->dev, "%s %d\n", __func__, __LINE__);
 	info = amdgpu_atcs_call(atcs, ATCS_FUNCTION_PCIE_DEVICE_READY_NOTIFICATION, NULL);
 	if (!info)
 		return -EIO;
@@ -733,6 +737,7 @@ int amdgpu_acpi_pcie_performance_request(struct amdgpu_device *adev,
 	params.pointer = &atcs_input;
 
 	while (retry--) {
+		dev_info(adev->dev, "%s %d\n", __func__, __LINE__);
 		info = amdgpu_atcs_call(atcs, ATCS_FUNCTION_PCIE_PERFORMANCE_REQUEST, &params);
 		if (!info)
 			return -EIO;
@@ -798,6 +803,7 @@ int amdgpu_acpi_power_shift_control(struct amdgpu_device *adev,
 	params.length = sizeof(struct atcs_pwr_shift_input);
 	params.pointer = &atcs_input;
 
+	dev_info(adev->dev, "%s %d\n", __func__, __LINE__);
 	info = amdgpu_atcs_call(atcs, ATCS_FUNCTION_POWER_SHIFT_CONTROL, &params);
 	if (!info) {
 		DRM_ERROR("ATCS PSC update failed\n");
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
index 3893e6fc2f03..ed3063f09007 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
@@ -123,6 +123,7 @@ static union acpi_object *amdgpu_atpx_call(acpi_handle handle, int function,
 		atpx_arg_elements[1].integer.value = 0;
 	}
 
+	printk(KERN_INFO "%s %d\n", __func__, __LINE__);
 	status = acpi_evaluate_object(handle, NULL, &atpx_arg, &buffer);
 
 	/* Fail only if calling the method fails and ATPX is supported */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c
index 00e96419fcda..542d039cfd42 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c
@@ -272,6 +272,7 @@ static int amdgpu_atrm_call(acpi_handle atrm_handle, uint8_t *bios,
 	atrm_arg_elements[1].type = ACPI_TYPE_INTEGER;
 	atrm_arg_elements[1].integer.value = len;
 
+	printk(KERN_INFO "%s %d\n", __func__, __LINE__);
 	status = acpi_evaluate_object(atrm_handle, NULL, &atrm_arg, &buffer);
 	if (ACPI_FAILURE(status)) {
 		DRM_ERROR("failed to evaluate ATRM got %s\n", acpi_format_exception(status));
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 0fdfde3dcb9f..bab504d1d24d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -5194,6 +5194,7 @@ int amdgpu_device_suspend(struct drm_device *dev, bool notify_clients)
 	struct amdgpu_device *adev = drm_to_adev(dev);
 	int r = 0;
 
+	dev_info(adev->dev, "%s %d\n", __func__, __LINE__);
 	if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
 		return 0;
 
@@ -5208,6 +5209,7 @@ int amdgpu_device_suspend(struct drm_device *dev, bool notify_clients)
 			return r;
 	}
 
+	dev_info(adev->dev, "%s %d\n", __func__, __LINE__);
 	if (amdgpu_acpi_smart_shift_update(adev, AMDGPU_SS_DEV_D3))
 		dev_warn(adev->dev, "smart shift update failed\n");
 
@@ -5286,6 +5288,7 @@ int amdgpu_device_resume(struct drm_device *dev, bool notify_clients)
 	struct amdgpu_device *adev = drm_to_adev(dev);
 	int r = 0;
 
+	dev_info(adev->dev, "%s %d\n", __func__, __LINE__);
 	if (amdgpu_sriov_vf(adev)) {
 		r = amdgpu_virt_request_full_gpu(adev, true);
 		if (r)
@@ -5379,6 +5382,7 @@ int amdgpu_device_resume(struct drm_device *dev, bool notify_clients)
 	amdgpu_vram_mgr_clear_reset_blocks(adev);
 	adev->in_suspend = false;
 
+	dev_info(adev->dev, "%s %d\n", __func__, __LINE__);
 	if (amdgpu_acpi_smart_shift_update(adev, AMDGPU_SS_DEV_D0))
 		dev_warn(adev->dev, "smart shift update failed\n");
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index ece251cbe8c3..165bd79fce82 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -2795,6 +2795,7 @@ static int amdgpu_pmops_runtime_suspend(struct device *dev)
 	struct drm_device *drm_dev = pci_get_drvdata(pdev);
 	struct amdgpu_device *adev = drm_to_adev(drm_dev);
 	int ret, i;
+	dev_info(dev, "%s %d\n", __func__, __LINE__);
 
 	if (adev->pm.rpm_mode == AMDGPU_RUNPM_NONE) {
 		pm_runtime_forbid(dev);
@@ -2874,6 +2875,7 @@ static int amdgpu_pmops_runtime_resume(struct device *dev)
 	struct drm_device *drm_dev = pci_get_drvdata(pdev);
 	struct amdgpu_device *adev = drm_to_adev(drm_dev);
 	int ret;
+	dev_info(dev, "%s %d\n", __func__, __LINE__);
 
 	if (adev->pm.rpm_mode == AMDGPU_RUNPM_NONE)
 		return -EINVAL;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index 8841d7213de4..576ff827d80c 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -7475,6 +7475,7 @@ static int gfx_v10_0_hw_init(struct amdgpu_ip_block *ip_block)
 {
 	int r;
 	struct amdgpu_device *adev = ip_block->adev;
+	dev_info(adev->dev, "%s %d\n", __func__, __LINE__);
 
 	if (!amdgpu_emu_mode)
 		gfx_v10_0_init_golden_registers(adev);
@@ -7529,6 +7530,7 @@ static int gfx_v10_0_hw_init(struct amdgpu_ip_block *ip_block)
 static int gfx_v10_0_hw_fini(struct amdgpu_ip_block *ip_block)
 {
 	struct amdgpu_device *adev = ip_block->adev;
+	dev_info(adev->dev, "%s %d\n", __func__, __LINE__);
 
 	cancel_delayed_work_sync(&adev->gfx.idle_work);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index d7499be8c4bf..fd4062e97e11 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -983,6 +983,7 @@ static int gmc_v10_0_hw_init(struct amdgpu_ip_block *ip_block)
 {
 	struct amdgpu_device *adev = ip_block->adev;
 	int r;
+	dev_info(adev->dev, "%s %d\n", __func__, __LINE__);
 
 	adev->gmc.flush_pasid_uses_kiq = !amdgpu_emu_mode;
 
@@ -1029,6 +1030,7 @@ static void gmc_v10_0_gart_disable(struct amdgpu_device *adev)
 static int gmc_v10_0_hw_fini(struct amdgpu_ip_block *ip_block)
 {
 	struct amdgpu_device *adev = ip_block->adev;
+	dev_info(adev->dev, "%s %d\n", __func__, __LINE__);
 
 	gmc_v10_0_gart_disable(adev);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c
index d1a011c40ba2..a181c9965282 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c
@@ -174,6 +174,7 @@ static int jpeg_v3_0_hw_init(struct amdgpu_ip_block *ip_block)
 {
 	struct amdgpu_device *adev = ip_block->adev;
 	struct amdgpu_ring *ring = adev->jpeg.inst->ring_dec;
+	dev_info(adev->dev, "%s %d\n", __func__, __LINE__);
 
 	adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell,
 		(adev->doorbell_index.vcn.vcn_ring0_1 << 1), 0);
@@ -212,6 +213,7 @@ static int jpeg_v3_0_suspend(struct amdgpu_ip_block *ip_block)
 {
 	int r;
 
+	dev_info(ip_block->adev->dev, "%s %d\n", __func__, __LINE__);
 	r = jpeg_v3_0_hw_fini(ip_block);
 	if (r)
 		return r;
@@ -232,6 +234,7 @@ static int jpeg_v3_0_resume(struct amdgpu_ip_block *ip_block)
 {
 	int r;
 
+	dev_info(ip_block->adev->dev, "%s %d\n", __func__, __LINE__);
 	r = amdgpu_jpeg_resume(ip_block->adev);
 	if (r)
 		return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
index 4cd325149b63..f33f5e2e6e53 100644
--- a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
@@ -320,6 +320,7 @@ static int navi10_ih_irq_init(struct amdgpu_device *adev)
 	u32 ih_chicken;
 	int ret;
 	int i;
+	dev_info(adev->dev, "%s %d\n", __func__, __LINE__);
 
 	/* disable irqs */
 	ret = navi10_ih_toggle_interrupts(adev, false);
@@ -385,6 +386,7 @@ static int navi10_ih_irq_init(struct amdgpu_device *adev)
  */
 static void navi10_ih_irq_disable(struct amdgpu_device *adev)
 {
+	dev_info(adev->dev, "%s %d\n", __func__, __LINE__);
 	force_update_wptr_for_self_int(adev, 0, 8, false);
 	navi10_ih_toggle_interrupts(adev, false);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
index 3bd44c24f692..78f60da4f498 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
@@ -697,6 +697,7 @@ static int sdma_v5_2_gfx_resume(struct amdgpu_device *adev)
 {
 	int i, r;
 
+	dev_info(adev->dev, "%s %d\n", __func__, __LINE__);
 	for (i = 0; i < adev->sdma.num_instances; i++) {
 		r = sdma_v5_2_gfx_resume_instance(adev, i, false);
 		if (r)
@@ -819,6 +820,7 @@ static int sdma_v5_2_start(struct amdgpu_device *adev)
 	int r = 0;
 	struct amdgpu_ip_block *ip_block;
 
+	dev_info(adev->dev, "%s %d\n", __func__, __LINE__);
 	if (amdgpu_sriov_vf(adev)) {
 		sdma_v5_2_ctx_switch_enable(adev, false);
 		sdma_v5_2_enable(adev, false);
@@ -1404,6 +1406,7 @@ static int sdma_v5_2_hw_fini(struct amdgpu_ip_block *ip_block)
 	if (amdgpu_sriov_vf(adev))
 		return 0;
 
+	dev_info(adev->dev, "%s %d\n", __func__, __LINE__);
 	sdma_v5_2_ctx_switch_enable(adev, false);
 	sdma_v5_2_enable(adev, false);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c
index d9cf8f0feeb3..b31062f212b5 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c
@@ -367,6 +367,7 @@ static int vcn_v3_0_hw_init(struct amdgpu_ip_block *ip_block)
 	struct amdgpu_device *adev = ip_block->adev;
 	struct amdgpu_ring *ring;
 	int i, j, r;
+	dev_info(adev->dev, "%s %d\n", __func__, __LINE__);
 
 	if (amdgpu_sriov_vf(adev)) {
 		r = vcn_v3_0_start_sriov(adev);
@@ -441,6 +442,7 @@ static int vcn_v3_0_hw_fini(struct amdgpu_ip_block *ip_block)
 {
 	struct amdgpu_device *adev = ip_block->adev;
 	int i;
+	dev_info(adev->dev, "%s %d\n", __func__, __LINE__);
 
 	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
 		struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[i];
@@ -474,6 +476,7 @@ static int vcn_v3_0_suspend(struct amdgpu_ip_block *ip_block)
 	struct amdgpu_device *adev = ip_block->adev;
 	int r, i;
 
+	dev_info(adev->dev, "%s %d\n", __func__, __LINE__);
 	r = vcn_v3_0_hw_fini(ip_block);
 	if (r)
 		return r;
@@ -498,6 +501,7 @@ static int vcn_v3_0_resume(struct amdgpu_ip_block *ip_block)
 {
 	struct amdgpu_device *adev = ip_block->adev;
 	int r, i;
+	dev_info(adev->dev, "%s %d\n", __func__, __LINE__);
 
 	for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
 		r = amdgpu_vcn_resume(ip_block->adev, i);
diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c
index 5b1f271c6034..e56ab308da20 100644
--- a/drivers/pci/hotplug/acpiphp_glue.c
+++ b/drivers/pci/hotplug/acpiphp_glue.c
@@ -484,6 +484,7 @@ static void enable_slot(struct acpiphp_slot *slot, bool bridge)
 	struct pci_dev *dev;
 	struct pci_bus *bus = slot->bus;
 	struct acpiphp_func *func;
+	printk(KERN_INFO "%s %d\n", __func__, __LINE__);
 
 	if (bridge && bus->self && hotplug_is_native(bus->self)) {
 		/*
@@ -494,10 +495,14 @@ static void enable_slot(struct acpiphp_slot *slot, bool bridge)
 		 * as a Thunderbolt host controller.
 		 */
 		for_each_pci_bridge(dev, bus) {
-			if (PCI_SLOT(dev->devfn) == slot->device)
+			dev_info(&dev->dev, "%s %d\n", __func__, __LINE__);
+			if (PCI_SLOT(dev->devfn) == slot->device) {
+				dev_info(&dev->dev, "%s %d\n", __func__, __LINE__);
 				acpiphp_native_scan_bridge(dev);
+			}
 		}
 	} else {
+		printk(KERN_INFO "%s %d\n", __func__, __LINE__);
 		LIST_HEAD(add_list);
 		int max, pass;
 
@@ -505,11 +510,15 @@ static void enable_slot(struct acpiphp_slot *slot, bool bridge)
 		max = acpiphp_max_busnr(bus);
 		for (pass = 0; pass < 2; pass++) {
 			for_each_pci_bridge(dev, bus) {
-				if (PCI_SLOT(dev->devfn) != slot->device)
+				dev_info(&dev->dev, "%s %d\n", __func__, __LINE__);
+				if (PCI_SLOT(dev->devfn) != slot->device) {
+					printk(KERN_INFO "%s %d\n", __func__, __LINE__);
 					continue;
+				}
 
 				max = pci_scan_bridge(bus, dev, max, pass);
 				if (pass && dev->subordinate) {
+					dev_info(&dev->dev, "%s %d\n", __func__, __LINE__);
 					check_hotplug_bridge(slot, dev);
 					pcibios_resource_survey_bus(dev->subordinate);
 					__pci_bus_size_bridges(dev->subordinate,
@@ -526,6 +535,7 @@ static void enable_slot(struct acpiphp_slot *slot, bool bridge)
 
 	list_for_each_entry(dev, &bus->devices, bus_list) {
 		/* Assume that newly added devices are powered on already. */
+		dev_info(&dev->dev, "%s %d\n", __func__, __LINE__);
 		if (!pci_dev_is_added(dev))
 			dev->current_state = PCI_D0;
 	}
@@ -544,6 +554,7 @@ static void enable_slot(struct acpiphp_slot *slot, bool bridge)
 		}
 		pci_dev_put(dev);
 	}
+	printk(KERN_INFO "%s %d\n", __func__, __LINE__);
 }
 
 /**
@@ -702,31 +713,43 @@ static void acpiphp_check_bridge(struct acpiphp_bridge *bridge)
 	if (bridge->is_going_away)
 		return;
 
-	if (bridge->pci_dev)
+	if (bridge->pci_dev) {
+		dev_info(&bridge->pci_dev->dev, "%s %d\n", __func__, __LINE__);
 		pm_runtime_get_sync(&bridge->pci_dev->dev);
+	}
 
+	dev_info(&bridge->pci_dev->dev, "%s %d\n", __func__, __LINE__);
 	list_for_each_entry(slot, &bridge->slots, node) {
 		struct pci_bus *bus = slot->bus;
 		struct pci_dev *dev, *tmp;
+		dev_info(&bridge->pci_dev->dev, "%s %d\n", __func__, __LINE__);
 
 		if (slot_no_hotplug(slot)) {
-			; /* do nothing */
+			/* do nothing */
+			dev_info(&bridge->pci_dev->dev, "%s %d\n", __func__, __LINE__);
 		} else if (device_status_valid(get_slot_status(slot))) {
 			/* remove stale devices if any */
 			list_for_each_entry_safe_reverse(dev, tmp,
-							 &bus->devices, bus_list)
+							 &bus->devices, bus_list) {
+				dev_info(&dev->dev, "%s %d\n", __func__, __LINE__);
 				if (PCI_SLOT(dev->devfn) == slot->device)
 					trim_stale_devices(dev);
+			}
 
 			/* configure all functions */
+			dev_info(&bridge->pci_dev->dev, "%s %d\n", __func__, __LINE__);
 			enable_slot(slot, true);
 		} else {
+			dev_info(&bridge->pci_dev->dev, "%s %d\n", __func__, __LINE__);
 			disable_slot(slot);
 		}
 	}
 
-	if (bridge->pci_dev)
+	if (bridge->pci_dev) {
 		pm_runtime_put(&bridge->pci_dev->dev);
+		dev_info(&bridge->pci_dev->dev, "%s %d\n", __func__, __LINE__);
+	}
+		dev_info(&bridge->pci_dev->dev, "%s %d\n", __func__, __LINE__);
 }
 
 /*
@@ -760,6 +783,7 @@ static void acpiphp_sanitize_bus(struct pci_bus *bus)
 void acpiphp_check_host_bridge(struct acpi_device *adev)
 {
 	struct acpiphp_bridge *bridge = NULL;
+	dev_info(&adev->dev, "%s %d\n", __func__, __LINE__);
 
 	acpi_lock_hp_context();
 	if (adev->hp) {
@@ -799,6 +823,7 @@ static void hotplug_event(u32 type, struct acpiphp_context *context)
 	switch (type) {
 	case ACPI_NOTIFY_BUS_CHECK:
 		/* bus re-enumerate */
+		printk(KERN_INFO "%s %d: ACPI_NOTIFY_BUS_CHECK\n", __func__, __LINE__);
 		acpi_handle_debug(handle, "Bus check in %s()\n", __func__);
 		if (bridge)
 			acpiphp_check_bridge(bridge);
@@ -809,6 +834,7 @@ static void hotplug_event(u32 type, struct acpiphp_context *context)
 
 	case ACPI_NOTIFY_DEVICE_CHECK:
 		/* device check */
+		printk(KERN_INFO "%s %d: ACPI_NOTIFY_DEVICE_CHECK\n", __func__, __LINE__);
 		acpi_handle_debug(handle, "Device check in %s()\n", __func__);
 		if (bridge) {
 			acpiphp_check_bridge(bridge);
@@ -824,19 +850,23 @@ static void hotplug_event(u32 type, struct acpiphp_context *context)
 
 	case ACPI_NOTIFY_EJECT_REQUEST:
 		/* request device eject */
+		printk(KERN_INFO "%s %d: ACPI_NOTIFY_EJECT_REQUEST\n", __func__, __LINE__);
 		acpi_handle_debug(handle, "Eject request in %s()\n", __func__);
 		acpiphp_disable_and_eject_slot(slot);
 		break;
 	}
 
 	pci_unlock_rescan_remove();
+	printk(KERN_INFO "%s %d:\n", __func__, __LINE__);
 	if (bridge)
 		put_bridge(bridge);
+	printk(KERN_INFO "%s %d:\n", __func__, __LINE__);
 }
 
 static int acpiphp_hotplug_notify(struct acpi_device *adev, u32 type)
 {
 	struct acpiphp_context *context;
+	dev_info(&adev->dev, "%s %d: %s = %px\n", __func__, __LINE__, __func__, (void *) acpiphp_hotplug_notify);
 
 	context = acpiphp_grab_context(adev);
 	if (!context)
diff --git a/drivers/pci/hotplug/pciehp_core.c b/drivers/pci/hotplug/pciehp_core.c
index f59baa912970..8f90f91c0a07 100644
--- a/drivers/pci/hotplug/pciehp_core.c
+++ b/drivers/pci/hotplug/pciehp_core.c
@@ -266,6 +266,7 @@ static void pciehp_disable_interrupt(struct pcie_device *dev)
 	 * Disable hotplug interrupt so that it does not trigger
 	 * immediately when the downstream link goes down.
 	 */
+	dev_info(&dev->device, "%s %d\n", __func__, __LINE__);
 	if (pme_is_native(dev))
 		pcie_disable_interrupt(get_service_data(dev));
 }
@@ -273,6 +274,7 @@ static void pciehp_disable_interrupt(struct pcie_device *dev)
 #ifdef CONFIG_PM_SLEEP
 static int pciehp_suspend(struct pcie_device *dev)
 {
+	dev_info(&dev->device, "%s %d\n", __func__, __LINE__);
 	/*
 	 * If the port is already runtime suspended we can keep it that
 	 * way.
@@ -287,6 +289,7 @@ static int pciehp_suspend(struct pcie_device *dev)
 static int pciehp_resume_noirq(struct pcie_device *dev)
 {
 	struct controller *ctrl = get_service_data(dev);
+	dev_info(&dev->device, "%s %d\n", __func__, __LINE__);
 
 	/* pci_restore_state() just wrote to the Slot Control register */
 	ctrl->cmd_started = jiffies;
@@ -317,6 +320,7 @@ static int pciehp_resume_noirq(struct pcie_device *dev)
 static int pciehp_resume(struct pcie_device *dev)
 {
 	struct controller *ctrl = get_service_data(dev);
+	dev_info(&dev->device, "%s %d\n", __func__, __LINE__);
 
 	if (pme_is_native(dev))
 		pcie_enable_interrupt(ctrl);
@@ -328,6 +332,7 @@ static int pciehp_resume(struct pcie_device *dev)
 
 static int pciehp_runtime_suspend(struct pcie_device *dev)
 {
+	dev_info(&dev->device, "%s %d\n", __func__, __LINE__);
 	pciehp_disable_interrupt(dev);
 	return 0;
 }
@@ -335,6 +340,7 @@ static int pciehp_runtime_suspend(struct pcie_device *dev)
 static int pciehp_runtime_resume(struct pcie_device *dev)
 {
 	struct controller *ctrl = get_service_data(dev);
+	dev_info(&dev->device, "%s %d\n", __func__, __LINE__);
 
 	/* pci_restore_state() just wrote to the Slot Control register */
 	ctrl->cmd_started = jiffies;


This gives as output when crashing (only the last few lines, which don not
appear in /var/log/kern.log, but are captured with netconsole)

The processess involved here are the following:

T254: [irq/40-ACPI:Event] (this is a threaded interrupt handler for ACPI events)
The other two processes are [kworker/mm_percpu_wq] workqueues.

2025-09-30T02:25:57.704378+02:00 [T254]evmisc-0132 ev_queue_notify_reques: Dispatching Notify on [GPP0] (Device) Value 0x00 (Bus Check) Node 0000000017caa1c9
2025-09-30T02:25:57.704378+02:00 [T61442]acpi_bus_notify 517: ACPI_NOTIFY_BUS_CHECK
2025-09-30T02:25:57.704378+02:00 [T61442]acpi device:00: acpi_bus_notify 567#012 SUBSYSTEM=acpi#012 DEVICE=+acpi:device:00
2025-09-30T02:25:57.704378+02:00 [T61442]acpi device:00: acpi_hotplug_schedule 1197#012 SUBSYSTEM=acpi#012 DEVICE=+acpi:device:00
2025-09-30T02:25:57.704378+02:00 [T77816]acpi_hotplug_work_fn 1188
2025-09-30T02:25:57.704378+02:00 [T77816]acpi_os_wait_events_complete 1170
2025-09-30T02:25:57.704378+02:00 [T77816]acpi device:00: acpi_device_hotplug 453#012 SUBSYSTEM=acpi#012 DEVICE=+acpi:device:00
2025-09-30T02:25:57.704378+02:00 [T77816]acpi device:00: acpi_device_hotplug 480: calling notify = ffffffffb8a24fc0#012 SUBSYSTEM=acpi#012 DEVICE=+acpi:device:00
2025-09-30T02:25:57.704378+02:00 [T77816]acpi device:00: acpiphp_hotplug_notify 869: acpiphp_hotplug_notify = ffffffffb8a24fc0#012 SUBSYSTEM=acpi#012 DEVICE=+acpi:device:00
2025-09-30T02:25:57.704378+02:00 [T77816]hotplug_event 826: ACPI_NOTIFY_BUS_CHECK
2025-09-30T02:25:57.704378+02:00 [T77816]pcieport 0000:00:01.1: acpiphp_check_bridge 717#012 SUBSYSTEM=pci#012 DEVICE=+pci:0000:00:01.1

So the problem as appears to be happening inside of acpiphp_check_bridge():

static void acpiphp_check_bridge(struct acpiphp_bridge *bridge)
{
	struct acpiphp_slot *slot;

	/* Bail out if the bridge is going away. */
	if (bridge->is_going_away)
		return;

	if (bridge->pci_dev) {
		dev_info(&bridge->pci_dev->dev, "%s %d\n", __func__, __LINE__); // This is the last reported line.
		pm_runtime_get_sync(&bridge->pci_dev->dev);
	}

	dev_info(&bridge->pci_dev->dev, "%s %d\n", __func__, __LINE__); // This line is not reported during a crash.


Bert Karwatzki



Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ