lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [day] [month] [year] [list]
Message-ID: <20250630072028.35178-1-fuqiang.wng@gmail.com>
Date: Mon, 30 Jun 2025 15:20:28 +0800
From: fuqiang wang <fuqiang.wng@...il.com>
To: Bjorn Helgaas <bhelgaas@...gle.com>,
	Thomas Gleixner <tglx@...utronix.de>,
	"Rafael J . Wysocki" <rafael@...nel.org>,
	Len Brown <lenb@...nel.org>,
	linux-pci@...r.kernel.org,
	linux-acpi@...r.kernel.org,
	linux-kernel@...r.kernel.org
Cc: wangfuqiang49 <wangfuqiang49@...com>,
	yaozhenguo <yaozhenguo@...com>,
	fuqiang wang <fuqiang.wng@...il.com>
Subject: [RFC PATCH] pci hotplug: fix hotplug bug during kernel boot

Performing hotplug operations when the virtual machine is just started
may cause the virtual machine kernel to trigger a bug_on in x86
architecture, with the bug_on dmesg as follows:

  ------------[ cut here ]------------
  kernel BUG at kernel/resource.c:792!
  Oops: invalid opcode: 0000 [#1] PREEMPT SMP PTI
  CPU: 1 UID: 0 PID: 215 Comm: kworker/u128:5 Not tainted 6.14.0-rc1+ #17
  Hardware name: JD JCloud Iaas Jvirt, BIOS unknown 2/2/2022
  Workqueue: kacpi_hotplug acpi_hotplug_work_fn

  RIP: 0010:reallocate_resource+0x197/0x1d0
  Code: 20 48 8b 44 24 28 48 89 43 28 48 8b 44 24 30 48 89 43 30 48 8b 44 24 38 48 89 43 38 e8 12 db ff ff 48 85 c0 0f 84 5d ff ff ff <0f> 0b 48 8b 74 24 08 48 3b 73 08 0f 82 1c ff ff ff 48 89 0b 48 89
  RSP: 0000:ffffc900008479b0 EFLAGS: 00010282
  RAX: ffff8881020c73b0 RBX: ffff8881021813b0 RCX: 000000000000343f
  RDX: 0000000000003400 RSI: ffff8881021813b0 RDI: ffff8881020c73b0
  RBP: 0000000000000000 R08: ffff8881021863e0 R09: 0000000000000040
  R10: 0000000000000000 R11: 000000000000343f R12: ffff88810020d6f0
  R13: ffffc90000847a20 R14: ffff88810020d6f0 R15: ffffffff82edb970
  FS:  0000000000000000(0000) GS:ffff88842ee80000(0000) knlGS:0000000000000000
  CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
  CR2: 0000000000000000 CR3: 0000000003036001 CR4: 0000000000170ef0
  Call Trace:
   <TASK>
   ? die+0x32/0x80
   ? do_trap+0xd9/0x100
   ? reallocate_resource+0x197/0x1d0
   ? do_error_trap+0x65/0x80
   ? reallocate_resource+0x197/0x1d0
   ? exc_invalid_op+0x4c/0x60
   ? reallocate_resource+0x197/0x1d0
   ? asm_exc_invalid_op+0x16/0x20
   ? reallocate_resource+0x197/0x1d0
   allocate_resource+0x57/0xd0
   ? __pfx_pcibios_align_resource+0x10/0x10
   pci_bus_alloc_from_region+0x1df/0x240
   ? __pfx_pcibios_align_resource+0x10/0x10
   ? __pfx_pcibios_align_resource+0x10/0x10
   ? __pfx_pcibios_align_resource+0x10/0x10
   pci_bus_alloc_resource+0x86/0xb0
   ? __pfx_pcibios_align_resource+0x10/0x10
   _pci_assign_resource+0x9e/0x120
   ? __pfx_pcibios_align_resource+0x10/0x10
   pci_assign_resource+0xae/0x290
   assign_requested_resources_sorted+0x4a/0xb0
   __assign_resources_sorted+0x491/0x4d0
   ? __dev_sort_resources+0x9b/0x2a0
   __pci_bus_assign_resources+0x6f/0x1f0
   enable_slot+0x25e/0x440
   ? pci_device_is_present+0x49/0x70
   acpiphp_check_bridge.part.0+0x117/0x150
   hotplug_event+0x13d/0x220
   ? __pfx_acpiphp_hotplug_notify+0x10/0x10
   acpiphp_hotplug_notify+0x20/0x60
   acpi_device_hotplug+0xae/0x240
   acpi_hotplug_work_fn+0x1a/0x30
   process_one_work+0x184/0x3a0
   worker_thread+0x24d/0x360
   ? __pfx_worker_thread+0x10/0x10
   kthread+0xed/0x220
   ? finish_task_switch.isra.0+0x88/0x2b0
   ? __pfx_kthread+0x10/0x10
   ret_from_fork+0x30/0x50
   ? __pfx_kthread+0x10/0x10
   ret_from_fork_asm+0x1a/0x30
   </TASK>
  Modules linked in:

The cause of the issue is that the enable_slot process in hotplug
conflicts with the pcibios_init process during kernel initialization.
This leads to the situation where, in the enable_slot process,
__dev_sort_resources first links all the resources of the devices
downstream of the bridge into the head (since there is no parent).
Subsequently, in the pcibios_init process, pci_claim_resource allocates
the BIOS-assigned ranges for these devices.

  hotplug CPU                              kernel init CPU
  enable_slot
  ...
   __dev_sort_resources

   //link all resources behind the bus
   //into head
                                           pci_bios_init
                                           ...
                                             pcibios_allocate_bus_resources
                                           //alloc resource for all bus
   //resources linked into head have
   //sibling and parent

However, in the subsequent steps of enable_slot, certain resources may
be reallocated due to the x86 alignment rule -- "0x00, 0xff region
modulo 0x400" (see pcibios_align_resource). During this reallocation,
alignment can cause gaps, leading to allocation failures and resulting
in the resource reset. Additionally, since this resource has already
been linked into bus->resource[]->child during the kernel initialization
process, a strange resource range [0, 0] appears in this chain. This
causes subsequent devices to be allocated ranges that conflict with
other resources. For a detailed analysis, see [1]:

This patch will make the hotplug process wait for the pcibios_init
process in kernel initialization to complete. (However, I am not sure if
this modification is appropriate, so I would appreciate your advice.)

[1]: https://github.com/cai-fuqiang/md/blob/master/case/guestkernel_hotplug_BUG_ON/kernel_panic.md

Signed-off-by: fuqiang wang <fuqiang.wng@...il.com>
---
 arch/x86/pci/common.c | 16 ++++++++++++++++
 drivers/acpi/scan.c   |  6 ++++++
 include/linux/pci.h   |  1 +
 3 files changed, 23 insertions(+)

diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c
index ddb798603201..06ff04ab2a26 100644
--- a/arch/x86/pci/common.c
+++ b/arch/x86/pci/common.c
@@ -37,6 +37,8 @@ unsigned long pirq_table_addr;
 const struct pci_raw_ops *__read_mostly raw_pci_ops;
 const struct pci_raw_ops *__read_mostly raw_pci_ext_ops;
 
+DECLARE_COMPLETION(pcibios_init_completion);
+
 int raw_pci_read(unsigned int domain, unsigned int bus, unsigned int devfn,
 						int reg, int len, u32 *val)
 {
@@ -498,6 +500,17 @@ void __init pcibios_set_cache_line_size(void)
 	}
 }
 
+static DEFINE_STATIC_KEY_FALSE(pcibios_init_done);
+
+void arch_wait_pcibios_init_complete(void)
+{
+	if (static_branch_likely(&pcibios_init_done))
+		return;
+
+	wait_for_completion(&pcibios_init_completion);
+	static_branch_enable(&pcibios_init_done);
+}
+
 int __init pcibios_init(void)
 {
 	if (!raw_pci_ops && !raw_pci_ext_ops) {
@@ -510,6 +523,9 @@ int __init pcibios_init(void)
 
 	if (pci_bf_sort >= pci_force_bf)
 		pci_sort_breadthfirst();
+
+	complete(&pcibios_init_completion);
+
 	return 0;
 }
 
diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index 9f4efa8f75a6..a66fbc262fb8 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -21,6 +21,7 @@
 #include <linux/pgtable.h>
 #include <linux/crc32.h>
 #include <linux/dma-direct.h>
+#include <linux/pci.h>
 
 #include "internal.h"
 #include "sleep.h"
@@ -435,12 +436,17 @@ static int acpi_generic_hotplug_event(struct acpi_device *adev, u32 type)
 	return -EINVAL;
 }
 
+void __weak arch_wait_pcibios_init_complete(void) {}
+
 void acpi_device_hotplug(struct acpi_device *adev, u32 src)
 {
 	u32 ost_code = ACPI_OST_SC_NON_SPECIFIC_FAILURE;
 	int error = -ENODEV;
 
 	lock_device_hotplug();
+
+	arch_wait_pcibios_init_complete();
+
 	mutex_lock(&acpi_scan_lock);
 
 	/*
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 47b31ad724fa..8078b68a9b0f 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -2356,6 +2356,7 @@ static inline void pcibios_penalize_isa_irq(int irq, int active) {}
 int pcibios_alloc_irq(struct pci_dev *dev);
 void pcibios_free_irq(struct pci_dev *dev);
 resource_size_t pcibios_default_alignment(void);
+void arch_wait_pcibios_init_complete(void);
 
 #if !defined(HAVE_PCI_MMAP) && !defined(ARCH_GENERIC_PCI_MMAP_RESOURCE)
 extern int pci_create_resource_files(struct pci_dev *dev);
-- 
2.47.0


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ