[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-Id: <20231016040132.23824-1-kai.heng.feng@canonical.com>
Date: Mon, 16 Oct 2023 12:01:31 +0800
From: Kai-Heng Feng <kai.heng.feng@...onical.com>
To: bhelgaas@...gle.com
Cc: linux-pm@...r.kernel.org,
linux-mmc@...r.kernel.org,
Kai-Heng Feng <kai.heng.feng@...onical.com>,
Ricky Wu <ricky_wu@...ltek.com>,
Kees Cook <keescook@...omium.org>,
Tony Luck <tony.luck@...el.com>,
"Guilherme G. Piccoli" <gpiccoli@...lia.com>,
Lukas Wunner <lukas@...ner.de>,
linux-pci@...r.kernel.org,
linux-kernel@...r.kernel.org,
linux-hardening@...r.kernel.org,
bpf@...r.kernel.org
Subject: [PATCH] PCI: pciehp: Prevent child devices from doing RPM on PCIe Link Down
When inserting an SD7.0 card to Realtek card reader, it can trigger PCI
slot Link down and causes the following error:
[ 63.898861] pcieport 0000:00:1c.0: pciehp: Slot(8): Link Down
[ 63.912118] BUG: unable to handle page fault for address: ffffb24d403e5010
[ 63.912122] #PF: supervisor read access in kernel mode
[ 63.912125] #PF: error_code(0x0000) - not-present page
[ 63.912126] PGD 100000067 P4D 100000067 PUD 1001fe067 PMD 100d97067 PTE 0
[ 63.912131] Oops: 0000 [#1] PREEMPT SMP PTI
[ 63.912134] CPU: 3 PID: 534 Comm: kworker/3:10 Not tainted 6.4.0 #6
[ 63.912137] Hardware name: To Be Filled By O.E.M. To Be Filled By O.E.M./H370M Pro4, BIOS P3.40 10/25/2018
[ 63.912138] Workqueue: pm pm_runtime_work
[ 63.912144] RIP: 0010:ioread32+0x2e/0x70
[ 63.912148] Code: ff 03 00 77 25 48 81 ff 00 00 01 00 77 14 8b 15 08 d9 54 01 b8 ff ff ff ff 85 d2 75 14 c3 cc cc cc cc 89 fa ed c3 cc cc cc cc <8b> 07 c3 cc cc cc cc 55 83 ea 01 48 89 fe 48 c7 c7 98 6f 15 99 48
[ 63.912150] RSP: 0018:ffffb24d40a5bd78 EFLAGS: 00010296
[ 63.912152] RAX: ffffb24d403e5000 RBX: 0000000000000152 RCX: 000000000000007f
[ 63.912153] RDX: 000000000000ff00 RSI: ffffb24d403e5010 RDI: ffffb24d403e5010
[ 63.912155] RBP: ffffb24d40a5bd98 R08: ffffb24d403e5010 R09: 0000000000000000
[ 63.912156] R10: ffff9074cd95e7f4 R11: 0000000000000003 R12: 000000000000007f
[ 63.912158] R13: ffff9074e1a68c00 R14: ffff9074e1a68d00 R15: 0000000000009003
[ 63.912159] FS: 0000000000000000(0000) GS:ffff90752a180000(0000) knlGS:0000000000000000
[ 63.912161] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 63.912162] CR2: ffffb24d403e5010 CR3: 0000000152832006 CR4: 00000000003706e0
[ 63.912164] Call Trace:
[ 63.912165] <TASK>
[ 63.912167] ? show_regs+0x68/0x70
[ 63.912171] ? __die_body+0x20/0x70
[ 63.912173] ? __die+0x2b/0x40
[ 63.912175] ? page_fault_oops+0x160/0x480
[ 63.912177] ? search_bpf_extables+0x63/0x90
[ 63.912180] ? ioread32+0x2e/0x70
[ 63.912183] ? search_exception_tables+0x5f/0x70
[ 63.912186] ? kernelmode_fixup_or_oops+0xa2/0x120
[ 63.912189] ? __bad_area_nosemaphore+0x179/0x230
[ 63.912191] ? bad_area_nosemaphore+0x16/0x20
[ 63.912193] ? do_kern_addr_fault+0x8b/0xa0
[ 63.912195] ? exc_page_fault+0xe5/0x180
[ 63.912198] ? asm_exc_page_fault+0x27/0x30
[ 63.912203] ? ioread32+0x2e/0x70
[ 63.912206] ? rtsx_pci_write_register+0x5b/0x90 [rtsx_pci]
[ 63.912217] rtsx_set_l1off_sub+0x1c/0x30 [rtsx_pci]
[ 63.912226] rts5261_set_l1off_cfg_sub_d0+0x36/0x40 [rtsx_pci]
[ 63.912234] rtsx_pci_runtime_idle+0xc7/0x160 [rtsx_pci]
[ 63.912243] ? __pfx_pci_pm_runtime_idle+0x10/0x10
[ 63.912246] pci_pm_runtime_idle+0x34/0x70
[ 63.912248] rpm_idle+0xc4/0x2b0
[ 63.912251] pm_runtime_work+0x93/0xc0
[ 63.912254] process_one_work+0x21a/0x430
[ 63.912258] worker_thread+0x4a/0x3c0
[ 63.912261] ? __pfx_worker_thread+0x10/0x10
[ 63.912263] kthread+0x106/0x140
[ 63.912266] ? __pfx_kthread+0x10/0x10
[ 63.912268] ret_from_fork+0x29/0x50
[ 63.912273] </TASK>
[ 63.912274] Modules linked in: nvme nvme_core snd_hda_codec_hdmi snd_sof_pci_intel_cnl snd_sof_intel_hda_common snd_hda_codec_realtek snd_hda_codec_generic snd_soc_hdac_hda soundwire_intel ledtrig_audio nls_iso8859_1 soundwire_generic_allocation soundwire_cadence snd_sof_intel_hda_mlink snd_sof_intel_hda snd_sof_pci snd_sof_xtensa_dsp snd_sof snd_sof_utils snd_hda_ext_core snd_soc_acpi_intel_match snd_soc_acpi soundwire_bus snd_soc_core snd_compress ac97_bus snd_pcm_dmaengine snd_hda_intel i915 snd_intel_dspcfg snd_intel_sdw_acpi intel_rapl_msr snd_hda_codec intel_rapl_common snd_hda_core x86_pkg_temp_thermal intel_powerclamp snd_hwdep coretemp snd_pcm kvm_intel drm_buddy ttm mei_hdcp kvm drm_display_helper snd_seq_midi snd_seq_midi_event cec crct10dif_pclmul ghash_clmulni_intel sha512_ssse3 aesni_intel crypto_simd rc_core cryptd rapl snd_rawmidi drm_kms_helper binfmt_misc intel_cstate i2c_algo_bit joydev snd_seq snd_seq_device syscopyarea wmi_bmof snd_timer sysfillrect input_leds snd ee1004 sysimgblt mei_me soundcore
[ 63.912324] mei intel_pch_thermal mac_hid acpi_tad acpi_pad sch_fq_codel msr parport_pc ppdev lp ramoops drm parport reed_solomon efi_pstore ip_tables x_tables autofs4 hid_generic usbhid hid rtsx_pci_sdmmc crc32_pclmul ahci e1000e i2c_i801 i2c_smbus rtsx_pci xhci_pci libahci xhci_pci_renesas video wmi
[ 63.912346] CR2: ffffb24d403e5010
[ 63.912348] ---[ end trace 0000000000000000 ]---
This happens because scheduled pm_runtime_idle() is not cancelled.
So use pm_runtime_barrier() to ensure all devices on the bus stops
runtime power management actions.
Link: https://lore.kernel.org/all/2ce258f371234b1f8a1a470d5488d00e@realtek.com/
Tested-by: Ricky Wu <ricky_wu@...ltek.com>
Signed-off-by: Kai-Heng Feng <kai.heng.feng@...onical.com>
---
drivers/pci/hotplug/pciehp_pci.c | 11 ++++++++++-
1 file changed, 10 insertions(+), 1 deletion(-)
diff --git a/drivers/pci/hotplug/pciehp_pci.c b/drivers/pci/hotplug/pciehp_pci.c
index ad12515a4a12..9ae4fa95c8c1 100644
--- a/drivers/pci/hotplug/pciehp_pci.c
+++ b/drivers/pci/hotplug/pciehp_pci.c
@@ -18,9 +18,18 @@
#include <linux/kernel.h>
#include <linux/types.h>
#include <linux/pci.h>
+#include <linux/pm_runtime.h>
#include "../pci.h"
#include "pciehp.h"
+int pci_dev_disconnect(struct pci_dev *pdev, void *unused)
+{
+ pm_runtime_barrier(&pdev->dev);
+ pci_dev_set_disconnected(pdev, NULL);
+
+ return 0;
+}
+
/**
* pciehp_configure_device() - enumerate PCI devices below a hotplug bridge
* @ctrl: PCIe hotplug controller
@@ -98,7 +107,7 @@ void pciehp_unconfigure_device(struct controller *ctrl, bool presence)
__func__, pci_domain_nr(parent), parent->number);
if (!presence)
- pci_walk_bus(parent, pci_dev_set_disconnected, NULL);
+ pci_walk_bus(parent, pci_dev_disconnect, NULL);
pci_lock_rescan_remove();
--
2.34.1
Powered by blists - more mailing lists