[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250115013753.49126-1-xueshuai@linux.alibaba.com>
Date: Wed, 15 Jan 2025 09:37:53 +0800
From: Shuai Xue <xueshuai@...ux.alibaba.com>
To: rostedt@...dmis.org,
lukas@...ner.de,
linux-pci@...r.kernel.org,
linux-kernel@...r.kernel.org,
linux-edac@...r.kernel.org,
linux-trace-kernel@...r.kernel.org,
helgaas@...nel.org
Cc: bhelgaas@...gle.com,
tony.luck@...el.com,
bp@...en8.de,
xueshuai@...ux.alibaba.com,
mhiramat@...nel.org,
mathieu.desnoyers@...icios.com,
oleg@...hat.com,
naveen@...nel.org,
davem@...emloft.net,
anil.s.keshavamurthy@...el.com,
mark.rutland@....com,
peterz@...radead.org,
tianruidong@...ux.alibaba.com
Subject: [PATCH v6] PCI: hotplug: Add a generic RAS tracepoint for hotplug event
Hotplug events are critical indicators for analyzing hardware health,
particularly in AI supercomputers where surprise link downs can
significantly impact system performance and reliability.
To this end, define a new TRACING_SYSTEM named pci, add a generic RAS
tracepoint for hotplug event to help healthy check, and generate
tracepoints for pcie hotplug event. Add enum pci_hotplug_event in
include/uapi/linux/pci.h so applications like rasdaemon can register
tracepoint event handlers for it.
The output like below:
$ echo 1 > /sys/kernel/debug/tracing/events/pci/pci_hp_event/enable
$ cat /sys/kernel/debug/tracing/trace_pipe
<...>-206 [001] ..... 40.373870: pci_hp_event: 0000:00:02.0 slot:10, event:Link Down
<...>-206 [001] ..... 40.374871: pci_hp_event: 0000:00:02.0 slot:10, event:Card not present
Suggested-by: Lukas Wunner <lukas@...ner.de>
Suggested-by: Steven Rostedt <rostedt@...dmis.org>
Signed-off-by: Shuai Xue <xueshuai@...ux.alibaba.com>
Reviewed-by: Lukas Wunner <lukas@...ner.de>
---
changes since v5:
- move define of enum to include/uapi/linux/pci.h
---
drivers/pci/hotplug/pciehp_ctrl.c | 33 ++++++++++++++++++-----
drivers/pci/hotplug/trace.h | 45 +++++++++++++++++++++++++++++++
include/uapi/linux/pci.h | 31 +++++++++++++++++++++
3 files changed, 103 insertions(+), 6 deletions(-)
create mode 100644 drivers/pci/hotplug/trace.h
diff --git a/drivers/pci/hotplug/pciehp_ctrl.c b/drivers/pci/hotplug/pciehp_ctrl.c
index d603a7aa7483..f9beb4d3a9b8 100644
--- a/drivers/pci/hotplug/pciehp_ctrl.c
+++ b/drivers/pci/hotplug/pciehp_ctrl.c
@@ -23,6 +23,9 @@
#include "../pci.h"
#include "pciehp.h"
+#define CREATE_TRACE_POINTS
+#include "trace.h"
+
/* The following routines constitute the bulk of the
hotplug controller logic
*/
@@ -244,12 +247,20 @@ void pciehp_handle_presence_or_link_change(struct controller *ctrl, u32 events)
case ON_STATE:
ctrl->state = POWEROFF_STATE;
mutex_unlock(&ctrl->state_lock);
- if (events & PCI_EXP_SLTSTA_DLLSC)
+ if (events & PCI_EXP_SLTSTA_DLLSC) {
ctrl_info(ctrl, "Slot(%s): Link Down\n",
slot_name(ctrl));
- if (events & PCI_EXP_SLTSTA_PDC)
+ trace_pci_hp_event(pci_name(ctrl->pcie->port),
+ slot_name(ctrl),
+ PCI_HOTPLUG_LINK_DOWN);
+ }
+ if (events & PCI_EXP_SLTSTA_PDC) {
ctrl_info(ctrl, "Slot(%s): Card not present\n",
slot_name(ctrl));
+ trace_pci_hp_event(pci_name(ctrl->pcie->port),
+ slot_name(ctrl),
+ PCI_HOTPLUG_CARD_NOT_PRESENT);
+ }
pciehp_disable_slot(ctrl, SURPRISE_REMOVAL);
break;
default:
@@ -269,6 +280,9 @@ void pciehp_handle_presence_or_link_change(struct controller *ctrl, u32 events)
INDICATOR_NOOP);
ctrl_info(ctrl, "Slot(%s): Card not present\n",
slot_name(ctrl));
+ trace_pci_hp_event(pci_name(ctrl->pcie->port),
+ slot_name(ctrl),
+ PCI_HOTPLUG_CARD_NOT_PRESENT);
}
mutex_unlock(&ctrl->state_lock);
return;
@@ -281,12 +295,19 @@ void pciehp_handle_presence_or_link_change(struct controller *ctrl, u32 events)
case OFF_STATE:
ctrl->state = POWERON_STATE;
mutex_unlock(&ctrl->state_lock);
- if (present)
+ if (present) {
ctrl_info(ctrl, "Slot(%s): Card present\n",
slot_name(ctrl));
- if (link_active)
- ctrl_info(ctrl, "Slot(%s): Link Up\n",
- slot_name(ctrl));
+ trace_pci_hp_event(pci_name(ctrl->pcie->port),
+ slot_name(ctrl),
+ PCI_HOTPLUG_CARD_PRESENT);
+ }
+ if (link_active) {
+ ctrl_info(ctrl, "Slot(%s): Link Up\n", slot_name(ctrl));
+ trace_pci_hp_event(pci_name(ctrl->pcie->port),
+ slot_name(ctrl),
+ PCI_HOTPLUG_LINK_UP);
+ }
ctrl->request_result = pciehp_enable_slot(ctrl);
break;
default:
diff --git a/drivers/pci/hotplug/trace.h b/drivers/pci/hotplug/trace.h
new file mode 100644
index 000000000000..1415ac505cb5
--- /dev/null
+++ b/drivers/pci/hotplug/trace.h
@@ -0,0 +1,45 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#if !defined(_TRACE_HW_EVENT_PCI_HP_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_HW_EVENT_PCI_HP_H
+
+#include <linux/tracepoint.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM pci
+
+TRACE_EVENT(pci_hp_event,
+
+ TP_PROTO(const char *port_name,
+ const char *slot,
+ const int event),
+
+ TP_ARGS(port_name, slot, event),
+
+ TP_STRUCT__entry(
+ __string( port_name, port_name )
+ __string( slot, slot )
+ __field( int, event )
+ ),
+
+ TP_fast_assign(
+ __assign_str(port_name);
+ __assign_str(slot);
+ __entry->event = event;
+ ),
+
+ TP_printk("%s slot:%s, event:%s\n",
+ __get_str(port_name),
+ __get_str(slot),
+ __print_symbolic(__entry->event, PCI_HOTPLUG_EVENT)
+ )
+);
+
+#endif /* _TRACE_HW_EVENT_PCI_HP_H */
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH ../../drivers/pci/hotplug
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE trace
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/include/uapi/linux/pci.h b/include/uapi/linux/pci.h
index a769eefc5139..58a8ad9389e3 100644
--- a/include/uapi/linux/pci.h
+++ b/include/uapi/linux/pci.h
@@ -19,6 +19,7 @@
#define _UAPILINUX_PCI_H
#include <linux/pci_regs.h> /* The pci register defines */
+#include <linux/tracepoint.h>
/*
* The PCI interface treats multi-function devices as independent
@@ -39,4 +40,34 @@
#define PCIIOC_MMAP_IS_MEM (PCIIOC_BASE | 0x02) /* Set mmap state to MEM space. */
#define PCIIOC_WRITE_COMBINE (PCIIOC_BASE | 0x03) /* Enable/disable write-combining. */
+#define PCI_HOTPLUG_EVENT \
+ EM(PCI_HOTPLUG_LINK_UP, "Link Up") \
+ EM(PCI_HOTPLUG_LINK_DOWN, "Link Down") \
+ EM(PCI_HOTPLUG_CARD_PRESENT, "Card present") \
+ EMe(PCI_HOTPLUG_CARD_NOT_PRESENT, "Card not present")
+
+/* Enums require being exported to userspace, for user tool parsing */
+#undef EM
+#undef EMe
+#define EM(a, b) TRACE_DEFINE_ENUM(a);
+#define EMe(a, b) TRACE_DEFINE_ENUM(a);
+
+enum pci_hotplug_event {
+ PCI_HOTPLUG_LINK_UP,
+ PCI_HOTPLUG_LINK_DOWN,
+ PCI_HOTPLUG_CARD_PRESENT,
+ PCI_HOTPLUG_CARD_NOT_PRESENT,
+};
+
+PCI_HOTPLUG_EVENT
+
+/*
+ * Now redefine the EM() and EMe() macros to map the enums to the strings
+ * that will be printed in the output.
+ */
+#undef EM
+#undef EMe
+#define EM(a, b) {a, b},
+#define EMe(a, b) {a, b}
+
#endif /* _UAPILINUX_PCI_H */
--
2.39.3
Powered by blists - more mailing lists