diff mbox series

[v3] PCI: hotplug: Add a generic RAS tracepoint for hotplug event

Message ID 20241120124328.19111-1-xueshuai@linux.alibaba.com (mailing list archive)
State Handled Elsewhere
Headers show
Series [v3] PCI: hotplug: Add a generic RAS tracepoint for hotplug event | expand

Commit Message

Shuai Xue Nov. 20, 2024, 12:43 p.m. UTC
Hotplug events are critical indicators for analyzing hardware health,
particularly in AI supercomputers where surprise link downs can
significantly impact system performance and reliability. The failure
characterization analysis illustrates the significance of failures
caused by the Infiniband link errors. Meta observes that 2% in a machine
learning cluster and 6% in a vision application cluster of Infiniband
failures co-occur with GPU failures, such as falling off the bus, which
may indicate a correlation with PCIe.[1]

To enhance hardware health monitoring and diagnostics, introduces a generic
tracepoint for hotplug events. Additionally, specific tracepoints for PCIe
hotplug events are generated. To enable userspace monitoring of these
tracepoints, for instance using tools like rasdaemon, the
`pci_hotplug_event` is included in the UAPI (User API) header.

$ echo 1 > /sys/kernel/debug/tracing/events/hotplug/pci_hp_event/enable
$ cat /sys/kernel/debug/tracing/trace_pipe
           <...>-206     [001] .....    40.373870: pci_hp_event: 0000:00:02.0 slot:10, event:Link Down

           <...>-206     [001] .....    40.374871: pci_hp_event: 0000:00:02.0 slot:10, event:Card not present

[1]https://arxiv.org/abs/2410.21680

Signed-off-by: Shuai Xue <xueshuai@linux.alibaba.com>
---
changes since v2 per Lukas:
- rewrite commit log to note why add pci_hotplug_event in the UAPI header.
- define TRACE_SYSTEM as pci_hotplug to avoid conflict with other
- squash previous two commits into one.
- rename _TRANS_STATE as _EVENT
- rename PCI_HOTPLUG_CARD_NO_PRESENT as PCI_HOTPLUG_CARD_NOT_PRESENT
---
 drivers/pci/hotplug/pciehp_ctrl.c | 33 ++++++++++++---
 drivers/pci/hotplug/trace.h       | 68 +++++++++++++++++++++++++++++++
 include/uapi/linux/pci.h          |  7 ++++
 3 files changed, 102 insertions(+), 6 deletions(-)
 create mode 100644 drivers/pci/hotplug/trace.h

Comments

Lukas Wunner Nov. 21, 2024, 9:27 a.m. UTC | #1
On Wed, Nov 20, 2024 at 08:43:28PM +0800, Shuai Xue wrote:
> $ echo 1 > /sys/kernel/debug/tracing/events/hotplug/pci_hp_event/enable
                                              ^^^^^^^
I think this should now be "pci_hotplug" because you've renamed the
TRACE_SYSTEM in v3.

I'm wondering if we'll have other categories besides "pci_hp_event"
below "pci_hotplug".  Maybe not.  Is it possible to omit the "pci_hotplug"
and make "pci_hp_event" top level?  Or should this be grouped below "pci"
instead of "pci_hotplug"?  I'm somewhat at a loss here as I'm not
familiar with the conventions used in the tracing subsystem.

From a PCI hotplug perspective, this patch LGTM, so:

Reviewed-by: Lukas Wunner <lukas@wunner.de>
Shuai Xue Nov. 21, 2024, 11:34 a.m. UTC | #2
在 2024/11/21 17:27, Lukas Wunner 写道:
> On Wed, Nov 20, 2024 at 08:43:28PM +0800, Shuai Xue wrote:
>> $ echo 1 > /sys/kernel/debug/tracing/events/hotplug/pci_hp_event/enable
>                                                ^^^^^^^
> I think this should now be "pci_hotplug" because you've renamed the
> TRACE_SYSTEM in v3.

Yes, you are right. Will fix it.

> 
> I'm wondering if we'll have other categories besides "pci_hp_event"
> below "pci_hotplug".  Maybe not.  Is it possible to omit the "pci_hotplug"
> and make "pci_hp_event" top level?  Or should this be grouped below "pci"
> instead of "pci_hotplug"?  I'm somewhat at a loss here as I'm not
> familiar with the conventions used in the tracing subsystem.

Sure, I can reorganize the directory. You have two optional proposals:

1. /sys/kernel/debug/tracing/events/pci_hp_event
2. /sys/kernel/debug/tracing/events/pci/pci_hp_event

I personally prefer the second approach. However, I'll defer the final decision
to the tracing subsystem maintainer, considering their expertise and
familiarity with the existing conventions.

> 
>  From a PCI hotplug perspective, this patch LGTM, so:
> 
> Reviewed-by: Lukas Wunner <lukas@wunner.de>


Thank you.

Best Regards,
Shuai
diff mbox series

Patch

diff --git a/drivers/pci/hotplug/pciehp_ctrl.c b/drivers/pci/hotplug/pciehp_ctrl.c
index dcdbfcf404dd..c836462ff067 100644
--- a/drivers/pci/hotplug/pciehp_ctrl.c
+++ b/drivers/pci/hotplug/pciehp_ctrl.c
@@ -21,6 +21,9 @@ 
 #include <linux/pci.h>
 #include "pciehp.h"
 
+#define CREATE_TRACE_POINTS
+#include "trace.h"
+
 /* The following routines constitute the bulk of the
    hotplug controller logic
  */
@@ -239,12 +242,20 @@  void pciehp_handle_presence_or_link_change(struct controller *ctrl, u32 events)
 	case ON_STATE:
 		ctrl->state = POWEROFF_STATE;
 		mutex_unlock(&ctrl->state_lock);
-		if (events & PCI_EXP_SLTSTA_DLLSC)
+		if (events & PCI_EXP_SLTSTA_DLLSC) {
 			ctrl_info(ctrl, "Slot(%s): Link Down\n",
 				  slot_name(ctrl));
-		if (events & PCI_EXP_SLTSTA_PDC)
+			trace_pci_hp_event(pci_name(ctrl->pcie->port),
+					   slot_name(ctrl),
+					   PCI_HOTPLUG_LINK_DOWN);
+		}
+		if (events & PCI_EXP_SLTSTA_PDC) {
 			ctrl_info(ctrl, "Slot(%s): Card not present\n",
 				  slot_name(ctrl));
+			trace_pci_hp_event(pci_name(ctrl->pcie->port),
+					   slot_name(ctrl),
+					   PCI_HOTPLUG_CARD_NOT_PRESENT);
+		}
 		pciehp_disable_slot(ctrl, SURPRISE_REMOVAL);
 		break;
 	default:
@@ -264,6 +275,9 @@  void pciehp_handle_presence_or_link_change(struct controller *ctrl, u32 events)
 					      INDICATOR_NOOP);
 			ctrl_info(ctrl, "Slot(%s): Card not present\n",
 				  slot_name(ctrl));
+			trace_pci_hp_event(pci_name(ctrl->pcie->port),
+					   slot_name(ctrl),
+					   PCI_HOTPLUG_CARD_NOT_PRESENT);
 		}
 		mutex_unlock(&ctrl->state_lock);
 		return;
@@ -276,12 +290,19 @@  void pciehp_handle_presence_or_link_change(struct controller *ctrl, u32 events)
 	case OFF_STATE:
 		ctrl->state = POWERON_STATE;
 		mutex_unlock(&ctrl->state_lock);
-		if (present)
+		if (present) {
 			ctrl_info(ctrl, "Slot(%s): Card present\n",
 				  slot_name(ctrl));
-		if (link_active)
-			ctrl_info(ctrl, "Slot(%s): Link Up\n",
-				  slot_name(ctrl));
+			trace_pci_hp_event(pci_name(ctrl->pcie->port),
+					   slot_name(ctrl),
+					   PCI_HOTPLUG_CARD_PRESENT);
+		}
+		if (link_active) {
+			ctrl_info(ctrl, "Slot(%s): Link Up\n", slot_name(ctrl));
+			trace_pci_hp_event(pci_name(ctrl->pcie->port),
+					   slot_name(ctrl),
+					   PCI_HOTPLUG_LINK_UP);
+		}
 		ctrl->request_result = pciehp_enable_slot(ctrl);
 		break;
 	default:
diff --git a/drivers/pci/hotplug/trace.h b/drivers/pci/hotplug/trace.h
new file mode 100644
index 000000000000..45e4ca8b83c4
--- /dev/null
+++ b/drivers/pci/hotplug/trace.h
@@ -0,0 +1,68 @@ 
+/* SPDX-License-Identifier: GPL-2.0 */
+#if !defined(_TRACE_HW_EVENT_PCI_HP_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_HW_EVENT_PCI_HP_H
+
+#include <linux/tracepoint.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM pci_hotplug
+
+#define PCI_HOTPLUG_EVENT					\
+	EM(PCI_HOTPLUG_LINK_UP,			"Link Up")	\
+	EM(PCI_HOTPLUG_LINK_DOWN,		"Link Down")	\
+	EM(PCI_HOTPLUG_CARD_PRESENT,		"Card present")	\
+	EMe(PCI_HOTPLUG_CARD_NOT_PRESENT,	"Card not present")
+
+/* Enums require being exported to userspace, for user tool parsing */
+#undef EM
+#undef EMe
+#define EM(a, b)	TRACE_DEFINE_ENUM(a);
+#define EMe(a, b)	TRACE_DEFINE_ENUM(a);
+
+PCI_HOTPLUG_EVENT
+
+/*
+ * Now redefine the EM() and EMe() macros to map the enums to the strings
+ * that will be printed in the output.
+ */
+#undef EM
+#undef EMe
+#define EM(a, b)	{a, b},
+#define EMe(a, b)	{a, b}
+
+TRACE_EVENT(pci_hp_event,
+
+	TP_PROTO(const char *port_name,
+		 const char *slot,
+		 const int event),
+
+	TP_ARGS(port_name, slot, event),
+
+	TP_STRUCT__entry(
+		__string(	port_name,	port_name	)
+		__string(	slot,		slot		)
+		__field(	int,		event	)
+	),
+
+	TP_fast_assign(
+		__assign_str(port_name);
+		__assign_str(slot);
+		__entry->event = event;
+	),
+
+	TP_printk("%s slot:%s, event:%s\n",
+		__get_str(port_name),
+		__get_str(slot),
+		__print_symbolic(__entry->event, PCI_HOTPLUG_EVENT)
+	)
+);
+
+#endif /* _TRACE_HW_EVENT_PCI_HP_H */
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH  ../../drivers/pci/hotplug
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE trace
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/include/uapi/linux/pci.h b/include/uapi/linux/pci.h
index a769eefc5139..4f150028965d 100644
--- a/include/uapi/linux/pci.h
+++ b/include/uapi/linux/pci.h
@@ -39,4 +39,11 @@ 
 #define PCIIOC_MMAP_IS_MEM	(PCIIOC_BASE | 0x02)	/* Set mmap state to MEM space. */
 #define PCIIOC_WRITE_COMBINE	(PCIIOC_BASE | 0x03)	/* Enable/disable write-combining. */
 
+enum pci_hotplug_event {
+	PCI_HOTPLUG_LINK_UP,
+	PCI_HOTPLUG_LINK_DOWN,
+	PCI_HOTPLUG_CARD_PRESENT,
+	PCI_HOTPLUG_CARD_NOT_PRESENT,
+};
+
 #endif /* _UAPILINUX_PCI_H */