Message ID | 20250327014717.2988633-3-terry.bowman@amd.com (mailing list archive) |
---|---|
State | New |
Delegated to: | Bjorn Helgaas |
Headers | show |
Series | Enable CXL PCIe port protocol error handling and logging | expand |
On Wed, Mar 26, 2025 at 08:47:03PM -0500, Terry Bowman wrote: > The AER service driver and aer_event tracing currently log 'PCIe Bus Type' > for all errors. Update the driver and aer_event tracing to log 'CXL Bus > Type' for CXL device errors. > > This requires the AER can identify and distinguish between PCIe errors and > CXL errors. > > Introduce boolean 'is_cxl' to 'struct aer_err_info'. Add assignment in > aer_get_device_error_info() and pci_print_aer(). > > Update the aer_event trace routine to accept a bus type string parameter. > +++ b/drivers/pci/pci.h > @@ -533,6 +533,7 @@ static inline bool pci_dev_test_and_set_removed(struct pci_dev *dev) > struct aer_err_info { > struct pci_dev *dev[AER_MAX_MULTI_ERR_DEVICES]; > int error_dev_num; > + bool is_cxl; > > unsigned int id:16; > > @@ -549,6 +550,11 @@ struct aer_err_info { > struct pcie_tlp_log tlp; /* TLP Header */ > }; > > +static inline const char *aer_err_bus(struct aer_err_info *info) > +{ > + return info->is_cxl ? "CXL" : "PCIe"; I don't really see the point in adding struct aer_err_info.is_cxl. Every place where we call aer_err_bus() to look at it, we also have the struct pci_dev pointer, so we could just as easily use pcie_is_cxl() here. > +} > + > int aer_get_device_error_info(struct pci_dev *dev, struct aer_err_info *info); > void aer_print_error(struct pci_dev *dev, struct aer_err_info *info); > > diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c > index 508474e17183..83f2069f111e 100644 > --- a/drivers/pci/pcie/aer.c > +++ b/drivers/pci/pcie/aer.c > @@ -694,13 +694,14 @@ static void __aer_print_error(struct pci_dev *dev, > > void aer_print_error(struct pci_dev *dev, struct aer_err_info *info) > { > + const char *bus_type = aer_err_bus(info); > int layer, agent; > int id = pci_dev_id(dev); > const char *level; > > if (!info->status) { > - pci_err(dev, "PCIe Bus Error: severity=%s, type=Inaccessible, (Unregistered Agent ID)\n", > - aer_error_severity_string[info->severity]); > + pci_err(dev, "%s Bus Error: severity=%s, type=Inaccessible, (Unregistered Agent ID)\n", > + bus_type, aer_error_severity_string[info->severity]); > goto out; > } > > @@ -709,8 +710,8 @@ void aer_print_error(struct pci_dev *dev, struct aer_err_info *info) > > level = (info->severity == AER_CORRECTABLE) ? KERN_WARNING : KERN_ERR; > > - pci_printk(level, dev, "PCIe Bus Error: severity=%s, type=%s, (%s)\n", > - aer_error_severity_string[info->severity], > + pci_printk(level, dev, "%s Bus Error: severity=%s, type=%s, (%s)\n", > + bus_type, aer_error_severity_string[info->severity], > aer_error_layer[layer], aer_agent_string[agent]); > > pci_printk(level, dev, " device [%04x:%04x] error status/mask=%08x/%08x\n", > @@ -725,7 +726,7 @@ void aer_print_error(struct pci_dev *dev, struct aer_err_info *info) > if (info->id && info->error_dev_num > 1 && info->id == id) > pci_err(dev, " Error of this Agent is reported first\n"); > > - trace_aer_event(dev_name(&dev->dev), (info->status & ~info->mask), > + trace_aer_event(dev_name(&dev->dev), bus_type, (info->status & ~info->mask), > info->severity, info->tlp_header_valid, &info->tlp); > } > > @@ -759,6 +760,7 @@ EXPORT_SYMBOL_GPL(cper_severity_to_aer); > void pci_print_aer(struct pci_dev *dev, int aer_severity, > struct aer_capability_regs *aer) > { > + const char *bus_type; > int layer, agent, tlp_header_valid = 0; > u32 status, mask; > struct aer_err_info info; > @@ -780,6 +782,9 @@ void pci_print_aer(struct pci_dev *dev, int aer_severity, > info.status = status; > info.mask = mask; > info.first_error = PCI_ERR_CAP_FEP(aer->cap_control); > + info.is_cxl = pcie_is_cxl(dev); > + > + bus_type = aer_err_bus(&info); > > pci_err(dev, "aer_status: 0x%08x, aer_mask: 0x%08x\n", status, mask); > __aer_print_error(dev, &info); > @@ -793,7 +798,7 @@ void pci_print_aer(struct pci_dev *dev, int aer_severity, > if (tlp_header_valid) > pcie_print_tlp_log(dev, &aer->header_log, dev_fmt(" ")); > > - trace_aer_event(dev_name(&dev->dev), (status & ~mask), > + trace_aer_event(dev_name(&dev->dev), bus_type, (status & ~mask), > aer_severity, tlp_header_valid, &aer->header_log); > } > EXPORT_SYMBOL_NS_GPL(pci_print_aer, "CXL"); > @@ -1211,6 +1216,7 @@ int aer_get_device_error_info(struct pci_dev *dev, struct aer_err_info *info) > /* Must reset in this function */ > info->status = 0; > info->tlp_header_valid = 0; > + info->is_cxl = pcie_is_cxl(dev); > > /* The device might not support AER */ > if (!aer) > diff --git a/include/ras/ras_event.h b/include/ras/ras_event.h > index e5f7ee0864e7..1bf8e7050ba8 100644 > --- a/include/ras/ras_event.h > +++ b/include/ras/ras_event.h > @@ -297,15 +297,17 @@ TRACE_EVENT(non_standard_event, > > TRACE_EVENT(aer_event, > TP_PROTO(const char *dev_name, > + const char *bus_type, > const u32 status, > const u8 severity, > const u8 tlp_header_valid, > struct pcie_tlp_log *tlp), > > - TP_ARGS(dev_name, status, severity, tlp_header_valid, tlp), > + TP_ARGS(dev_name, bus_type, status, severity, tlp_header_valid, tlp), > > TP_STRUCT__entry( > __string( dev_name, dev_name ) > + __string( bus_type, bus_type ) > __field( u32, status ) > __field( u8, severity ) > __field( u8, tlp_header_valid) > @@ -314,6 +316,7 @@ TRACE_EVENT(aer_event, > > TP_fast_assign( > __assign_str(dev_name); > + __assign_str(bus_type); > __entry->status = status; > __entry->severity = severity; > __entry->tlp_header_valid = tlp_header_valid; > @@ -325,8 +328,8 @@ TRACE_EVENT(aer_event, > } > ), > > - TP_printk("%s PCIe Bus Error: severity=%s, %s, TLP Header=%s\n", > - __get_str(dev_name), > + TP_printk("%s %s Bus Error: severity=%s, %s, TLP Header=%s\n", > + __get_str(dev_name), __get_str(bus_type), > __entry->severity == AER_CORRECTABLE ? "Corrected" : > __entry->severity == AER_FATAL ? > "Fatal" : "Uncorrected, non-fatal", > -- > 2.34.1 >
Terry Bowman wrote: > The AER service driver and aer_event tracing currently log 'PCIe Bus Type' > for all errors. Update the driver and aer_event tracing to log 'CXL Bus > Type' for CXL device errors. > > This requires the AER can identify and distinguish between PCIe errors and > CXL errors. > > Introduce boolean 'is_cxl' to 'struct aer_err_info'. Add assignment in > aer_get_device_error_info() and pci_print_aer(). > > Update the aer_event trace routine to accept a bus type string parameter. > > Signed-off-by: Terry Bowman <terry.bowman@amd.com> I'm unsure how Karolina's new populate_aer_err_info() will get the new is_cxl flag.[1] Generally though this looks ok. Reviewed-by: Ira Weiny <ira.weiny@intel.com> [1] https://lore.kernel.org/all/81c040d54209627de2d8b150822636b415834c7f.1742900213.git.karolina.stolarek@oracle.com/ [snip]
On 3/27/2025 11:48 AM, Bjorn Helgaas wrote: > On Wed, Mar 26, 2025 at 08:47:03PM -0500, Terry Bowman wrote: >> The AER service driver and aer_event tracing currently log 'PCIe Bus Type' >> for all errors. Update the driver and aer_event tracing to log 'CXL Bus >> Type' for CXL device errors. >> >> This requires the AER can identify and distinguish between PCIe errors and >> CXL errors. >> >> Introduce boolean 'is_cxl' to 'struct aer_err_info'. Add assignment in >> aer_get_device_error_info() and pci_print_aer(). >> >> Update the aer_event trace routine to accept a bus type string parameter. >> +++ b/drivers/pci/pci.h >> @@ -533,6 +533,7 @@ static inline bool pci_dev_test_and_set_removed(struct pci_dev *dev) >> struct aer_err_info { >> struct pci_dev *dev[AER_MAX_MULTI_ERR_DEVICES]; >> int error_dev_num; >> + bool is_cxl; >> >> unsigned int id:16; >> >> @@ -549,6 +550,11 @@ struct aer_err_info { >> struct pcie_tlp_log tlp; /* TLP Header */ >> }; >> >> +static inline const char *aer_err_bus(struct aer_err_info *info) >> +{ >> + return info->is_cxl ? "CXL" : "PCIe"; > I don't really see the point in adding struct aer_err_info.is_cxl. > Every place where we call aer_err_bus() to look at it, we also have > the struct pci_dev pointer, so we could just as easily use > pcie_is_cxl() here. Hi Bjorn, My understanding is Dan wanted the decorated aer_err_info::is_cxl to capture the type of error (CXL or PCIe) and cache it because it could change. That is, the CXL device's alternate protocol could transition down before handling but the CXL driver must keep knowledge of the original state for reporting. But, the alternate protocol transition is not currently detected and used to update pci_dev::is_cxl. pci_dev::is_cxl is currently only assigned during device creation at the moment. DanW, please add detail and/or correct me. Regards, Terry >> +} >> + >> int aer_get_device_error_info(struct pci_dev *dev, struct aer_err_info *info); >> void aer_print_error(struct pci_dev *dev, struct aer_err_info *info); >> >> diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c >> index 508474e17183..83f2069f111e 100644 >> --- a/drivers/pci/pcie/aer.c >> +++ b/drivers/pci/pcie/aer.c >> @@ -694,13 +694,14 @@ static void __aer_print_error(struct pci_dev *dev, >> >> void aer_print_error(struct pci_dev *dev, struct aer_err_info *info) >> { >> + const char *bus_type = aer_err_bus(info); >> int layer, agent; >> int id = pci_dev_id(dev); >> const char *level; >> >> if (!info->status) { >> - pci_err(dev, "PCIe Bus Error: severity=%s, type=Inaccessible, (Unregistered Agent ID)\n", >> - aer_error_severity_string[info->severity]); >> + pci_err(dev, "%s Bus Error: severity=%s, type=Inaccessible, (Unregistered Agent ID)\n", >> + bus_type, aer_error_severity_string[info->severity]); >> goto out; >> } >> >> @@ -709,8 +710,8 @@ void aer_print_error(struct pci_dev *dev, struct aer_err_info *info) >> >> level = (info->severity == AER_CORRECTABLE) ? KERN_WARNING : KERN_ERR; >> >> - pci_printk(level, dev, "PCIe Bus Error: severity=%s, type=%s, (%s)\n", >> - aer_error_severity_string[info->severity], >> + pci_printk(level, dev, "%s Bus Error: severity=%s, type=%s, (%s)\n", >> + bus_type, aer_error_severity_string[info->severity], >> aer_error_layer[layer], aer_agent_string[agent]); >> >> pci_printk(level, dev, " device [%04x:%04x] error status/mask=%08x/%08x\n", >> @@ -725,7 +726,7 @@ void aer_print_error(struct pci_dev *dev, struct aer_err_info *info) >> if (info->id && info->error_dev_num > 1 && info->id == id) >> pci_err(dev, " Error of this Agent is reported first\n"); >> >> - trace_aer_event(dev_name(&dev->dev), (info->status & ~info->mask), >> + trace_aer_event(dev_name(&dev->dev), bus_type, (info->status & ~info->mask), >> info->severity, info->tlp_header_valid, &info->tlp); >> } >> >> @@ -759,6 +760,7 @@ EXPORT_SYMBOL_GPL(cper_severity_to_aer); >> void pci_print_aer(struct pci_dev *dev, int aer_severity, >> struct aer_capability_regs *aer) >> { >> + const char *bus_type; >> int layer, agent, tlp_header_valid = 0; >> u32 status, mask; >> struct aer_err_info info; >> @@ -780,6 +782,9 @@ void pci_print_aer(struct pci_dev *dev, int aer_severity, >> info.status = status; >> info.mask = mask; >> info.first_error = PCI_ERR_CAP_FEP(aer->cap_control); >> + info.is_cxl = pcie_is_cxl(dev); >> + >> + bus_type = aer_err_bus(&info); >> >> pci_err(dev, "aer_status: 0x%08x, aer_mask: 0x%08x\n", status, mask); >> __aer_print_error(dev, &info); >> @@ -793,7 +798,7 @@ void pci_print_aer(struct pci_dev *dev, int aer_severity, >> if (tlp_header_valid) >> pcie_print_tlp_log(dev, &aer->header_log, dev_fmt(" ")); >> >> - trace_aer_event(dev_name(&dev->dev), (status & ~mask), >> + trace_aer_event(dev_name(&dev->dev), bus_type, (status & ~mask), >> aer_severity, tlp_header_valid, &aer->header_log); >> } >> EXPORT_SYMBOL_NS_GPL(pci_print_aer, "CXL"); >> @@ -1211,6 +1216,7 @@ int aer_get_device_error_info(struct pci_dev *dev, struct aer_err_info *info) >> /* Must reset in this function */ >> info->status = 0; >> info->tlp_header_valid = 0; >> + info->is_cxl = pcie_is_cxl(dev); >> >> /* The device might not support AER */ >> if (!aer) >> diff --git a/include/ras/ras_event.h b/include/ras/ras_event.h >> index e5f7ee0864e7..1bf8e7050ba8 100644 >> --- a/include/ras/ras_event.h >> +++ b/include/ras/ras_event.h >> @@ -297,15 +297,17 @@ TRACE_EVENT(non_standard_event, >> >> TRACE_EVENT(aer_event, >> TP_PROTO(const char *dev_name, >> + const char *bus_type, >> const u32 status, >> const u8 severity, >> const u8 tlp_header_valid, >> struct pcie_tlp_log *tlp), >> >> - TP_ARGS(dev_name, status, severity, tlp_header_valid, tlp), >> + TP_ARGS(dev_name, bus_type, status, severity, tlp_header_valid, tlp), >> >> TP_STRUCT__entry( >> __string( dev_name, dev_name ) >> + __string( bus_type, bus_type ) >> __field( u32, status ) >> __field( u8, severity ) >> __field( u8, tlp_header_valid) >> @@ -314,6 +316,7 @@ TRACE_EVENT(aer_event, >> >> TP_fast_assign( >> __assign_str(dev_name); >> + __assign_str(bus_type); >> __entry->status = status; >> __entry->severity = severity; >> __entry->tlp_header_valid = tlp_header_valid; >> @@ -325,8 +328,8 @@ TRACE_EVENT(aer_event, >> } >> ), >> >> - TP_printk("%s PCIe Bus Error: severity=%s, %s, TLP Header=%s\n", >> - __get_str(dev_name), >> + TP_printk("%s %s Bus Error: severity=%s, %s, TLP Header=%s\n", >> + __get_str(dev_name), __get_str(bus_type), >> __entry->severity == AER_CORRECTABLE ? "Corrected" : >> __entry->severity == AER_FATAL ? >> "Fatal" : "Uncorrected, non-fatal", >> -- >> 2.34.1 >>
On 3/27/2025 11:58 AM, Ira Weiny wrote: > Terry Bowman wrote: >> The AER service driver and aer_event tracing currently log 'PCIe Bus Type' >> for all errors. Update the driver and aer_event tracing to log 'CXL Bus >> Type' for CXL device errors. >> >> This requires the AER can identify and distinguish between PCIe errors and >> CXL errors. >> >> Introduce boolean 'is_cxl' to 'struct aer_err_info'. Add assignment in >> aer_get_device_error_info() and pci_print_aer(). >> >> Update the aer_event trace routine to accept a bus type string parameter. >> >> Signed-off-by: Terry Bowman <terry.bowman@amd.com> > I'm unsure how Karolina's new populate_aer_err_info() will get the new > is_cxl flag.[1] It looks like the Karolina's patchset and this will likely fall into the same merge window. I will need to take a closer look and possibly make changes for a simpler merge. > Generally though this looks ok. > > Reviewed-by: Ira Weiny <ira.weiny@intel.com> > > [1] https://lore.kernel.org/all/81c040d54209627de2d8b150822636b415834c7f.1742900213.git.karolina.stolarek@oracle.com/ > > [snip] Thanks Ira.
On Thu, Mar 27, 2025 at 12:15:10PM -0500, Bowman, Terry wrote: > On 3/27/2025 11:48 AM, Bjorn Helgaas wrote: > > On Wed, Mar 26, 2025 at 08:47:03PM -0500, Terry Bowman wrote: > >> The AER service driver and aer_event tracing currently log 'PCIe Bus Type' > >> for all errors. Update the driver and aer_event tracing to log 'CXL Bus > >> Type' for CXL device errors. > >> > >> This requires the AER can identify and distinguish between PCIe errors and > >> CXL errors. > >> > >> Introduce boolean 'is_cxl' to 'struct aer_err_info'. Add assignment in > >> aer_get_device_error_info() and pci_print_aer(). > >> > >> Update the aer_event trace routine to accept a bus type string parameter. > >> +++ b/drivers/pci/pci.h > >> @@ -533,6 +533,7 @@ static inline bool pci_dev_test_and_set_removed(struct pci_dev *dev) > >> struct aer_err_info { > >> struct pci_dev *dev[AER_MAX_MULTI_ERR_DEVICES]; > >> int error_dev_num; > >> + bool is_cxl; > >> > >> unsigned int id:16; > >> > >> @@ -549,6 +550,11 @@ struct aer_err_info { > >> struct pcie_tlp_log tlp; /* TLP Header */ > >> }; > >> > >> +static inline const char *aer_err_bus(struct aer_err_info *info) > >> +{ > >> + return info->is_cxl ? "CXL" : "PCIe"; > > > > I don't really see the point in adding struct aer_err_info.is_cxl. > > Every place where we call aer_err_bus() to look at it, we also have > > the struct pci_dev pointer, so we could just as easily use > > pcie_is_cxl() here. > > My understanding is Dan wanted the decorated aer_err_info::is_cxl to > capture the type of error (CXL or PCIe) and cache it because it > could change. That is, the CXL device's alternate protocol could > transition down before handling but the CXL driver must keep > knowledge of the original state for reporting. But, the alternate > protocol transition is not currently detected and used to update > pci_dev::is_cxl. pci_dev::is_cxl is currently only assigned during > device creation at the moment. Sounds like there's always a race here if the CXL/PCIe type isn't captured as part of a hardware error log register. Bjorn
diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index 01e51db8d285..eed098c134a6 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -533,6 +533,7 @@ static inline bool pci_dev_test_and_set_removed(struct pci_dev *dev) struct aer_err_info { struct pci_dev *dev[AER_MAX_MULTI_ERR_DEVICES]; int error_dev_num; + bool is_cxl; unsigned int id:16; @@ -549,6 +550,11 @@ struct aer_err_info { struct pcie_tlp_log tlp; /* TLP Header */ }; +static inline const char *aer_err_bus(struct aer_err_info *info) +{ + return info->is_cxl ? "CXL" : "PCIe"; +} + int aer_get_device_error_info(struct pci_dev *dev, struct aer_err_info *info); void aer_print_error(struct pci_dev *dev, struct aer_err_info *info); diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c index 508474e17183..83f2069f111e 100644 --- a/drivers/pci/pcie/aer.c +++ b/drivers/pci/pcie/aer.c @@ -694,13 +694,14 @@ static void __aer_print_error(struct pci_dev *dev, void aer_print_error(struct pci_dev *dev, struct aer_err_info *info) { + const char *bus_type = aer_err_bus(info); int layer, agent; int id = pci_dev_id(dev); const char *level; if (!info->status) { - pci_err(dev, "PCIe Bus Error: severity=%s, type=Inaccessible, (Unregistered Agent ID)\n", - aer_error_severity_string[info->severity]); + pci_err(dev, "%s Bus Error: severity=%s, type=Inaccessible, (Unregistered Agent ID)\n", + bus_type, aer_error_severity_string[info->severity]); goto out; } @@ -709,8 +710,8 @@ void aer_print_error(struct pci_dev *dev, struct aer_err_info *info) level = (info->severity == AER_CORRECTABLE) ? KERN_WARNING : KERN_ERR; - pci_printk(level, dev, "PCIe Bus Error: severity=%s, type=%s, (%s)\n", - aer_error_severity_string[info->severity], + pci_printk(level, dev, "%s Bus Error: severity=%s, type=%s, (%s)\n", + bus_type, aer_error_severity_string[info->severity], aer_error_layer[layer], aer_agent_string[agent]); pci_printk(level, dev, " device [%04x:%04x] error status/mask=%08x/%08x\n", @@ -725,7 +726,7 @@ void aer_print_error(struct pci_dev *dev, struct aer_err_info *info) if (info->id && info->error_dev_num > 1 && info->id == id) pci_err(dev, " Error of this Agent is reported first\n"); - trace_aer_event(dev_name(&dev->dev), (info->status & ~info->mask), + trace_aer_event(dev_name(&dev->dev), bus_type, (info->status & ~info->mask), info->severity, info->tlp_header_valid, &info->tlp); } @@ -759,6 +760,7 @@ EXPORT_SYMBOL_GPL(cper_severity_to_aer); void pci_print_aer(struct pci_dev *dev, int aer_severity, struct aer_capability_regs *aer) { + const char *bus_type; int layer, agent, tlp_header_valid = 0; u32 status, mask; struct aer_err_info info; @@ -780,6 +782,9 @@ void pci_print_aer(struct pci_dev *dev, int aer_severity, info.status = status; info.mask = mask; info.first_error = PCI_ERR_CAP_FEP(aer->cap_control); + info.is_cxl = pcie_is_cxl(dev); + + bus_type = aer_err_bus(&info); pci_err(dev, "aer_status: 0x%08x, aer_mask: 0x%08x\n", status, mask); __aer_print_error(dev, &info); @@ -793,7 +798,7 @@ void pci_print_aer(struct pci_dev *dev, int aer_severity, if (tlp_header_valid) pcie_print_tlp_log(dev, &aer->header_log, dev_fmt(" ")); - trace_aer_event(dev_name(&dev->dev), (status & ~mask), + trace_aer_event(dev_name(&dev->dev), bus_type, (status & ~mask), aer_severity, tlp_header_valid, &aer->header_log); } EXPORT_SYMBOL_NS_GPL(pci_print_aer, "CXL"); @@ -1211,6 +1216,7 @@ int aer_get_device_error_info(struct pci_dev *dev, struct aer_err_info *info) /* Must reset in this function */ info->status = 0; info->tlp_header_valid = 0; + info->is_cxl = pcie_is_cxl(dev); /* The device might not support AER */ if (!aer) diff --git a/include/ras/ras_event.h b/include/ras/ras_event.h index e5f7ee0864e7..1bf8e7050ba8 100644 --- a/include/ras/ras_event.h +++ b/include/ras/ras_event.h @@ -297,15 +297,17 @@ TRACE_EVENT(non_standard_event, TRACE_EVENT(aer_event, TP_PROTO(const char *dev_name, + const char *bus_type, const u32 status, const u8 severity, const u8 tlp_header_valid, struct pcie_tlp_log *tlp), - TP_ARGS(dev_name, status, severity, tlp_header_valid, tlp), + TP_ARGS(dev_name, bus_type, status, severity, tlp_header_valid, tlp), TP_STRUCT__entry( __string( dev_name, dev_name ) + __string( bus_type, bus_type ) __field( u32, status ) __field( u8, severity ) __field( u8, tlp_header_valid) @@ -314,6 +316,7 @@ TRACE_EVENT(aer_event, TP_fast_assign( __assign_str(dev_name); + __assign_str(bus_type); __entry->status = status; __entry->severity = severity; __entry->tlp_header_valid = tlp_header_valid; @@ -325,8 +328,8 @@ TRACE_EVENT(aer_event, } ), - TP_printk("%s PCIe Bus Error: severity=%s, %s, TLP Header=%s\n", - __get_str(dev_name), + TP_printk("%s %s Bus Error: severity=%s, %s, TLP Header=%s\n", + __get_str(dev_name), __get_str(bus_type), __entry->severity == AER_CORRECTABLE ? "Corrected" : __entry->severity == AER_FATAL ? "Fatal" : "Uncorrected, non-fatal",
The AER service driver and aer_event tracing currently log 'PCIe Bus Type' for all errors. Update the driver and aer_event tracing to log 'CXL Bus Type' for CXL device errors. This requires the AER can identify and distinguish between PCIe errors and CXL errors. Introduce boolean 'is_cxl' to 'struct aer_err_info'. Add assignment in aer_get_device_error_info() and pci_print_aer(). Update the aer_event trace routine to accept a bus type string parameter. Signed-off-by: Terry Bowman <terry.bowman@amd.com> --- drivers/pci/pci.h | 6 ++++++ drivers/pci/pcie/aer.c | 18 ++++++++++++------ include/ras/ras_event.h | 9 ++++++--- 3 files changed, 24 insertions(+), 9 deletions(-)