Message ID | 20231128160656.166609-5-Benjamin.Cheatham@amd.com |
---|---|
State | Superseded |
Headers | show |
Series | CXL, ACPI, APEI, EINJ: Update EINJ for CXL error types | expand |
Ben Cheatham wrote: > Add functions to the EINJ module to be used in the CXL module for CXL > protocol error injection. The callbacks implement the einj_types and > einj_inject files under /sys/kernel/debug/cxl/portX/dportY. These two > files work in the same way as the available_error_types and error_inject > files under /sys/kernel/debug/apei/einj, but only for CXL error types. > If the dport is enumerated in RCH (CXL 1.1) mode, a CXL 1.1 error is > injected into the dport; a CXL 2.0 error is injected otherwise. > > Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> > Signed-off-by: Ben Cheatham <Benjamin.Cheatham@amd.com> > --- > drivers/acpi/apei/Kconfig | 3 ++ > drivers/acpi/apei/einj.c | 105 ++++++++++++++++++++++++++++++++++++++ > 2 files changed, 108 insertions(+) > > diff --git a/drivers/acpi/apei/Kconfig b/drivers/acpi/apei/Kconfig > index 6b18f8bc7be3..100c27beb581 100644 > --- a/drivers/acpi/apei/Kconfig > +++ b/drivers/acpi/apei/Kconfig > @@ -11,6 +11,9 @@ config ACPI_APEI > select PSTORE > select UEFI_CPER > depends on HAVE_ACPI_APEI > + imply CXL_BUS > + imply CXL_ACPI > + imply CXL_PORT This goes away with the CONFIG_CXL_EINJ scheme proposed on patch1. > help > APEI allows to report errors (for example from the chipset) > to the operating system. This improves NMI handling > diff --git a/drivers/acpi/apei/einj.c b/drivers/acpi/apei/einj.c > index 330329ac2f1f..98d5e6d60a02 100644 > --- a/drivers/acpi/apei/einj.c > +++ b/drivers/acpi/apei/einj.c > @@ -21,9 +21,11 @@ > #include <linux/nmi.h> > #include <linux/delay.h> > #include <linux/mm.h> > +#include <linux/pci.h> > #include <asm/unaligned.h> > > #include "apei-internal.h" > +#include "../../cxl/cxl.h" EINJ has no business knowing all of the details in cxl.h. In fact all EINJ cares about is dport->dport_dev and dport->rch. I think just add those to the calling convention, see below: > #undef pr_fmt > #define pr_fmt(fmt) "EINJ: " fmt > @@ -627,6 +629,25 @@ static int available_error_type_show(struct seq_file *m, void *v) > > DEFINE_SHOW_ATTRIBUTE(available_error_type); > > +static int cxl_einj_available_error_type(struct seq_file *m, void *v) > +{ > + int cxl_err, rc; > + u32 available_error_type = 0; > + > + rc = einj_get_available_error_type(&available_error_type); > + if (rc) > + return rc; > + > + for (int pos = 0; pos < ARRAY_SIZE(einj_cxl_error_type_string); pos++) { > + cxl_err = ACPI_EINJ_CXL_CACHE_CORRECTABLE << pos; > + > + if (available_error_type & cxl_err) > + seq_puts(m, einj_cxl_error_type_string[pos]); > + } > + > + return 0; > +} > + > static int validate_error_type(u64 type) > { > u32 tval, vendor, available_error_type = 0; > @@ -657,6 +678,68 @@ static int validate_error_type(u64 type) > return 0; > } > > +static int cxl_dport_get_sbdf(struct cxl_dport *dport, u64 *sbdf) > +{ > + struct pci_dev *pdev; > + struct pci_bus *pbus; > + struct pci_host_bridge *bridge; > + u64 seg = 0, bus; > + > + if (!dev_is_pci(dport->dport_dev)) > + return -EINVAL; > + > + pdev = to_pci_dev(dport->dport_dev); > + pbus = pdev->bus; > + bridge = pci_find_host_bridge(pbus); > + > + if (!bridge) > + return -ENODEV; > + > + if (bridge->domain_nr != PCI_DOMAIN_NR_NOT_SET) > + seg = bridge->domain_nr << 24; > + > + bus = pbus->number << 16; > + *sbdf = seg | bus | pdev->devfn; > + > + return 0; > +} > + > +static int cxl_einj_inject_error(struct cxl_dport *dport, u64 type) If you split this into cxl_einj_inject_error() and cxl_einj_inject_rch_error() with following prototypes: static int cxl_einj_inject_error(struct pci_dev *dport_dev, u64 type) static int cxl_einj_inject_rch_error(u64 rcrb, u64 type) ...then you don't need any of the cxl.h definitions. The dev_is_pci() and rch determination details can remain private to CXL. > +{ > + u64 param1 = 0, param2 = 0, param4 = 0; > + u32 flags; > + int rc; > + > + /* Only CXL error types can be specified */ > + if (type & ~CXL_ERROR_MASK || (type & ACPI5_VENDOR_BIT)) > + return -EINVAL; > + > + rc = validate_error_type(type); > + if (rc) > + return rc; > + > + /* > + * If dport is in restricted mode, inject a CXL 1.1 error, > + * otherwise inject a CXL 2.0 error > + */ > + if (dport->rch) { > + if (dport->rcrb.base == CXL_RESOURCE_NONE) > + return -EINVAL; > + > + param1 = (u64) dport->rcrb.base; > + param2 = 0xfffffffffffff000; > + flags = 0x2; > + } else { > + rc = cxl_dport_get_sbdf(dport, ¶m4); > + if (rc) > + return rc; > + > + flags = 0x4; > + } > + > + return einj_error_inject(type, flags, param1, param2, 0, param4); > +} > + > static int error_type_get(void *data, u64 *val) > { > *val = error_type; > @@ -668,6 +751,7 @@ static int error_type_set(void *data, u64 val) > { > int rc; > > + /* CXL error types have to be injected from cxl debugfs */ > if (val & CXL_ERROR_MASK && !(val & ACPI5_VENDOR_BIT)) > return -EINVAL; > > @@ -714,6 +798,7 @@ static int __init einj_init(void) > { > int rc; > acpi_status status; > + struct cxl_einj_ops cxl_ops; > struct apei_exec_context ctx; > > if (acpi_disabled) { > @@ -793,6 +878,15 @@ static int __init einj_init(void) > einj_debug_dir, &vendor_flags); > } > > + if (IS_REACHABLE(CONFIG_CXL_ACPI) || IS_REACHABLE(CONFIG_CXL_PORT)) { > + cxl_ops = (struct cxl_einj_ops) { > + .einj_type = cxl_einj_available_error_type, > + .einj_inject = cxl_einj_inject_error, > + }; > + > + cxl_einj_set_ops_cbs(&cxl_ops); > + } This goes away with the new Kconfig dependency scheme. > + > pr_info("Error INJection is initialized.\n"); > > return 0; > @@ -810,8 +904,18 @@ static int __init einj_init(void) > > static void __exit einj_exit(void) > { > + struct cxl_einj_ops cxl_ops; > struct apei_exec_context ctx; > > + if (IS_REACHABLE(CONFIG_CXL_ACPI) || IS_REACHABLE(CONFIG_CXL_PORT)) { > + cxl_ops = (struct cxl_einj_ops) { > + .einj_type = NULL, > + .einj_inject = NULL, > + }; > + > + cxl_einj_set_ops_cbs(&cxl_ops); > + } > + > if (einj_param) { > acpi_size size = (acpi5) ? > sizeof(struct set_error_type_with_address) : > @@ -832,4 +936,5 @@ module_exit(einj_exit); > > MODULE_AUTHOR("Huang Ying"); > MODULE_DESCRIPTION("APEI Error INJection support"); > +MODULE_IMPORT_NS(CXL); EINJ never references a CXL symbol in the new proposed scheme.
On 12/7/23 6:03 PM, Dan Williams wrote: > Ben Cheatham wrote: >> Add functions to the EINJ module to be used in the CXL module for CXL >> protocol error injection. The callbacks implement the einj_types and >> einj_inject files under /sys/kernel/debug/cxl/portX/dportY. These two >> files work in the same way as the available_error_types and error_inject >> files under /sys/kernel/debug/apei/einj, but only for CXL error types. >> If the dport is enumerated in RCH (CXL 1.1) mode, a CXL 1.1 error is >> injected into the dport; a CXL 2.0 error is injected otherwise. >> >> Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> >> Signed-off-by: Ben Cheatham <Benjamin.Cheatham@amd.com> >> --- >> drivers/acpi/apei/Kconfig | 3 ++ >> drivers/acpi/apei/einj.c | 105 ++++++++++++++++++++++++++++++++++++++ >> 2 files changed, 108 insertions(+) >> >> diff --git a/drivers/acpi/apei/Kconfig b/drivers/acpi/apei/Kconfig >> index 6b18f8bc7be3..100c27beb581 100644 >> --- a/drivers/acpi/apei/Kconfig >> +++ b/drivers/acpi/apei/Kconfig >> @@ -11,6 +11,9 @@ config ACPI_APEI >> select PSTORE >> select UEFI_CPER >> depends on HAVE_ACPI_APEI >> + imply CXL_BUS >> + imply CXL_ACPI >> + imply CXL_PORT > > This goes away with the CONFIG_CXL_EINJ scheme proposed on patch1. > >> help >> APEI allows to report errors (for example from the chipset) >> to the operating system. This improves NMI handling >> diff --git a/drivers/acpi/apei/einj.c b/drivers/acpi/apei/einj.c >> index 330329ac2f1f..98d5e6d60a02 100644 >> --- a/drivers/acpi/apei/einj.c >> +++ b/drivers/acpi/apei/einj.c >> @@ -21,9 +21,11 @@ >> #include <linux/nmi.h> >> #include <linux/delay.h> >> #include <linux/mm.h> >> +#include <linux/pci.h> >> #include <asm/unaligned.h> >> >> #include "apei-internal.h" >> +#include "../../cxl/cxl.h" > > EINJ has no business knowing all of the details in cxl.h. In fact all > EINJ cares about is dport->dport_dev and dport->rch. I think just add > those to the calling convention, see below: > >> #undef pr_fmt >> #define pr_fmt(fmt) "EINJ: " fmt >> @@ -627,6 +629,25 @@ static int available_error_type_show(struct seq_file *m, void *v) >> >> DEFINE_SHOW_ATTRIBUTE(available_error_type); >> >> +static int cxl_einj_available_error_type(struct seq_file *m, void *v) >> +{ >> + int cxl_err, rc; >> + u32 available_error_type = 0; >> + >> + rc = einj_get_available_error_type(&available_error_type); >> + if (rc) >> + return rc; >> + >> + for (int pos = 0; pos < ARRAY_SIZE(einj_cxl_error_type_string); pos++) { >> + cxl_err = ACPI_EINJ_CXL_CACHE_CORRECTABLE << pos; >> + >> + if (available_error_type & cxl_err) >> + seq_puts(m, einj_cxl_error_type_string[pos]); >> + } >> + >> + return 0; >> +} >> + >> static int validate_error_type(u64 type) >> { >> u32 tval, vendor, available_error_type = 0; >> @@ -657,6 +678,68 @@ static int validate_error_type(u64 type) >> return 0; >> } >> >> +static int cxl_dport_get_sbdf(struct cxl_dport *dport, u64 *sbdf) >> +{ >> + struct pci_dev *pdev; >> + struct pci_bus *pbus; >> + struct pci_host_bridge *bridge; >> + u64 seg = 0, bus; >> + >> + if (!dev_is_pci(dport->dport_dev)) >> + return -EINVAL; >> + >> + pdev = to_pci_dev(dport->dport_dev); >> + pbus = pdev->bus; >> + bridge = pci_find_host_bridge(pbus); >> + >> + if (!bridge) >> + return -ENODEV; >> + >> + if (bridge->domain_nr != PCI_DOMAIN_NR_NOT_SET) >> + seg = bridge->domain_nr << 24; >> + >> + bus = pbus->number << 16; >> + *sbdf = seg | bus | pdev->devfn; >> + >> + return 0; >> +} >> + >> +static int cxl_einj_inject_error(struct cxl_dport *dport, u64 type) > > If you split this into cxl_einj_inject_error() and > cxl_einj_inject_rch_error() with following prototypes: > > static int cxl_einj_inject_error(struct pci_dev *dport_dev, u64 type) > static int cxl_einj_inject_rch_error(u64 rcrb, u64 type) > > ...then you don't need any of the cxl.h definitions. The dev_is_pci() > and rch determination details can remain private to CXL. > Good suggestion. Will do! Thanks, Ben >> +{ >> + u64 param1 = 0, param2 = 0, param4 = 0; >> + u32 flags; >> + int rc; >> + >> + /* Only CXL error types can be specified */ >> + if (type & ~CXL_ERROR_MASK || (type & ACPI5_VENDOR_BIT)) >> + return -EINVAL; >> + >> + rc = validate_error_type(type); >> + if (rc) >> + return rc; >> + >> + /* >> + * If dport is in restricted mode, inject a CXL 1.1 error, >> + * otherwise inject a CXL 2.0 error >> + */ >> + if (dport->rch) { >> + if (dport->rcrb.base == CXL_RESOURCE_NONE) >> + return -EINVAL; >> + >> + param1 = (u64) dport->rcrb.base; >> + param2 = 0xfffffffffffff000; >> + flags = 0x2; >> + } else { >> + rc = cxl_dport_get_sbdf(dport, ¶m4); >> + if (rc) >> + return rc; >> + >> + flags = 0x4; >> + } >> + >> + return einj_error_inject(type, flags, param1, param2, 0, param4); >> +} >> + >> static int error_type_get(void *data, u64 *val) >> { >> *val = error_type; >> @@ -668,6 +751,7 @@ static int error_type_set(void *data, u64 val) >> { >> int rc; >> >> + /* CXL error types have to be injected from cxl debugfs */ >> if (val & CXL_ERROR_MASK && !(val & ACPI5_VENDOR_BIT)) >> return -EINVAL; >> >> @@ -714,6 +798,7 @@ static int __init einj_init(void) >> { >> int rc; >> acpi_status status; >> + struct cxl_einj_ops cxl_ops; >> struct apei_exec_context ctx; >> >> if (acpi_disabled) { >> @@ -793,6 +878,15 @@ static int __init einj_init(void) >> einj_debug_dir, &vendor_flags); >> } >> >> + if (IS_REACHABLE(CONFIG_CXL_ACPI) || IS_REACHABLE(CONFIG_CXL_PORT)) { >> + cxl_ops = (struct cxl_einj_ops) { >> + .einj_type = cxl_einj_available_error_type, >> + .einj_inject = cxl_einj_inject_error, >> + }; >> + >> + cxl_einj_set_ops_cbs(&cxl_ops); >> + } > > This goes away with the new Kconfig dependency scheme. > >> + >> pr_info("Error INJection is initialized.\n"); >> >> return 0; >> @@ -810,8 +904,18 @@ static int __init einj_init(void) >> >> static void __exit einj_exit(void) >> { >> + struct cxl_einj_ops cxl_ops; >> struct apei_exec_context ctx; >> >> + if (IS_REACHABLE(CONFIG_CXL_ACPI) || IS_REACHABLE(CONFIG_CXL_PORT)) { >> + cxl_ops = (struct cxl_einj_ops) { >> + .einj_type = NULL, >> + .einj_inject = NULL, >> + }; >> + >> + cxl_einj_set_ops_cbs(&cxl_ops); >> + } >> + >> if (einj_param) { >> acpi_size size = (acpi5) ? >> sizeof(struct set_error_type_with_address) : >> @@ -832,4 +936,5 @@ module_exit(einj_exit); >> >> MODULE_AUTHOR("Huang Ying"); >> MODULE_DESCRIPTION("APEI Error INJection support"); >> +MODULE_IMPORT_NS(CXL); > > EINJ never references a CXL symbol in the new proposed scheme.
diff --git a/drivers/acpi/apei/Kconfig b/drivers/acpi/apei/Kconfig index 6b18f8bc7be3..100c27beb581 100644 --- a/drivers/acpi/apei/Kconfig +++ b/drivers/acpi/apei/Kconfig @@ -11,6 +11,9 @@ config ACPI_APEI select PSTORE select UEFI_CPER depends on HAVE_ACPI_APEI + imply CXL_BUS + imply CXL_ACPI + imply CXL_PORT help APEI allows to report errors (for example from the chipset) to the operating system. This improves NMI handling diff --git a/drivers/acpi/apei/einj.c b/drivers/acpi/apei/einj.c index 330329ac2f1f..98d5e6d60a02 100644 --- a/drivers/acpi/apei/einj.c +++ b/drivers/acpi/apei/einj.c @@ -21,9 +21,11 @@ #include <linux/nmi.h> #include <linux/delay.h> #include <linux/mm.h> +#include <linux/pci.h> #include <asm/unaligned.h> #include "apei-internal.h" +#include "../../cxl/cxl.h" #undef pr_fmt #define pr_fmt(fmt) "EINJ: " fmt @@ -627,6 +629,25 @@ static int available_error_type_show(struct seq_file *m, void *v) DEFINE_SHOW_ATTRIBUTE(available_error_type); +static int cxl_einj_available_error_type(struct seq_file *m, void *v) +{ + int cxl_err, rc; + u32 available_error_type = 0; + + rc = einj_get_available_error_type(&available_error_type); + if (rc) + return rc; + + for (int pos = 0; pos < ARRAY_SIZE(einj_cxl_error_type_string); pos++) { + cxl_err = ACPI_EINJ_CXL_CACHE_CORRECTABLE << pos; + + if (available_error_type & cxl_err) + seq_puts(m, einj_cxl_error_type_string[pos]); + } + + return 0; +} + static int validate_error_type(u64 type) { u32 tval, vendor, available_error_type = 0; @@ -657,6 +678,68 @@ static int validate_error_type(u64 type) return 0; } +static int cxl_dport_get_sbdf(struct cxl_dport *dport, u64 *sbdf) +{ + struct pci_dev *pdev; + struct pci_bus *pbus; + struct pci_host_bridge *bridge; + u64 seg = 0, bus; + + if (!dev_is_pci(dport->dport_dev)) + return -EINVAL; + + pdev = to_pci_dev(dport->dport_dev); + pbus = pdev->bus; + bridge = pci_find_host_bridge(pbus); + + if (!bridge) + return -ENODEV; + + if (bridge->domain_nr != PCI_DOMAIN_NR_NOT_SET) + seg = bridge->domain_nr << 24; + + bus = pbus->number << 16; + *sbdf = seg | bus | pdev->devfn; + + return 0; +} + +static int cxl_einj_inject_error(struct cxl_dport *dport, u64 type) +{ + u64 param1 = 0, param2 = 0, param4 = 0; + u32 flags; + int rc; + + /* Only CXL error types can be specified */ + if (type & ~CXL_ERROR_MASK || (type & ACPI5_VENDOR_BIT)) + return -EINVAL; + + rc = validate_error_type(type); + if (rc) + return rc; + + /* + * If dport is in restricted mode, inject a CXL 1.1 error, + * otherwise inject a CXL 2.0 error + */ + if (dport->rch) { + if (dport->rcrb.base == CXL_RESOURCE_NONE) + return -EINVAL; + + param1 = (u64) dport->rcrb.base; + param2 = 0xfffffffffffff000; + flags = 0x2; + } else { + rc = cxl_dport_get_sbdf(dport, ¶m4); + if (rc) + return rc; + + flags = 0x4; + } + + return einj_error_inject(type, flags, param1, param2, 0, param4); +} + static int error_type_get(void *data, u64 *val) { *val = error_type; @@ -668,6 +751,7 @@ static int error_type_set(void *data, u64 val) { int rc; + /* CXL error types have to be injected from cxl debugfs */ if (val & CXL_ERROR_MASK && !(val & ACPI5_VENDOR_BIT)) return -EINVAL; @@ -714,6 +798,7 @@ static int __init einj_init(void) { int rc; acpi_status status; + struct cxl_einj_ops cxl_ops; struct apei_exec_context ctx; if (acpi_disabled) { @@ -793,6 +878,15 @@ static int __init einj_init(void) einj_debug_dir, &vendor_flags); } + if (IS_REACHABLE(CONFIG_CXL_ACPI) || IS_REACHABLE(CONFIG_CXL_PORT)) { + cxl_ops = (struct cxl_einj_ops) { + .einj_type = cxl_einj_available_error_type, + .einj_inject = cxl_einj_inject_error, + }; + + cxl_einj_set_ops_cbs(&cxl_ops); + } + pr_info("Error INJection is initialized.\n"); return 0; @@ -810,8 +904,18 @@ static int __init einj_init(void) static void __exit einj_exit(void) { + struct cxl_einj_ops cxl_ops; struct apei_exec_context ctx; + if (IS_REACHABLE(CONFIG_CXL_ACPI) || IS_REACHABLE(CONFIG_CXL_PORT)) { + cxl_ops = (struct cxl_einj_ops) { + .einj_type = NULL, + .einj_inject = NULL, + }; + + cxl_einj_set_ops_cbs(&cxl_ops); + } + if (einj_param) { acpi_size size = (acpi5) ? sizeof(struct set_error_type_with_address) : @@ -832,4 +936,5 @@ module_exit(einj_exit); MODULE_AUTHOR("Huang Ying"); MODULE_DESCRIPTION("APEI Error INJection support"); +MODULE_IMPORT_NS(CXL); MODULE_LICENSE("GPL");