diff mbox series

[v8,4/5] cxl/core, EINJ: Add CXL debugfs files and EINJ functions

Message ID 20231213223702.543419-5-Benjamin.Cheatham@amd.com
State Superseded
Headers show
Series CXL, ACPI, APEI, EINJ: Update EINJ for CXL error types | expand

Commit Message

Ben Cheatham Dec. 13, 2023, 10:37 p.m. UTC
Implement CXL helper functions in the EINJ module for getting the
available CXL protocol error types and injecting CXL errors and export
them to sysfs under kernel/debug/cxl.

The kernel/debug/cxl/einj_types file will print the available CXL
protocol errors in the same format as the available_error_types
file provided by the EINJ module. The
kernel/debug/cxl/$dport_dev/einj_inject is functionally the same as the
error_type and error_inject files provided by the EINJ module, i.e.:
writing an error type into $dport_dev/einj_inject will inject said error
type into the CXL dport represented by $dport_dev.

Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Signed-off-by: Ben Cheatham <Benjamin.Cheatham@amd.com>
---
 Documentation/ABI/testing/debugfs-cxl |  23 ++++
 drivers/acpi/apei/einj.c              | 144 ++++++++++++++++++++++++--
 drivers/cxl/core/port.c               |  33 ++++++
 drivers/cxl/einj.h                    |  58 +++++++++++
 4 files changed, 248 insertions(+), 10 deletions(-)
 create mode 100644 drivers/cxl/einj.h

Comments

Dan Williams Dec. 19, 2023, 4:47 a.m. UTC | #1
Ben Cheatham wrote:
> Implement CXL helper functions in the EINJ module for getting the
> available CXL protocol error types and injecting CXL errors and export
> them to sysfs under kernel/debug/cxl.
> 
> The kernel/debug/cxl/einj_types file will print the available CXL
> protocol errors in the same format as the available_error_types
> file provided by the EINJ module. The
> kernel/debug/cxl/$dport_dev/einj_inject is functionally the same as the
> error_type and error_inject files provided by the EINJ module, i.e.:
> writing an error type into $dport_dev/einj_inject will inject said error
> type into the CXL dport represented by $dport_dev.
> 
> Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
> Signed-off-by: Ben Cheatham <Benjamin.Cheatham@amd.com>
> ---
>  Documentation/ABI/testing/debugfs-cxl |  23 ++++
>  drivers/acpi/apei/einj.c              | 144 ++++++++++++++++++++++++--
>  drivers/cxl/core/port.c               |  33 ++++++
>  drivers/cxl/einj.h                    |  58 +++++++++++
>  4 files changed, 248 insertions(+), 10 deletions(-)
>  create mode 100644 drivers/cxl/einj.h
> 
> diff --git a/Documentation/ABI/testing/debugfs-cxl b/Documentation/ABI/testing/debugfs-cxl
> index fe61d372e3fa..97a8684bad84 100644
> --- a/Documentation/ABI/testing/debugfs-cxl
> +++ b/Documentation/ABI/testing/debugfs-cxl
> @@ -33,3 +33,26 @@ Description:
>  		device cannot clear poison from the address, -ENXIO is returned.
>  		The clear_poison attribute is only visible for devices
>  		supporting the capability.
> +
> +What:		/sys/kernel/debug/cxl/einj_types
> +Date:		November, 2023
> +KernelVersion:	v6.8
> +Contact:	linux-cxl@vger.kernel.org
> +Description:
> +		(RO) Prints the CXL protocol error types made available by
> +		the platform in the format "0x<error number>	<error type>".
> +		The <error number> can be written to einj_inject to inject
> +		<error type> into a chosen dport. This file is only visible if
> +		CONFIG_CXL_EINJ is enabled.

I don't think the Kconfig dependency needs to be mentioned here.

> +
> +What:		/sys/kernel/debug/cxl/$dport_dev/einj_inject
> +Date:		November, 2023
> +KernelVersion:	v6.8
> +Contact:	linux-cxl@vger.kernel.org
> +Description:
> +		(WO) Writing an integer to this file injects the corresponding
> +		CXL protocol error into $dport_dev (integer to type mapping is
> +		available by reading from einj_types). 

Maybe mention that $dport_dev is a device name from /sys/bus/pci/devices?

> +		enumerated in RCH mode, a CXL 1.1 error is injected, otherwise
> +		a CXL 2.0 error is injected. This file is only visible if
> +		CONFIG_CXL_EINJ is enabled.

Same "drop the Kconfig mention" here.

> diff --git a/drivers/acpi/apei/einj.c b/drivers/acpi/apei/einj.c
> index 26a887d2a5cd..1a2195779b52 100644
> --- a/drivers/acpi/apei/einj.c
> +++ b/drivers/acpi/apei/einj.c
> @@ -24,6 +24,7 @@
>  #include <asm/unaligned.h>
>  
>  #include "apei-internal.h"
> +#include "../../cxl/einj.h"

I thought this was going to move to a top-level header?

>  
>  #undef pr_fmt
>  #define pr_fmt(fmt) "EINJ: " fmt
> @@ -36,6 +37,12 @@
>  #define MEM_ERROR_MASK		(ACPI_EINJ_MEMORY_CORRECTABLE | \
>  				ACPI_EINJ_MEMORY_UNCORRECTABLE | \
>  				ACPI_EINJ_MEMORY_FATAL)
> +#define CXL_ERROR_MASK		(ACPI_EINJ_CXL_CACHE_CORRECTABLE | \
> +				ACPI_EINJ_CXL_CACHE_UNCORRECTABLE | \
> +				ACPI_EINJ_CXL_CACHE_FATAL | \
> +				ACPI_EINJ_CXL_MEM_CORRECTABLE | \
> +				ACPI_EINJ_CXL_MEM_UNCORRECTABLE | \
> +				ACPI_EINJ_CXL_MEM_FATAL)
>  
>  /*
>   * ACPI version 5 provides a SET_ERROR_TYPE_WITH_ADDRESS action.
> @@ -537,8 +544,11 @@ static int einj_error_inject(u32 type, u32 flags, u64 param1, u64 param2,
>  	if (type & ACPI5_VENDOR_BIT) {
>  		if (vendor_flags != SETWA_FLAGS_MEM)
>  			goto inject;
> -	} else if (!(type & MEM_ERROR_MASK) && !(flags & SETWA_FLAGS_MEM))
> +	} else if (!(type & MEM_ERROR_MASK) && !(flags & SETWA_FLAGS_MEM)) {
>  		goto inject;
> +	} else if ((type & CXL_ERROR_MASK) && (flags & SETWA_FLAGS_MEM)) {
> +		goto inject;
> +	}
>  
>  	/*
>  	 * Disallow crazy address masks that give BIOS leeway to pick
> @@ -590,6 +600,9 @@ static const char * const einj_error_type_string[] = {
>  	"0x00000200\tPlatform Correctable\n",
>  	"0x00000400\tPlatform Uncorrectable non-fatal\n",
>  	"0x00000800\tPlatform Uncorrectable fatal\n",
> +};
> +
> +static const char * const einj_cxl_error_type_string[] = {
>  	"0x00001000\tCXL.cache Protocol Correctable\n",
>  	"0x00002000\tCXL.cache Protocol Uncorrectable non-fatal\n",
>  	"0x00004000\tCXL.cache Protocol Uncorrectable fatal\n",
> @@ -617,29 +630,44 @@ DEFINE_SHOW_ATTRIBUTE(available_error_type);
>  
>  static bool einj_initialized;
>  
> -static int error_type_get(void *data, u64 *val)
> +int einj_cxl_available_error_type_show(struct seq_file *m, void *v)

Oh nice an einj_cxl_ prefix in the global namespace, looks appropriate. I
mention this here because I don't think a later cxl_einj_ wrapper makes
sense, see below.

>  {
> -	*val = error_type;
> +	int cxl_err, rc;
> +	u32 available_error_type = 0;
> +
> +	if (!einj_initialized)
> +		return -ENXIO;
> +
> +	rc = einj_get_available_error_type(&available_error_type);
> +	if (rc)
> +		return rc;
> +
> +	for (int pos = 0; pos < ARRAY_SIZE(einj_cxl_error_type_string); pos++) {
> +		cxl_err = ACPI_EINJ_CXL_CACHE_CORRECTABLE << pos;
> +
> +		if (available_error_type & cxl_err)
> +			seq_puts(m, einj_cxl_error_type_string[pos]);
> +	}
>  
>  	return 0;
>  }
> +EXPORT_SYMBOL_NS_GPL(einj_cxl_available_error_type_show, CXL);
>  
> -static int error_type_set(void *data, u64 val)
> +static int validate_error_type(u64 type)
>  {
> +	u32 tval, vendor, available_error_type = 0;
>  	int rc;
> -	u32 available_error_type = 0;
> -	u32 tval, vendor;
>  
>  	/* Only low 32 bits for error type are valid */
> -	if (val & GENMASK_ULL(63, 32))
> +	if (type & GENMASK_ULL(63, 32))
>  		return -EINVAL;
>  
>  	/*
>  	 * Vendor defined types have 0x80000000 bit set, and
>  	 * are not enumerated by ACPI_EINJ_GET_ERROR_TYPE
>  	 */
> -	vendor = val & ACPI5_VENDOR_BIT;
> -	tval = val & 0x7fffffff;
> +	vendor = type & ACPI5_VENDOR_BIT;
> +	tval = type & 0x7fffffff;
>  
>  	/* Only one error type can be specified */
>  	if (tval & (tval - 1))
> @@ -648,9 +676,105 @@ static int error_type_set(void *data, u64 val)
>  		rc = einj_get_available_error_type(&available_error_type);
>  		if (rc)
>  			return rc;
> -		if (!(val & available_error_type))
> +		if (!(type & available_error_type))
>  			return -EINVAL;
>  	}
> +
> +	return 0;
> +}
> +
> +static int cxl_dport_get_sbdf(struct pci_dev *dport_dev, u64 *sbdf)
> +{
> +	struct pci_bus *pbus;
> +	struct pci_host_bridge *bridge;
> +	u64 seg = 0, bus;
> +
> +	pbus = dport_dev->bus;
> +	bridge = pci_find_host_bridge(pbus);
> +
> +	if (!bridge)
> +		return -ENODEV;
> +
> +	if (bridge->domain_nr != PCI_DOMAIN_NR_NOT_SET)
> +		seg = bridge->domain_nr << 24;
> +
> +	bus = pbus->number << 16;
> +	*sbdf = seg | bus | dport_dev->devfn;
> +
> +	return 0;
> +}
> +
> +int einj_cxl_inject_rch_error(u64 rcrb, u64 type)
> +{
> +	u64 param1 = 0, param2 = 0, param4 = 0;
> +	u32 flags;
> +	int rc;
> +
> +	if (!einj_initialized)
> +		return -ENXIO;
> +
> +	/* Only CXL error types can be specified */
> +	if (type & ~CXL_ERROR_MASK || (type & ACPI5_VENDOR_BIT))
> +		return -EINVAL;
> +
> +	rc = validate_error_type(type);
> +	if (rc)
> +		return rc;
> +
> +	param1 = (u64) rcrb;
> +	param2 = 0xfffffffffffff000;
> +	flags = 0x2;
> +
> +	return einj_error_inject(type, flags, param1, param2, 0, param4);
> +}
> +EXPORT_SYMBOL_NS_GPL(einj_cxl_inject_rch_error, CXL);
> +
> +int einj_cxl_inject_error(struct pci_dev *dport, u64 type)
> +{
> +	u64 param1 = 0, param2 = 0, param4 = 0;
> +	u32 flags;
> +	int rc;
> +
> +	if (!einj_initialized)
> +		return -ENXIO;
> +
> +	/* Only CXL error types can be specified */
> +	if (type & ~CXL_ERROR_MASK || (type & ACPI5_VENDOR_BIT))
> +		return -EINVAL;
> +
> +	rc = validate_error_type(type);
> +	if (rc)
> +		return rc;
> +
> +	rc = cxl_dport_get_sbdf(dport, &param4);
> +	if (rc)
> +		return rc;
> +
> +	flags = 0x4;
> +
> +	return einj_error_inject(type, flags, param1, param2, 0, param4);
> +}
> +EXPORT_SYMBOL_NS_GPL(einj_cxl_inject_error, CXL);
> +
> +static int error_type_get(void *data, u64 *val)
> +{
> +	*val = error_type;
> +
> +	return 0;
> +}
> +
> +static int error_type_set(void *data, u64 val)
> +{
> +	int rc;
> +
> +	/* CXL error types have to be injected from cxl debugfs */
> +	if (val & CXL_ERROR_MASK && !(val & ACPI5_VENDOR_BIT))
> +		return -EINVAL;
> +
> +	rc = validate_error_type(val);
> +	if (rc)
> +		return rc;
> +
>  	error_type = val;
>  
>  	return 0;
> diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c
> index 38441634e4c6..4ed4a24138c3 100644
> --- a/drivers/cxl/core/port.c
> +++ b/drivers/cxl/core/port.c
> @@ -11,6 +11,7 @@
>  #include <linux/idr.h>
>  #include <cxlmem.h>
>  #include <cxlpci.h>
> +#include <einj.h>
>  #include <cxl.h>
>  #include "core.h"
>  
> @@ -783,6 +784,32 @@ static int cxl_dport_setup_regs(struct device *host, struct cxl_dport *dport,
>  	return rc;
>  }
>  
> +DEFINE_SHOW_ATTRIBUTE(cxl_einj_available_error_type);
> +
> +static int cxl_einj_inject(void *data, u64 type)
> +{
> +	struct cxl_dport *dport = data;
> +
> +	if (dport->rch)
> +		return cxl_einj_inject_rch_error(dport->rcrb.base, type);
> +
> +	if (!dev_is_pci(dport->dport_dev))
> +		return -EINVAL;
> +
> +	return cxl_einj_inject_error(to_pci_dev(dport->dport_dev), type);
> +}
> +DEFINE_DEBUGFS_ATTRIBUTE(cxl_einj_inject_fops, NULL, cxl_einj_inject, "%llx\n");
> +
> +static void cxl_debugfs_create_dport_dir(struct cxl_dport *dport)
> +{
> +	struct dentry *dir;

I think an "IS_ENABLED(CONFIG_CXL_EINJ)" is warranted here.

> +
> +	dir = cxl_debugfs_create_dir(dev_name(dport->dport_dev));
> +
> +	debugfs_create_file("einj_inject", 0200, dir, dport,
> +			    &cxl_einj_inject_fops);

I will note that I am little bit uneasy about this ACPI'ism escaping
into the common core, but he mitigation for me is that if some other
platform firmware invented a platform-firmware method for error inject
it would simply need to reuse the einj_cxl_ namespace to make it common.


> +}
> +
>  static struct cxl_port *__devm_cxl_add_port(struct device *host,
>  					    struct device *uport_dev,
>  					    resource_size_t component_reg_phys,
> @@ -1136,6 +1163,8 @@ struct cxl_dport *devm_cxl_add_dport(struct cxl_port *port,
>  	} else {
>  		dev_dbg(dport_dev, "dport added to %s\n",
>  			dev_name(&port->dev));
> +
> +		cxl_debugfs_create_dport_dir(dport);
>  	}
>  
>  	return dport;
> @@ -1170,6 +1199,8 @@ struct cxl_dport *devm_cxl_add_rch_dport(struct cxl_port *port,
>  	} else {
>  		dev_dbg(dport_dev, "RCH dport added to %s\n",
>  			dev_name(&port->dev));
> +
> +		cxl_debugfs_create_dport_dir(dport);
>  	}

Move the above 2 invocations into 1 common call from
__devm_cxl_add_dport().

>  
>  	return dport;
> @@ -2109,6 +2140,8 @@ static __init int cxl_core_init(void)
>  	int rc;
>  
>  	cxl_debugfs = debugfs_create_dir("cxl", NULL);
> +	debugfs_create_file("einj_types", 0400, cxl_debugfs, NULL,
> +			    &cxl_einj_available_error_type_fops);

This should be gated by "IS_ENABLED(CONFIG_CXL_EINJ)"

>  
>  	cxl_mbox_init();
>  
> diff --git a/drivers/cxl/einj.h b/drivers/cxl/einj.h
> new file mode 100644
> index 000000000000..b913763c3238
> --- /dev/null
> +++ b/drivers/cxl/einj.h
> @@ -0,0 +1,58 @@
> +/* SPDX-License-Identifier: GPL-2.0-or-later */
> +/*
> + * CXL protocol Error INJection support.
> + *
> + * Copyright (c) 2023 Advanced Micro Devices, Inc.
> + * All Rights Reserved.
> + *
> + * Author: Ben Cheatham <benjamin.cheatham@amd.com>
> + */
> +#ifndef CXL_EINJ_H
> +#define CXL_EINJ_H
> +#include <linux/pci.h>
> +
> +int einj_cxl_available_error_type_show(struct seq_file *m, void *v);
> +int einj_cxl_inject_error(struct pci_dev *dport_dev, u64 type);
> +int einj_cxl_inject_rch_error(u64 rcrb, u64 type);

See below, these should be within the ifdef guard.

> +
> +#if IS_ENABLED(CONFIG_CXL_EINJ)

This should be

#ifdef CONFIG_ACPI_APEI_EINJ

...because that is the module defining the exported public interface.

> +static inline int cxl_einj_available_error_type_show(struct seq_file *m,
> +						     void *v)
> +{
> +	return einj_cxl_available_error_type_show(m, v);
> +}
> +
> +static inline int cxl_einj_inject_error(struct pci_dev *dport_dev, u64 type)
> +{
> +	return einj_cxl_inject_error(dport_dev, type);
> +}
> +
> +static inline int cxl_einj_inject_rch_error(u64 rcrb, u64 type)
> +{
> +	return einj_cxl_inject_rch_error(rcrb, type);
> +}

Above 3 static inlines can be deleted, this section of the ifdef would
just be:

int einj_cxl_available_error_type_show(struct seq_file *m, void *v);
int einj_cxl_inject_error(struct pci_dev *dport_dev, u64 type);
int einj_cxl_inject_rch_error(u64 rcrb, u64 type);

> +
> +#else
> +static inline int cxl_einj_available_error_type_show(struct seq_file *m,
> +						     void *v)
> +{
> +	return -ENXIO;
> +}
> +
> +static inline int cxl_einj_type_show(struct seq_file *m, void *data)
> +{
> +	return -ENXIO;
> +}
> +
> +static inline int cxl_einj_inject_error(struct pci_dev *dport_dev, u64 type)
> +{
> +	return -ENXIO;
> +}
> +
> +static inline int cxl_einj_inject_rch_error(u64 rcrb, u64 type)
> +{
> +	return -ENXIO;
> +}
> +#endif

Rename these to their einj_cxl_ equivalent.
Ben Cheatham Jan. 10, 2024, 8:31 p.m. UTC | #2
On 12/18/23 10:47 PM, Dan Williams wrote:
> Ben Cheatham wrote:
>> Implement CXL helper functions in the EINJ module for getting the
>> available CXL protocol error types and injecting CXL errors and export
>> them to sysfs under kernel/debug/cxl.
>>
>> The kernel/debug/cxl/einj_types file will print the available CXL
>> protocol errors in the same format as the available_error_types
>> file provided by the EINJ module. The
>> kernel/debug/cxl/$dport_dev/einj_inject is functionally the same as the
>> error_type and error_inject files provided by the EINJ module, i.e.:
>> writing an error type into $dport_dev/einj_inject will inject said error
>> type into the CXL dport represented by $dport_dev.
>>
>> Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
>> Signed-off-by: Ben Cheatham <Benjamin.Cheatham@amd.com>
>> ---
>>  Documentation/ABI/testing/debugfs-cxl |  23 ++++
>>  drivers/acpi/apei/einj.c              | 144 ++++++++++++++++++++++++--
>>  drivers/cxl/core/port.c               |  33 ++++++
>>  drivers/cxl/einj.h                    |  58 +++++++++++
>>  4 files changed, 248 insertions(+), 10 deletions(-)
>>  create mode 100644 drivers/cxl/einj.h
>>
>> diff --git a/Documentation/ABI/testing/debugfs-cxl b/Documentation/ABI/testing/debugfs-cxl
>> index fe61d372e3fa..97a8684bad84 100644
>> --- a/Documentation/ABI/testing/debugfs-cxl
>> +++ b/Documentation/ABI/testing/debugfs-cxl
>> @@ -33,3 +33,26 @@ Description:
>>  		device cannot clear poison from the address, -ENXIO is returned.
>>  		The clear_poison attribute is only visible for devices
>>  		supporting the capability.
>> +
>> +What:		/sys/kernel/debug/cxl/einj_types
>> +Date:		November, 2023
>> +KernelVersion:	v6.8
>> +Contact:	linux-cxl@vger.kernel.org
>> +Description:
>> +		(RO) Prints the CXL protocol error types made available by
>> +		the platform in the format "0x<error number>	<error type>".
>> +		The <error number> can be written to einj_inject to inject
>> +		<error type> into a chosen dport. This file is only visible if
>> +		CONFIG_CXL_EINJ is enabled.
> 
> I don't think the Kconfig dependency needs to be mentioned here.
> 
I'll remove it.

>> +
>> +What:		/sys/kernel/debug/cxl/$dport_dev/einj_inject
>> +Date:		November, 2023
>> +KernelVersion:	v6.8
>> +Contact:	linux-cxl@vger.kernel.org
>> +Description:
>> +		(WO) Writing an integer to this file injects the corresponding
>> +		CXL protocol error into $dport_dev (integer to type mapping is
>> +		available by reading from einj_types). 
> 
> Maybe mention that $dport_dev is a device name from /sys/bus/pci/devices?
> 

Will do!

>> +		enumerated in RCH mode, a CXL 1.1 error is injected, otherwise
>> +		a CXL 2.0 error is injected. This file is only visible if
>> +		CONFIG_CXL_EINJ is enabled.
> 
> Same "drop the Kconfig mention" here.
> 
>> diff --git a/drivers/acpi/apei/einj.c b/drivers/acpi/apei/einj.c
>> index 26a887d2a5cd..1a2195779b52 100644
>> --- a/drivers/acpi/apei/einj.c
>> +++ b/drivers/acpi/apei/einj.c
>> @@ -24,6 +24,7 @@
>>  #include <asm/unaligned.h>
>>  
>>  #include "apei-internal.h"
>> +#include "../../cxl/einj.h"
> 
> I thought this was going to move to a top-level header?
> 

Sorry, I either misunderstood you a couple of versions ago or misread the original request.
I'll move it under include/linux.

>>  
>>  #undef pr_fmt
>>  #define pr_fmt(fmt) "EINJ: " fmt
>> @@ -36,6 +37,12 @@
>>  #define MEM_ERROR_MASK		(ACPI_EINJ_MEMORY_CORRECTABLE | \
>>  				ACPI_EINJ_MEMORY_UNCORRECTABLE | \
>>  				ACPI_EINJ_MEMORY_FATAL)
>> +#define CXL_ERROR_MASK		(ACPI_EINJ_CXL_CACHE_CORRECTABLE | \
>> +				ACPI_EINJ_CXL_CACHE_UNCORRECTABLE | \
>> +				ACPI_EINJ_CXL_CACHE_FATAL | \
>> +				ACPI_EINJ_CXL_MEM_CORRECTABLE | \
>> +				ACPI_EINJ_CXL_MEM_UNCORRECTABLE | \
>> +				ACPI_EINJ_CXL_MEM_FATAL)
>>  
>>  /*
>>   * ACPI version 5 provides a SET_ERROR_TYPE_WITH_ADDRESS action.
>> @@ -537,8 +544,11 @@ static int einj_error_inject(u32 type, u32 flags, u64 param1, u64 param2,
>>  	if (type & ACPI5_VENDOR_BIT) {
>>  		if (vendor_flags != SETWA_FLAGS_MEM)
>>  			goto inject;
>> -	} else if (!(type & MEM_ERROR_MASK) && !(flags & SETWA_FLAGS_MEM))
>> +	} else if (!(type & MEM_ERROR_MASK) && !(flags & SETWA_FLAGS_MEM)) {
>>  		goto inject;
>> +	} else if ((type & CXL_ERROR_MASK) && (flags & SETWA_FLAGS_MEM)) {
>> +		goto inject;
>> +	}
>>  
>>  	/*
>>  	 * Disallow crazy address masks that give BIOS leeway to pick
>> @@ -590,6 +600,9 @@ static const char * const einj_error_type_string[] = {
>>  	"0x00000200\tPlatform Correctable\n",
>>  	"0x00000400\tPlatform Uncorrectable non-fatal\n",
>>  	"0x00000800\tPlatform Uncorrectable fatal\n",
>> +};
>> +
>> +static const char * const einj_cxl_error_type_string[] = {
>>  	"0x00001000\tCXL.cache Protocol Correctable\n",
>>  	"0x00002000\tCXL.cache Protocol Uncorrectable non-fatal\n",
>>  	"0x00004000\tCXL.cache Protocol Uncorrectable fatal\n",
>> @@ -617,29 +630,44 @@ DEFINE_SHOW_ATTRIBUTE(available_error_type);
>>  
>>  static bool einj_initialized;
>>  
>> -static int error_type_get(void *data, u64 *val)
>> +int einj_cxl_available_error_type_show(struct seq_file *m, void *v)
> 
> Oh nice an einj_cxl_ prefix in the global namespace, looks appropriate. I
> mention this here because I don't think a later cxl_einj_ wrapper makes
> sense, see below.
> 
>>  {
>> -	*val = error_type;
>> +	int cxl_err, rc;
>> +	u32 available_error_type = 0;
>> +
>> +	if (!einj_initialized)
>> +		return -ENXIO;
>> +
>> +	rc = einj_get_available_error_type(&available_error_type);
>> +	if (rc)
>> +		return rc;
>> +
>> +	for (int pos = 0; pos < ARRAY_SIZE(einj_cxl_error_type_string); pos++) {
>> +		cxl_err = ACPI_EINJ_CXL_CACHE_CORRECTABLE << pos;
>> +
>> +		if (available_error_type & cxl_err)
>> +			seq_puts(m, einj_cxl_error_type_string[pos]);
>> +	}
>>  
>>  	return 0;
>>  }
>> +EXPORT_SYMBOL_NS_GPL(einj_cxl_available_error_type_show, CXL);
>>  
>> -static int error_type_set(void *data, u64 val)
>> +static int validate_error_type(u64 type)
>>  {
>> +	u32 tval, vendor, available_error_type = 0;
>>  	int rc;
>> -	u32 available_error_type = 0;
>> -	u32 tval, vendor;
>>  
>>  	/* Only low 32 bits for error type are valid */
>> -	if (val & GENMASK_ULL(63, 32))
>> +	if (type & GENMASK_ULL(63, 32))
>>  		return -EINVAL;
>>  
>>  	/*
>>  	 * Vendor defined types have 0x80000000 bit set, and
>>  	 * are not enumerated by ACPI_EINJ_GET_ERROR_TYPE
>>  	 */
>> -	vendor = val & ACPI5_VENDOR_BIT;
>> -	tval = val & 0x7fffffff;
>> +	vendor = type & ACPI5_VENDOR_BIT;
>> +	tval = type & 0x7fffffff;
>>  
>>  	/* Only one error type can be specified */
>>  	if (tval & (tval - 1))
>> @@ -648,9 +676,105 @@ static int error_type_set(void *data, u64 val)
>>  		rc = einj_get_available_error_type(&available_error_type);
>>  		if (rc)
>>  			return rc;
>> -		if (!(val & available_error_type))
>> +		if (!(type & available_error_type))
>>  			return -EINVAL;
>>  	}
>> +
>> +	return 0;
>> +}
>> +
>> +static int cxl_dport_get_sbdf(struct pci_dev *dport_dev, u64 *sbdf)
>> +{
>> +	struct pci_bus *pbus;
>> +	struct pci_host_bridge *bridge;
>> +	u64 seg = 0, bus;
>> +
>> +	pbus = dport_dev->bus;
>> +	bridge = pci_find_host_bridge(pbus);
>> +
>> +	if (!bridge)
>> +		return -ENODEV;
>> +
>> +	if (bridge->domain_nr != PCI_DOMAIN_NR_NOT_SET)
>> +		seg = bridge->domain_nr << 24;
>> +
>> +	bus = pbus->number << 16;
>> +	*sbdf = seg | bus | dport_dev->devfn;
>> +
>> +	return 0;
>> +}
>> +
>> +int einj_cxl_inject_rch_error(u64 rcrb, u64 type)
>> +{
>> +	u64 param1 = 0, param2 = 0, param4 = 0;
>> +	u32 flags;
>> +	int rc;
>> +
>> +	if (!einj_initialized)
>> +		return -ENXIO;
>> +
>> +	/* Only CXL error types can be specified */
>> +	if (type & ~CXL_ERROR_MASK || (type & ACPI5_VENDOR_BIT))
>> +		return -EINVAL;
>> +
>> +	rc = validate_error_type(type);
>> +	if (rc)
>> +		return rc;
>> +
>> +	param1 = (u64) rcrb;
>> +	param2 = 0xfffffffffffff000;
>> +	flags = 0x2;
>> +
>> +	return einj_error_inject(type, flags, param1, param2, 0, param4);
>> +}
>> +EXPORT_SYMBOL_NS_GPL(einj_cxl_inject_rch_error, CXL);
>> +
>> +int einj_cxl_inject_error(struct pci_dev *dport, u64 type)
>> +{
>> +	u64 param1 = 0, param2 = 0, param4 = 0;
>> +	u32 flags;
>> +	int rc;
>> +
>> +	if (!einj_initialized)
>> +		return -ENXIO;
>> +
>> +	/* Only CXL error types can be specified */
>> +	if (type & ~CXL_ERROR_MASK || (type & ACPI5_VENDOR_BIT))
>> +		return -EINVAL;
>> +
>> +	rc = validate_error_type(type);
>> +	if (rc)
>> +		return rc;
>> +
>> +	rc = cxl_dport_get_sbdf(dport, &param4);
>> +	if (rc)
>> +		return rc;
>> +
>> +	flags = 0x4;
>> +
>> +	return einj_error_inject(type, flags, param1, param2, 0, param4);
>> +}
>> +EXPORT_SYMBOL_NS_GPL(einj_cxl_inject_error, CXL);
>> +
>> +static int error_type_get(void *data, u64 *val)
>> +{
>> +	*val = error_type;
>> +
>> +	return 0;
>> +}
>> +
>> +static int error_type_set(void *data, u64 val)
>> +{
>> +	int rc;
>> +
>> +	/* CXL error types have to be injected from cxl debugfs */
>> +	if (val & CXL_ERROR_MASK && !(val & ACPI5_VENDOR_BIT))
>> +		return -EINVAL;
>> +
>> +	rc = validate_error_type(val);
>> +	if (rc)
>> +		return rc;
>> +
>>  	error_type = val;
>>  
>>  	return 0;
>> diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c
>> index 38441634e4c6..4ed4a24138c3 100644
>> --- a/drivers/cxl/core/port.c
>> +++ b/drivers/cxl/core/port.c
>> @@ -11,6 +11,7 @@
>>  #include <linux/idr.h>
>>  #include <cxlmem.h>
>>  #include <cxlpci.h>
>> +#include <einj.h>
>>  #include <cxl.h>
>>  #include "core.h"
>>  
>> @@ -783,6 +784,32 @@ static int cxl_dport_setup_regs(struct device *host, struct cxl_dport *dport,
>>  	return rc;
>>  }
>>  
>> +DEFINE_SHOW_ATTRIBUTE(cxl_einj_available_error_type);
>> +
>> +static int cxl_einj_inject(void *data, u64 type)
>> +{
>> +	struct cxl_dport *dport = data;
>> +
>> +	if (dport->rch)
>> +		return cxl_einj_inject_rch_error(dport->rcrb.base, type);
>> +
>> +	if (!dev_is_pci(dport->dport_dev))
>> +		return -EINVAL;
>> +
>> +	return cxl_einj_inject_error(to_pci_dev(dport->dport_dev), type);
>> +}
>> +DEFINE_DEBUGFS_ATTRIBUTE(cxl_einj_inject_fops, NULL, cxl_einj_inject, "%llx\n");
>> +
>> +static void cxl_debugfs_create_dport_dir(struct cxl_dport *dport)
>> +{
>> +	struct dentry *dir;
> 
> I think an "IS_ENABLED(CONFIG_CXL_EINJ)" is warranted here.
> 

I agree. I'll add one.

>> +
>> +	dir = cxl_debugfs_create_dir(dev_name(dport->dport_dev));
>> +
>> +	debugfs_create_file("einj_inject", 0200, dir, dport,
>> +			    &cxl_einj_inject_fops);
> 
> I will note that I am little bit uneasy about this ACPI'ism escaping
> into the common core, but he mitigation for me is that if some other
> platform firmware invented a platform-firmware method for error inject
> it would simply need to reuse the einj_cxl_ namespace to make it common.
> 

I'll be honest; I'm not sure I understand the concern here, but that's probably
just inexperience on my part. That being said, I don't mind changing it if you
have any suggestions!

> 
>> +}
>> +
>>  static struct cxl_port *__devm_cxl_add_port(struct device *host,
>>  					    struct device *uport_dev,
>>  					    resource_size_t component_reg_phys,
>> @@ -1136,6 +1163,8 @@ struct cxl_dport *devm_cxl_add_dport(struct cxl_port *port,
>>  	} else {
>>  		dev_dbg(dport_dev, "dport added to %s\n",
>>  			dev_name(&port->dev));
>> +
>> +		cxl_debugfs_create_dport_dir(dport);
>>  	}
>>  
>>  	return dport;
>> @@ -1170,6 +1199,8 @@ struct cxl_dport *devm_cxl_add_rch_dport(struct cxl_port *port,
>>  	} else {
>>  		dev_dbg(dport_dev, "RCH dport added to %s\n",
>>  			dev_name(&port->dev));
>> +
>> +		cxl_debugfs_create_dport_dir(dport);
>>  	}
> 
> Move the above 2 invocations into 1 common call from
> __devm_cxl_add_dport().
> 

Will do.

>>  
>>  	return dport;
>> @@ -2109,6 +2140,8 @@ static __init int cxl_core_init(void)
>>  	int rc;
>>  
>>  	cxl_debugfs = debugfs_create_dir("cxl", NULL);
>> +	debugfs_create_file("einj_types", 0400, cxl_debugfs, NULL,
>> +			    &cxl_einj_available_error_type_fops);
> 
> This should be gated by "IS_ENABLED(CONFIG_CXL_EINJ)"
> 

Will do.

>>  
>>  	cxl_mbox_init();
>>  
>> diff --git a/drivers/cxl/einj.h b/drivers/cxl/einj.h
>> new file mode 100644
>> index 000000000000..b913763c3238
>> --- /dev/null
>> +++ b/drivers/cxl/einj.h
>> @@ -0,0 +1,58 @@
>> +/* SPDX-License-Identifier: GPL-2.0-or-later */
>> +/*
>> + * CXL protocol Error INJection support.
>> + *
>> + * Copyright (c) 2023 Advanced Micro Devices, Inc.
>> + * All Rights Reserved.
>> + *
>> + * Author: Ben Cheatham <benjamin.cheatham@amd.com>
>> + */
>> +#ifndef CXL_EINJ_H
>> +#define CXL_EINJ_H
>> +#include <linux/pci.h>
>> +
>> +int einj_cxl_available_error_type_show(struct seq_file *m, void *v);
>> +int einj_cxl_inject_error(struct pci_dev *dport_dev, u64 type);
>> +int einj_cxl_inject_rch_error(u64 rcrb, u64 type);
> 
> See below, these should be within the ifdef guard.
> 
>> +
>> +#if IS_ENABLED(CONFIG_CXL_EINJ)
> 
> This should be
> 
> #ifdef CONFIG_ACPI_APEI_EINJ
> 
> ...because that is the module defining the exported public interface.
> 

Makes sense, I'll change it.

>> +static inline int cxl_einj_available_error_type_show(struct seq_file *m,
>> +						     void *v)
>> +{
>> +	return einj_cxl_available_error_type_show(m, v);
>> +}
>> +
>> +static inline int cxl_einj_inject_error(struct pci_dev *dport_dev, u64 type)
>> +{
>> +	return einj_cxl_inject_error(dport_dev, type);
>> +}
>> +
>> +static inline int cxl_einj_inject_rch_error(u64 rcrb, u64 type)
>> +{
>> +	return einj_cxl_inject_rch_error(rcrb, type);
>> +}
> 
> Above 3 static inlines can be deleted, this section of the ifdef would
> just be:
> 
> int einj_cxl_available_error_type_show(struct seq_file *m, void *v);
> int einj_cxl_inject_error(struct pci_dev *dport_dev, u64 type);
> int einj_cxl_inject_rch_error(u64 rcrb, u64 type);
> 

Sounds good. I originally tried this but was getting compile-time errors in einj.c
most likely due to gating around CONFIG_CXL_EINJ instead of CONFIG_ACPI_APEI_EINJ.

>> +
>> +#else
>> +static inline int cxl_einj_available_error_type_show(struct seq_file *m,
>> +						     void *v)
>> +{
>> +	return -ENXIO;
>> +}
>> +
>> +static inline int cxl_einj_type_show(struct seq_file *m, void *data)
>> +{
>> +	return -ENXIO;
>> +}
>> +
>> +static inline int cxl_einj_inject_error(struct pci_dev *dport_dev, u64 type)
>> +{
>> +	return -ENXIO;
>> +}
>> +
>> +static inline int cxl_einj_inject_rch_error(u64 rcrb, u64 type)
>> +{
>> +	return -ENXIO;
>> +}
>> +#endif
> 
> Rename these to their einj_cxl_ equivalent.

Gladly! I wasn't a fan of using cxl_einj_ and einj_cxl_ at the same time since the prefixes are so similiar :/.

Thanks again,
Ben
Dan Williams Jan. 10, 2024, 10:10 p.m. UTC | #3
Ben Cheatham wrote:
[..]
> >> +
> >> +	dir = cxl_debugfs_create_dir(dev_name(dport->dport_dev));
> >> +
> >> +	debugfs_create_file("einj_inject", 0200, dir, dport,
> >> +			    &cxl_einj_inject_fops);
> > 
> > I will note that I am little bit uneasy about this ACPI'ism escaping
> > into the common core, but he mitigation for me is that if some other
> > platform firmware invented a platform-firmware method for error inject
> > it would simply need to reuse the einj_cxl_ namespace to make it common.
> > 
> 
> I'll be honest; I'm not sure I understand the concern here, but that's probably
> just inexperience on my part. That being said, I don't mind changing it if you
> have any suggestions!
> 

Notice how the CXL subsystem organizes all the ACPI interface concerns
into drivers/cxl/acpi.c. There are some generic callbacks like the
@calc_hb argument to cxl_root_decoder_alloc(), but there are no direct
ties of the CXL core code to ACPI details. This separation allows, in
principle, a non-ACPI platform (like PowerPC OpenFirmware) to support
CXL without needing to unwind a bunch of CXL-ACPI specific stuff.

This "cxl_einj" work is leaking an ACPI specific term "EINJ" into the
core code. The reason I am ok with this is that the ABI is still gated
on CONFIG_ACPI_APEI_EINJ. Also, we still have the option to require that
OpenFirmware error injection just call their user facing ABI "EINJ" as
well, even if it is a different name in the OpenFirmware specification.
To be clear though no one has sent patches for CXL support on anything
other than ACPI based platforms.
Ben Cheatham Jan. 10, 2024, 10:17 p.m. UTC | #4
On 1/10/24 4:10 PM, Dan Williams wrote:
> Ben Cheatham wrote:
> [..]
>>>> +
>>>> +	dir = cxl_debugfs_create_dir(dev_name(dport->dport_dev));
>>>> +
>>>> +	debugfs_create_file("einj_inject", 0200, dir, dport,
>>>> +			    &cxl_einj_inject_fops);
>>>
>>> I will note that I am little bit uneasy about this ACPI'ism escaping
>>> into the common core, but he mitigation for me is that if some other
>>> platform firmware invented a platform-firmware method for error inject
>>> it would simply need to reuse the einj_cxl_ namespace to make it common.
>>>
>>
>> I'll be honest; I'm not sure I understand the concern here, but that's probably
>> just inexperience on my part. That being said, I don't mind changing it if you
>> have any suggestions!
>>
> 
> Notice how the CXL subsystem organizes all the ACPI interface concerns
> into drivers/cxl/acpi.c. There are some generic callbacks like the
> @calc_hb argument to cxl_root_decoder_alloc(), but there are no direct
> ties of the CXL core code to ACPI details. This separation allows, in
> principle, a non-ACPI platform (like PowerPC OpenFirmware) to support
> CXL without needing to unwind a bunch of CXL-ACPI specific stuff.
> 
> This "cxl_einj" work is leaking an ACPI specific term "EINJ" into the
> core code. The reason I am ok with this is that the ABI is still gated
> on CONFIG_ACPI_APEI_EINJ. Also, we still have the option to require that
> OpenFirmware error injection just call their user facing ABI "EINJ" as
> well, even if it is a different name in the OpenFirmware specification.
> To be clear though no one has sent patches for CXL support on anything
> other than ACPI based platforms.

Ok that makes sense, thanks for the explanation!
diff mbox series

Patch

diff --git a/Documentation/ABI/testing/debugfs-cxl b/Documentation/ABI/testing/debugfs-cxl
index fe61d372e3fa..97a8684bad84 100644
--- a/Documentation/ABI/testing/debugfs-cxl
+++ b/Documentation/ABI/testing/debugfs-cxl
@@ -33,3 +33,26 @@  Description:
 		device cannot clear poison from the address, -ENXIO is returned.
 		The clear_poison attribute is only visible for devices
 		supporting the capability.
+
+What:		/sys/kernel/debug/cxl/einj_types
+Date:		November, 2023
+KernelVersion:	v6.8
+Contact:	linux-cxl@vger.kernel.org
+Description:
+		(RO) Prints the CXL protocol error types made available by
+		the platform in the format "0x<error number>	<error type>".
+		The <error number> can be written to einj_inject to inject
+		<error type> into a chosen dport. This file is only visible if
+		CONFIG_CXL_EINJ is enabled.
+
+What:		/sys/kernel/debug/cxl/$dport_dev/einj_inject
+Date:		November, 2023
+KernelVersion:	v6.8
+Contact:	linux-cxl@vger.kernel.org
+Description:
+		(WO) Writing an integer to this file injects the corresponding
+		CXL protocol error into $dport_dev (integer to type mapping is
+		available by reading from einj_types). If the dport was
+		enumerated in RCH mode, a CXL 1.1 error is injected, otherwise
+		a CXL 2.0 error is injected. This file is only visible if
+		CONFIG_CXL_EINJ is enabled.
diff --git a/drivers/acpi/apei/einj.c b/drivers/acpi/apei/einj.c
index 26a887d2a5cd..1a2195779b52 100644
--- a/drivers/acpi/apei/einj.c
+++ b/drivers/acpi/apei/einj.c
@@ -24,6 +24,7 @@ 
 #include <asm/unaligned.h>
 
 #include "apei-internal.h"
+#include "../../cxl/einj.h"
 
 #undef pr_fmt
 #define pr_fmt(fmt) "EINJ: " fmt
@@ -36,6 +37,12 @@ 
 #define MEM_ERROR_MASK		(ACPI_EINJ_MEMORY_CORRECTABLE | \
 				ACPI_EINJ_MEMORY_UNCORRECTABLE | \
 				ACPI_EINJ_MEMORY_FATAL)
+#define CXL_ERROR_MASK		(ACPI_EINJ_CXL_CACHE_CORRECTABLE | \
+				ACPI_EINJ_CXL_CACHE_UNCORRECTABLE | \
+				ACPI_EINJ_CXL_CACHE_FATAL | \
+				ACPI_EINJ_CXL_MEM_CORRECTABLE | \
+				ACPI_EINJ_CXL_MEM_UNCORRECTABLE | \
+				ACPI_EINJ_CXL_MEM_FATAL)
 
 /*
  * ACPI version 5 provides a SET_ERROR_TYPE_WITH_ADDRESS action.
@@ -537,8 +544,11 @@  static int einj_error_inject(u32 type, u32 flags, u64 param1, u64 param2,
 	if (type & ACPI5_VENDOR_BIT) {
 		if (vendor_flags != SETWA_FLAGS_MEM)
 			goto inject;
-	} else if (!(type & MEM_ERROR_MASK) && !(flags & SETWA_FLAGS_MEM))
+	} else if (!(type & MEM_ERROR_MASK) && !(flags & SETWA_FLAGS_MEM)) {
 		goto inject;
+	} else if ((type & CXL_ERROR_MASK) && (flags & SETWA_FLAGS_MEM)) {
+		goto inject;
+	}
 
 	/*
 	 * Disallow crazy address masks that give BIOS leeway to pick
@@ -590,6 +600,9 @@  static const char * const einj_error_type_string[] = {
 	"0x00000200\tPlatform Correctable\n",
 	"0x00000400\tPlatform Uncorrectable non-fatal\n",
 	"0x00000800\tPlatform Uncorrectable fatal\n",
+};
+
+static const char * const einj_cxl_error_type_string[] = {
 	"0x00001000\tCXL.cache Protocol Correctable\n",
 	"0x00002000\tCXL.cache Protocol Uncorrectable non-fatal\n",
 	"0x00004000\tCXL.cache Protocol Uncorrectable fatal\n",
@@ -617,29 +630,44 @@  DEFINE_SHOW_ATTRIBUTE(available_error_type);
 
 static bool einj_initialized;
 
-static int error_type_get(void *data, u64 *val)
+int einj_cxl_available_error_type_show(struct seq_file *m, void *v)
 {
-	*val = error_type;
+	int cxl_err, rc;
+	u32 available_error_type = 0;
+
+	if (!einj_initialized)
+		return -ENXIO;
+
+	rc = einj_get_available_error_type(&available_error_type);
+	if (rc)
+		return rc;
+
+	for (int pos = 0; pos < ARRAY_SIZE(einj_cxl_error_type_string); pos++) {
+		cxl_err = ACPI_EINJ_CXL_CACHE_CORRECTABLE << pos;
+
+		if (available_error_type & cxl_err)
+			seq_puts(m, einj_cxl_error_type_string[pos]);
+	}
 
 	return 0;
 }
+EXPORT_SYMBOL_NS_GPL(einj_cxl_available_error_type_show, CXL);
 
-static int error_type_set(void *data, u64 val)
+static int validate_error_type(u64 type)
 {
+	u32 tval, vendor, available_error_type = 0;
 	int rc;
-	u32 available_error_type = 0;
-	u32 tval, vendor;
 
 	/* Only low 32 bits for error type are valid */
-	if (val & GENMASK_ULL(63, 32))
+	if (type & GENMASK_ULL(63, 32))
 		return -EINVAL;
 
 	/*
 	 * Vendor defined types have 0x80000000 bit set, and
 	 * are not enumerated by ACPI_EINJ_GET_ERROR_TYPE
 	 */
-	vendor = val & ACPI5_VENDOR_BIT;
-	tval = val & 0x7fffffff;
+	vendor = type & ACPI5_VENDOR_BIT;
+	tval = type & 0x7fffffff;
 
 	/* Only one error type can be specified */
 	if (tval & (tval - 1))
@@ -648,9 +676,105 @@  static int error_type_set(void *data, u64 val)
 		rc = einj_get_available_error_type(&available_error_type);
 		if (rc)
 			return rc;
-		if (!(val & available_error_type))
+		if (!(type & available_error_type))
 			return -EINVAL;
 	}
+
+	return 0;
+}
+
+static int cxl_dport_get_sbdf(struct pci_dev *dport_dev, u64 *sbdf)
+{
+	struct pci_bus *pbus;
+	struct pci_host_bridge *bridge;
+	u64 seg = 0, bus;
+
+	pbus = dport_dev->bus;
+	bridge = pci_find_host_bridge(pbus);
+
+	if (!bridge)
+		return -ENODEV;
+
+	if (bridge->domain_nr != PCI_DOMAIN_NR_NOT_SET)
+		seg = bridge->domain_nr << 24;
+
+	bus = pbus->number << 16;
+	*sbdf = seg | bus | dport_dev->devfn;
+
+	return 0;
+}
+
+int einj_cxl_inject_rch_error(u64 rcrb, u64 type)
+{
+	u64 param1 = 0, param2 = 0, param4 = 0;
+	u32 flags;
+	int rc;
+
+	if (!einj_initialized)
+		return -ENXIO;
+
+	/* Only CXL error types can be specified */
+	if (type & ~CXL_ERROR_MASK || (type & ACPI5_VENDOR_BIT))
+		return -EINVAL;
+
+	rc = validate_error_type(type);
+	if (rc)
+		return rc;
+
+	param1 = (u64) rcrb;
+	param2 = 0xfffffffffffff000;
+	flags = 0x2;
+
+	return einj_error_inject(type, flags, param1, param2, 0, param4);
+}
+EXPORT_SYMBOL_NS_GPL(einj_cxl_inject_rch_error, CXL);
+
+int einj_cxl_inject_error(struct pci_dev *dport, u64 type)
+{
+	u64 param1 = 0, param2 = 0, param4 = 0;
+	u32 flags;
+	int rc;
+
+	if (!einj_initialized)
+		return -ENXIO;
+
+	/* Only CXL error types can be specified */
+	if (type & ~CXL_ERROR_MASK || (type & ACPI5_VENDOR_BIT))
+		return -EINVAL;
+
+	rc = validate_error_type(type);
+	if (rc)
+		return rc;
+
+	rc = cxl_dport_get_sbdf(dport, &param4);
+	if (rc)
+		return rc;
+
+	flags = 0x4;
+
+	return einj_error_inject(type, flags, param1, param2, 0, param4);
+}
+EXPORT_SYMBOL_NS_GPL(einj_cxl_inject_error, CXL);
+
+static int error_type_get(void *data, u64 *val)
+{
+	*val = error_type;
+
+	return 0;
+}
+
+static int error_type_set(void *data, u64 val)
+{
+	int rc;
+
+	/* CXL error types have to be injected from cxl debugfs */
+	if (val & CXL_ERROR_MASK && !(val & ACPI5_VENDOR_BIT))
+		return -EINVAL;
+
+	rc = validate_error_type(val);
+	if (rc)
+		return rc;
+
 	error_type = val;
 
 	return 0;
diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c
index 38441634e4c6..4ed4a24138c3 100644
--- a/drivers/cxl/core/port.c
+++ b/drivers/cxl/core/port.c
@@ -11,6 +11,7 @@ 
 #include <linux/idr.h>
 #include <cxlmem.h>
 #include <cxlpci.h>
+#include <einj.h>
 #include <cxl.h>
 #include "core.h"
 
@@ -783,6 +784,32 @@  static int cxl_dport_setup_regs(struct device *host, struct cxl_dport *dport,
 	return rc;
 }
 
+DEFINE_SHOW_ATTRIBUTE(cxl_einj_available_error_type);
+
+static int cxl_einj_inject(void *data, u64 type)
+{
+	struct cxl_dport *dport = data;
+
+	if (dport->rch)
+		return cxl_einj_inject_rch_error(dport->rcrb.base, type);
+
+	if (!dev_is_pci(dport->dport_dev))
+		return -EINVAL;
+
+	return cxl_einj_inject_error(to_pci_dev(dport->dport_dev), type);
+}
+DEFINE_DEBUGFS_ATTRIBUTE(cxl_einj_inject_fops, NULL, cxl_einj_inject, "%llx\n");
+
+static void cxl_debugfs_create_dport_dir(struct cxl_dport *dport)
+{
+	struct dentry *dir;
+
+	dir = cxl_debugfs_create_dir(dev_name(dport->dport_dev));
+
+	debugfs_create_file("einj_inject", 0200, dir, dport,
+			    &cxl_einj_inject_fops);
+}
+
 static struct cxl_port *__devm_cxl_add_port(struct device *host,
 					    struct device *uport_dev,
 					    resource_size_t component_reg_phys,
@@ -1136,6 +1163,8 @@  struct cxl_dport *devm_cxl_add_dport(struct cxl_port *port,
 	} else {
 		dev_dbg(dport_dev, "dport added to %s\n",
 			dev_name(&port->dev));
+
+		cxl_debugfs_create_dport_dir(dport);
 	}
 
 	return dport;
@@ -1170,6 +1199,8 @@  struct cxl_dport *devm_cxl_add_rch_dport(struct cxl_port *port,
 	} else {
 		dev_dbg(dport_dev, "RCH dport added to %s\n",
 			dev_name(&port->dev));
+
+		cxl_debugfs_create_dport_dir(dport);
 	}
 
 	return dport;
@@ -2109,6 +2140,8 @@  static __init int cxl_core_init(void)
 	int rc;
 
 	cxl_debugfs = debugfs_create_dir("cxl", NULL);
+	debugfs_create_file("einj_types", 0400, cxl_debugfs, NULL,
+			    &cxl_einj_available_error_type_fops);
 
 	cxl_mbox_init();
 
diff --git a/drivers/cxl/einj.h b/drivers/cxl/einj.h
new file mode 100644
index 000000000000..b913763c3238
--- /dev/null
+++ b/drivers/cxl/einj.h
@@ -0,0 +1,58 @@ 
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * CXL protocol Error INJection support.
+ *
+ * Copyright (c) 2023 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Author: Ben Cheatham <benjamin.cheatham@amd.com>
+ */
+#ifndef CXL_EINJ_H
+#define CXL_EINJ_H
+#include <linux/pci.h>
+
+int einj_cxl_available_error_type_show(struct seq_file *m, void *v);
+int einj_cxl_inject_error(struct pci_dev *dport_dev, u64 type);
+int einj_cxl_inject_rch_error(u64 rcrb, u64 type);
+
+#if IS_ENABLED(CONFIG_CXL_EINJ)
+static inline int cxl_einj_available_error_type_show(struct seq_file *m,
+						     void *v)
+{
+	return einj_cxl_available_error_type_show(m, v);
+}
+
+static inline int cxl_einj_inject_error(struct pci_dev *dport_dev, u64 type)
+{
+	return einj_cxl_inject_error(dport_dev, type);
+}
+
+static inline int cxl_einj_inject_rch_error(u64 rcrb, u64 type)
+{
+	return einj_cxl_inject_rch_error(rcrb, type);
+}
+
+#else
+static inline int cxl_einj_available_error_type_show(struct seq_file *m,
+						     void *v)
+{
+	return -ENXIO;
+}
+
+static inline int cxl_einj_type_show(struct seq_file *m, void *data)
+{
+	return -ENXIO;
+}
+
+static inline int cxl_einj_inject_error(struct pci_dev *dport_dev, u64 type)
+{
+	return -ENXIO;
+}
+
+static inline int cxl_einj_inject_rch_error(u64 rcrb, u64 type)
+{
+	return -ENXIO;
+}
+#endif
+
+#endif