diff mbox series

ACPI, APEI, EINJ: Remove memory range validation for CXL error types

Message ID 20230329143813.25849-1-Benjamin.Cheatham@amd.com
State Superseded
Headers show
Series ACPI, APEI, EINJ: Remove memory range validation for CXL error types | expand

Commit Message

Ben Cheatham March 29, 2023, 2:38 p.m. UTC
From: Yazen Ghannam <yazen.ghannam@amd.com>

This patch is a follow up to the discussion at [1], and builds on Tony's
CXL error patch at [2].

The new CXL error types will use the Memory Address field in the
SET_ERROR_TYPE_WITH_ADDRESS structure in order to target a CXL 1.1
compliant memory-mapped Downstream port. The value of the Memory Address
will be in the port's MMIO range, and it will not represent physical
(normal or persistent) memory.

Allow error injection for CXL 1.1 systems by skipping memory range
validation for CXL error injection types. 

Output trying to inject CXL.mem error without patch:

# echo 0x8000 > error_type
# echo 6 > flags
# echo 0x3cd5d2000000 > param1
# echo 0xFFFFFFFFFFFFF000 > param2
# echo 0 > param3
# echo 0x400000 > param4
# echo 1 > error_inject
-bash: echo: write error: Invalid argument

[1]: https://lore.kernel.org/linux-acpi/20221206205234.606073-1-Benjamin.Cheatham@amd.com/
[2]: https://lore.kernel.org/linux-cxl/CAJZ5v0hNQUfWViqxbJ5B4JCGJUuHpWWSpqpCFWPNpGuagoFbsQ@mail.gmail.com/T/#t

Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com>
Signed-off-by: Ben Cheatham <benjamin.cheatham@amd.com>
---
 drivers/acpi/apei/einj.c | 13 ++++++++++++-
 include/acpi/actbl1.h    |  6 ++++++
 2 files changed, 18 insertions(+), 1 deletion(-)

Comments

Rafael J. Wysocki March 29, 2023, 2:41 p.m. UTC | #1
On Wed, Mar 29, 2023 at 4:38 PM Ben Cheatham <Benjamin.Cheatham@amd.com> wrote:
>
> From: Yazen Ghannam <yazen.ghannam@amd.com>
>
> This patch is a follow up to the discussion at [1], and builds on Tony's
> CXL error patch at [2].
>
> The new CXL error types will use the Memory Address field in the
> SET_ERROR_TYPE_WITH_ADDRESS structure in order to target a CXL 1.1
> compliant memory-mapped Downstream port. The value of the Memory Address
> will be in the port's MMIO range, and it will not represent physical
> (normal or persistent) memory.
>
> Allow error injection for CXL 1.1 systems by skipping memory range
> validation for CXL error injection types.
>
> Output trying to inject CXL.mem error without patch:
>
> # echo 0x8000 > error_type
> # echo 6 > flags
> # echo 0x3cd5d2000000 > param1
> # echo 0xFFFFFFFFFFFFF000 > param2
> # echo 0 > param3
> # echo 0x400000 > param4
> # echo 1 > error_inject
> -bash: echo: write error: Invalid argument
>
> [1]: https://lore.kernel.org/linux-acpi/20221206205234.606073-1-Benjamin.Cheatham@amd.com/
> [2]: https://lore.kernel.org/linux-cxl/CAJZ5v0hNQUfWViqxbJ5B4JCGJUuHpWWSpqpCFWPNpGuagoFbsQ@mail.gmail.com/T/#t

Link tags for the above, please, and an ACK from the CXL side,
preferably from Dan.

> Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com>
> Signed-off-by: Ben Cheatham <benjamin.cheatham@amd.com>
> ---
>  drivers/acpi/apei/einj.c | 13 ++++++++++++-
>  include/acpi/actbl1.h    |  6 ++++++
>  2 files changed, 18 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/acpi/apei/einj.c b/drivers/acpi/apei/einj.c
> index 82004abb9643..4e201dfb7d29 100644
> --- a/drivers/acpi/apei/einj.c
> +++ b/drivers/acpi/apei/einj.c
> @@ -37,6 +37,13 @@
>                                 ACPI_EINJ_MEMORY_UNCORRECTABLE | \
>                                 ACPI_EINJ_MEMORY_FATAL)
>
> +#define CXL_ERROR_MASK         (ACPI_EINJ_CXL_CACHE_CORRECTABLE        | \
> +                               ACPI_EINJ_CXL_CACHE_UNCORRECTABLE       | \
> +                               ACPI_EINJ_CXL_CACHE_FATAL               | \
> +                               ACPI_EINJ_CXL_MEM_CORRECTABLE           | \
> +                               ACPI_EINJ_CXL_MEM_UNCORRECTABLE         | \
> +                               ACPI_EINJ_CXL_MEM_FATAL)
> +
>  /*
>   * ACPI version 5 provides a SET_ERROR_TYPE_WITH_ADDRESS action.
>   */
> @@ -511,6 +518,7 @@ static int einj_error_inject(u32 type, u32 flags, u64 param1, u64 param2,
>                              u64 param3, u64 param4)
>  {
>         int rc;
> +       u32 available_error_types = 0;
>         u64 base_addr, size;
>
>         /* If user manually set "flags", make sure it is legal */
> @@ -531,8 +539,11 @@ static int einj_error_inject(u32 type, u32 flags, u64 param1, u64 param2,
>         if (type & ACPI5_VENDOR_BIT) {
>                 if (vendor_flags != SETWA_FLAGS_MEM)
>                         goto inject;
> -       } else if (!(type & MEM_ERROR_MASK) && !(flags & SETWA_FLAGS_MEM))
> +       } else if (!(type & MEM_ERROR_MASK) && !(flags & SETWA_FLAGS_MEM)) {
> +               goto inject;
> +       } else if (type & CXL_ERROR_MASK) {
>                 goto inject;
> +       }
>
>         /*
>          * Disallow crazy address masks that give BIOS leeway to pick
> diff --git a/include/acpi/actbl1.h b/include/acpi/actbl1.h
> index 15c78678c5d3..68588b2be716 100644
> --- a/include/acpi/actbl1.h
> +++ b/include/acpi/actbl1.h
> @@ -895,6 +895,12 @@ enum acpi_einj_command_status {
>  #define ACPI_EINJ_PLATFORM_CORRECTABLE      (1<<9)
>  #define ACPI_EINJ_PLATFORM_UNCORRECTABLE    (1<<10)
>  #define ACPI_EINJ_PLATFORM_FATAL            (1<<11)
> +#define ACPI_EINJ_CXL_CACHE_CORRECTABLE     BIT(12)
> +#define ACPI_EINJ_CXL_CACHE_UNCORRECTABLE   BIT(13)
> +#define ACPI_EINJ_CXL_CACHE_FATAL           BIT(14)
> +#define ACPI_EINJ_CXL_MEM_CORRECTABLE       BIT(15)
> +#define ACPI_EINJ_CXL_MEM_UNCORRECTABLE     BIT(16)
> +#define ACPI_EINJ_CXL_MEM_FATAL             BIT(17)
>  #define ACPI_EINJ_VENDOR_DEFINED            (1<<31)
>
>  /*******************************************************************************
> --
> 2.34.1
>
kernel test robot March 29, 2023, 7:28 p.m. UTC | #2
Hi Ben,

I love your patch! Perhaps something to improve:

[auto build test WARNING on rafael-pm/linux-next]
[also build test WARNING on linus/master v6.3-rc4 next-20230329]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Ben-Cheatham/ACPI-APEI-EINJ-Remove-memory-range-validation-for-CXL-error-types/20230329-224405
base:   https://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm.git linux-next
patch link:    https://lore.kernel.org/r/20230329143813.25849-1-Benjamin.Cheatham%40amd.com
patch subject: [PATCH] ACPI, APEI, EINJ: Remove memory range validation for CXL error types
config: i386-allyesconfig (https://download.01.org/0day-ci/archive/20230330/202303300345.QZXN7i0i-lkp@intel.com/config)
compiler: gcc-11 (Debian 11.3.0-8) 11.3.0
reproduce (this is a W=1 build):
        # https://github.com/intel-lab-lkp/linux/commit/f0c48248d842ab224de7ac3d62d260e9f411e80c
        git remote add linux-review https://github.com/intel-lab-lkp/linux
        git fetch --no-tags linux-review Ben-Cheatham/ACPI-APEI-EINJ-Remove-memory-range-validation-for-CXL-error-types/20230329-224405
        git checkout f0c48248d842ab224de7ac3d62d260e9f411e80c
        # save the config file
        mkdir build_dir && cp config build_dir/.config
        make W=1 O=build_dir ARCH=i386 olddefconfig
        make W=1 O=build_dir ARCH=i386 SHELL=/bin/bash drivers/acpi/apei/

If you fix the issue, kindly add following tag where applicable
| Reported-by: kernel test robot <lkp@intel.com>
| Link: https://lore.kernel.org/oe-kbuild-all/202303300345.QZXN7i0i-lkp@intel.com/

All warnings (new ones prefixed by >>):

   drivers/acpi/apei/einj.c: In function 'einj_error_inject':
>> drivers/acpi/apei/einj.c:527:13: warning: unused variable 'available_error_types' [-Wunused-variable]
     527 |         u32 available_error_types = 0;
         |             ^~~~~~~~~~~~~~~~~~~~~


vim +/available_error_types +527 drivers/acpi/apei/einj.c

   521	
   522	/* Inject the specified hardware error */
   523	static int einj_error_inject(u32 type, u32 flags, u64 param1, u64 param2,
   524				     u64 param3, u64 param4)
   525	{
   526		int rc;
 > 527		u32 available_error_types = 0;
   528		u64 base_addr, size;
   529	
   530		/* If user manually set "flags", make sure it is legal */
   531		if (flags && (flags &
   532			~(SETWA_FLAGS_APICID|SETWA_FLAGS_MEM|SETWA_FLAGS_PCIE_SBDF)))
   533			return -EINVAL;
   534	
   535		/*
   536		 * We need extra sanity checks for memory errors.
   537		 * Other types leap directly to injection.
   538		 */
   539	
   540		/* ensure param1/param2 existed */
   541		if (!(param_extension || acpi5))
   542			goto inject;
   543	
   544		/* ensure injection is memory related */
   545		if (type & ACPI5_VENDOR_BIT) {
   546			if (vendor_flags != SETWA_FLAGS_MEM)
   547				goto inject;
   548		} else if (!(type & MEM_ERROR_MASK) && !(flags & SETWA_FLAGS_MEM)) {
   549			goto inject;
   550		} else if (type & CXL_ERROR_MASK) {
   551			goto inject;
   552		}
   553	
   554		/*
   555		 * Disallow crazy address masks that give BIOS leeway to pick
   556		 * injection address almost anywhere. Insist on page or
   557		 * better granularity and that target address is normal RAM or
   558		 * NVDIMM.
   559		 */
   560		base_addr = param1 & param2;
   561		size = ~param2 + 1;
   562	
   563		if (((param2 & PAGE_MASK) != PAGE_MASK) ||
   564		    ((region_intersects(base_addr, size, IORESOURCE_SYSTEM_RAM, IORES_DESC_NONE)
   565					!= REGION_INTERSECTS) &&
   566		     (region_intersects(base_addr, size, IORESOURCE_MEM, IORES_DESC_PERSISTENT_MEMORY)
   567					!= REGION_INTERSECTS) &&
   568		     (region_intersects(base_addr, size, IORESOURCE_MEM, IORES_DESC_SOFT_RESERVED)
   569					!= REGION_INTERSECTS) &&
   570		     !arch_is_platform_page(base_addr)))
   571			return -EINVAL;
   572	
   573		if (is_zero_pfn(base_addr >> PAGE_SHIFT))
   574			return -EADDRINUSE;
   575	
   576	inject:
   577		mutex_lock(&einj_mutex);
   578		rc = __einj_error_inject(type, flags, param1, param2, param3, param4);
   579		mutex_unlock(&einj_mutex);
   580	
   581		return rc;
   582	}
   583
diff mbox series

Patch

diff --git a/drivers/acpi/apei/einj.c b/drivers/acpi/apei/einj.c
index 82004abb9643..4e201dfb7d29 100644
--- a/drivers/acpi/apei/einj.c
+++ b/drivers/acpi/apei/einj.c
@@ -37,6 +37,13 @@ 
 				ACPI_EINJ_MEMORY_UNCORRECTABLE | \
 				ACPI_EINJ_MEMORY_FATAL)
 
+#define CXL_ERROR_MASK		(ACPI_EINJ_CXL_CACHE_CORRECTABLE	| \
+				ACPI_EINJ_CXL_CACHE_UNCORRECTABLE	| \
+				ACPI_EINJ_CXL_CACHE_FATAL		| \
+				ACPI_EINJ_CXL_MEM_CORRECTABLE		| \
+				ACPI_EINJ_CXL_MEM_UNCORRECTABLE		| \
+				ACPI_EINJ_CXL_MEM_FATAL)
+
 /*
  * ACPI version 5 provides a SET_ERROR_TYPE_WITH_ADDRESS action.
  */
@@ -511,6 +518,7 @@  static int einj_error_inject(u32 type, u32 flags, u64 param1, u64 param2,
 			     u64 param3, u64 param4)
 {
 	int rc;
+	u32 available_error_types = 0;
 	u64 base_addr, size;
 
 	/* If user manually set "flags", make sure it is legal */
@@ -531,8 +539,11 @@  static int einj_error_inject(u32 type, u32 flags, u64 param1, u64 param2,
 	if (type & ACPI5_VENDOR_BIT) {
 		if (vendor_flags != SETWA_FLAGS_MEM)
 			goto inject;
-	} else if (!(type & MEM_ERROR_MASK) && !(flags & SETWA_FLAGS_MEM))
+	} else if (!(type & MEM_ERROR_MASK) && !(flags & SETWA_FLAGS_MEM)) {
+		goto inject;
+	} else if (type & CXL_ERROR_MASK) {
 		goto inject;
+	}
 
 	/*
 	 * Disallow crazy address masks that give BIOS leeway to pick
diff --git a/include/acpi/actbl1.h b/include/acpi/actbl1.h
index 15c78678c5d3..68588b2be716 100644
--- a/include/acpi/actbl1.h
+++ b/include/acpi/actbl1.h
@@ -895,6 +895,12 @@  enum acpi_einj_command_status {
 #define ACPI_EINJ_PLATFORM_CORRECTABLE      (1<<9)
 #define ACPI_EINJ_PLATFORM_UNCORRECTABLE    (1<<10)
 #define ACPI_EINJ_PLATFORM_FATAL            (1<<11)
+#define ACPI_EINJ_CXL_CACHE_CORRECTABLE     BIT(12)
+#define ACPI_EINJ_CXL_CACHE_UNCORRECTABLE   BIT(13)
+#define ACPI_EINJ_CXL_CACHE_FATAL           BIT(14)
+#define ACPI_EINJ_CXL_MEM_CORRECTABLE       BIT(15)
+#define ACPI_EINJ_CXL_MEM_UNCORRECTABLE     BIT(16)
+#define ACPI_EINJ_CXL_MEM_FATAL             BIT(17)
 #define ACPI_EINJ_VENDOR_DEFINED            (1<<31)
 
 /*******************************************************************************