Message ID | 20240521211036.227674-8-zaidal@os.amperecomputing.com (mailing list archive) |
---|---|
State | RFC, archived |
Headers | show |
Series | Enable EINJv2 support | expand |
On 5/21/24 4:10 PM, Zaid Alali wrote: > Enable the driver to inject EINJv2 type errors. The component > array values are parsed from user_input and expected to contain > hex values for component id and syndrome separated by space, > and multiple components are separated by new line as follows: > > component_id1 component_syndrome1 > component_id2 component_syndrome2 > : > component_id(n) component_syndrome(n) > > for example: > > $comp_arr="0x1 0x2 >> 0x1 0x4 >> 0x2 0x4" > $cd /sys/kernel/debug/apei/einj/ > $echo "$comp_arr" > einjv2_component_array > I think it would be good to change this from being newline-delimited to comma-delimited instead. So instead of your first example above it would be: component_id1 component_syndrome1, component_id2 component_syndrome2, ... My reasoning here is that it's less error-prone. For example, if I run your example but forget to quote the comp_arr variable in the last line, i.e.: $ echo $comp_arr > einjv2_component_array I would effectively be running (at least in my stock Ubuntu 22.04 terminal): $ echo "0x1 0x2 0x1 0x4 0x2 0x4" > einjv2_component_array Which would result in an error for something that isn't necessarily readily apparent. I also think keeping the input on a single line is nicer on the eyes, but that's a subjective thing and I'd understand if you think differently. > Signed-off-by: Zaid Alali <zaidal@os.amperecomputing.com> > --- > drivers/acpi/apei/einj-core.c | 81 ++++++++++++++++++++++++++++++++--- > 1 file changed, 75 insertions(+), 6 deletions(-) > > diff --git a/drivers/acpi/apei/einj-core.c b/drivers/acpi/apei/einj-core.c > index 2e30ebed079b..2e5c00b34a4b 100644 > --- a/drivers/acpi/apei/einj-core.c > +++ b/drivers/acpi/apei/einj-core.c > @@ -87,6 +87,13 @@ enum { > SETWA_FLAGS_APICID = 1, > SETWA_FLAGS_MEM = 2, > SETWA_FLAGS_PCIE_SBDF = 4, > + SETWA_FLAGS_EINJV2 = 8, > +}; > + > +enum { > + EINJV2_PROCESSOR_ERROR = 0x1, > + EINJV2_MEMORY_ERROR = 0x2, > + EINJV2_PCIE_ERROR = 0x4, > }; > > /* > @@ -111,6 +118,7 @@ static char vendor_dev[64]; > static struct debugfs_blob_wrapper einjv2_component_arr; > static u64 component_count; > static void *user_input; > +static int nr_components; > static u32 available_error_type; > static u32 available_error_type_v2; > > @@ -287,8 +295,18 @@ static void *einj_get_parameter_address(void) > > v5param = acpi_os_map_iomem(pa_v5, sizeof(*v5param)); > if (v5param) { > + int offset, len; > + > acpi5 = 1; > check_vendor_extension(pa_v5, v5param); > + if (available_error_type & ACPI65_EINJV2_SUPP) { > + len = v5param->einjv2_struct.length; > + offset = offsetof(struct einjv2_extension_struct, component_arr); > + nr_components = (len - offset) / 32; > + acpi_os_unmap_iomem(v5param, sizeof(*v5param)); > + v5param = acpi_os_map_iomem(pa_v5, sizeof(*v5param) + ( > + (nr_components) * sizeof(struct syndrome_array))); > + } > return v5param; > } > } > @@ -494,10 +512,52 @@ static int __einj_error_inject(u32 type, u32 flags, u64 param1, u64 param2, > v5param->flags = vendor_flags; > } else if (flags) { > v5param->flags = flags; > - v5param->memory_address = param1; > - v5param->memory_address_range = param2; > - v5param->apicid = param3; > - v5param->pcie_sbdf = param4; > + if (flags & SETWA_FLAGS_MEM) { > + v5param->memory_address = param1; > + v5param->memory_address_range = param2; > + } I don't think you need this if statement since the values will be ignored when the SETWA_FLAGS_MEM bit isn't set anyway as per the spec. > + if (flags & SETWA_FLAGS_EINJV2) { > + int count = 0, bytes_read, pos = 0; > + unsigned int comp, synd; > + struct syndrome_array *component_arr; > + > + if (component_count > nr_components) > + goto err_out; > + > + v5param->einjv2_struct.component_arr_count = component_count; > + component_arr = v5param->einjv2_struct.component_arr; > + > + while (sscanf(user_input+pos, "%x %x\n%n", &comp, &synd, > + &bytes_read) == 2) { > + count++; > + pos += bytes_read; > + if (count > component_count) > + goto err_out; > + > + switch (type) { > + case EINJV2_PROCESSOR_ERROR: > + component_arr[count-1].comp_id.acpi_id = comp; > + component_arr[count-1].comp_synd.proc_synd = synd; > + break; > + case EINJV2_MEMORY_ERROR: > + component_arr[count-1].comp_id.device_id = comp; > + component_arr[count-1].comp_synd.mem_synd = synd; > + break; > + case EINJV2_PCIE_ERROR: > + component_arr[count-1].comp_id.pcie_sbdf = comp; > + component_arr[count-1].comp_synd.pcie_synd = synd; > + break; > + } > + } > + if (count != component_count) > + goto err_out; Nitpick here, but you could use count directly instead of count-1 when indexing component_arr[] if you move the count++ to the bottom of the loop and change the above if to: if (count != component_count - 1) goto err_out; or use count + 1 instead of component_count - 1. > + > + /* clear buffer after user input for next injection */ > + memset(user_input, 0, COMP_ARR_SIZE); > + } else { > + v5param->apicid = param3; > + v5param->pcie_sbdf = param4; > + } > } else { > switch (type) { > case ACPI_EINJ_PROCESSOR_CORRECTABLE: > @@ -570,6 +630,9 @@ static int __einj_error_inject(u32 type, u32 flags, u64 param1, u64 param2, > rc = apei_exec_run_optional(&ctx, ACPI_EINJ_END_OPERATION); > > return rc; > +err_out: > + memset(user_input, 0, COMP_ARR_SIZE); > + return -EINVAL; > } > > /* Inject the specified hardware error */ > @@ -581,9 +644,14 @@ int einj_error_inject(u32 type, u32 flags, u64 param1, u64 param2, u64 param3, > > /* If user manually set "flags", make sure it is legal */ > if (flags && (flags & > - ~(SETWA_FLAGS_APICID|SETWA_FLAGS_MEM|SETWA_FLAGS_PCIE_SBDF))) > + ~(SETWA_FLAGS_APICID|SETWA_FLAGS_MEM|SETWA_FLAGS_PCIE_SBDF|SETWA_FLAGS_EINJV2))) > return -EINVAL; > > + /*check if type is a valid EINJv2 error type*/ > + if (flags & SETWA_FLAGS_EINJV2) { > + if (!(type & available_error_type_v2)) > + return -EINVAL; > + } > /* > * We need extra sanity checks for memory errors. > * Other types leap directly to injection. > @@ -915,7 +983,8 @@ static void __exit einj_remove(struct platform_device *pdev) > sizeof(struct set_error_type_with_address) : > sizeof(struct einj_parameter); > > - acpi_os_unmap_iomem(einj_param, size); > + acpi_os_unmap_iomem(einj_param, > + size + (nr_components * sizeof(struct syndrome_array))); > if (vendor_errors.size) > acpi_os_unmap_memory(vendor_errors.data, vendor_errors.size); > }
diff --git a/drivers/acpi/apei/einj-core.c b/drivers/acpi/apei/einj-core.c index 2e30ebed079b..2e5c00b34a4b 100644 --- a/drivers/acpi/apei/einj-core.c +++ b/drivers/acpi/apei/einj-core.c @@ -87,6 +87,13 @@ enum { SETWA_FLAGS_APICID = 1, SETWA_FLAGS_MEM = 2, SETWA_FLAGS_PCIE_SBDF = 4, + SETWA_FLAGS_EINJV2 = 8, +}; + +enum { + EINJV2_PROCESSOR_ERROR = 0x1, + EINJV2_MEMORY_ERROR = 0x2, + EINJV2_PCIE_ERROR = 0x4, }; /* @@ -111,6 +118,7 @@ static char vendor_dev[64]; static struct debugfs_blob_wrapper einjv2_component_arr; static u64 component_count; static void *user_input; +static int nr_components; static u32 available_error_type; static u32 available_error_type_v2; @@ -287,8 +295,18 @@ static void *einj_get_parameter_address(void) v5param = acpi_os_map_iomem(pa_v5, sizeof(*v5param)); if (v5param) { + int offset, len; + acpi5 = 1; check_vendor_extension(pa_v5, v5param); + if (available_error_type & ACPI65_EINJV2_SUPP) { + len = v5param->einjv2_struct.length; + offset = offsetof(struct einjv2_extension_struct, component_arr); + nr_components = (len - offset) / 32; + acpi_os_unmap_iomem(v5param, sizeof(*v5param)); + v5param = acpi_os_map_iomem(pa_v5, sizeof(*v5param) + ( + (nr_components) * sizeof(struct syndrome_array))); + } return v5param; } } @@ -494,10 +512,52 @@ static int __einj_error_inject(u32 type, u32 flags, u64 param1, u64 param2, v5param->flags = vendor_flags; } else if (flags) { v5param->flags = flags; - v5param->memory_address = param1; - v5param->memory_address_range = param2; - v5param->apicid = param3; - v5param->pcie_sbdf = param4; + if (flags & SETWA_FLAGS_MEM) { + v5param->memory_address = param1; + v5param->memory_address_range = param2; + } + if (flags & SETWA_FLAGS_EINJV2) { + int count = 0, bytes_read, pos = 0; + unsigned int comp, synd; + struct syndrome_array *component_arr; + + if (component_count > nr_components) + goto err_out; + + v5param->einjv2_struct.component_arr_count = component_count; + component_arr = v5param->einjv2_struct.component_arr; + + while (sscanf(user_input+pos, "%x %x\n%n", &comp, &synd, + &bytes_read) == 2) { + count++; + pos += bytes_read; + if (count > component_count) + goto err_out; + + switch (type) { + case EINJV2_PROCESSOR_ERROR: + component_arr[count-1].comp_id.acpi_id = comp; + component_arr[count-1].comp_synd.proc_synd = synd; + break; + case EINJV2_MEMORY_ERROR: + component_arr[count-1].comp_id.device_id = comp; + component_arr[count-1].comp_synd.mem_synd = synd; + break; + case EINJV2_PCIE_ERROR: + component_arr[count-1].comp_id.pcie_sbdf = comp; + component_arr[count-1].comp_synd.pcie_synd = synd; + break; + } + } + if (count != component_count) + goto err_out; + + /* clear buffer after user input for next injection */ + memset(user_input, 0, COMP_ARR_SIZE); + } else { + v5param->apicid = param3; + v5param->pcie_sbdf = param4; + } } else { switch (type) { case ACPI_EINJ_PROCESSOR_CORRECTABLE: @@ -570,6 +630,9 @@ static int __einj_error_inject(u32 type, u32 flags, u64 param1, u64 param2, rc = apei_exec_run_optional(&ctx, ACPI_EINJ_END_OPERATION); return rc; +err_out: + memset(user_input, 0, COMP_ARR_SIZE); + return -EINVAL; } /* Inject the specified hardware error */ @@ -581,9 +644,14 @@ int einj_error_inject(u32 type, u32 flags, u64 param1, u64 param2, u64 param3, /* If user manually set "flags", make sure it is legal */ if (flags && (flags & - ~(SETWA_FLAGS_APICID|SETWA_FLAGS_MEM|SETWA_FLAGS_PCIE_SBDF))) + ~(SETWA_FLAGS_APICID|SETWA_FLAGS_MEM|SETWA_FLAGS_PCIE_SBDF|SETWA_FLAGS_EINJV2))) return -EINVAL; + /*check if type is a valid EINJv2 error type*/ + if (flags & SETWA_FLAGS_EINJV2) { + if (!(type & available_error_type_v2)) + return -EINVAL; + } /* * We need extra sanity checks for memory errors. * Other types leap directly to injection. @@ -915,7 +983,8 @@ static void __exit einj_remove(struct platform_device *pdev) sizeof(struct set_error_type_with_address) : sizeof(struct einj_parameter); - acpi_os_unmap_iomem(einj_param, size); + acpi_os_unmap_iomem(einj_param, + size + (nr_components * sizeof(struct syndrome_array))); if (vendor_errors.size) acpi_os_unmap_memory(vendor_errors.data, vendor_errors.size); }