diff mbox series

[v4] cper, apei, mce: Pass x86 CPER through the MCA handling chain

Message ID 20200904140444.161291-1-Smita.KoralahalliChannabasappa@amd.com (mailing list archive)
State New, archived
Headers show
Series [v4] cper, apei, mce: Pass x86 CPER through the MCA handling chain | expand

Commit Message

Smita Koralahalli Sept. 4, 2020, 2:04 p.m. UTC
Linux Kernel uses ACPI Boot Error Record Table (BERT) to report fatal
errors that occurred in a previous boot. The MCA errors in the BERT are
reported using the x86 Processor Error Common Platform Error Record (CPER)
format. Currently, the record prints out the raw MSR values and AMD relies
on the raw record to provide MCA information.

Extract the raw MSR values of MCA registers from the BERT and feed it into
the standard mce_log() function through the existing x86/MCA RAS
infrastructure. This will result in better decoding from the EDAC MCE
decoder or the default notifier.

The implementation is SMCA specific as the raw MCA register values are
given in the register offset order of the MCAX address space.

[ Build error in patch v1. ]

Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Smita Koralahalli <Smita.KoralahalliChannabasappa@amd.com>
---
Link:
https://lkml.kernel.org/r/20200903234531.162484-2-Smita.KoralahalliChannabasappa@amd.com

v4:
	Included what kernel test robot reported.
	Changed function name from apei_mce_report_x86_error ->
	apei_smca_report_x86_error.
	Added comment for MASK_MCA_STATUS definition.
	Wrapped apei_smca_report_x86_error() with CONFIG_X86_MCE in
	arch/x86/include/asm/mce.h
v3:
	Moved arch specific declarations from generic headers to arch
	specific headers.
	Cleaned additional declarations which are unnecessary.
	Included the check for context type.
	Added additional check to verify for appropriate MSR address in
	the register layout.
v2:
	Fixed build error reported by kernel test robot.
	Passed struct variable as function argument instead of entire struct.
---
 arch/x86/include/asm/acpi.h     | 11 ++++++++
 arch/x86/include/asm/mce.h      |  5 ++++
 arch/x86/kernel/acpi/apei.c     |  5 ++++
 arch/x86/kernel/cpu/mce/apei.c  | 49 +++++++++++++++++++++++++++++++++
 drivers/firmware/efi/cper-x86.c | 10 +++++--
 5 files changed, 77 insertions(+), 3 deletions(-)

Comments

Punit Agrawal Sept. 23, 2020, 10:07 a.m. UTC | #1
Hi Smita,

A few comments below.

Smita Koralahalli <Smita.KoralahalliChannabasappa@amd.com> writes:

> Linux Kernel uses ACPI Boot Error Record Table (BERT) to report fatal
> errors that occurred in a previous boot. The MCA errors in the BERT are
> reported using the x86 Processor Error Common Platform Error Record (CPER)
> format. Currently, the record prints out the raw MSR values and AMD relies
> on the raw record to provide MCA information.
>
> Extract the raw MSR values of MCA registers from the BERT and feed it into
> the standard mce_log() function through the existing x86/MCA RAS
> infrastructure. This will result in better decoding from the EDAC MCE
> decoder or the default notifier.
>
> The implementation is SMCA specific as the raw MCA register values are
> given in the register offset order of the MCAX address space.
>
> [ Build error in patch v1. ]
>
> Reported-by: kernel test robot <lkp@intel.com>

I know Boris asked you to add the reason for the Reported-by, but
usually we don't track version differences in the committed patch.

Boris, can you confirm if you want the Reported-by to be retained?

> Signed-off-by: Smita Koralahalli <Smita.KoralahalliChannabasappa@amd.com>
> ---
> Link:
> https://lkml.kernel.org/r/20200903234531.162484-2-Smita.KoralahalliChannabasappa@amd.com
>
> v4:
> 	Included what kernel test robot reported.
> 	Changed function name from apei_mce_report_x86_error ->
> 	apei_smca_report_x86_error.
> 	Added comment for MASK_MCA_STATUS definition.
> 	Wrapped apei_smca_report_x86_error() with CONFIG_X86_MCE in
> 	arch/x86/include/asm/mce.h
> v3:
> 	Moved arch specific declarations from generic headers to arch
> 	specific headers.
> 	Cleaned additional declarations which are unnecessary.
> 	Included the check for context type.
> 	Added additional check to verify for appropriate MSR address in
> 	the register layout.
> v2:
> 	Fixed build error reported by kernel test robot.
> 	Passed struct variable as function argument instead of entire struct.
> ---
>  arch/x86/include/asm/acpi.h     | 11 ++++++++
>  arch/x86/include/asm/mce.h      |  5 ++++
>  arch/x86/kernel/acpi/apei.c     |  5 ++++
>  arch/x86/kernel/cpu/mce/apei.c  | 49 +++++++++++++++++++++++++++++++++
>  drivers/firmware/efi/cper-x86.c | 10 +++++--
>  5 files changed, 77 insertions(+), 3 deletions(-)
>
> diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h
> index 6d2df1ee427b..65064d9f7fa6 100644
> --- a/arch/x86/include/asm/acpi.h
> +++ b/arch/x86/include/asm/acpi.h
> @@ -159,6 +159,8 @@ static inline u64 x86_default_get_root_pointer(void)
>  extern int x86_acpi_numa_init(void);
>  #endif /* CONFIG_ACPI_NUMA */
>  
> +struct cper_ia_proc_ctx;
> +
>  #ifdef CONFIG_ACPI_APEI
>  static inline pgprot_t arch_apei_get_mem_attribute(phys_addr_t addr)
>  {
> @@ -177,6 +179,15 @@ static inline pgprot_t arch_apei_get_mem_attribute(phys_addr_t addr)
>  	 */
>  	return PAGE_KERNEL_NOENC;
>  }
> +
> +int arch_apei_report_x86_error(struct cper_ia_proc_ctx *ctx_info,
> +			       u64 lapic_id);
> +#else
> +static inline int arch_apei_report_x86_error(struct cper_ia_proc_ctx *ctx_info,
> +					     u64 lapic_id)
> +{
> +	return -EINVAL;
> +}
>  #endif
>  
>  #define ACPI_TABLE_UPGRADE_MAX_PHYS (max_low_pfn_mapped << PAGE_SHIFT)
> diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
> index 109af5c7f515..d07bd635acfd 100644
> --- a/arch/x86/include/asm/mce.h
> +++ b/arch/x86/include/asm/mce.h
> @@ -173,17 +173,22 @@ extern void mce_unregister_decode_chain(struct notifier_block *nb);
>  #include <linux/atomic.h>
>  
>  extern int mce_p5_enabled;
> +struct cper_ia_proc_ctx;
>  
>  #ifdef CONFIG_X86_MCE
>  int mcheck_init(void);
>  void mcheck_cpu_init(struct cpuinfo_x86 *c);
>  void mcheck_cpu_clear(struct cpuinfo_x86 *c);
>  void mcheck_vendor_init_severity(void);
> +int apei_smca_report_x86_error(struct cper_ia_proc_ctx *ctx_info,
> +			       u64 lapic_id);
>  #else
>  static inline int mcheck_init(void) { return 0; }
>  static inline void mcheck_cpu_init(struct cpuinfo_x86 *c) {}
>  static inline void mcheck_cpu_clear(struct cpuinfo_x86 *c) {}
>  static inline void mcheck_vendor_init_severity(void) {}
> +static inline int apei_smca_report_x86_error(struct cper_ia_proc_ctx *ctx_info,
> +					     u64 lapic_id) { return -EINVAL; }
>  #endif
>  
>  #ifdef CONFIG_X86_ANCIENT_MCE
> diff --git a/arch/x86/kernel/acpi/apei.c b/arch/x86/kernel/acpi/apei.c
> index c22fb55abcfd..0916f00a992e 100644
> --- a/arch/x86/kernel/acpi/apei.c
> +++ b/arch/x86/kernel/acpi/apei.c
> @@ -43,3 +43,8 @@ void arch_apei_report_mem_error(int sev, struct cper_sec_mem_err *mem_err)
>  	apei_mce_report_mem_error(sev, mem_err);
>  #endif
>  }
> +
> +int arch_apei_report_x86_error(struct cper_ia_proc_ctx *ctx_info, u64 lapic_id)
> +{
> +	return apei_smca_report_x86_error(ctx_info, lapic_id);
> +}
> diff --git a/arch/x86/kernel/cpu/mce/apei.c b/arch/x86/kernel/cpu/mce/apei.c
> index af8d37962586..d4b3a2053eef 100644
> --- a/arch/x86/kernel/cpu/mce/apei.c
> +++ b/arch/x86/kernel/cpu/mce/apei.c
> @@ -51,6 +51,55 @@ void apei_mce_report_mem_error(int severity, struct cper_sec_mem_err *mem_err)
>  }
>  EXPORT_SYMBOL_GPL(apei_mce_report_mem_error);
>  
> +/*
> + * The first expected register in the register layout of MCAX address space.
> + * The address defined must match with the first MSR address extracted from
> + * BERT which in SMCA systems is the bank's MCA_STATUS register.
> + *
> + * Note that the decoding of the raw MSR values in BERT is implementation
> + * specific and follows register offset order of MCAX address space.
> + */
> +#define MASK_MCA_STATUS 0xC0002001

The macro value is already defined in mce.h as
MSR_AMD64_SMCA_MC0_STATUS.  Is there any reason to not use it?

You can move the comment to where you check the status register.


> +
> +int apei_smca_report_x86_error(struct cper_ia_proc_ctx *ctx_info, u64 lapic_id)
> +{
> +	const u64 *i_mce = ((const void *) (ctx_info + 1));

This can directly be cast into (const u64 *).

> +	unsigned int cpu;
> +	struct mce m;
> +
> +	if (!boot_cpu_has(X86_FEATURE_SMCA))
> +		return -EINVAL;
> +
> +	if ((ctx_info->msr_addr & MASK_MCA_STATUS) != MASK_MCA_STATUS)
> +		return -EINVAL;
> +
> +	mce_setup(&m);
> +
> +	m.extcpu = -1;
> +	m.socketid = -1;
> +
> +	for_each_possible_cpu(cpu) {
> +		if (cpu_data(cpu).initial_apicid == lapic_id) {
> +			m.extcpu = cpu;
> +			m.socketid = cpu_data(m.extcpu).phys_proc_id;
> +			break;
> +		}
> +	}
> +
> +	m.apicid = lapic_id;
> +	m.bank = (ctx_info->msr_addr >> 4) & 0xFF;
> +	m.status = *i_mce;
> +	m.addr = *(i_mce + 1);
> +	m.misc = *(i_mce + 2);
> +	/* Skipping MCA_CONFIG */
> +	m.ipid = *(i_mce + 4);
> +	m.synd = *(i_mce + 5);

Instead of using the raw pointer arithmetic, it is better to define a
structure for the MCA registers? Something like -

    struct {
        u64 addr;
        u64 misc;
        u64 config;
        u64 ipid;
        ...
    }

Checking back, this was mentioned in the previous review comments as
well. Please address all comments before posting a new version - either
by following the suggestion or explaining why it is not a good idea.

Thanks,
Punit

> +
> +	mce_log(&m);
> +
> +	return 0;
> +}
> +
>  #define CPER_CREATOR_MCE						\
>  	GUID_INIT(0x75a574e3, 0x5052, 0x4b29, 0x8a, 0x8e, 0xbe, 0x2c,	\
>  		  0x64, 0x90, 0xb8, 0x9d)
> diff --git a/drivers/firmware/efi/cper-x86.c b/drivers/firmware/efi/cper-x86.c
> index 2531de49f56c..2f2b0c431c18 100644
> --- a/drivers/firmware/efi/cper-x86.c
> +++ b/drivers/firmware/efi/cper-x86.c
> @@ -2,6 +2,7 @@
>  // Copyright (C) 2018, Advanced Micro Devices, Inc.
>  
>  #include <linux/cper.h>
> +#include <linux/acpi.h>
>  
>  /*
>   * We don't need a "CPER_IA" prefix since these are all locally defined.
> @@ -347,9 +348,12 @@ void cper_print_proc_ia(const char *pfx, const struct cper_sec_proc_ia *proc)
>  			       ctx_info->mm_reg_addr);
>  		}
>  
> -		printk("%sRegister Array:\n", newpfx);
> -		print_hex_dump(newpfx, "", DUMP_PREFIX_OFFSET, 16, groupsize,
> -			       (ctx_info + 1), ctx_info->reg_arr_size, 0);
> +		if (ctx_info->reg_ctx_type != CTX_TYPE_MSR ||
> +		    arch_apei_report_x86_error(ctx_info, proc->lapic_id)) {
> +			printk("%sRegister Array:\n", newpfx);
> +			print_hex_dump(newpfx, "", DUMP_PREFIX_OFFSET, 16, groupsize,
> +				       (ctx_info + 1), ctx_info->reg_arr_size, 0);
> +		}
>  
>  		ctx_info = (struct cper_ia_proc_ctx *)((long)ctx_info + size);
>  	}
Borislav Petkov Sept. 23, 2020, 2:05 p.m. UTC | #2
Smita,

pls sync the time of the box where you create the patch:

 Date: Fri,  4 Sep 2020 09:04:44 -0500

but your mail headers have:

 Received: from ... with mapi id 15.20.3370.019; Fri, 18 Sep 2020 14:49:12 +0000
 						^^^^^^^^^^^^^^^^^^

On Wed, Sep 23, 2020 at 07:07:17PM +0900, Punit Agrawal wrote:
> I know Boris asked you to add the reason for the Reported-by, but
> usually we don't track version differences in the committed patch.
> 
> Boris, can you confirm if you want the Reported-by to be retained?

How else would you explain what the Reported-by: tag is for on a patch
which adds a feature?

> > + * The first expected register in the register layout of MCAX address space.
> > + * The address defined must match with the first MSR address extracted from
> > + * BERT which in SMCA systems is the bank's MCA_STATUS register.
> > + *
> > + * Note that the decoding of the raw MSR values in BERT is implementation
> > + * specific and follows register offset order of MCAX address space.
> > + */
> > +#define MASK_MCA_STATUS 0xC0002001
> 
> The macro value is already defined in mce.h as
> MSR_AMD64_SMCA_MC0_STATUS.  Is there any reason to not use it?

Good point.

> You can move the comment to where you check the status register.

No need if he really wants to use the first MCi_STATUS address.

> > +	m.apicid = lapic_id;
> > +	m.bank = (ctx_info->msr_addr >> 4) & 0xFF;
> > +	m.status = *i_mce;
> > +	m.addr = *(i_mce + 1);
> > +	m.misc = *(i_mce + 2);
> > +	/* Skipping MCA_CONFIG */
> > +	m.ipid = *(i_mce + 4);
> > +	m.synd = *(i_mce + 5);
> 
> Instead of using the raw pointer arithmetic, it is better to define a
> structure for the MCA registers? Something like -
> 
>     struct {
>         u64 addr;
>         u64 misc;
>         u64 config;
>         u64 ipid;
>         ...
>     }
> 
> Checking back, this was mentioned in the previous review comments as
> well. Please address all comments before posting a new version - either
> by following the suggestion or explaining why it is not a good idea.

Well, that was addressed in his reply last time:

https://lkml.kernel.org/r/a28aa613-8353-0052-31f6-34bc733abf59@amd.com

You might've missed it because you weren't CCed directly.
Ard Biesheuvel Sept. 23, 2020, 2:52 p.m. UTC | #3
On Wed, 23 Sep 2020 at 16:05, Borislav Petkov <bp@alien8.de> wrote:
>
> Smita,
>
> pls sync the time of the box where you create the patch:
>
>  Date: Fri,  4 Sep 2020 09:04:44 -0500
>
> but your mail headers have:
>
>  Received: from ... with mapi id 15.20.3370.019; Fri, 18 Sep 2020 14:49:12 +0000
>                                                 ^^^^^^^^^^^^^^^^^^
>
> On Wed, Sep 23, 2020 at 07:07:17PM +0900, Punit Agrawal wrote:
> > I know Boris asked you to add the reason for the Reported-by, but
> > usually we don't track version differences in the committed patch.
> >
> > Boris, can you confirm if you want the Reported-by to be retained?
>
> How else would you explain what the Reported-by: tag is for on a patch
> which adds a feature?
>

I think the question is why we are retaining this Reported-by header
to begin with. Even though the early feedback is appreciated,
crediting the bot for eternity for a version of the patch that never
got merged seems a bit excessive. Also, it may suggest that the bot
was involved in reporting an issue that the patch aims to fix but that
is not the case. The last thing we want is Sasha's bot to jump on
patches adding new functionality just because it has a reported-by
line.

So I suggest dropping the Reported-by credit as well as the [] context
regarding v1
Borislav Petkov Sept. 23, 2020, 3:39 p.m. UTC | #4
On Wed, Sep 23, 2020 at 04:52:18PM +0200, Ard Biesheuvel wrote:
> I think the question is why we are retaining this Reported-by header
> to begin with. Even though the early feedback is appreciated,
> crediting the bot for eternity for a version of the patch that never
> got merged seems a bit excessive. Also, it may suggest that the bot
> was involved in reporting an issue that the patch aims to fix but that
> is not the case.

That is supposed to be explained in [] properly so that there's no
misreading of why that tag's there.

> The last thing we want is Sasha's bot to jump on patches adding new
> functionality just because it has a reported-by line.

It should jump on patches which have Fixes: tags. But Sasha's bot is
nuts regardless. :-)

> So I suggest dropping the Reported-by credit as well as the [] context
> regarding v1

So I don't mind having a Reported-by: tag with an explanation of what
it reported. We slap all kinds of tags so having some attribution for
the work the 0day bot does to catch such errors is reasonable. I presume
they track this way how "useful" it is, by counting the Reported-by's or
so, as they suggest one should add a Reported-by in their reports.

And without any attribution what the 0day bot reported, it might decide
not to report anything next time, I'd venture a guess.

And the same argument can be had for Suggested-by: tags: one could
decide not to add that tag and the person who's doing the suggesting
might decide not to suggest anymore.

So I think something like:

 [ Fix a build breakage in an earlier version. ]
Reported-by: 0day bot

is fine as long as it makes it perfectly clear what Reported-by tag
is for and as long as ts purpose for being present there is clear, I
don't see an issue...
Ard Biesheuvel Sept. 23, 2020, 6:24 p.m. UTC | #5
On Wed, 23 Sep 2020 at 17:39, Borislav Petkov <bp@alien8.de> wrote:
>
> On Wed, Sep 23, 2020 at 04:52:18PM +0200, Ard Biesheuvel wrote:
> > I think the question is why we are retaining this Reported-by header
> > to begin with. Even though the early feedback is appreciated,
> > crediting the bot for eternity for a version of the patch that never
> > got merged seems a bit excessive. Also, it may suggest that the bot
> > was involved in reporting an issue that the patch aims to fix but that
> > is not the case.
>
> That is supposed to be explained in [] properly so that there's no
> misreading of why that tag's there.
>
> > The last thing we want is Sasha's bot to jump on patches adding new
> > functionality just because it has a reported-by line.
>
> It should jump on patches which have Fixes: tags. But Sasha's bot is
> nuts regardless. :-)
>
> > So I suggest dropping the Reported-by credit as well as the [] context
> > regarding v1
>
> So I don't mind having a Reported-by: tag with an explanation of what
> it reported. We slap all kinds of tags so having some attribution for
> the work the 0day bot does to catch such errors is reasonable. I presume
> they track this way how "useful" it is, by counting the Reported-by's or
> so, as they suggest one should add a Reported-by in their reports.
>
> And without any attribution what the 0day bot reported, it might decide
> not to report anything next time, I'd venture a guess.
>
> And the same argument can be had for Suggested-by: tags: one could
> decide not to add that tag and the person who's doing the suggesting
> might decide not to suggest anymore.
>
> So I think something like:
>
>  [ Fix a build breakage in an earlier version. ]
> Reported-by: 0day bot
>
> is fine as long as it makes it perfectly clear what Reported-by tag
> is for and as long as ts purpose for being present there is clear, I
> don't see an issue...
>

I don't think it adds much value tbh, but I am not going to obsess
about it either.
Punit Agrawal Sept. 24, 2020, 12:02 a.m. UTC | #6
Borislav Petkov <bp@alien8.de> writes:

> Smita,
>
> pls sync the time of the box where you create the patch:
>
>  Date: Fri,  4 Sep 2020 09:04:44 -0500
>
> but your mail headers have:
>
>  Received: from ... with mapi id 15.20.3370.019; Fri, 18 Sep 2020 14:49:12 +0000
>  						^^^^^^^^^^^^^^^^^^
>
> On Wed, Sep 23, 2020 at 07:07:17PM +0900, Punit Agrawal wrote:
>> I know Boris asked you to add the reason for the Reported-by, but
>> usually we don't track version differences in the committed patch.
>> 
>> Boris, can you confirm if you want the Reported-by to be retained?
>
> How else would you explain what the Reported-by: tag is for on a patch
> which adds a feature?

As Ard clarified, I was questioning the inclusion of the Reported-by:
tag in the patch itself. But I also don't have enough of a strong
opinion to obsess about it.

[ Aside: One interesting consequence of this though is that by the same
argument, changes resulting from comments on earlier versions are also
legitimate content for the final patch. Not saying I agree. ]

>
>> > + * The first expected register in the register layout of MCAX address space.
>> > + * The address defined must match with the first MSR address extracted from
>> > + * BERT which in SMCA systems is the bank's MCA_STATUS register.
>> > + *
>> > + * Note that the decoding of the raw MSR values in BERT is implementation
>> > + * specific and follows register offset order of MCAX address space.
>> > + */
>> > +#define MASK_MCA_STATUS 0xC0002001
>> 
>> The macro value is already defined in mce.h as
>> MSR_AMD64_SMCA_MC0_STATUS.  Is there any reason to not use it?
>
> Good point.
>
>> You can move the comment to where you check the status register.
>
> No need if he really wants to use the first MCi_STATUS address.
>
>> > +	m.apicid = lapic_id;
>> > +	m.bank = (ctx_info->msr_addr >> 4) & 0xFF;
>> > +	m.status = *i_mce;
>> > +	m.addr = *(i_mce + 1);
>> > +	m.misc = *(i_mce + 2);
>> > +	/* Skipping MCA_CONFIG */
>> > +	m.ipid = *(i_mce + 4);
>> > +	m.synd = *(i_mce + 5);
>> 
>> Instead of using the raw pointer arithmetic, it is better to define a
>> structure for the MCA registers? Something like -
>> 
>>     struct {
>>         u64 addr;
>>         u64 misc;
>>         u64 config;
>>         u64 ipid;
>>         ...
>>     }
>> 
>> Checking back, this was mentioned in the previous review comments as
>> well. Please address all comments before posting a new version - either
>> by following the suggestion or explaining why it is not a good idea.
>
> Well, that was addressed in his reply last time:
>
> https://lkml.kernel.org/r/a28aa613-8353-0052-31f6-34bc733abf59@amd.com

Oops. My bad - sorry I missed the response.

Copying the relevant comment here for discussion -

>>> The registers here are implementation specific and applies only for
>>> SMCA systems. So I have used pointer arithmetic as it is not defined
>>> in the spec.

Even though it's not defined in the UEFI spec, it doesn't mean a
structure definition cannot be created. After all, the patch is relying
on some guarantee of the meaning of the values and their ordering.

If the patch is relying on the definitions in the SMCA spec it is a good
idea to reference it here - both for review and providing relevant
context for future developers.

> You might've missed it because you weren't CCed directly.

Indeed, I missed it. Thanks for the pointer.

Cheers,
Punit
Koralahalli Channabasappa, Smita Sept. 24, 2020, 5:23 p.m. UTC | #7
On 9/23/20 7:02 PM, Punit Agrawal wrote:

> Borislav Petkov <bp@alien8.de> writes:
>
>> Smita,
>>
>> pls sync the time of the box where you create the patch:
>>
>>   Date: Fri,  4 Sep 2020 09:04:44 -0500
>>
>> but your mail headers have:
>>
>>   Received: from ... with mapi id 15.20.3370.019; Fri, 18 Sep 2020 14:49:12 +0000
>>   						^^^^^^^^^^^^^^^^^^

Sorry for the trouble. I have fixed this.

>>> I know Boris asked you to add the reason for the Reported-by, but
>>> usually we don't track version differences in the committed patch.
>>>
>>> Boris, can you confirm if you want the Reported-by to be retained?
>> How else would you explain what the Reported-by: tag is for on a patch
>> which adds a feature?
> As Ard clarified, I was questioning the inclusion of the Reported-by:
> tag in the patch itself. But I also don't have enough of a strong
> opinion to obsess about it.
>
> [ Aside: One interesting consequence of this though is that by the same
> argument, changes resulting from comments on earlier versions are also
> legitimate content for the final patch. Not saying I agree. ]
>
>>>> + * The first expected register in the register layout of MCAX address space.
>>>> + * The address defined must match with the first MSR address extracted from
>>>> + * BERT which in SMCA systems is the bank's MCA_STATUS register.
>>>> + *
>>>> + * Note that the decoding of the raw MSR values in BERT is implementation
>>>> + * specific and follows register offset order of MCAX address space.
>>>> + */
>>>> +#define MASK_MCA_STATUS 0xC0002001
>>> The macro value is already defined in mce.h as
>>> MSR_AMD64_SMCA_MC0_STATUS.  Is there any reason to not use it?
>> Good point.

I indeed missed it. thanks!

>>> You can move the comment to where you check the status register.
>> No need if he really wants to use the first MCi_STATUS address.

Okay!

>>>> +	m.apicid = lapic_id;
>>>> +	m.bank = (ctx_info->msr_addr >> 4) & 0xFF;
>>>> +	m.status = *i_mce;
>>>> +	m.addr = *(i_mce + 1);
>>>> +	m.misc = *(i_mce + 2);
>>>> +	/* Skipping MCA_CONFIG */
>>>> +	m.ipid = *(i_mce + 4);
>>>> +	m.synd = *(i_mce + 5);
>>> Instead of using the raw pointer arithmetic, it is better to define a
>>> structure for the MCA registers? Something like -
>>>
>>>      struct {
>>>          u64 addr;
>>>          u64 misc;
>>>          u64 config;
>>>          u64 ipid;
>>>          ...
>>>      }
>>>
>>> Checking back, this was mentioned in the previous review comments as
>>> well. Please address all comments before posting a new version - either
>>> by following the suggestion or explaining why it is not a good idea.
>> Well, that was addressed in his reply last time:
>>
>> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flkml.kernel.org%2Fr%2Fa28aa613-8353-0052-31f6-34bc733abf59%40amd.com&amp;data=02%7C01%7CSmita.KoralahalliChannabasappa%40amd.com%7C1e8d8042158141af2c0a08d8601d31d7%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637365025808391248&amp;sdata=C71Gp1ZNQhtckegVJbYPA%2FTNi6np%2Fl1Xl4BvI4kGX4Y%3D&amp;reserved=0
> Oops. My bad - sorry I missed the response.
>
> Copying the relevant comment here for discussion -
>
>>>> The registers here are implementation specific and applies only for
>>>> SMCA systems. So I have used pointer arithmetic as it is not defined
>>>> in the spec.
> Even though it's not defined in the UEFI spec, it doesn't mean a
> structure definition cannot be created. After all, the patch is relying
> on some guarantee of the meaning of the values and their ordering.
>
> If the patch is relying on the definitions in the SMCA spec it is a good
> idea to reference it here - both for review and providing relevant
> context for future developers.

Okay, I agree the structure definition will make the code less arbitrary
and provides relevant context compared to pointer arithmetic. I did not
think this way. I can try this out if no objections.

>> You might've missed it because you weren't CCed directly.
> Indeed, I missed it. Thanks for the pointer.

Sorry, I missed including you on CC. Will include henceforth!

Thanks,
Smita
Borislav Petkov Sept. 24, 2020, 5:50 p.m. UTC | #8
On Thu, Sep 24, 2020 at 12:23:27PM -0500, Smita Koralahalli Channabasappa wrote:
> > Even though it's not defined in the UEFI spec, it doesn't mean a
> > structure definition cannot be created.

Created for what? That structure better have a big fat comment above it, what
firmware generates its layout.

> > After all, the patch is relying on some guarantee of the meaning of
> > the values and their ordering.

AFAICT, this looks like an ad-hoc definition and the moment they change
it in some future revision, that struct of yours becomes invalid so we'd
need to add another one.

> > If the patch is relying on the definitions in the SMCA spec it is a good

Yes, what SMCA spec is that?

> > idea to reference it here - both for review and providing relevant
> > context for future developers.
> 
> Okay, I agree the structure definition will make the code less arbitrary
> and provides relevant context compared to pointer arithmetic. I did not
> think this way. I can try this out if no objections.

Again, this struct better have "versioning" info because the moment your
fw people change it in some future platform, this code needs touching
again.

It probably would need touching even with the offsets if those offsets
change but at least not having it adhere to some slow-moving spec is
probably easier in case they wanna add/change fields.

So Smita, you probably should talk to fw people about how stable that
layout at ctx_info + 1 is going to be wrt future platforms so that
we make sure we only access the correct offsets, now and on future
platforms.

Thx.
Punit Agrawal Sept. 25, 2020, 12:54 a.m. UTC | #9
Borislav Petkov <bp@alien8.de> writes:

> On Thu, Sep 24, 2020 at 12:23:27PM -0500, Smita Koralahalli Channabasappa wrote:
>> > Even though it's not defined in the UEFI spec, it doesn't mean a
>> > structure definition cannot be created.
>
> Created for what? That structure better have a big fat comment above it, what
> firmware generates its layout.

Maybe I could've used a better choice of words - I meant to define a
structure with meaningful member names to replace the *(ptr + i)
accesses in the patch.

The requirement for documenting the record layout doesn't change -
whether using raw pointer arithmetic vs a structure definition.

>> > After all, the patch is relying on some guarantee of the meaning of
>> > the values and their ordering.
>
> AFAICT, this looks like an ad-hoc definition and the moment they change
> it in some future revision, that struct of yours becomes invalid so we'd
> need to add another one.

If there's no spec backing the current layout, then it'll indeed be an
ad-hoc definition of a structure in the kernel. But considering that
it's part of firmware / OS interface for an important part of the RAS
story I would hope that the code is based on a spec - having that
reference included would help maintainability.

Incompatible changes will indeed break the assumptions in the kernel and
code will need to be updated - regardless of the choice of kernel
implementation; pointer arithmetic, structure definition - ad-hoc or
spec provided.

Having versioning will allow running older kernels on newer hardware and
vice versa - but I don't see why that is important only when using a
structure based access.

>
>> > If the patch is relying on the definitions in the SMCA spec it is a good
>
> Yes, what SMCA spec is that?
>
>> > idea to reference it here - both for review and providing relevant
>> > context for future developers.
>> 
>> Okay, I agree the structure definition will make the code less arbitrary
>> and provides relevant context compared to pointer arithmetic. I did not
>> think this way. I can try this out if no objections.
>
> Again, this struct better have "versioning" info because the moment your
> fw people change it in some future platform, this code needs touching
> again.
>
> It probably would need touching even with the offsets if those offsets
> change but at least not having it adhere to some slow-moving spec is
> probably easier in case they wanna add/change fields.
>
> So Smita, you probably should talk to fw people about how stable that
> layout at ctx_info + 1 is going to be wrt future platforms so that
> we make sure we only access the correct offsets, now and on future
> platforms.
>
> Thx.
Borislav Petkov Sept. 25, 2020, 7:07 a.m. UTC | #10
On Fri, Sep 25, 2020 at 09:54:06AM +0900, Punit Agrawal wrote:
> Maybe I could've used a better choice of words - I meant to define a
> structure with meaningful member names to replace the *(ptr + i)
> accesses in the patch.

I know exactly what you mean - I had the same question during last
review.
Yazen Ghannam Sept. 25, 2020, 4:19 p.m. UTC | #11
On Fri, Sep 25, 2020 at 09:54:06AM +0900, Punit Agrawal wrote:
> Borislav Petkov <bp@alien8.de> writes:
> 
> > On Thu, Sep 24, 2020 at 12:23:27PM -0500, Smita Koralahalli Channabasappa wrote:
> >> > Even though it's not defined in the UEFI spec, it doesn't mean a
> >> > structure definition cannot be created.
> >
> > Created for what? That structure better have a big fat comment above it, what
> > firmware generates its layout.
> 
> Maybe I could've used a better choice of words - I meant to define a
> structure with meaningful member names to replace the *(ptr + i)
> accesses in the patch.
> 
> The requirement for documenting the record layout doesn't change -
> whether using raw pointer arithmetic vs a structure definition.
> 
> >> > After all, the patch is relying on some guarantee of the meaning of
> >> > the values and their ordering.
> >
> > AFAICT, this looks like an ad-hoc definition and the moment they change
> > it in some future revision, that struct of yours becomes invalid so we'd
> > need to add another one.
> 
> If there's no spec backing the current layout, then it'll indeed be an
> ad-hoc definition of a structure in the kernel. But considering that
> it's part of firmware / OS interface for an important part of the RAS
> story I would hope that the code is based on a spec - having that
> reference included would help maintainability.
> 
> Incompatible changes will indeed break the assumptions in the kernel and
> code will need to be updated - regardless of the choice of kernel
> implementation; pointer arithmetic, structure definition - ad-hoc or
> spec provided.
> 
> Having versioning will allow running older kernels on newer hardware and
> vice versa - but I don't see why that is important only when using a
> structure based access.
>

There is no versioning option for the x86 context info structure in the
UEFI spec, so I don't think there'd be a clean way to include version
information.

The format of the data in the context info is not totally ad-hoc, and it
does follow the UEFI spec. The "Register Array" field is raw data. This
may follow one of the predefined formats in the UEFI spec like the "X64
Register State", etc. Or, in the case of MSR and Memory Mapped
Registers, this is a raw dump of the registers starting from the address
shown in the structure. The two values that can be changed are the
starting address and the array size. These two together provide a window
to the registers. The registers are fixed, so a single context info
struture should include a single contiguous range of registers. Multiple
context info structures can be provided to include registers from
different, non-contiguous ranges.

This patch is checking if an MSR context info structure lines up with
the MCAX register space used on Scalable MCA systems. This register
space is defined in the AMD Processor Programming Reference for various
products. This is considered a hardware feature extension, so the
existing register layout won't change though new registers may be added.
A layout change would require moving to another register space which is
what happened going from legacy MCA (starting at address 0x400) to MCAX
(starting at address 0xC0002000) registers.

The only two things firmware can change are from what address does the
info start and where does the info end. So the implementation-specific
details here are that currently the starting address is MCA_STATUS (in
MCAX space) for a bank and the remaining info includes the other MCA
registers for this bank.

So I think the kernel can be strict with this format, i.e. the two
variables match what we're looking for. This patch already has a check
on the starting address. It should also include a check that "Register
Array Size" is large enough to include all the registers we want to
extract. If the format doesn't match, then we fall back to a raw dump
of the data like we have today.

Or the kernel can be more flexible and try to find the window of
registers based on the starting address. I think this is really
open-ended though.

Does this sound reasonable?

Thanks,
Yazen
Borislav Petkov Sept. 25, 2020, 4:27 p.m. UTC | #12
On Fri, Sep 25, 2020 at 11:19:40AM -0500, Yazen Ghannam wrote:
> This patch is checking if an MSR context info structure lines up with
> the MCAX register space used on Scalable MCA systems. This register
> space is defined in the AMD Processor Programming Reference for various
> products. This is considered a hardware feature extension, so the
> existing register layout won't change though new registers may be added.

Yeah, and exactly for that there's no need to add a special structure
because if new registers get added, you'd need to add a new struct
definition too. Let's keep it simple and do the offsets thing.

Thx.
Punit Agrawal Sept. 28, 2020, 8:06 a.m. UTC | #13
Yazen Ghannam <yazen.ghannam@amd.com> writes:

> On Fri, Sep 25, 2020 at 09:54:06AM +0900, Punit Agrawal wrote:
>> Borislav Petkov <bp@alien8.de> writes:
>> 
>> > On Thu, Sep 24, 2020 at 12:23:27PM -0500, Smita Koralahalli Channabasappa wrote:
>> >> > Even though it's not defined in the UEFI spec, it doesn't mean a
>> >> > structure definition cannot be created.
>> >
>> > Created for what? That structure better have a big fat comment above it, what
>> > firmware generates its layout.
>> 
>> Maybe I could've used a better choice of words - I meant to define a
>> structure with meaningful member names to replace the *(ptr + i)
>> accesses in the patch.
>> 
>> The requirement for documenting the record layout doesn't change -
>> whether using raw pointer arithmetic vs a structure definition.
>> 
>> >> > After all, the patch is relying on some guarantee of the meaning of
>> >> > the values and their ordering.
>> >
>> > AFAICT, this looks like an ad-hoc definition and the moment they change
>> > it in some future revision, that struct of yours becomes invalid so we'd
>> > need to add another one.
>> 
>> If there's no spec backing the current layout, then it'll indeed be an
>> ad-hoc definition of a structure in the kernel. But considering that
>> it's part of firmware / OS interface for an important part of the RAS
>> story I would hope that the code is based on a spec - having that
>> reference included would help maintainability.
>> 
>> Incompatible changes will indeed break the assumptions in the kernel and
>> code will need to be updated - regardless of the choice of kernel
>> implementation; pointer arithmetic, structure definition - ad-hoc or
>> spec provided.
>> 
>> Having versioning will allow running older kernels on newer hardware and
>> vice versa - but I don't see why that is important only when using a
>> structure based access.
>>
>
> There is no versioning option for the x86 context info structure in the
> UEFI spec, so I don't think there'd be a clean way to include version
> information.
>
> The format of the data in the context info is not totally ad-hoc, and it
> does follow the UEFI spec. The "Register Array" field is raw data. This
> may follow one of the predefined formats in the UEFI spec like the "X64
> Register State", etc. Or, in the case of MSR and Memory Mapped
> Registers, this is a raw dump of the registers starting from the address
> shown in the structure. The two values that can be changed are the
> starting address and the array size. These two together provide a window
> to the registers. The registers are fixed, so a single context info
> struture should include a single contiguous range of registers. Multiple
> context info structures can be provided to include registers from
> different, non-contiguous ranges.
>
> This patch is checking if an MSR context info structure lines up with
> the MCAX register space used on Scalable MCA systems. This register
> space is defined in the AMD Processor Programming Reference for various
> products. This is considered a hardware feature extension, so the
> existing register layout won't change though new registers may be added.
> A layout change would require moving to another register space which is
> what happened going from legacy MCA (starting at address 0x400) to MCAX
> (starting at address 0xC0002000) registers.

Thanks for the SMCA related background.
>
> The only two things firmware can change are from what address does the
> info start and where does the info end. So the implementation-specific
> details here are that currently the starting address is MCA_STATUS (in
> MCAX space) for a bank and the remaining info includes the other MCA
> registers for this bank.
>
> So I think the kernel can be strict with this format, i.e. the two
> variables match what we're looking for. This patch already has a check
> on the starting address. It should also include a check that "Register
> Array Size" is large enough to include all the registers we want to
> extract. If the format doesn't match, then we fall back to a raw dump
> of the data like we have today.
>
> Or the kernel can be more flexible and try to find the window of
> registers based on the starting address. I think this is really
> open-ended though.

I think I understand the hesitancy here if the firmware can arbitrarily
move the starting address. Though I hope that doesn't happen as it would
break the feature introduced in $SUBJECT.

The way I read the code / spec led me to believe that the MSR context
info records in the SMCA space are just encoding the layout of MC Bank
registers[0] and making it explicit can only help.

But Boris seems to think the current approach is good enough. So no
objections from me.

Thanks,
Punit

[0] AMD Processor Programming Reference for Family 17H, Sec 3.1.5
diff mbox series

Patch

diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h
index 6d2df1ee427b..65064d9f7fa6 100644
--- a/arch/x86/include/asm/acpi.h
+++ b/arch/x86/include/asm/acpi.h
@@ -159,6 +159,8 @@  static inline u64 x86_default_get_root_pointer(void)
 extern int x86_acpi_numa_init(void);
 #endif /* CONFIG_ACPI_NUMA */
 
+struct cper_ia_proc_ctx;
+
 #ifdef CONFIG_ACPI_APEI
 static inline pgprot_t arch_apei_get_mem_attribute(phys_addr_t addr)
 {
@@ -177,6 +179,15 @@  static inline pgprot_t arch_apei_get_mem_attribute(phys_addr_t addr)
 	 */
 	return PAGE_KERNEL_NOENC;
 }
+
+int arch_apei_report_x86_error(struct cper_ia_proc_ctx *ctx_info,
+			       u64 lapic_id);
+#else
+static inline int arch_apei_report_x86_error(struct cper_ia_proc_ctx *ctx_info,
+					     u64 lapic_id)
+{
+	return -EINVAL;
+}
 #endif
 
 #define ACPI_TABLE_UPGRADE_MAX_PHYS (max_low_pfn_mapped << PAGE_SHIFT)
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 109af5c7f515..d07bd635acfd 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -173,17 +173,22 @@  extern void mce_unregister_decode_chain(struct notifier_block *nb);
 #include <linux/atomic.h>
 
 extern int mce_p5_enabled;
+struct cper_ia_proc_ctx;
 
 #ifdef CONFIG_X86_MCE
 int mcheck_init(void);
 void mcheck_cpu_init(struct cpuinfo_x86 *c);
 void mcheck_cpu_clear(struct cpuinfo_x86 *c);
 void mcheck_vendor_init_severity(void);
+int apei_smca_report_x86_error(struct cper_ia_proc_ctx *ctx_info,
+			       u64 lapic_id);
 #else
 static inline int mcheck_init(void) { return 0; }
 static inline void mcheck_cpu_init(struct cpuinfo_x86 *c) {}
 static inline void mcheck_cpu_clear(struct cpuinfo_x86 *c) {}
 static inline void mcheck_vendor_init_severity(void) {}
+static inline int apei_smca_report_x86_error(struct cper_ia_proc_ctx *ctx_info,
+					     u64 lapic_id) { return -EINVAL; }
 #endif
 
 #ifdef CONFIG_X86_ANCIENT_MCE
diff --git a/arch/x86/kernel/acpi/apei.c b/arch/x86/kernel/acpi/apei.c
index c22fb55abcfd..0916f00a992e 100644
--- a/arch/x86/kernel/acpi/apei.c
+++ b/arch/x86/kernel/acpi/apei.c
@@ -43,3 +43,8 @@  void arch_apei_report_mem_error(int sev, struct cper_sec_mem_err *mem_err)
 	apei_mce_report_mem_error(sev, mem_err);
 #endif
 }
+
+int arch_apei_report_x86_error(struct cper_ia_proc_ctx *ctx_info, u64 lapic_id)
+{
+	return apei_smca_report_x86_error(ctx_info, lapic_id);
+}
diff --git a/arch/x86/kernel/cpu/mce/apei.c b/arch/x86/kernel/cpu/mce/apei.c
index af8d37962586..d4b3a2053eef 100644
--- a/arch/x86/kernel/cpu/mce/apei.c
+++ b/arch/x86/kernel/cpu/mce/apei.c
@@ -51,6 +51,55 @@  void apei_mce_report_mem_error(int severity, struct cper_sec_mem_err *mem_err)
 }
 EXPORT_SYMBOL_GPL(apei_mce_report_mem_error);
 
+/*
+ * The first expected register in the register layout of MCAX address space.
+ * The address defined must match with the first MSR address extracted from
+ * BERT which in SMCA systems is the bank's MCA_STATUS register.
+ *
+ * Note that the decoding of the raw MSR values in BERT is implementation
+ * specific and follows register offset order of MCAX address space.
+ */
+#define MASK_MCA_STATUS 0xC0002001
+
+int apei_smca_report_x86_error(struct cper_ia_proc_ctx *ctx_info, u64 lapic_id)
+{
+	const u64 *i_mce = ((const void *) (ctx_info + 1));
+	unsigned int cpu;
+	struct mce m;
+
+	if (!boot_cpu_has(X86_FEATURE_SMCA))
+		return -EINVAL;
+
+	if ((ctx_info->msr_addr & MASK_MCA_STATUS) != MASK_MCA_STATUS)
+		return -EINVAL;
+
+	mce_setup(&m);
+
+	m.extcpu = -1;
+	m.socketid = -1;
+
+	for_each_possible_cpu(cpu) {
+		if (cpu_data(cpu).initial_apicid == lapic_id) {
+			m.extcpu = cpu;
+			m.socketid = cpu_data(m.extcpu).phys_proc_id;
+			break;
+		}
+	}
+
+	m.apicid = lapic_id;
+	m.bank = (ctx_info->msr_addr >> 4) & 0xFF;
+	m.status = *i_mce;
+	m.addr = *(i_mce + 1);
+	m.misc = *(i_mce + 2);
+	/* Skipping MCA_CONFIG */
+	m.ipid = *(i_mce + 4);
+	m.synd = *(i_mce + 5);
+
+	mce_log(&m);
+
+	return 0;
+}
+
 #define CPER_CREATOR_MCE						\
 	GUID_INIT(0x75a574e3, 0x5052, 0x4b29, 0x8a, 0x8e, 0xbe, 0x2c,	\
 		  0x64, 0x90, 0xb8, 0x9d)
diff --git a/drivers/firmware/efi/cper-x86.c b/drivers/firmware/efi/cper-x86.c
index 2531de49f56c..2f2b0c431c18 100644
--- a/drivers/firmware/efi/cper-x86.c
+++ b/drivers/firmware/efi/cper-x86.c
@@ -2,6 +2,7 @@ 
 // Copyright (C) 2018, Advanced Micro Devices, Inc.
 
 #include <linux/cper.h>
+#include <linux/acpi.h>
 
 /*
  * We don't need a "CPER_IA" prefix since these are all locally defined.
@@ -347,9 +348,12 @@  void cper_print_proc_ia(const char *pfx, const struct cper_sec_proc_ia *proc)
 			       ctx_info->mm_reg_addr);
 		}
 
-		printk("%sRegister Array:\n", newpfx);
-		print_hex_dump(newpfx, "", DUMP_PREFIX_OFFSET, 16, groupsize,
-			       (ctx_info + 1), ctx_info->reg_arr_size, 0);
+		if (ctx_info->reg_ctx_type != CTX_TYPE_MSR ||
+		    arch_apei_report_x86_error(ctx_info, proc->lapic_id)) {
+			printk("%sRegister Array:\n", newpfx);
+			print_hex_dump(newpfx, "", DUMP_PREFIX_OFFSET, 16, groupsize,
+				       (ctx_info + 1), ctx_info->reg_arr_size, 0);
+		}
 
 		ctx_info = (struct cper_ia_proc_ctx *)((long)ctx_info + size);
 	}