diff mbox series

[v2,01/16] x86/mce: Define mce_setup() helpers for common and per-CPU fields

Message ID 20240404151359.47970-2-yazen.ghannam@amd.com (mailing list archive)
State New
Headers show
Series MCA Updates | expand

Commit Message

Yazen Ghannam April 4, 2024, 3:13 p.m. UTC
Generally, MCA information for an error is gathered on the CPU that
reported the error. In this case, CPU-specific information from the
running CPU will be correct.

However, this will be incorrect if the MCA information is gathered while
running on a CPU that didn't report the error. One example is creating
an MCA record using mce_setup() for errors reported from ACPI.

Split mce_setup() so that there is a helper function to gather common,
i.e. not CPU-specific, information and another helper for CPU-specific
information.

Leave mce_setup() defined as-is for the common case when running on the
reporting CPU.

Get MCG_CAP in the global helper even though the register is per-CPU.
This value is not already cached per-CPU like other values. And it does
not assist with any per-CPU decoding or handling.

Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com>
---

Notes:
    Link:
    https://lkml.kernel.org/r/20231118193248.1296798-3-yazen.ghannam@amd.com
    
    v1->v2:
    * Change helper names and pass-in CPU number (Boris)

 arch/x86/kernel/cpu/mce/core.c     | 34 ++++++++++++++++++++----------
 arch/x86/kernel/cpu/mce/internal.h |  2 ++
 2 files changed, 25 insertions(+), 11 deletions(-)

Comments

Borislav Petkov April 16, 2024, 10:02 a.m. UTC | #1
On Thu, Apr 04, 2024 at 10:13:44AM -0500, Yazen Ghannam wrote:
> diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
> index b5cc557cfc37..7a857b33f515 100644
> --- a/arch/x86/kernel/cpu/mce/core.c
> +++ b/arch/x86/kernel/cpu/mce/core.c
> @@ -117,20 +117,32 @@ static struct irq_work mce_irq_work;
>   */
>  BLOCKING_NOTIFIER_HEAD(x86_mce_decoder_chain);
>  
> -/* Do initial initialization of a struct mce */
> -void mce_setup(struct mce *m)
> +void mce_setup_common(struct mce *m)

Since we're touching this...

mce_setup() is a perfectly wrong name for what it does. So let's clean
it up. Diff ontop below.

* mce_prep_record() - the name says what the function does.

* mce_prep_record_per_cpu() - "per_cpu" as this is a common kernel
concept and we do use per_cpu data in there.

Please do this in two patches:

- the first one renames mce_setup() only without adding the additional
  functionality

- the second one does the split

Thx.

diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index dfd2e9699bd7..491f3d78c46a 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -221,7 +221,7 @@ static inline int apei_smca_report_x86_error(struct cper_ia_proc_ctx *ctx_info,
 					     u64 lapic_id) { return -EINVAL; }
 #endif
 
-void mce_setup(struct mce *m);
+void mce_prep_record(struct mce *m);
 void mce_log(struct mce *m);
 DECLARE_PER_CPU(struct device *, mce_device);
 
diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c
index 9a0133ef7e20..14bf8c232e45 100644
--- a/arch/x86/kernel/cpu/mce/amd.c
+++ b/arch/x86/kernel/cpu/mce/amd.c
@@ -780,7 +780,7 @@ static void __log_error(unsigned int bank, u64 status, u64 addr, u64 misc)
 {
 	struct mce m;
 
-	mce_setup(&m);
+	mce_prep_record(&m);
 
 	m.status = status;
 	m.misc   = misc;
diff --git a/arch/x86/kernel/cpu/mce/apei.c b/arch/x86/kernel/cpu/mce/apei.c
index 7f7309ff67d0..8f509c8a4e98 100644
--- a/arch/x86/kernel/cpu/mce/apei.c
+++ b/arch/x86/kernel/cpu/mce/apei.c
@@ -44,7 +44,7 @@ void apei_mce_report_mem_error(int severity, struct cper_sec_mem_err *mem_err)
 	else
 		lsb = PAGE_SHIFT;
 
-	mce_setup(&m);
+	mce_prep_record(&m);
 	m.bank = -1;
 	/* Fake a memory read error with unknown channel */
 	m.status = MCI_STATUS_VAL | MCI_STATUS_EN | MCI_STATUS_ADDRV | MCI_STATUS_MISCV | 0x9f;
@@ -97,7 +97,7 @@ int apei_smca_report_x86_error(struct cper_ia_proc_ctx *ctx_info, u64 lapic_id)
 	if (ctx_info->reg_arr_size < 48)
 		return -EINVAL;
 
-	mce_setup(&m);
+	mce_prep_record(&m);
 
 	m.extcpu = -1;
 	m.socketid = -1;
diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index c0ce2de7fb51..a89508327b0d 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -117,7 +117,7 @@ static struct irq_work mce_irq_work;
  */
 BLOCKING_NOTIFIER_HEAD(x86_mce_decoder_chain);
 
-void mce_setup_common(struct mce *m)
+void mce_prep_record_common(struct mce *m)
 {
 	memset(m, 0, sizeof(struct mce));
 
@@ -128,21 +128,21 @@ void mce_setup_common(struct mce *m)
 	m->time		= __ktime_get_real_seconds();
 }
 
-void mce_setup_for_cpu(unsigned int cpu, struct mce *m)
+void mce_prep_record_per_cpu(unsigned int cpu, struct mce *m)
 {
-	m->cpu			= cpu;
-	m->extcpu		= cpu;
-	m->apicid		= cpu_data(m->extcpu).topo.initial_apicid;
-	m->microcode		= cpu_data(m->extcpu).microcode;
-	m->ppin			= cpu_data(m->extcpu).ppin;
-	m->socketid		= cpu_data(m->extcpu).topo.pkg_id;
+	m->cpu		= cpu;
+	m->extcpu	= cpu;
+	m->apicid	= cpu_data(m->extcpu).topo.initial_apicid;
+	m->microcode	= cpu_data(m->extcpu).microcode;
+	m->ppin		= cpu_data(m->extcpu).ppin;
+	m->socketid	= cpu_data(m->extcpu).topo.pkg_id;
 }
 
 /* Do initial initialization of a struct mce */
-void mce_setup(struct mce *m)
+void mce_prep_record(struct mce *m)
 {
-	mce_setup_common(m);
-	mce_setup_for_cpu(smp_processor_id(), m);
+	mce_prep_record_common(m);
+	mce_prep_record_per_cpu(smp_processor_id(), m);
 }
 
 DEFINE_PER_CPU(struct mce, injectm);
@@ -448,11 +448,11 @@ static noinstr void mce_wrmsrl(u32 msr, u64 v)
 static noinstr void mce_gather_info(struct mce *m, struct pt_regs *regs)
 {
 	/*
-	 * Enable instrumentation around mce_setup() which calls external
+	 * Enable instrumentation around mce_prep_record() which calls external
 	 * facilities.
 	 */
 	instrumentation_begin();
-	mce_setup(m);
+	mce_prep_record(m);
 	instrumentation_end();
 
 	m->mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS);
diff --git a/arch/x86/kernel/cpu/mce/internal.h b/arch/x86/kernel/cpu/mce/internal.h
index e86e53695828..43c7f3b71df5 100644
--- a/arch/x86/kernel/cpu/mce/internal.h
+++ b/arch/x86/kernel/cpu/mce/internal.h
@@ -261,8 +261,8 @@ enum mca_msr {
 
 /* Decide whether to add MCE record to MCE event pool or filter it out. */
 extern bool filter_mce(struct mce *m);
-void mce_setup_common(struct mce *m);
-void mce_setup_for_cpu(unsigned int cpu, struct mce *m);
+void mce_prep_record_common(struct mce *m);
+void mce_prep_record_per_cpu(unsigned int cpu, struct mce *m);
 
 #ifdef CONFIG_X86_MCE_AMD
 extern bool amd_filter_mce(struct mce *m);
Yazen Ghannam April 17, 2024, 1:50 p.m. UTC | #2
On 4/16/24 06:02, Borislav Petkov wrote:
> On Thu, Apr 04, 2024 at 10:13:44AM -0500, Yazen Ghannam wrote:
>> diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
>> index b5cc557cfc37..7a857b33f515 100644
>> --- a/arch/x86/kernel/cpu/mce/core.c
>> +++ b/arch/x86/kernel/cpu/mce/core.c
>> @@ -117,20 +117,32 @@ static struct irq_work mce_irq_work;
>>    */
>>   BLOCKING_NOTIFIER_HEAD(x86_mce_decoder_chain);
>>   
>> -/* Do initial initialization of a struct mce */
>> -void mce_setup(struct mce *m)
>> +void mce_setup_common(struct mce *m)
> 
> Since we're touching this...
> 
> mce_setup() is a perfectly wrong name for what it does. So let's clean
> it up. Diff ontop below.
> 
> * mce_prep_record() - the name says what the function does.
> 
> * mce_prep_record_per_cpu() - "per_cpu" as this is a common kernel
> concept and we do use per_cpu data in there.
> 
> Please do this in two patches:
> 
> - the first one renames mce_setup() only without adding the additional
>    functionality
> 
> - the second one does the split
> 
> Thx.
>

Okay, will do.

Should I send another revision of this entire set? Or should I split out
the mce_setup() patches?

Thanks,
Yazen
Borislav Petkov April 22, 2024, 8:13 a.m. UTC | #3
On Wed, Apr 17, 2024 at 09:50:58AM -0400, Yazen Ghannam wrote:
> Should I send another revision of this entire set? Or should I split out
> the mce_setup() patches?

I leave it up to you.

If it makes sense to keep it all together then wait for me to go through
the rest first and send a whole new set or if you want to break this
out, that's fine too.

Thx.
diff mbox series

Patch

diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index b5cc557cfc37..7a857b33f515 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -117,20 +117,32 @@  static struct irq_work mce_irq_work;
  */
 BLOCKING_NOTIFIER_HEAD(x86_mce_decoder_chain);
 
-/* Do initial initialization of a struct mce */
-void mce_setup(struct mce *m)
+void mce_setup_common(struct mce *m)
 {
 	memset(m, 0, sizeof(struct mce));
-	m->cpu = m->extcpu = smp_processor_id();
+
+	m->cpuid	= cpuid_eax(1);
+	m->cpuvendor	= boot_cpu_data.x86_vendor;
+	m->mcgcap	= __rdmsr(MSR_IA32_MCG_CAP);
 	/* need the internal __ version to avoid deadlocks */
-	m->time = __ktime_get_real_seconds();
-	m->cpuvendor = boot_cpu_data.x86_vendor;
-	m->cpuid = cpuid_eax(1);
-	m->socketid = cpu_data(m->extcpu).topo.pkg_id;
-	m->apicid = cpu_data(m->extcpu).topo.initial_apicid;
-	m->mcgcap = __rdmsr(MSR_IA32_MCG_CAP);
-	m->ppin = cpu_data(m->extcpu).ppin;
-	m->microcode = boot_cpu_data.microcode;
+	m->time		= __ktime_get_real_seconds();
+}
+
+void mce_setup_for_cpu(unsigned int cpu, struct mce *m)
+{
+	m->cpu			= cpu;
+	m->extcpu		= cpu;
+	m->apicid		= cpu_data(m->extcpu).topo.initial_apicid;
+	m->microcode		= cpu_data(m->extcpu).microcode;
+	m->ppin			= cpu_data(m->extcpu).ppin;
+	m->socketid		= cpu_data(m->extcpu).topo.pkg_id;
+}
+
+/* Do initial initialization of a struct mce */
+void mce_setup(struct mce *m)
+{
+	mce_setup_common(m);
+	mce_setup_for_cpu(smp_processor_id(), m);
 }
 
 DEFINE_PER_CPU(struct mce, injectm);
diff --git a/arch/x86/kernel/cpu/mce/internal.h b/arch/x86/kernel/cpu/mce/internal.h
index 01f8f03969e6..e86e53695828 100644
--- a/arch/x86/kernel/cpu/mce/internal.h
+++ b/arch/x86/kernel/cpu/mce/internal.h
@@ -261,6 +261,8 @@  enum mca_msr {
 
 /* Decide whether to add MCE record to MCE event pool or filter it out. */
 extern bool filter_mce(struct mce *m);
+void mce_setup_common(struct mce *m);
+void mce_setup_for_cpu(unsigned int cpu, struct mce *m);
 
 #ifdef CONFIG_X86_MCE_AMD
 extern bool amd_filter_mce(struct mce *m);