diff mbox series

[v2,4/4] x86/mce: Add Zhaoxin LMCE support

Message ID 5f4f8dee1fb24d38aa0ee136c5e98c72@zhaoxin.com (mailing list archive)
State New, archived
Headers show
Series [v2,1/4] x86/mce: Add Zhaoxin MCE support | expand

Commit Message

Tony W Wang-oc Sept. 10, 2019, 8:20 a.m. UTC
Zhaoxin newer CPUs support LMCE that compatible with Intel's
"Machine-Check Architecture", so add support for Zhaoxin LMCE
in mce/core.c.

Signed-off-by: Tony W Wang-oc <TonyWWang-oc@zhaoxin.com>
---
v1->v2:
 - Fix redefinition of "mce_zhaoxin_feature_clear"

 arch/x86/include/asm/mce.h     |  2 ++
 arch/x86/kernel/cpu/mce/core.c | 25 +++++++++++++++++++++++--
 2 files changed, 25 insertions(+), 2 deletions(-)

Comments

Borislav Petkov Sept. 10, 2019, 12:36 p.m. UTC | #1
On Tue, Sep 10, 2019 at 08:20:07AM +0000, Tony W Wang-oc wrote:
> Zhaoxin newer CPUs support LMCE that compatible with Intel's
> "Machine-Check Architecture", so add support for Zhaoxin LMCE
> in mce/core.c.
> 
> Signed-off-by: Tony W Wang-oc <TonyWWang-oc@zhaoxin.com>
> ---
> v1->v2:
>  - Fix redefinition of "mce_zhaoxin_feature_clear"
> 
>  arch/x86/include/asm/mce.h     |  2 ++
>  arch/x86/kernel/cpu/mce/core.c | 25 +++++++++++++++++++++++--
>  2 files changed, 25 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
> index 0986a11..01840ec 100644
> --- a/arch/x86/include/asm/mce.h
> +++ b/arch/x86/include/asm/mce.h
> @@ -352,8 +352,10 @@ static inline void mce_hygon_feature_init(struct cpuinfo_x86 *c)	{ return mce_am
>  
>  #ifdef CONFIG_CPU_SUP_ZHAOXIN
>  void mce_zhaoxin_feature_init(struct cpuinfo_x86 *c);
> +void mce_zhaoxin_feature_clear(struct cpuinfo_x86 *c);
>  #else
>  static inline void mce_zhaoxin_feature_init(struct cpuinfo_x86 *c) { }
> +static inline void mce_zhaoxin_feature_clear(struct cpuinfo_x86 *c) { }
>  #endif
>  
>  #endif /* _ASM_X86_MCE_H */
> diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
> index 8a36833..595d3af7ac 100644
> --- a/arch/x86/kernel/cpu/mce/core.c
> +++ b/arch/x86/kernel/cpu/mce/core.c
> @@ -1129,6 +1129,17 @@ static bool __mc_check_crashing_cpu(int cpu)
>  		u64 mcgstatus;
>  
>  		mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS);
> +
> +		if (boot_cpu_data.x86_vendor == X86_VENDOR_ZHAOXIN) {
> +			if (mcgstatus & MCG_STATUS_LMCES) {
> +				return false;
> +			} else {
> +				if (mcgstatus & MCG_STATUS_RIPV)
> +					mce_wrmsrl(MSR_IA32_MCG_STATUS, 0);
> +				return true;
> +			}
> +		}

Simplify:

                if (boot_cpu_data.x86_vendor == X86_VENDOR_ZHAOXIN) {
                        if (mcgstatus & MCG_STATUS_LMCES)
                                return false;
		}

		<--- Now here, on your CPUs which don't set MCG_STATUS_LMCES,
it will fallback to clearing the status register. I.e., what you do in the else
clause.
Tony W Wang-oc Sept. 11, 2019, 10:13 a.m. UTC | #2
On Tue, Sep 10, 2019, Borislav Petkov wrote:
>On Tue, Sep 10, 2019 at 08:20:07AM +0000, Tony W Wang-oc wrote:
>> Zhaoxin newer CPUs support LMCE that compatible with Intel's
>> "Machine-Check Architecture", so add support for Zhaoxin LMCE
>> in mce/core.c.
>>
>> Signed-off-by: Tony W Wang-oc <TonyWWang-oc@zhaoxin.com>
>> ---
>> v1->v2:
>>  - Fix redefinition of "mce_zhaoxin_feature_clear"
>>
>>  arch/x86/include/asm/mce.h     |  2 ++
>>  arch/x86/kernel/cpu/mce/core.c | 25 +++++++++++++++++++++++--
>>  2 files changed, 25 insertions(+), 2 deletions(-)
>>
>> diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
>> index 0986a11..01840ec 100644
>> --- a/arch/x86/include/asm/mce.h
>> +++ b/arch/x86/include/asm/mce.h
>> @@ -352,8 +352,10 @@ static inline void mce_hygon_feature_init(struct
>cpuinfo_x86 *c)	{ return mce_am
>>
>>  #ifdef CONFIG_CPU_SUP_ZHAOXIN
>>  void mce_zhaoxin_feature_init(struct cpuinfo_x86 *c);
>> +void mce_zhaoxin_feature_clear(struct cpuinfo_x86 *c);
>>  #else
>>  static inline void mce_zhaoxin_feature_init(struct cpuinfo_x86 *c) { }
>> +static inline void mce_zhaoxin_feature_clear(struct cpuinfo_x86 *c) { }
>>  #endif
>>
>>  #endif /* _ASM_X86_MCE_H */
>> diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
>> index 8a36833..595d3af7ac 100644
>> --- a/arch/x86/kernel/cpu/mce/core.c
>> +++ b/arch/x86/kernel/cpu/mce/core.c
>> @@ -1129,6 +1129,17 @@ static bool __mc_check_crashing_cpu(int cpu)
>>  		u64 mcgstatus;
>>
>>  		mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS);
>> +
>> +		if (boot_cpu_data.x86_vendor == X86_VENDOR_ZHAOXIN) {
>> +			if (mcgstatus & MCG_STATUS_LMCES) {
>> +				return false;
>> +			} else {
>> +				if (mcgstatus & MCG_STATUS_RIPV)
>> +					mce_wrmsrl(MSR_IA32_MCG_STATUS, 0);
>> +				return true;
>> +			}
>> +		}
>
>Simplify:
>
>                if (boot_cpu_data.x86_vendor == X86_VENDOR_ZHAOXIN) {
>                        if (mcgstatus & MCG_STATUS_LMCES)
>                                return false;
>		}
>
>		<--- Now here, on your CPUs which don't set MCG_STATUS_LMCES,
>it will fallback to clearing the status register. I.e., what you do in the else
>clause.
>

On Zhaoxin CPUs don't set MCG_STATUS_LMCES, to avoid rendezvous timeout if 
this CPU is offline or crashing_cpu was set, we want return true regardless of
MCG_STATUS_RIPV's setting. 

Without my else clause, original codes return true only when MCG_STATUS_RIPV
be setted.

For better readability, will add comment and change coding style in v3.

Sincerely
TonyWWang-oc
Tony Luck Sept. 12, 2019, 6:48 p.m. UTC | #3
On Tue, Sep 10, 2019 at 08:20:07AM +0000, Tony W Wang-oc wrote:
> Zhaoxin newer CPUs support LMCE that compatible with Intel's
> "Machine-Check Architecture", so add support for Zhaoxin LMCE
> in mce/core.c.

Your mailer included a header:

	Content-Language: zh-CN

which seems to have made my e-mail client (mutt) very
confused when saving some parts of this series.

Unsure whether that is a problem for you to fix, or an issue
at my end. Posting here to see if it rings any bells for
someone else.

-Tony
diff mbox series

Patch

diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 0986a11..01840ec 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -352,8 +352,10 @@  static inline void mce_hygon_feature_init(struct cpuinfo_x86 *c)	{ return mce_am
 
 #ifdef CONFIG_CPU_SUP_ZHAOXIN
 void mce_zhaoxin_feature_init(struct cpuinfo_x86 *c);
+void mce_zhaoxin_feature_clear(struct cpuinfo_x86 *c);
 #else
 static inline void mce_zhaoxin_feature_init(struct cpuinfo_x86 *c) { }
+static inline void mce_zhaoxin_feature_clear(struct cpuinfo_x86 *c) { }
 #endif
 
 #endif /* _ASM_X86_MCE_H */
diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index 8a36833..595d3af7ac 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -1129,6 +1129,17 @@  static bool __mc_check_crashing_cpu(int cpu)
 		u64 mcgstatus;
 
 		mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS);
+
+		if (boot_cpu_data.x86_vendor == X86_VENDOR_ZHAOXIN) {
+			if (mcgstatus & MCG_STATUS_LMCES) {
+				return false;
+			} else {
+				if (mcgstatus & MCG_STATUS_RIPV)
+					mce_wrmsrl(MSR_IA32_MCG_STATUS, 0);
+				return true;
+			}
+		}
+
 		if (mcgstatus & MCG_STATUS_RIPV) {
 			mce_wrmsrl(MSR_IA32_MCG_STATUS, 0);
 			return true;
@@ -1279,9 +1290,10 @@  void do_machine_check(struct pt_regs *regs, long error_code)
 
 	/*
 	 * Check if this MCE is signaled to only this logical processor,
-	 * on Intel only.
+	 * on Intel, Zhaoxin only.
 	 */
-	if (m.cpuvendor == X86_VENDOR_INTEL)
+	if (m.cpuvendor == X86_VENDOR_INTEL ||
+	    m.cpuvendor == X86_VENDOR_ZHAOXIN)
 		lmce = m.mcgstatus & MCG_STATUS_LMCES;
 
 	/*
@@ -1796,8 +1808,14 @@  void mce_zhaoxin_feature_init(struct cpuinfo_x86 *c)
 	}
 
 	intel_init_cmci();
+	intel_init_lmce();
 	mce_adjust_timer = cmci_intel_adjust_timer;
 }
+
+void mce_zhaoxin_feature_clear(struct cpuinfo_x86 *c)
+{
+	intel_clear_lmce();
+}
 #endif
 
 static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c)
@@ -1836,6 +1854,9 @@  static void __mcheck_cpu_clear_vendor(struct cpuinfo_x86 *c)
 	case X86_VENDOR_INTEL:
 		mce_intel_feature_clear(c);
 		break;
+	case X86_VENDOR_ZHAOXIN:
+		mce_zhaoxin_feature_clear(c);
+		break;
 	default:
 		break;
 	}