diff mbox

[v4,02/11] xen/mce: allow mce_barrier_{enter, exit} to return without waiting

Message ID 20170626091625.19655-3-haozhong.zhang@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Haozhong Zhang June 26, 2017, 9:16 a.m. UTC
Add a 'nowait' argument to mce_barrier_{enter,exit}() to allow them
return immediately without waiting mce_barrier_{enter,exit}() on other
CPUs. This is useful when handling LMCE, where mce_barrier_{enter,exit}
are called only on one CPU.

Signed-off-by: Haozhong Zhang <haozhong.zhang@intel.com>
---
Cc: Jan Beulich <jbeulich@suse.com>
Cc: Andrew Cooper <andrew.cooper3@citrix.com>
---
 xen/arch/x86/cpu/mcheck/barrier.c | 12 ++++++------
 xen/arch/x86/cpu/mcheck/barrier.h | 12 ++++++++++--
 xen/arch/x86/cpu/mcheck/mce.c     | 20 ++++++++++----------
 3 files changed, 26 insertions(+), 18 deletions(-)

Comments

Jan Beulich June 27, 2017, 6:38 a.m. UTC | #1
>>> Haozhong Zhang <haozhong.zhang@intel.com> 06/26/17 11:17 AM >>>
>Add a 'nowait' argument to mce_barrier_{enter,exit}() to allow them
>return immediately without waiting mce_barrier_{enter,exit}() on other
>CPUs. This is useful when handling LMCE, where mce_barrier_{enter,exit}
>are called only on one CPU.
>
>Signed-off-by: Haozhong Zhang <haozhong.zhang@intel.com>

The patch could have my ack in principle, but ...

>--- a/xen/arch/x86/cpu/mcheck/barrier.h
>+++ b/xen/arch/x86/cpu/mcheck/barrier.h
>@@ -32,6 +32,14 @@ void mce_barrier_init(struct mce_softirq_barrier *);
 >void mce_barrier_dec(struct mce_softirq_barrier *);
 >
 >/*
>+ * If nowait is true, mce_barrier_enter/exit() will return immediately
>+ * without touching the barrier. It's used when handling a LMCE which
>+ * is received on only one CPU and thus does not invoke
>+ * mce_barrier_enter/exit() calls on all CPUs.
>+ *
>+ * If nowait is false, mce_barrier_enter/exit() will handle the given
>+ * barrier as below.
>+ *
  >* Increment the generation number and the value. The generation number
  >* is incremented when entering a barrier. This way, it can be checked
  >* on exit if a CPU is trying to re-enter the barrier. This can happen

... especially with the complete lack of mentioning the mce_broadcast
check inside the functions I wonder whether it wouldn't better be the
callers now to pass "!mce_broadcast" into the functions, instead of
"false". What do you think? Which then further makes me wonder
whether the parameter wouldn't better be inverted ("wait" instead of
"nowait").

Jan
Haozhong Zhang June 29, 2017, 2 a.m. UTC | #2
On 06/27/17 00:38 -0600, Jan Beulich wrote:
> >>> Haozhong Zhang <haozhong.zhang@intel.com> 06/26/17 11:17 AM >>>
> >Add a 'nowait' argument to mce_barrier_{enter,exit}() to allow them
> >return immediately without waiting mce_barrier_{enter,exit}() on other
> >CPUs. This is useful when handling LMCE, where mce_barrier_{enter,exit}
> >are called only on one CPU.
> >
> >Signed-off-by: Haozhong Zhang <haozhong.zhang@intel.com>
> 
> The patch could have my ack in principle, but ...
> 
> >--- a/xen/arch/x86/cpu/mcheck/barrier.h
> >+++ b/xen/arch/x86/cpu/mcheck/barrier.h
> >@@ -32,6 +32,14 @@ void mce_barrier_init(struct mce_softirq_barrier *);
>  >void mce_barrier_dec(struct mce_softirq_barrier *);
>  >
>  >/*
> >+ * If nowait is true, mce_barrier_enter/exit() will return immediately
> >+ * without touching the barrier. It's used when handling a LMCE which
> >+ * is received on only one CPU and thus does not invoke
> >+ * mce_barrier_enter/exit() calls on all CPUs.
> >+ *
> >+ * If nowait is false, mce_barrier_enter/exit() will handle the given
> >+ * barrier as below.
> >+ *
>   >* Increment the generation number and the value. The generation number
>   >* is incremented when entering a barrier. This way, it can be checked
>   >* on exit if a CPU is trying to re-enter the barrier. This can happen
> 
> ... especially with the complete lack of mentioning the mce_broadcast
> check inside the functions I wonder whether it wouldn't better be the
> callers now to pass "!mce_broadcast" into the functions, instead of
> "false". What do you think? Which then further makes me wonder
> whether the parameter wouldn't better be inverted ("wait" instead of
> "nowait").
> 

In that case, it's better to use "wait" and let caller pass in
"mce_broadcast".

Haozhong
diff mbox

Patch

diff --git a/xen/arch/x86/cpu/mcheck/barrier.c b/xen/arch/x86/cpu/mcheck/barrier.c
index 5dce1fb9b9..0b3b09103d 100644
--- a/xen/arch/x86/cpu/mcheck/barrier.c
+++ b/xen/arch/x86/cpu/mcheck/barrier.c
@@ -16,11 +16,11 @@  void mce_barrier_dec(struct mce_softirq_barrier *bar)
     atomic_dec(&bar->val);
 }
 
-void mce_barrier_enter(struct mce_softirq_barrier *bar)
+void mce_barrier_enter(struct mce_softirq_barrier *bar, bool nowait)
 {
     int gen;
 
-    if (!mce_broadcast)
+    if ( !mce_broadcast || nowait )
         return;
     atomic_inc(&bar->ingen);
     gen = atomic_read(&bar->outgen);
@@ -34,11 +34,11 @@  void mce_barrier_enter(struct mce_softirq_barrier *bar)
     }
 }
 
-void mce_barrier_exit(struct mce_softirq_barrier *bar)
+void mce_barrier_exit(struct mce_softirq_barrier *bar, bool nowait)
 {
     int gen;
 
-    if ( !mce_broadcast )
+    if ( !mce_broadcast || nowait )
         return;
     atomic_inc(&bar->outgen);
     gen = atomic_read(&bar->ingen);
@@ -54,6 +54,6 @@  void mce_barrier_exit(struct mce_softirq_barrier *bar)
 
 void mce_barrier(struct mce_softirq_barrier *bar)
 {
-    mce_barrier_enter(bar);
-    mce_barrier_exit(bar);
+    mce_barrier_enter(bar, false);
+    mce_barrier_exit(bar, false);
 }
diff --git a/xen/arch/x86/cpu/mcheck/barrier.h b/xen/arch/x86/cpu/mcheck/barrier.h
index d3ccf8b15f..f6b4370945 100644
--- a/xen/arch/x86/cpu/mcheck/barrier.h
+++ b/xen/arch/x86/cpu/mcheck/barrier.h
@@ -32,6 +32,14 @@  void mce_barrier_init(struct mce_softirq_barrier *);
 void mce_barrier_dec(struct mce_softirq_barrier *);
 
 /*
+ * If nowait is true, mce_barrier_enter/exit() will return immediately
+ * without touching the barrier. It's used when handling a LMCE which
+ * is received on only one CPU and thus does not invoke
+ * mce_barrier_enter/exit() calls on all CPUs.
+ *
+ * If nowait is false, mce_barrier_enter/exit() will handle the given
+ * barrier as below.
+ *
  * Increment the generation number and the value. The generation number
  * is incremented when entering a barrier. This way, it can be checked
  * on exit if a CPU is trying to re-enter the barrier. This can happen
@@ -43,8 +51,8 @@  void mce_barrier_dec(struct mce_softirq_barrier *);
  * These barrier functions should always be paired, so that the
  * counter value will reach 0 again after all CPUs have exited.
  */
-void mce_barrier_enter(struct mce_softirq_barrier *);
-void mce_barrier_exit(struct mce_softirq_barrier *);
+void mce_barrier_enter(struct mce_softirq_barrier *, bool nowait);
+void mce_barrier_exit(struct mce_softirq_barrier *, bool nowait);
 
 void mce_barrier(struct mce_softirq_barrier *);
 
diff --git a/xen/arch/x86/cpu/mcheck/mce.c b/xen/arch/x86/cpu/mcheck/mce.c
index 54fd000aa0..1e0b03c38b 100644
--- a/xen/arch/x86/cpu/mcheck/mce.c
+++ b/xen/arch/x86/cpu/mcheck/mce.c
@@ -497,15 +497,15 @@  void mcheck_cmn_handler(const struct cpu_user_regs *regs)
     }
     mce_spin_unlock(&mce_logout_lock);
 
-    mce_barrier_enter(&mce_trap_bar);
+    mce_barrier_enter(&mce_trap_bar, false);
     if ( mctc != NULL && mce_urgent_action(regs, mctc))
         cpumask_set_cpu(smp_processor_id(), &mce_fatal_cpus);
-    mce_barrier_exit(&mce_trap_bar);
+    mce_barrier_exit(&mce_trap_bar, false);
 
     /*
      * Wait until everybody has processed the trap.
      */
-    mce_barrier_enter(&mce_trap_bar);
+    mce_barrier_enter(&mce_trap_bar, false);
     if (atomic_read(&severity_cpu) == smp_processor_id())
     {
         /* According to SDM, if no error bank found on any cpus,
@@ -524,16 +524,16 @@  void mcheck_cmn_handler(const struct cpu_user_regs *regs)
         atomic_set(&found_error, 0);
         atomic_set(&severity_cpu, -1);
     }
-    mce_barrier_exit(&mce_trap_bar);
+    mce_barrier_exit(&mce_trap_bar, false);
 
     /* Clear flags after above fatal check */
-    mce_barrier_enter(&mce_trap_bar);
+    mce_barrier_enter(&mce_trap_bar, false);
     gstatus = mca_rdmsr(MSR_IA32_MCG_STATUS);
     if ((gstatus & MCG_STATUS_MCIP) != 0) {
         mce_printk(MCE_CRITICAL, "MCE: Clear MCIP@ last step");
         mca_wrmsr(MSR_IA32_MCG_STATUS, 0);
     }
-    mce_barrier_exit(&mce_trap_bar);
+    mce_barrier_exit(&mce_trap_bar, false);
 
     raise_softirq(MACHINE_CHECK_SOFTIRQ);
 }
@@ -1703,7 +1703,7 @@  static void mce_softirq(void)
 
     mce_printk(MCE_VERBOSE, "CPU%d enter softirq\n", cpu);
 
-    mce_barrier_enter(&mce_inside_bar);
+    mce_barrier_enter(&mce_inside_bar, false);
 
     /*
      * Everybody is here. Now let's see who gets to do the
@@ -1716,10 +1716,10 @@  static void mce_softirq(void)
 
     atomic_set(&severity_cpu, cpu);
 
-    mce_barrier_enter(&mce_severity_bar);
+    mce_barrier_enter(&mce_severity_bar, false);
     if (!mctelem_has_deferred(cpu))
         atomic_set(&severity_cpu, cpu);
-    mce_barrier_exit(&mce_severity_bar);
+    mce_barrier_exit(&mce_severity_bar, false);
 
     /* We choose severity_cpu for further processing */
     if (atomic_read(&severity_cpu) == cpu) {
@@ -1740,7 +1740,7 @@  static void mce_softirq(void)
         }
     }
 
-    mce_barrier_exit(&mce_inside_bar);
+    mce_barrier_exit(&mce_inside_bar, false);
 }
 
 /* Machine Check owner judge algorithm: