diff mbox series

[RFC] PM / s2idle: Clear _TIF_POLLING_NRFLAG before suspend to idle

Message ID 20200615173611.15349-1-yu.c.chen@intel.com (mailing list archive)
State Superseded, archived
Headers show
Series [RFC] PM / s2idle: Clear _TIF_POLLING_NRFLAG before suspend to idle | expand

Commit Message

Chen Yu June 15, 2020, 5:36 p.m. UTC
Suspend to idle was found to not work on Goldmont CPU recently.
And the issue was triggered due to:

1. On Goldmont the CPU in idle can only be woken up via IPIs,
   not POLL mode:
   Commit 08e237fa56a1 ("x86/cpu: Add workaround for MONITOR
   instruction erratum on Goldmont based CPUs")
2. When the CPU is entering suspend to idle process, the
  _TIF_POLLING_NRFLAG is kept on.
3. Commit b2a02fc43a1f ("smp: Optimize send_call_function_single_ipi()")
   makes use of _TIF_POLLING_NRFLAG to avoid sending IPIs to
   idle CPUs.
4. As a result, some IPIs related functions might not work
   well during suspend to idle on Goldmont. For example, one
   suspected victim:
   tick_unfreeze() -> timekeeping_resume() -> hrtimers_resume()
   -> clock_was_set() -> on_each_cpu() might wait forever,
   because the IPIs will not be sent to the CPUs which are
   sleeping with _TIF_POLLING_NRFLAG set, and Goldmont CPU
   could not be woken up by only setting _TIF_NEED_RESCHED
   on the monitor address.

I don't find a way in Ubuntu to update the firmware of Goldmont
and check if the issue was gone, a fix patch would do no harm.
Clear the _TIF_POLLING_NRFLAG flag before entering suspend to idle,
and let the driver's enter_s2idle() to decide whether to set
_TIF_POLLING_NRFLAG or not. So that to avoid the scenario described
above and keep the context consistent with before.

Fixes: b2a02fc43a1f ("smp: Optimize send_call_function_single_ipi()")
Reported-by: kbuild test robot <lkp@intel.com>
Cc: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Chen Yu <yu.c.chen@intel.com>
---
 drivers/cpuidle/cpuidle.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

Comments

Peter Zijlstra June 15, 2020, 6:40 p.m. UTC | #1
On Tue, Jun 16, 2020 at 01:36:11AM +0800, Chen Yu wrote:
> Suspend to idle was found to not work on Goldmont CPU recently.
> And the issue was triggered due to:
> 
> 1. On Goldmont the CPU in idle can only be woken up via IPIs,
>    not POLL mode:
>    Commit 08e237fa56a1 ("x86/cpu: Add workaround for MONITOR
>    instruction erratum on Goldmont based CPUs")
> 2. When the CPU is entering suspend to idle process, the
>   _TIF_POLLING_NRFLAG is kept on.
> 3. Commit b2a02fc43a1f ("smp: Optimize send_call_function_single_ipi()")
>    makes use of _TIF_POLLING_NRFLAG to avoid sending IPIs to
>    idle CPUs.
> 4. As a result, some IPIs related functions might not work
>    well during suspend to idle on Goldmont. For example, one
>    suspected victim:
>    tick_unfreeze() -> timekeeping_resume() -> hrtimers_resume()
>    -> clock_was_set() -> on_each_cpu() might wait forever,
>    because the IPIs will not be sent to the CPUs which are
>    sleeping with _TIF_POLLING_NRFLAG set, and Goldmont CPU
>    could not be woken up by only setting _TIF_NEED_RESCHED
>    on the monitor address.

*sigh*... just what we need.


> @@ -186,8 +187,10 @@ int cpuidle_enter_s2idle(struct cpuidle_driver *drv, struct cpuidle_device *dev)
>  	 * be frozen safely.
>  	 */
>  	index = find_deepest_state(drv, dev, U64_MAX, 0, true);
> -	if (index > 0)
> +	if (index > 0) {
> +		__current_clr_polling();
>  		enter_s2idle_proper(drv, dev, index);
> +	}
>  
>  	return index;
>  }

So how is that commit 08e237fa56a1 not suffient? That makes
mwait_idle_with_hints() DTRT for this 'functionally challenged' piece of
hardware.

AFAICT intel_enter_s2idle() uses mwait_idle_with_hints().

What am I missing?
Peter Zijlstra June 15, 2020, 7:31 p.m. UTC | #2
On Mon, Jun 15, 2020 at 08:40:41PM +0200, Peter Zijlstra wrote:

> > @@ -186,8 +187,10 @@ int cpuidle_enter_s2idle(struct cpuidle_driver *drv, struct cpuidle_device *dev)
> >  	 * be frozen safely.
> >  	 */
> >  	index = find_deepest_state(drv, dev, U64_MAX, 0, true);
> > -	if (index > 0)
> > +	if (index > 0) {
> > +		__current_clr_polling();
> >  		enter_s2idle_proper(drv, dev, index);
> > +	}
> >  
> >  	return index;
> >  }
> 
> So how is that commit 08e237fa56a1 not suffient? That makes
> mwait_idle_with_hints() DTRT for this 'functionally challenged' piece of
> hardware.
> 
> AFAICT intel_enter_s2idle() uses mwait_idle_with_hints().
> 
> What am I missing?

What's missing is that cpuidle_enter_s2idle() doesn't properly match
call_cpuidle().

Something like so then. Your version is racy, if someone already set
TIF_NEED_RESCHED you just clear POLLING and go to sleep.

---

diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
index c149d9e20dfd..81bee8d03c6d 100644
--- a/drivers/cpuidle/cpuidle.c
+++ b/drivers/cpuidle/cpuidle.c
@@ -133,8 +133,8 @@ int cpuidle_find_deepest_state(struct cpuidle_driver *drv,
 }
 
 #ifdef CONFIG_SUSPEND
-static void enter_s2idle_proper(struct cpuidle_driver *drv,
-				struct cpuidle_device *dev, int index)
+static void s2idle_enter(struct cpuidle_driver *drv,
+			 struct cpuidle_device *dev, int index)
 {
 	ktime_t time_start, time_end;
 
@@ -168,6 +168,15 @@ static void enter_s2idle_proper(struct cpuidle_driver *drv,
 	dev->states_usage[index].s2idle_usage++;
 }
 
+static int call_s2idle(struct cpuidle_driver *drv, struct cpuidle_device *dev,
+		       int index)
+{
+	if (!current_clr_polling_and_test())
+		s2idle_enter(drv, dev, index);
+
+	return index;
+}
+
 /**
  * cpuidle_enter_s2idle - Enter an idle state suitable for suspend-to-idle.
  * @drv: cpuidle driver for the given CPU.
@@ -187,7 +196,7 @@ int cpuidle_enter_s2idle(struct cpuidle_driver *drv, struct cpuidle_device *dev)
 	 */
 	index = find_deepest_state(drv, dev, U64_MAX, 0, true);
 	if (index > 0)
-		enter_s2idle_proper(drv, dev, index);
+		call_s2idle(drv, dev, index);
 
 	return index;
 }
Chen Yu June 16, 2020, 3:36 a.m. UTC | #3
On Mon, Jun 15, 2020 at 09:31:54PM +0200, Peter Zijlstra wrote:
> On Mon, Jun 15, 2020 at 08:40:41PM +0200, Peter Zijlstra wrote:
> 
> > > @@ -186,8 +187,10 @@ int cpuidle_enter_s2idle(struct cpuidle_driver *drv, struct cpuidle_device *dev)
> > >  	 * be frozen safely.
> > >  	 */
> > >  	index = find_deepest_state(drv, dev, U64_MAX, 0, true);
> > > -	if (index > 0)
> > > +	if (index > 0) {
> > > +		__current_clr_polling();
> > >  		enter_s2idle_proper(drv, dev, index);
> > > +	}
> > >  
> > >  	return index;
> > >  }
> > 
> > So how is that commit 08e237fa56a1 not suffient? That makes
> > mwait_idle_with_hints() DTRT for this 'functionally challenged' piece of
> > hardware.
> > 
> > AFAICT intel_enter_s2idle() uses mwait_idle_with_hints().
> > 
> > What am I missing?
> 
> What's missing is that cpuidle_enter_s2idle() doesn't properly match
> call_cpuidle().
>
Right.
> Something like so then. Your version is racy, if someone already set
> TIF_NEED_RESCHED you just clear POLLING and go to sleep.
> 
Got it, I'll test the patch below.

Thanks,
Chenyu
> ---
> 
> diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
> index c149d9e20dfd..81bee8d03c6d 100644
> --- a/drivers/cpuidle/cpuidle.c
> +++ b/drivers/cpuidle/cpuidle.c
> @@ -133,8 +133,8 @@ int cpuidle_find_deepest_state(struct cpuidle_driver *drv,
>  }
>  
>  #ifdef CONFIG_SUSPEND
> -static void enter_s2idle_proper(struct cpuidle_driver *drv,
> -				struct cpuidle_device *dev, int index)
> +static void s2idle_enter(struct cpuidle_driver *drv,
> +			 struct cpuidle_device *dev, int index)
>  {
>  	ktime_t time_start, time_end;
>  
> @@ -168,6 +168,15 @@ static void enter_s2idle_proper(struct cpuidle_driver *drv,
>  	dev->states_usage[index].s2idle_usage++;
>  }
>  
> +static int call_s2idle(struct cpuidle_driver *drv, struct cpuidle_device *dev,
> +		       int index)
> +{
> +	if (!current_clr_polling_and_test())
> +		s2idle_enter(drv, dev, index);
> +
> +	return index;
> +}
> +
>  /**
>   * cpuidle_enter_s2idle - Enter an idle state suitable for suspend-to-idle.
>   * @drv: cpuidle driver for the given CPU.
> @@ -187,7 +196,7 @@ int cpuidle_enter_s2idle(struct cpuidle_driver *drv, struct cpuidle_device *dev)
>  	 */
>  	index = find_deepest_state(drv, dev, U64_MAX, 0, true);
>  	if (index > 0)
> -		enter_s2idle_proper(drv, dev, index);
> +		call_s2idle(drv, dev, index);
>  
>  	return index;
>  }
diff mbox series

Patch

diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
index c149d9e20dfd..d17dad362d34 100644
--- a/drivers/cpuidle/cpuidle.c
+++ b/drivers/cpuidle/cpuidle.c
@@ -13,6 +13,7 @@ 
 #include <linux/mutex.h>
 #include <linux/sched.h>
 #include <linux/sched/clock.h>
+#include <linux/sched/idle.h>
 #include <linux/notifier.h>
 #include <linux/pm_qos.h>
 #include <linux/cpu.h>
@@ -186,8 +187,10 @@  int cpuidle_enter_s2idle(struct cpuidle_driver *drv, struct cpuidle_device *dev)
 	 * be frozen safely.
 	 */
 	index = find_deepest_state(drv, dev, U64_MAX, 0, true);
-	if (index > 0)
+	if (index > 0) {
+		__current_clr_polling();
 		enter_s2idle_proper(drv, dev, index);
+	}
 
 	return index;
 }