diff mbox series

[v3,1/6] xen/sched: call cpu_disable_scheduler() via cpu notifier

Message ID 20190402053457.24912-2-jgross@suse.com (mailing list archive)
State Superseded
Headers show
Series xen: simplify suspend/resume handling | expand

Commit Message

Jürgen Groß April 2, 2019, 5:34 a.m. UTC
cpu_disable_scheduler() is being called from __cpu_disable() today.
There is no need to execute it on the cpu just being disabled, so use
the CPU_DEAD case of the cpu notifier chain. Moving the call out of
stop_machine() context is fine, as we just need to hold the domain RCU
lock and need the scheduler percpu data to be still allocated.

Add another hook for CPU_DOWN_PREPARE to bail out early in case
cpu_disable_scheduler() would fail. This will avoid crashes in rare
cases for cpu hotplug or suspend.

Signed-off-by: Juergen Gross <jgross@suse.com>
Reviewed-by: Dario Faggioli <dfaggioli@suse.com>
---
V3:
- restore smp_mb() (Julien Grall)

V2:
- add CPU_DOWN_PREPARE hook
- BUG() in case of cpu_disable_scheduler() failing in CPU_DEAD
  (Jan Beulich)
- modify ARM __cpu_disable(), too (Andrew Cooper)
---
 xen/arch/arm/smpboot.c |  2 --
 xen/arch/x86/smpboot.c |  3 ---
 xen/common/schedule.c  | 42 +++++++++++++++++++++++++++++++++++-------
 3 files changed, 35 insertions(+), 12 deletions(-)

Comments

Julien Grall April 2, 2019, 9:54 a.m. UTC | #1
Hi,

On 02/04/2019 06:34, Juergen Gross wrote:
> cpu_disable_scheduler() is being called from __cpu_disable() today.
> There is no need to execute it on the cpu just being disabled, so use
> the CPU_DEAD case of the cpu notifier chain. Moving the call out of
> stop_machine() context is fine, as we just need to hold the domain RCU
> lock and need the scheduler percpu data to be still allocated.
> 
> Add another hook for CPU_DOWN_PREPARE to bail out early in case
> cpu_disable_scheduler() would fail. This will avoid crashes in rare
> cases for cpu hotplug or suspend.
> 
> Signed-off-by: Juergen Gross <jgross@suse.com>
> Reviewed-by: Dario Faggioli <dfaggioli@suse.com>

For the Arm bits:

Acked-by: Julien Grall <julien.grall@arm.com>

Cheers,
Jan Beulich April 2, 2019, 11:54 a.m. UTC | #2
>>> On 02.04.19 at 07:34, <jgross@suse.com> wrote:
> cpu_disable_scheduler() is being called from __cpu_disable() today.
> There is no need to execute it on the cpu just being disabled, so use
> the CPU_DEAD case of the cpu notifier chain. Moving the call out of
> stop_machine() context is fine, as we just need to hold the domain RCU
> lock and need the scheduler percpu data to be still allocated.
> 
> Add another hook for CPU_DOWN_PREPARE to bail out early in case
> cpu_disable_scheduler() would fail. This will avoid crashes in rare
> cases for cpu hotplug or suspend.
> 
> Signed-off-by: Juergen Gross <jgross@suse.com>
> Reviewed-by: Dario Faggioli <dfaggioli@suse.com>

Acked-by: Jan Beulich <jbeulich@suse.com>
diff mbox series

Patch

diff --git a/xen/arch/arm/smpboot.c b/xen/arch/arm/smpboot.c
index 25cd44549c..f756444362 100644
--- a/xen/arch/arm/smpboot.c
+++ b/xen/arch/arm/smpboot.c
@@ -386,8 +386,6 @@  void __cpu_disable(void)
     /* It's now safe to remove this processor from the online map */
     cpumask_clear_cpu(cpu, &cpu_online_map);
 
-    if ( cpu_disable_scheduler(cpu) )
-        BUG();
     smp_mb();
 
     /* Return to caller; eventually the IPI mechanism will unwind and the 
diff --git a/xen/arch/x86/smpboot.c b/xen/arch/x86/smpboot.c
index 7d1226d7bc..b7a0a4a419 100644
--- a/xen/arch/x86/smpboot.c
+++ b/xen/arch/x86/smpboot.c
@@ -1221,9 +1221,6 @@  void __cpu_disable(void)
     cpumask_clear_cpu(cpu, &cpu_online_map);
     fixup_irqs(&cpu_online_map, 1);
     fixup_eoi();
-
-    if ( cpu_disable_scheduler(cpu) )
-        BUG();
 }
 
 void __cpu_die(unsigned int cpu)
diff --git a/xen/common/schedule.c b/xen/common/schedule.c
index 76d60785e2..7b93028170 100644
--- a/xen/common/schedule.c
+++ b/xen/common/schedule.c
@@ -773,8 +773,9 @@  void restore_vcpu_affinity(struct domain *d)
 }
 
 /*
- * This function is used by cpu_hotplug code from stop_machine context
+ * This function is used by cpu_hotplug code via cpu notifier chain
  * and from cpupools to switch schedulers on a cpu.
+ * Caller must get domlist_read_lock.
  */
 int cpu_disable_scheduler(unsigned int cpu)
 {
@@ -789,12 +790,6 @@  int cpu_disable_scheduler(unsigned int cpu)
     if ( c == NULL )
         return ret;
 
-    /*
-     * We'd need the domain RCU lock, but:
-     *  - when we are called from cpupool code, it's acquired there already;
-     *  - when we are called for CPU teardown, we're in stop-machine context,
-     *    so that's not be a problem.
-     */
     for_each_domain_in_cpupool ( d, c )
     {
         for_each_vcpu ( d, v )
@@ -893,6 +888,30 @@  int cpu_disable_scheduler(unsigned int cpu)
     return ret;
 }
 
+static int cpu_disable_scheduler_check(unsigned int cpu)
+{
+    struct domain *d;
+    struct vcpu *v;
+    struct cpupool *c;
+
+    c = per_cpu(cpupool, cpu);
+    if ( c == NULL )
+        return 0;
+
+    for_each_domain_in_cpupool ( d, c )
+    {
+        for_each_vcpu ( d, v )
+        {
+            if ( v->affinity_broken )
+                return -EADDRINUSE;
+            if ( system_state != SYS_STATE_suspend && v->processor == cpu )
+                return -EAGAIN;
+        }
+    }
+
+    return 0;
+}
+
 /*
  * In general, this must be called with the scheduler lock held, because the
  * adjust_affinity hook may want to modify the vCPU state. However, when the
@@ -1734,7 +1753,16 @@  static int cpu_schedule_callback(
     case CPU_UP_PREPARE:
         rc = cpu_schedule_up(cpu);
         break;
+    case CPU_DOWN_PREPARE:
+        rcu_read_lock(&domlist_read_lock);
+        rc = cpu_disable_scheduler_check(cpu);
+        rcu_read_unlock(&domlist_read_lock);
+        break;
     case CPU_DEAD:
+        rcu_read_lock(&domlist_read_lock);
+        rc = cpu_disable_scheduler(cpu);
+        BUG_ON(rc);
+        rcu_read_unlock(&domlist_read_lock);
         SCHED_OP(sched, deinit_pdata, sd->sched_priv, cpu);
         /* Fallthrough */
     case CPU_UP_CANCELED: