diff mbox

[1/5] xen: credit2: use the correct scratch cpumask.

Message ID 148467399874.27920.3501738903081546236.stgit@Solace.fritz.box (mailing list archive)
State New, archived
Headers show

Commit Message

Dario Faggioli Jan. 17, 2017, 5:26 p.m. UTC
In fact, there is one scratch mask per each CPU. When
you use the one of a CPU, it must be true that:
 - the CPU belongs to your cpupool and scheduler,
 - you own the runqueue lock (the one you take via
   {v,p}cpu_schedule_lock()) for that CPU.

This was not the case within the following functions:

get_fallback_cpu(), csched2_cpu_pick(): as we can't be
sure we either are on, or hold the lock for, the CPU
that is in the vCPU's 'v->processor'.

migrate(): it's ok, when called from balance_load(),
because that comes from csched2_schedule(), which takes
the runqueue lock of the CPU where it executes. But it is
not ok when we come from csched2_vcpu_migrate(), which
can be called from other places.

The fix is to explicitly use the scratch space of the
CPUs for which we know we hold the runqueue lock.

Signed-off-by: Dario Faggioli <dario.faggioli@citrix.com>
Reported-by: Jan Beulich <JBeulich@suse.com>
---
Cc: George Dunlap <george.dunlap@eu.citrix.com>
Cc: Jan Beulich <jbeulich@suse.com>
---
This is a bugfix, and should be backported to 4.8.
---
 xen/common/sched_credit2.c |   39 ++++++++++++++++++++-------------------
 1 file changed, 20 insertions(+), 19 deletions(-)

Comments

George Dunlap Jan. 19, 2017, 12:22 p.m. UTC | #1
On 17/01/17 17:26, Dario Faggioli wrote:
> In fact, there is one scratch mask per each CPU. When
> you use the one of a CPU, it must be true that:
>  - the CPU belongs to your cpupool and scheduler,
>  - you own the runqueue lock (the one you take via
>    {v,p}cpu_schedule_lock()) for that CPU.
> 
> This was not the case within the following functions:
> 
> get_fallback_cpu(), csched2_cpu_pick(): as we can't be
> sure we either are on, or hold the lock for, the CPU
> that is in the vCPU's 'v->processor'.
> 
> migrate(): it's ok, when called from balance_load(),
> because that comes from csched2_schedule(), which takes
> the runqueue lock of the CPU where it executes. But it is
> not ok when we come from csched2_vcpu_migrate(), which
> can be called from other places.
> 
> The fix is to explicitly use the scratch space of the
> CPUs for which we know we hold the runqueue lock.
> 
> Signed-off-by: Dario Faggioli <dario.faggioli@citrix.com>
> Reported-by: Jan Beulich <JBeulich@suse.com>

Reviewed-by: George Dunlap <george.dunlap@citrix.com>

And queued.

> ---
> Cc: George Dunlap <george.dunlap@eu.citrix.com>
> Cc: Jan Beulich <jbeulich@suse.com>
> ---
> This is a bugfix, and should be backported to 4.8.
> ---
>  xen/common/sched_credit2.c |   39 ++++++++++++++++++++-------------------
>  1 file changed, 20 insertions(+), 19 deletions(-)
> 
> diff --git a/xen/common/sched_credit2.c b/xen/common/sched_credit2.c
> index ef8e0d8..523922e 100644
> --- a/xen/common/sched_credit2.c
> +++ b/xen/common/sched_credit2.c
> @@ -510,24 +510,23 @@ void smt_idle_mask_clear(unsigned int cpu, cpumask_t *mask)
>   */
>  static int get_fallback_cpu(struct csched2_vcpu *svc)
>  {
> -    int cpu;
> +    int fallback_cpu, cpu = svc->vcpu->processor;
>  
> -    if ( likely(cpumask_test_cpu(svc->vcpu->processor,
> -                                 svc->vcpu->cpu_hard_affinity)) )
> -        return svc->vcpu->processor;
> +    if ( likely(cpumask_test_cpu(cpu, svc->vcpu->cpu_hard_affinity)) )
> +        return cpu;
>  
> -    cpumask_and(cpumask_scratch, svc->vcpu->cpu_hard_affinity,
> +    cpumask_and(cpumask_scratch_cpu(cpu), svc->vcpu->cpu_hard_affinity,
>                  &svc->rqd->active);
> -    cpu = cpumask_first(cpumask_scratch);
> -    if ( likely(cpu < nr_cpu_ids) )
> -        return cpu;
> +    fallback_cpu = cpumask_first(cpumask_scratch_cpu(cpu));
> +    if ( likely(fallback_cpu < nr_cpu_ids) )
> +        return fallback_cpu;
>  
>      cpumask_and(cpumask_scratch, svc->vcpu->cpu_hard_affinity,
>                  cpupool_domain_cpumask(svc->vcpu->domain));
>  
> -    ASSERT(!cpumask_empty(cpumask_scratch));
> +    ASSERT(!cpumask_empty(cpumask_scratch_cpu(cpu)));
>  
> -    return cpumask_first(cpumask_scratch);
> +    return cpumask_first(cpumask_scratch_cpu(cpu));
>  }
>  
>  /*
> @@ -1492,7 +1491,7 @@ static int
>  csched2_cpu_pick(const struct scheduler *ops, struct vcpu *vc)
>  {
>      struct csched2_private *prv = CSCHED2_PRIV(ops);
> -    int i, min_rqi = -1, new_cpu;
> +    int i, min_rqi = -1, new_cpu, cpu = vc->processor;
>      struct csched2_vcpu *svc = CSCHED2_VCPU(vc);
>      s_time_t min_avgload = MAX_LOAD;
>  
> @@ -1512,7 +1511,7 @@ csched2_cpu_pick(const struct scheduler *ops, struct vcpu *vc)
>       * just grab the prv lock.  Instead, we'll have to trylock, and
>       * do something else reasonable if we fail.
>       */
> -    ASSERT(spin_is_locked(per_cpu(schedule_data, vc->processor).schedule_lock));
> +    ASSERT(spin_is_locked(per_cpu(schedule_data, cpu).schedule_lock));
>  
>      if ( !read_trylock(&prv->lock) )
>      {
> @@ -1539,9 +1538,9 @@ csched2_cpu_pick(const struct scheduler *ops, struct vcpu *vc)
>          }
>          else
>          {
> -            cpumask_and(cpumask_scratch, vc->cpu_hard_affinity,
> +            cpumask_and(cpumask_scratch_cpu(cpu), vc->cpu_hard_affinity,
>                          &svc->migrate_rqd->active);
> -            new_cpu = cpumask_any(cpumask_scratch);
> +            new_cpu = cpumask_any(cpumask_scratch_cpu(cpu));
>              if ( new_cpu < nr_cpu_ids )
>                  goto out_up;
>          }
> @@ -1598,9 +1597,9 @@ csched2_cpu_pick(const struct scheduler *ops, struct vcpu *vc)
>          goto out_up;
>      }
>  
> -    cpumask_and(cpumask_scratch, vc->cpu_hard_affinity,
> +    cpumask_and(cpumask_scratch_cpu(cpu), vc->cpu_hard_affinity,
>                  &prv->rqd[min_rqi].active);
> -    new_cpu = cpumask_any(cpumask_scratch);
> +    new_cpu = cpumask_any(cpumask_scratch_cpu(cpu));
>      BUG_ON(new_cpu >= nr_cpu_ids);
>  
>   out_up:
> @@ -1675,6 +1674,8 @@ static void migrate(const struct scheduler *ops,
>                      struct csched2_runqueue_data *trqd, 
>                      s_time_t now)
>  {
> +    int cpu = svc->vcpu->processor;
> +
>      if ( unlikely(tb_init_done) )
>      {
>          struct {
> @@ -1696,7 +1697,7 @@ static void migrate(const struct scheduler *ops,
>          svc->migrate_rqd = trqd;
>          __set_bit(_VPF_migrating, &svc->vcpu->pause_flags);
>          __set_bit(__CSFLAG_runq_migrate_request, &svc->flags);
> -        cpu_raise_softirq(svc->vcpu->processor, SCHEDULE_SOFTIRQ);
> +        cpu_raise_softirq(cpu, SCHEDULE_SOFTIRQ);
>          SCHED_STAT_CRANK(migrate_requested);
>      }
>      else
> @@ -1711,9 +1712,9 @@ static void migrate(const struct scheduler *ops,
>          }
>          __runq_deassign(svc);
>  
> -        cpumask_and(cpumask_scratch, svc->vcpu->cpu_hard_affinity,
> +        cpumask_and(cpumask_scratch_cpu(cpu), svc->vcpu->cpu_hard_affinity,
>                      &trqd->active);
> -        svc->vcpu->processor = cpumask_any(cpumask_scratch);
> +        svc->vcpu->processor = cpumask_any(cpumask_scratch_cpu(cpu));
>          ASSERT(svc->vcpu->processor < nr_cpu_ids);
>  
>          __runq_assign(svc, trqd);
>
diff mbox

Patch

diff --git a/xen/common/sched_credit2.c b/xen/common/sched_credit2.c
index ef8e0d8..523922e 100644
--- a/xen/common/sched_credit2.c
+++ b/xen/common/sched_credit2.c
@@ -510,24 +510,23 @@  void smt_idle_mask_clear(unsigned int cpu, cpumask_t *mask)
  */
 static int get_fallback_cpu(struct csched2_vcpu *svc)
 {
-    int cpu;
+    int fallback_cpu, cpu = svc->vcpu->processor;
 
-    if ( likely(cpumask_test_cpu(svc->vcpu->processor,
-                                 svc->vcpu->cpu_hard_affinity)) )
-        return svc->vcpu->processor;
+    if ( likely(cpumask_test_cpu(cpu, svc->vcpu->cpu_hard_affinity)) )
+        return cpu;
 
-    cpumask_and(cpumask_scratch, svc->vcpu->cpu_hard_affinity,
+    cpumask_and(cpumask_scratch_cpu(cpu), svc->vcpu->cpu_hard_affinity,
                 &svc->rqd->active);
-    cpu = cpumask_first(cpumask_scratch);
-    if ( likely(cpu < nr_cpu_ids) )
-        return cpu;
+    fallback_cpu = cpumask_first(cpumask_scratch_cpu(cpu));
+    if ( likely(fallback_cpu < nr_cpu_ids) )
+        return fallback_cpu;
 
     cpumask_and(cpumask_scratch, svc->vcpu->cpu_hard_affinity,
                 cpupool_domain_cpumask(svc->vcpu->domain));
 
-    ASSERT(!cpumask_empty(cpumask_scratch));
+    ASSERT(!cpumask_empty(cpumask_scratch_cpu(cpu)));
 
-    return cpumask_first(cpumask_scratch);
+    return cpumask_first(cpumask_scratch_cpu(cpu));
 }
 
 /*
@@ -1492,7 +1491,7 @@  static int
 csched2_cpu_pick(const struct scheduler *ops, struct vcpu *vc)
 {
     struct csched2_private *prv = CSCHED2_PRIV(ops);
-    int i, min_rqi = -1, new_cpu;
+    int i, min_rqi = -1, new_cpu, cpu = vc->processor;
     struct csched2_vcpu *svc = CSCHED2_VCPU(vc);
     s_time_t min_avgload = MAX_LOAD;
 
@@ -1512,7 +1511,7 @@  csched2_cpu_pick(const struct scheduler *ops, struct vcpu *vc)
      * just grab the prv lock.  Instead, we'll have to trylock, and
      * do something else reasonable if we fail.
      */
-    ASSERT(spin_is_locked(per_cpu(schedule_data, vc->processor).schedule_lock));
+    ASSERT(spin_is_locked(per_cpu(schedule_data, cpu).schedule_lock));
 
     if ( !read_trylock(&prv->lock) )
     {
@@ -1539,9 +1538,9 @@  csched2_cpu_pick(const struct scheduler *ops, struct vcpu *vc)
         }
         else
         {
-            cpumask_and(cpumask_scratch, vc->cpu_hard_affinity,
+            cpumask_and(cpumask_scratch_cpu(cpu), vc->cpu_hard_affinity,
                         &svc->migrate_rqd->active);
-            new_cpu = cpumask_any(cpumask_scratch);
+            new_cpu = cpumask_any(cpumask_scratch_cpu(cpu));
             if ( new_cpu < nr_cpu_ids )
                 goto out_up;
         }
@@ -1598,9 +1597,9 @@  csched2_cpu_pick(const struct scheduler *ops, struct vcpu *vc)
         goto out_up;
     }
 
-    cpumask_and(cpumask_scratch, vc->cpu_hard_affinity,
+    cpumask_and(cpumask_scratch_cpu(cpu), vc->cpu_hard_affinity,
                 &prv->rqd[min_rqi].active);
-    new_cpu = cpumask_any(cpumask_scratch);
+    new_cpu = cpumask_any(cpumask_scratch_cpu(cpu));
     BUG_ON(new_cpu >= nr_cpu_ids);
 
  out_up:
@@ -1675,6 +1674,8 @@  static void migrate(const struct scheduler *ops,
                     struct csched2_runqueue_data *trqd, 
                     s_time_t now)
 {
+    int cpu = svc->vcpu->processor;
+
     if ( unlikely(tb_init_done) )
     {
         struct {
@@ -1696,7 +1697,7 @@  static void migrate(const struct scheduler *ops,
         svc->migrate_rqd = trqd;
         __set_bit(_VPF_migrating, &svc->vcpu->pause_flags);
         __set_bit(__CSFLAG_runq_migrate_request, &svc->flags);
-        cpu_raise_softirq(svc->vcpu->processor, SCHEDULE_SOFTIRQ);
+        cpu_raise_softirq(cpu, SCHEDULE_SOFTIRQ);
         SCHED_STAT_CRANK(migrate_requested);
     }
     else
@@ -1711,9 +1712,9 @@  static void migrate(const struct scheduler *ops,
         }
         __runq_deassign(svc);
 
-        cpumask_and(cpumask_scratch, svc->vcpu->cpu_hard_affinity,
+        cpumask_and(cpumask_scratch_cpu(cpu), svc->vcpu->cpu_hard_affinity,
                     &trqd->active);
-        svc->vcpu->processor = cpumask_any(cpumask_scratch);
+        svc->vcpu->processor = cpumask_any(cpumask_scratch_cpu(cpu));
         ASSERT(svc->vcpu->processor < nr_cpu_ids);
 
         __runq_assign(svc, trqd);