diff mbox

[2/3] xen: sched_null: support for hard affinity

Message ID 148977618592.29510.6991110994080248461.stgit@Palanthas.fritz.box (mailing list archive)
State New, archived
Headers show

Commit Message

Dario Faggioli March 17, 2017, 6:43 p.m. UTC
As a (rudimental) way of directing and affecting the
placement logic implemented by the scheduler, support
vCPU hard affinity.

Basically, a vCPU will now be assigned only to a pCPU
that is part of its own hard affinity. If such pCPU(s)
is (are) busy, the vCPU will wait, like it happens
when there are no free pCPUs.

Signed-off-by: Dario Faggioli <dario.faggioli@citrix.com>
---
Cc: George Dunlap <george.dunlap@citrix.com>
Cc: Stefano Stabellini <stefano@aporeto.com>
Cc: Julien Grall <julien.grall@arm.com>
Cc: Jonathan Davies <Jonathan.Davies@citrix.com>
Cc: Marcus Granado <marcus.granado@citrix.com>
---
 xen/common/sched_null.c |   43 ++++++++++++++++++++++++++++++++-----------
 1 file changed, 32 insertions(+), 11 deletions(-)

Comments

Stefano Stabellini March 20, 2017, 11:46 p.m. UTC | #1
On Fri, 17 Mar 2017, Dario Faggioli wrote:
> As a (rudimental) way of directing and affecting the
> placement logic implemented by the scheduler, support
> vCPU hard affinity.
> 
> Basically, a vCPU will now be assigned only to a pCPU
> that is part of its own hard affinity. If such pCPU(s)
> is (are) busy, the vCPU will wait, like it happens
> when there are no free pCPUs.
> 
> Signed-off-by: Dario Faggioli <dario.faggioli@citrix.com>
> ---
> Cc: George Dunlap <george.dunlap@citrix.com>
> Cc: Stefano Stabellini <stefano@aporeto.com>
> Cc: Julien Grall <julien.grall@arm.com>
> Cc: Jonathan Davies <Jonathan.Davies@citrix.com>
> Cc: Marcus Granado <marcus.granado@citrix.com>
> ---
>  xen/common/sched_null.c |   43 ++++++++++++++++++++++++++++++++-----------
>  1 file changed, 32 insertions(+), 11 deletions(-)
> 
> diff --git a/xen/common/sched_null.c b/xen/common/sched_null.c
> index 6a13308..ea055f1 100644
> --- a/xen/common/sched_null.c
> +++ b/xen/common/sched_null.c
> @@ -117,6 +117,14 @@ static inline struct null_dom *null_dom(const struct domain *d)
>      return d->sched_priv;
>  }
>  
> +static inline bool check_nvc_affinity(struct null_vcpu *nvc, unsigned int cpu)
> +{
> +    cpumask_and(cpumask_scratch_cpu(cpu), nvc->vcpu->cpu_hard_affinity,
> +                cpupool_domain_cpumask(nvc->vcpu->domain));
> +
> +    return cpumask_test_cpu(cpu, cpumask_scratch_cpu(cpu));
> +}

If you make it take a struct vcpu* as first argument, it will be more
generally usable


>  static int null_init(struct scheduler *ops)
>  {
>      struct null_private *prv;
> @@ -284,16 +292,20 @@ static unsigned int pick_cpu(struct null_private *prv, struct vcpu *v)
>  
>      ASSERT(spin_is_locked(per_cpu(schedule_data, cpu).schedule_lock));
>  
> +    cpumask_and(cpumask_scratch_cpu(cpu), v->cpu_hard_affinity, cpus);
> +
>      /*
>       * If our processor is free, or we are assigned to it, and it is
> -     * also still valid, just go for it.
> +     * also still valid and part of our affinity, just go for it.
>       */
>      if ( likely((per_cpu(npc, cpu).vcpu == NULL || per_cpu(npc, cpu).vcpu == v)
> -                && cpumask_test_cpu(cpu, cpus)) )
> +                && cpumask_test_cpu(cpu, cpumask_scratch_cpu(cpu))) )

Then you can use it here:
     check_nvc_affinity(v, cpu);


>          return cpu;
>  
> -    /* If not, just go for a valid free pCPU, if any */
> +    /* If not, just go for a free pCPU, within our affinity, if any */
>      cpumask_and(cpumask_scratch_cpu(cpu), &prv->cpus_free, cpus);
> +    cpumask_and(cpumask_scratch_cpu(cpu), cpumask_scratch_cpu(cpu),
> +                v->cpu_hard_affinity);

You can do this with one cpumask_and (in addition to the one above):

   cpumask_and(cpumask_scratch_cpu(cpu), cpumask_scratch_cpu(cpu),
               &prv->cpus_free);


>      cpu = cpumask_first(cpumask_scratch_cpu(cpu));
>  
>      /*
> @@ -308,7 +320,10 @@ static unsigned int pick_cpu(struct null_private *prv, struct vcpu *v)
>       * only if the pCPU is free.
>       */
>      if ( unlikely(cpu == nr_cpu_ids) )
> -        cpu = cpumask_any(cpus);
> +    {
> +        cpumask_and(cpumask_scratch_cpu(cpu), cpus, v->cpu_hard_affinity);

Could the intersection be 0?


> +        cpu = cpumask_any(cpumask_scratch_cpu(cpu));
> +    }
>  
>      return cpu;
>  }
> @@ -391,6 +406,9 @@ static void null_vcpu_insert(const struct scheduler *ops, struct vcpu *v)
>          lock = pcpu_schedule_lock(cpu);
>      }
>  
> +    cpumask_and(cpumask_scratch_cpu(cpu), v->cpu_hard_affinity,
> +		cpupool_domain_cpumask(v->domain));
> +

coding style


>      /*
>       * If the pCPU is free, we assign v to it.
>       *
> @@ -408,8 +426,7 @@ static void null_vcpu_insert(const struct scheduler *ops, struct vcpu *v)
>           */
>          vcpu_assign(prv, v, cpu);
>      }
> -    else if ( cpumask_intersects(&prv->cpus_free,
> -                                 cpupool_domain_cpumask(v->domain)) )
> +    else if ( cpumask_intersects(&prv->cpus_free, cpumask_scratch_cpu(cpu)) )
>      {
>          spin_unlock(lock);
>          goto retry;
> @@ -462,7 +479,7 @@ static void null_vcpu_remove(const struct scheduler *ops, struct vcpu *v)
>  
>      spin_lock(&prv->waitq_lock);
>      wvc = list_first_entry_or_null(&prv->waitq, struct null_vcpu, waitq_elem);
> -    if ( wvc )
> +    if ( wvc && cpumask_test_cpu(cpu, cpumask_scratch_cpu(cpu)) )

shouldn't this be
    
    check_nvc_affinity(wvc, cpu)

?


>      {
>          vcpu_assign(prv, wvc->vcpu, cpu);
>          list_del_init(&wvc->waitq_elem);
> @@ -550,7 +567,7 @@ static void null_vcpu_migrate(const struct scheduler *ops, struct vcpu *v,
>  
>          spin_lock(&prv->waitq_lock);
>          wvc = list_first_entry_or_null(&prv->waitq, struct null_vcpu, waitq_elem);
> -        if ( wvc && cpumask_test_cpu(cpu, cpupool_domain_cpumask(v->domain)) )
> +        if ( wvc && check_nvc_affinity(wvc, cpu) )
>          {
>              vcpu_assign(prv, wvc->vcpu, cpu);
>              list_del_init(&wvc->waitq_elem);
> @@ -573,11 +590,15 @@ static void null_vcpu_migrate(const struct scheduler *ops, struct vcpu *v,
>       * Let's now consider new_cpu, which is where v is being sent. It can be
>       * either free, or have a vCPU already assigned to it.
>       *
> -     * In the former case, we should assign v to it, and try to get it to run.
> +     * In the former case, we should assign v to it, and try to get it to run,
> +     * if possible, according to affinity.
>       *
>       * In latter, all we can do is to park v in the waitqueue.
>       */
> -    if ( per_cpu(npc, new_cpu).vcpu == NULL )
> +    cpumask_and(cpumask_scratch_cpu(cpu), cpupool_domain_cpumask(v->domain),
> +                nvc->vcpu->cpu_hard_affinity);
> +    if ( per_cpu(npc, new_cpu).vcpu == NULL &&
> +         cpumask_test_cpu(new_cpu, cpumask_scratch_cpu(cpu)) )

could you do instead:
            check_nvc_affinity(nvc, new_cpu)
?


>      {
>          /* We don't know whether v was in the waitqueue. If yes, remove it */
>          spin_lock(&prv->waitq_lock);
> @@ -666,7 +687,7 @@ static struct task_slice null_schedule(const struct scheduler *ops,
>      {
>          spin_lock(&prv->waitq_lock);
>          wvc = list_first_entry_or_null(&prv->waitq, struct null_vcpu, waitq_elem);
> -        if ( wvc )
> +        if ( wvc && check_nvc_affinity(wvc, cpu) )
>          {
>              vcpu_assign(prv, wvc->vcpu, cpu);
>              list_del_init(&wvc->waitq_elem);
>
Dario Faggioli March 21, 2017, 8:47 a.m. UTC | #2
On Mon, 2017-03-20 at 16:46 -0700, Stefano Stabellini wrote:
> On Fri, 17 Mar 2017, Dario Faggioli wrote:
> > 
> > --- a/xen/common/sched_null.c
> > +++ b/xen/common/sched_null.c
> > @@ -117,6 +117,14 @@ static inline struct null_dom *null_dom(const
> > struct domain *d)
> >      return d->sched_priv;
> >  }
> >  
> > +static inline bool check_nvc_affinity(struct null_vcpu *nvc,
> > unsigned int cpu)
> > +{
> > +    cpumask_and(cpumask_scratch_cpu(cpu), nvc->vcpu-
> > >cpu_hard_affinity,
> > +                cpupool_domain_cpumask(nvc->vcpu->domain));
> > +
> > +    return cpumask_test_cpu(cpu, cpumask_scratch_cpu(cpu));
> > +}
> 
> If you make it take a struct vcpu* as first argument, it will be more
> generally usable
> 
Yes, that's probably a good idea. Thanks.

> >          return cpu;
> >  
> > -    /* If not, just go for a valid free pCPU, if any */
> > +    /* If not, just go for a free pCPU, within our affinity, if
> > any */
> >      cpumask_and(cpumask_scratch_cpu(cpu), &prv->cpus_free, cpus);
> > +    cpumask_and(cpumask_scratch_cpu(cpu),
> > cpumask_scratch_cpu(cpu),
> > +                v->cpu_hard_affinity);
> 
> You can do this with one cpumask_and (in addition to the one above):
> 
>    cpumask_and(cpumask_scratch_cpu(cpu), cpumask_scratch_cpu(cpu),
>                &prv->cpus_free);
> 
Mmm... right. Wow... Quite an overlook on my side! :-P

> > @@ -308,7 +320,10 @@ static unsigned int pick_cpu(struct
> > null_private *prv, struct vcpu *v)
> >       * only if the pCPU is free.
> >       */
> >      if ( unlikely(cpu == nr_cpu_ids) )
> > -        cpu = cpumask_any(cpus);
> > +    {
> > +        cpumask_and(cpumask_scratch_cpu(cpu), cpus, v-
> > >cpu_hard_affinity);
> 
> Could the intersection be 0?
> 
Not really. Because vcpu_set_hard_affinity() fails when trying to set
the affinity to something that has a zero intersection with the set of
cpus of the pool the domain is in.

Other schedulers relies on this too.

However, I need to re-check what happens if everything is ok when
changing the affinity, but then all the cpus in the affinity itself are
removed from the pool... I will do that as, as I said, this is
something general (and this area is really a can of worms... And I keep
finding weird corner cases! :-O)

> > @@ -408,8 +426,7 @@ static void null_vcpu_insert(const struct
> > scheduler *ops, struct vcpu *v)
> >           */
> >          vcpu_assign(prv, v, cpu);
> >      }
> > -    else if ( cpumask_intersects(&prv->cpus_free,
> > -                                 cpupool_domain_cpumask(v-
> > >domain)) )
> > +    else if ( cpumask_intersects(&prv->cpus_free,
> > cpumask_scratch_cpu(cpu)) )
> >      {
> >          spin_unlock(lock);
> >          goto retry;
> > @@ -462,7 +479,7 @@ static void null_vcpu_remove(const struct
> > scheduler *ops, struct vcpu *v)
> >  
> >      spin_lock(&prv->waitq_lock);
> >      wvc = list_first_entry_or_null(&prv->waitq, struct null_vcpu,
> > waitq_elem);
> > -    if ( wvc )
> > +    if ( wvc && cpumask_test_cpu(cpu, cpumask_scratch_cpu(cpu)) )
> 
> shouldn't this be
>     
>     check_nvc_affinity(wvc, cpu)
> 
> ?
> 
Mmm... well, considering that I never touch cpumask_scratch_cpu() in
this function, this is clearly a bug. Will fix. :-/

Thanks and Regards,
Dario
diff mbox

Patch

diff --git a/xen/common/sched_null.c b/xen/common/sched_null.c
index 6a13308..ea055f1 100644
--- a/xen/common/sched_null.c
+++ b/xen/common/sched_null.c
@@ -117,6 +117,14 @@  static inline struct null_dom *null_dom(const struct domain *d)
     return d->sched_priv;
 }
 
+static inline bool check_nvc_affinity(struct null_vcpu *nvc, unsigned int cpu)
+{
+    cpumask_and(cpumask_scratch_cpu(cpu), nvc->vcpu->cpu_hard_affinity,
+                cpupool_domain_cpumask(nvc->vcpu->domain));
+
+    return cpumask_test_cpu(cpu, cpumask_scratch_cpu(cpu));
+}
+
 static int null_init(struct scheduler *ops)
 {
     struct null_private *prv;
@@ -284,16 +292,20 @@  static unsigned int pick_cpu(struct null_private *prv, struct vcpu *v)
 
     ASSERT(spin_is_locked(per_cpu(schedule_data, cpu).schedule_lock));
 
+    cpumask_and(cpumask_scratch_cpu(cpu), v->cpu_hard_affinity, cpus);
+
     /*
      * If our processor is free, or we are assigned to it, and it is
-     * also still valid, just go for it.
+     * also still valid and part of our affinity, just go for it.
      */
     if ( likely((per_cpu(npc, cpu).vcpu == NULL || per_cpu(npc, cpu).vcpu == v)
-                && cpumask_test_cpu(cpu, cpus)) )
+                && cpumask_test_cpu(cpu, cpumask_scratch_cpu(cpu))) )
         return cpu;
 
-    /* If not, just go for a valid free pCPU, if any */
+    /* If not, just go for a free pCPU, within our affinity, if any */
     cpumask_and(cpumask_scratch_cpu(cpu), &prv->cpus_free, cpus);
+    cpumask_and(cpumask_scratch_cpu(cpu), cpumask_scratch_cpu(cpu),
+                v->cpu_hard_affinity);
     cpu = cpumask_first(cpumask_scratch_cpu(cpu));
 
     /*
@@ -308,7 +320,10 @@  static unsigned int pick_cpu(struct null_private *prv, struct vcpu *v)
      * only if the pCPU is free.
      */
     if ( unlikely(cpu == nr_cpu_ids) )
-        cpu = cpumask_any(cpus);
+    {
+        cpumask_and(cpumask_scratch_cpu(cpu), cpus, v->cpu_hard_affinity);
+        cpu = cpumask_any(cpumask_scratch_cpu(cpu));
+    }
 
     return cpu;
 }
@@ -391,6 +406,9 @@  static void null_vcpu_insert(const struct scheduler *ops, struct vcpu *v)
         lock = pcpu_schedule_lock(cpu);
     }
 
+    cpumask_and(cpumask_scratch_cpu(cpu), v->cpu_hard_affinity,
+		cpupool_domain_cpumask(v->domain));
+
     /*
      * If the pCPU is free, we assign v to it.
      *
@@ -408,8 +426,7 @@  static void null_vcpu_insert(const struct scheduler *ops, struct vcpu *v)
          */
         vcpu_assign(prv, v, cpu);
     }
-    else if ( cpumask_intersects(&prv->cpus_free,
-                                 cpupool_domain_cpumask(v->domain)) )
+    else if ( cpumask_intersects(&prv->cpus_free, cpumask_scratch_cpu(cpu)) )
     {
         spin_unlock(lock);
         goto retry;
@@ -462,7 +479,7 @@  static void null_vcpu_remove(const struct scheduler *ops, struct vcpu *v)
 
     spin_lock(&prv->waitq_lock);
     wvc = list_first_entry_or_null(&prv->waitq, struct null_vcpu, waitq_elem);
-    if ( wvc )
+    if ( wvc && cpumask_test_cpu(cpu, cpumask_scratch_cpu(cpu)) )
     {
         vcpu_assign(prv, wvc->vcpu, cpu);
         list_del_init(&wvc->waitq_elem);
@@ -550,7 +567,7 @@  static void null_vcpu_migrate(const struct scheduler *ops, struct vcpu *v,
 
         spin_lock(&prv->waitq_lock);
         wvc = list_first_entry_or_null(&prv->waitq, struct null_vcpu, waitq_elem);
-        if ( wvc && cpumask_test_cpu(cpu, cpupool_domain_cpumask(v->domain)) )
+        if ( wvc && check_nvc_affinity(wvc, cpu) )
         {
             vcpu_assign(prv, wvc->vcpu, cpu);
             list_del_init(&wvc->waitq_elem);
@@ -573,11 +590,15 @@  static void null_vcpu_migrate(const struct scheduler *ops, struct vcpu *v,
      * Let's now consider new_cpu, which is where v is being sent. It can be
      * either free, or have a vCPU already assigned to it.
      *
-     * In the former case, we should assign v to it, and try to get it to run.
+     * In the former case, we should assign v to it, and try to get it to run,
+     * if possible, according to affinity.
      *
      * In latter, all we can do is to park v in the waitqueue.
      */
-    if ( per_cpu(npc, new_cpu).vcpu == NULL )
+    cpumask_and(cpumask_scratch_cpu(cpu), cpupool_domain_cpumask(v->domain),
+                nvc->vcpu->cpu_hard_affinity);
+    if ( per_cpu(npc, new_cpu).vcpu == NULL &&
+         cpumask_test_cpu(new_cpu, cpumask_scratch_cpu(cpu)) )
     {
         /* We don't know whether v was in the waitqueue. If yes, remove it */
         spin_lock(&prv->waitq_lock);
@@ -666,7 +687,7 @@  static struct task_slice null_schedule(const struct scheduler *ops,
     {
         spin_lock(&prv->waitq_lock);
         wvc = list_first_entry_or_null(&prv->waitq, struct null_vcpu, waitq_elem);
-        if ( wvc )
+        if ( wvc && check_nvc_affinity(wvc, cpu) )
         {
             vcpu_assign(prv, wvc->vcpu, cpu);
             list_del_init(&wvc->waitq_elem);