diff mbox

[v2,4/5] xen: sched_null: support for hard affinity

Message ID 149152525510.31941.6625917268878264420.stgit@Solace.fritz.box (mailing list archive)
State New, archived
Headers show

Commit Message

Dario Faggioli April 7, 2017, 12:34 a.m. UTC
As a (rudimental) way of directing and affecting the
placement logic implemented by the scheduler, support
vCPU hard affinity.

Basically, a vCPU will now be assigned only to a pCPU
that is part of its own hard affinity. If such pCPU(s)
is (are) busy, the vCPU will wait, like it happens
when there are no free pCPUs.

Signed-off-by: Dario Faggioli <dario.faggioli@citrix.com>
---
Cc: George Dunlap <george.dunlap@citrix.com>
Cc: Stefano Stabellini <stefano@aporeto.com>
Cc: Julien Grall <julien.grall@arm.com>
Cc: Jonathan Davies <Jonathan.Davies@citrix.com>
Cc: Marcus Granado <marcus.granado@citrix.com>
---
Changes from v1:
- coding style fixes (removed some hard tabs);
- better signature for check_nvc_affinity() (also renamed in
  vcpu_check_affinity());
- fixed bug in null_vcpu_remove() using uninitialized cpumask.
---
 xen/common/sched_null.c |   50 ++++++++++++++++++++++++++++++++---------------
 1 file changed, 34 insertions(+), 16 deletions(-)

Comments

George Dunlap April 7, 2017, 10:08 a.m. UTC | #1
On 07/04/17 01:34, Dario Faggioli wrote:
> As a (rudimental) way of directing and affecting the
> placement logic implemented by the scheduler, support
> vCPU hard affinity.
> 
> Basically, a vCPU will now be assigned only to a pCPU
> that is part of its own hard affinity. If such pCPU(s)
> is (are) busy, the vCPU will wait, like it happens
> when there are no free pCPUs.
> 
> Signed-off-by: Dario Faggioli <dario.faggioli@citrix.com>

[snip]

> @@ -413,7 +431,6 @@ static void null_vcpu_insert(const struct scheduler *ops, struct vcpu *v)
>  static void _vcpu_remove(struct null_private *prv, struct vcpu *v)
>  {
>      unsigned int cpu = v->processor;
> -    struct domain *d = v->domain;
>      struct null_vcpu *wvc;
>  
>      ASSERT(list_empty(&null_vcpu(v)->waitq_elem));
> @@ -425,7 +442,7 @@ static void _vcpu_remove(struct null_private *prv, struct vcpu *v)
>       * If yes, we assign it to cpu, in spite of v.
>       */
>      wvc = list_first_entry_or_null(&prv->waitq, struct null_vcpu, waitq_elem);
> -    if ( wvc && cpumask_test_cpu(cpu, cpupool_domain_cpumask(d)) )
> +    if ( wvc && vcpu_check_affinity(wvc->vcpu, cpu) )

Hmm, actually I just noticed that this only checks the first item on the
list.  If there are two vcpus on the list, and the first one doesn't
have affinity with the vcpu in question, the second one won't even be
considered.  This was probably OK in the previous case, where the only
time the test could fail is during suspend/resume, but it's not really
OK anymore, I don't think.

Everything else looks OK to me.

 -George
Dario Faggioli April 7, 2017, 10:11 a.m. UTC | #2
On Fri, 2017-04-07 at 11:08 +0100, George Dunlap wrote:
> On 07/04/17 01:34, Dario Faggioli wrote:
> > @@ -413,7 +431,6 @@ static void null_vcpu_insert(const struct
> > scheduler *ops, struct vcpu *v)
> >  static void _vcpu_remove(struct null_private *prv, struct vcpu *v)
> >  {
> >      unsigned int cpu = v->processor;
> > -    struct domain *d = v->domain;
> >      struct null_vcpu *wvc;
> >  
> >      ASSERT(list_empty(&null_vcpu(v)->waitq_elem));
> > @@ -425,7 +442,7 @@ static void _vcpu_remove(struct null_private
> > *prv, struct vcpu *v)
> >       * If yes, we assign it to cpu, in spite of v.
> >       */
> >      wvc = list_first_entry_or_null(&prv->waitq, struct null_vcpu,
> > waitq_elem);
> > -    if ( wvc && cpumask_test_cpu(cpu, cpupool_domain_cpumask(d)) )
> > +    if ( wvc && vcpu_check_affinity(wvc->vcpu, cpu) )
> 
> Hmm, actually I just noticed that this only checks the first item on
> the
> list.  If there are two vcpus on the list, and the first one doesn't
> have affinity with the vcpu in question, the second one won't even be
> considered.  This was probably OK in the previous case, where the
> only
> time the test could fail is during suspend/resume, but it's not
> really
> OK anymore, I don't think.
> 
Good point. I need to scan the waitqueue. Will do.

> Everything else looks OK to me.
> 
Good to hear. :-)

Thanks and Regards,
Dario
diff mbox

Patch

diff --git a/xen/common/sched_null.c b/xen/common/sched_null.c
index c2c4182..96652a0 100644
--- a/xen/common/sched_null.c
+++ b/xen/common/sched_null.c
@@ -115,6 +115,14 @@  static inline struct null_dom *null_dom(const struct domain *d)
     return d->sched_priv;
 }
 
+static inline bool vcpu_check_affinity(struct vcpu *v, unsigned int cpu)
+{
+    cpumask_and(cpumask_scratch_cpu(cpu), v->cpu_hard_affinity,
+                cpupool_domain_cpumask(v->domain));
+
+    return cpumask_test_cpu(cpu, cpumask_scratch_cpu(cpu));
+}
+
 static int null_init(struct scheduler *ops)
 {
     struct null_private *prv;
@@ -276,16 +284,22 @@  static unsigned int pick_cpu(struct null_private *prv, struct vcpu *v)
 
     ASSERT(spin_is_locked(per_cpu(schedule_data, cpu).schedule_lock));
 
+    cpumask_and(cpumask_scratch_cpu(cpu), v->cpu_hard_affinity, cpus);
+
     /*
-     * If our processor is free, or we are assigned to it, and it is
-     * also still valid, just go for it.
+     * If our processor is free, or we are assigned to it, and it is also
+     * still valid and part of our affinity, just go for it.
+     * (Note that we may call vcpu_check_affinity(), but we deliberately
+     * don't, so we get to keep in the scratch cpumask what we have just
+     * put in it.)
      */
     if ( likely((per_cpu(npc, cpu).vcpu == NULL || per_cpu(npc, cpu).vcpu == v)
-                && cpumask_test_cpu(cpu, cpus)) )
+                && cpumask_test_cpu(cpu, cpumask_scratch_cpu(cpu))) )
         return cpu;
 
-    /* If not, just go for a valid free pCPU, if any */
-    cpumask_and(cpumask_scratch_cpu(cpu), &prv->cpus_free, cpus);
+    /* If not, just go for a free pCPU, within our affinity, if any */
+    cpumask_and(cpumask_scratch_cpu(cpu), cpumask_scratch_cpu(cpu),
+                &prv->cpus_free);
     new_cpu = cpumask_first(cpumask_scratch_cpu(cpu));
 
     if ( likely(new_cpu != nr_cpu_ids) )
@@ -302,7 +316,8 @@  static unsigned int pick_cpu(struct null_private *prv, struct vcpu *v)
      * as we will actually assign the vCPU to the pCPU we return from here,
      * only if the pCPU is free.
      */
-    return cpumask_any(cpus);
+    cpumask_and(cpumask_scratch_cpu(cpu), cpus, v->cpu_hard_affinity);
+    return cpumask_any(cpumask_scratch_cpu(cpu));
 }
 
 static void vcpu_assign(struct null_private *prv, struct vcpu *v,
@@ -361,6 +376,7 @@  static void null_vcpu_insert(const struct scheduler *ops, struct vcpu *v)
 {
     struct null_private *prv = null_priv(ops);
     struct null_vcpu *nvc = null_vcpu(v);
+    unsigned int cpu;
     spinlock_t *lock;
 
     ASSERT(!is_idle_vcpu(v));
@@ -368,23 +384,25 @@  static void null_vcpu_insert(const struct scheduler *ops, struct vcpu *v)
     lock = vcpu_schedule_lock_irq(v);
  retry:
 
-    v->processor = pick_cpu(prv, v);
+    cpu = v->processor = pick_cpu(prv, v);
 
     spin_unlock(lock);
 
     lock = vcpu_schedule_lock(v);
 
+    cpumask_and(cpumask_scratch_cpu(cpu), v->cpu_hard_affinity,
+                cpupool_domain_cpumask(v->domain));
+
     /* If the pCPU is free, we assign v to it */
-    if ( likely(per_cpu(npc, v->processor).vcpu == NULL) )
+    if ( likely(per_cpu(npc, cpu).vcpu == NULL) )
     {
         /*
          * Insert is followed by vcpu_wake(), so there's no need to poke
          * the pcpu with the SCHEDULE_SOFTIRQ, as wake will do that.
          */
-        vcpu_assign(prv, v, v->processor);
+        vcpu_assign(prv, v, cpu);
     }
-    else if ( cpumask_intersects(&prv->cpus_free,
-                                 cpupool_domain_cpumask(v->domain)) )
+    else if ( cpumask_intersects(&prv->cpus_free, cpumask_scratch_cpu(cpu)) )
     {
         /*
          * If the pCPU is not free (e.g., because we raced with another
@@ -413,7 +431,6 @@  static void null_vcpu_insert(const struct scheduler *ops, struct vcpu *v)
 static void _vcpu_remove(struct null_private *prv, struct vcpu *v)
 {
     unsigned int cpu = v->processor;
-    struct domain *d = v->domain;
     struct null_vcpu *wvc;
 
     ASSERT(list_empty(&null_vcpu(v)->waitq_elem));
@@ -425,7 +442,7 @@  static void _vcpu_remove(struct null_private *prv, struct vcpu *v)
      * If yes, we assign it to cpu, in spite of v.
      */
     wvc = list_first_entry_or_null(&prv->waitq, struct null_vcpu, waitq_elem);
-    if ( wvc && cpumask_test_cpu(cpu, cpupool_domain_cpumask(d)) )
+    if ( wvc && vcpu_check_affinity(wvc->vcpu, cpu) )
     {
         list_del_init(&wvc->waitq_elem);
         vcpu_assign(prv, wvc->vcpu, cpu);
@@ -547,11 +564,12 @@  static void null_vcpu_migrate(const struct scheduler *ops, struct vcpu *v,
      * Let's now consider new_cpu, which is where v is being sent. It can be
      * either free, or have a vCPU already assigned to it.
      *
-     * In the former case, we should assign v to it, and try to get it to run.
+     * In the former case, we should assign v to it, and try to get it to run,
+     * if possible, according to affinity.
      *
      * In latter, all we can do is to park v in the waitqueue.
      */
-    if ( per_cpu(npc, new_cpu).vcpu == NULL )
+    if ( per_cpu(npc, new_cpu).vcpu == NULL && vcpu_check_affinity(v, new_cpu) )
     {
         /* v might have been in the waitqueue, so remove it */
         spin_lock(&prv->waitq_lock);
@@ -635,7 +653,7 @@  static struct task_slice null_schedule(const struct scheduler *ops,
     {
         spin_lock(&prv->waitq_lock);
         wvc = list_first_entry_or_null(&prv->waitq, struct null_vcpu, waitq_elem);
-        if ( wvc )
+        if ( wvc && vcpu_check_affinity(wvc->vcpu, cpu) )
         {
             vcpu_assign(prv, wvc->vcpu, cpu);
             list_del_init(&wvc->waitq_elem);