diff mbox

[3/5] xen: sched-null: support soft-affinity

Message ID 149874099124.524.12472246724522761078.stgit@Solace (mailing list archive)
State New, archived
Headers show

Commit Message

Dario Faggioli June 29, 2017, 12:56 p.m. UTC
The null scheduler does not really use hard-affinity for
scheduling, it uses it for 'placement', i.e., for deciding
to what pCPU to statically assign a vCPU.

Let's use soft-affinity in the same way, of course with the
difference that, if there's no free pCPU within the vCPU's
soft-affinity, we go checking the hard-affinity, instead of
putting the vCPU in the waitqueue.

This does has no impact on the scheduling overhead, because
soft-affinity is only considered in cold-path (like when a
vCPU joins the scheduler for the first time, or is manually
moved between pCPUs by the user).

Signed-off-by: Dario Faggioli <dario.faggioli@citrix.com>
---
Cc: George Dunlap <george.dunlap@eu.citrix.com>
---
 xen/common/sched_null.c |  110 +++++++++++++++++++++++++++++++++--------------
 1 file changed, 77 insertions(+), 33 deletions(-)

Comments

George Dunlap July 25, 2017, 3:50 p.m. UTC | #1
On Thu, Jun 29, 2017 at 1:56 PM, Dario Faggioli
<dario.faggioli@citrix.com> wrote:
> The null scheduler does not really use hard-affinity for
> scheduling, it uses it for 'placement', i.e., for deciding
> to what pCPU to statically assign a vCPU.
>
> Let's use soft-affinity in the same way, of course with the
> difference that, if there's no free pCPU within the vCPU's
> soft-affinity, we go checking the hard-affinity, instead of
> putting the vCPU in the waitqueue.
>
> This does has no impact on the scheduling overhead, because
> soft-affinity is only considered in cold-path (like when a
> vCPU joins the scheduler for the first time, or is manually
> moved between pCPUs by the user).
>
> Signed-off-by: Dario Faggioli <dario.faggioli@citrix.com>

Reviewed-by: George Dunlap <george.dunlap@citrix.com>
diff mbox

Patch

diff --git a/xen/common/sched_null.c b/xen/common/sched_null.c
index 610a150..19c7f0f 100644
--- a/xen/common/sched_null.c
+++ b/xen/common/sched_null.c
@@ -115,9 +115,11 @@  static inline struct null_dom *null_dom(const struct domain *d)
     return d->sched_priv;
 }
 
-static inline bool vcpu_check_affinity(struct vcpu *v, unsigned int cpu)
+static inline bool vcpu_check_affinity(struct vcpu *v, unsigned int cpu,
+                                       unsigned int balance_step)
 {
-    cpumask_and(cpumask_scratch_cpu(cpu), v->cpu_hard_affinity,
+    affinity_balance_cpumask(v, balance_step, cpumask_scratch_cpu(cpu));
+    cpumask_and(cpumask_scratch_cpu(cpu), cpumask_scratch_cpu(cpu),
                 cpupool_domain_cpumask(v->domain));
 
     return cpumask_test_cpu(cpu, cpumask_scratch_cpu(cpu));
@@ -279,31 +281,40 @@  static void null_dom_destroy(const struct scheduler *ops, struct domain *d)
  */
 static unsigned int pick_cpu(struct null_private *prv, struct vcpu *v)
 {
+    unsigned int bs;
     unsigned int cpu = v->processor, new_cpu;
     cpumask_t *cpus = cpupool_domain_cpumask(v->domain);
 
     ASSERT(spin_is_locked(per_cpu(schedule_data, cpu).schedule_lock));
 
-    cpumask_and(cpumask_scratch_cpu(cpu), v->cpu_hard_affinity, cpus);
+    for_each_affinity_balance_step( bs )
+    {
+        if ( bs == BALANCE_SOFT_AFFINITY &&
+             !has_soft_affinity(v, v->cpu_hard_affinity) )
+            continue;
 
-    /*
-     * If our processor is free, or we are assigned to it, and it is also
-     * still valid and part of our affinity, just go for it.
-     * (Note that we may call vcpu_check_affinity(), but we deliberately
-     * don't, so we get to keep in the scratch cpumask what we have just
-     * put in it.)
-     */
-    if ( likely((per_cpu(npc, cpu).vcpu == NULL || per_cpu(npc, cpu).vcpu == v)
-                && cpumask_test_cpu(cpu, cpumask_scratch_cpu(cpu))) )
-        return cpu;
+        affinity_balance_cpumask(v, bs, cpumask_scratch_cpu(cpu));
+        cpumask_and(cpumask_scratch_cpu(cpu), cpumask_scratch_cpu(cpu), cpus);
 
-    /* If not, just go for a free pCPU, within our affinity, if any */
-    cpumask_and(cpumask_scratch_cpu(cpu), cpumask_scratch_cpu(cpu),
-                &prv->cpus_free);
-    new_cpu = cpumask_first(cpumask_scratch_cpu(cpu));
+        /*
+         * If our processor is free, or we are assigned to it, and it is also
+         * still valid and part of our affinity, just go for it.
+         * (Note that we may call vcpu_check_affinity(), but we deliberately
+         * don't, so we get to keep in the scratch cpumask what we have just
+         * put in it.)
+         */
+        if ( likely((per_cpu(npc, cpu).vcpu == NULL || per_cpu(npc, cpu).vcpu == v)
+                    && cpumask_test_cpu(cpu, cpumask_scratch_cpu(cpu))) )
+            return cpu;
 
-    if ( likely(new_cpu != nr_cpu_ids) )
-        return new_cpu;
+        /* If not, just go for a free pCPU, within our affinity, if any */
+        cpumask_and(cpumask_scratch_cpu(cpu), cpumask_scratch_cpu(cpu),
+                    &prv->cpus_free);
+        new_cpu = cpumask_first(cpumask_scratch_cpu(cpu));
+
+        if ( likely(new_cpu != nr_cpu_ids) )
+            return new_cpu;
+    }
 
     /*
      * If we didn't find any free pCPU, just pick any valid pcpu, even if
@@ -430,6 +441,7 @@  static void null_vcpu_insert(const struct scheduler *ops, struct vcpu *v)
 
 static void _vcpu_remove(struct null_private *prv, struct vcpu *v)
 {
+    unsigned int bs;
     unsigned int cpu = v->processor;
     struct null_vcpu *wvc;
 
@@ -441,19 +453,27 @@  static void _vcpu_remove(struct null_private *prv, struct vcpu *v)
 
     /*
      * If v is assigned to a pCPU, let's see if there is someone waiting,
-     * suitable to be assigned to it.
+     * suitable to be assigned to it (prioritizing vcpus that have
+     * soft-affinity with cpu).
      */
-    list_for_each_entry( wvc, &prv->waitq, waitq_elem )
+    for_each_affinity_balance_step( bs )
     {
-        if ( vcpu_check_affinity(wvc->vcpu, cpu) )
+        list_for_each_entry( wvc, &prv->waitq, waitq_elem )
         {
-            list_del_init(&wvc->waitq_elem);
-            vcpu_assign(prv, wvc->vcpu, cpu);
-            cpu_raise_softirq(cpu, SCHEDULE_SOFTIRQ);
-            break;
+            if ( bs == BALANCE_SOFT_AFFINITY &&
+                 !has_soft_affinity(wvc->vcpu, wvc->vcpu->cpu_hard_affinity) )
+                continue;
+
+            if ( vcpu_check_affinity(wvc->vcpu, cpu, bs) )
+            {
+                list_del_init(&wvc->waitq_elem);
+                vcpu_assign(prv, wvc->vcpu, cpu);
+                cpu_raise_softirq(cpu, SCHEDULE_SOFTIRQ);
+                spin_unlock(&prv->waitq_lock);
+                return;
+            }
         }
     }
-
     spin_unlock(&prv->waitq_lock);
 }
 
@@ -570,7 +590,8 @@  static void null_vcpu_migrate(const struct scheduler *ops, struct vcpu *v,
      *
      * In latter, all we can do is to park v in the waitqueue.
      */
-    if ( per_cpu(npc, new_cpu).vcpu == NULL && vcpu_check_affinity(v, new_cpu) )
+    if ( per_cpu(npc, new_cpu).vcpu == NULL &&
+         vcpu_check_affinity(v, new_cpu, BALANCE_HARD_AFFINITY) )
     {
         /* v might have been in the waitqueue, so remove it */
         spin_lock(&prv->waitq_lock);
@@ -633,6 +654,7 @@  static struct task_slice null_schedule(const struct scheduler *ops,
                                        s_time_t now,
                                        bool_t tasklet_work_scheduled)
 {
+    unsigned int bs;
     const unsigned int cpu = smp_processor_id();
     struct null_private *prv = null_priv(ops);
     struct null_vcpu *wvc;
@@ -656,13 +678,35 @@  static struct task_slice null_schedule(const struct scheduler *ops,
     if ( unlikely(ret.task == NULL) )
     {
         spin_lock(&prv->waitq_lock);
-        wvc = list_first_entry_or_null(&prv->waitq, struct null_vcpu, waitq_elem);
-        if ( wvc && vcpu_check_affinity(wvc->vcpu, cpu) )
+
+        if ( list_empty(&prv->waitq) )
+            goto unlock;
+
+        /*
+         * We scan the waitqueue twice, for prioritizing vcpus that have
+         * soft-affinity with cpu. This may look like something expensive to
+         * do here in null_schedule(), but it's actually fine, beceuse we do
+         * it only in cases where a pcpu has no vcpu associated (e.g., as
+         * said above, the cpu has just joined a cpupool).
+         */
+        for_each_affinity_balance_step( bs )
         {
-            vcpu_assign(prv, wvc->vcpu, cpu);
-            list_del_init(&wvc->waitq_elem);
-            ret.task = wvc->vcpu;
+            list_for_each_entry( wvc, &prv->waitq, waitq_elem )
+            {
+                if ( bs == BALANCE_SOFT_AFFINITY &&
+                     !has_soft_affinity(wvc->vcpu, wvc->vcpu->cpu_hard_affinity) )
+                    continue;
+
+                if ( vcpu_check_affinity(wvc->vcpu, cpu, bs) )
+                {
+                    vcpu_assign(prv, wvc->vcpu, cpu);
+                    list_del_init(&wvc->waitq_elem);
+                    ret.task = wvc->vcpu;
+                    goto unlock;
+                }
+            }
         }
+ unlock:
         spin_unlock(&prv->waitq_lock);
     }