@@ -115,9 +115,11 @@ static inline struct null_dom *null_dom(const struct domain *d)
return d->sched_priv;
}
-static inline bool vcpu_check_affinity(struct vcpu *v, unsigned int cpu)
+static inline bool vcpu_check_affinity(struct vcpu *v, unsigned int cpu,
+ unsigned int balance_step)
{
- cpumask_and(cpumask_scratch_cpu(cpu), v->cpu_hard_affinity,
+ affinity_balance_cpumask(v, balance_step, cpumask_scratch_cpu(cpu));
+ cpumask_and(cpumask_scratch_cpu(cpu), cpumask_scratch_cpu(cpu),
cpupool_domain_cpumask(v->domain));
return cpumask_test_cpu(cpu, cpumask_scratch_cpu(cpu));
@@ -279,31 +281,40 @@ static void null_dom_destroy(const struct scheduler *ops, struct domain *d)
*/
static unsigned int pick_cpu(struct null_private *prv, struct vcpu *v)
{
+ unsigned int bs;
unsigned int cpu = v->processor, new_cpu;
cpumask_t *cpus = cpupool_domain_cpumask(v->domain);
ASSERT(spin_is_locked(per_cpu(schedule_data, cpu).schedule_lock));
- cpumask_and(cpumask_scratch_cpu(cpu), v->cpu_hard_affinity, cpus);
+ for_each_affinity_balance_step( bs )
+ {
+ if ( bs == BALANCE_SOFT_AFFINITY &&
+ !has_soft_affinity(v, v->cpu_hard_affinity) )
+ continue;
- /*
- * If our processor is free, or we are assigned to it, and it is also
- * still valid and part of our affinity, just go for it.
- * (Note that we may call vcpu_check_affinity(), but we deliberately
- * don't, so we get to keep in the scratch cpumask what we have just
- * put in it.)
- */
- if ( likely((per_cpu(npc, cpu).vcpu == NULL || per_cpu(npc, cpu).vcpu == v)
- && cpumask_test_cpu(cpu, cpumask_scratch_cpu(cpu))) )
- return cpu;
+ affinity_balance_cpumask(v, bs, cpumask_scratch_cpu(cpu));
+ cpumask_and(cpumask_scratch_cpu(cpu), cpumask_scratch_cpu(cpu), cpus);
- /* If not, just go for a free pCPU, within our affinity, if any */
- cpumask_and(cpumask_scratch_cpu(cpu), cpumask_scratch_cpu(cpu),
- &prv->cpus_free);
- new_cpu = cpumask_first(cpumask_scratch_cpu(cpu));
+ /*
+ * If our processor is free, or we are assigned to it, and it is also
+ * still valid and part of our affinity, just go for it.
+ * (Note that we may call vcpu_check_affinity(), but we deliberately
+ * don't, so we get to keep in the scratch cpumask what we have just
+ * put in it.)
+ */
+ if ( likely((per_cpu(npc, cpu).vcpu == NULL || per_cpu(npc, cpu).vcpu == v)
+ && cpumask_test_cpu(cpu, cpumask_scratch_cpu(cpu))) )
+ return cpu;
- if ( likely(new_cpu != nr_cpu_ids) )
- return new_cpu;
+ /* If not, just go for a free pCPU, within our affinity, if any */
+ cpumask_and(cpumask_scratch_cpu(cpu), cpumask_scratch_cpu(cpu),
+ &prv->cpus_free);
+ new_cpu = cpumask_first(cpumask_scratch_cpu(cpu));
+
+ if ( likely(new_cpu != nr_cpu_ids) )
+ return new_cpu;
+ }
/*
* If we didn't find any free pCPU, just pick any valid pcpu, even if
@@ -430,6 +441,7 @@ static void null_vcpu_insert(const struct scheduler *ops, struct vcpu *v)
static void _vcpu_remove(struct null_private *prv, struct vcpu *v)
{
+ unsigned int bs;
unsigned int cpu = v->processor;
struct null_vcpu *wvc;
@@ -441,19 +453,27 @@ static void _vcpu_remove(struct null_private *prv, struct vcpu *v)
/*
* If v is assigned to a pCPU, let's see if there is someone waiting,
- * suitable to be assigned to it.
+ * suitable to be assigned to it (prioritizing vcpus that have
+ * soft-affinity with cpu).
*/
- list_for_each_entry( wvc, &prv->waitq, waitq_elem )
+ for_each_affinity_balance_step( bs )
{
- if ( vcpu_check_affinity(wvc->vcpu, cpu) )
+ list_for_each_entry( wvc, &prv->waitq, waitq_elem )
{
- list_del_init(&wvc->waitq_elem);
- vcpu_assign(prv, wvc->vcpu, cpu);
- cpu_raise_softirq(cpu, SCHEDULE_SOFTIRQ);
- break;
+ if ( bs == BALANCE_SOFT_AFFINITY &&
+ !has_soft_affinity(wvc->vcpu, wvc->vcpu->cpu_hard_affinity) )
+ continue;
+
+ if ( vcpu_check_affinity(wvc->vcpu, cpu, bs) )
+ {
+ list_del_init(&wvc->waitq_elem);
+ vcpu_assign(prv, wvc->vcpu, cpu);
+ cpu_raise_softirq(cpu, SCHEDULE_SOFTIRQ);
+ spin_unlock(&prv->waitq_lock);
+ return;
+ }
}
}
-
spin_unlock(&prv->waitq_lock);
}
@@ -570,7 +590,8 @@ static void null_vcpu_migrate(const struct scheduler *ops, struct vcpu *v,
*
* In latter, all we can do is to park v in the waitqueue.
*/
- if ( per_cpu(npc, new_cpu).vcpu == NULL && vcpu_check_affinity(v, new_cpu) )
+ if ( per_cpu(npc, new_cpu).vcpu == NULL &&
+ vcpu_check_affinity(v, new_cpu, BALANCE_HARD_AFFINITY) )
{
/* v might have been in the waitqueue, so remove it */
spin_lock(&prv->waitq_lock);
@@ -633,6 +654,7 @@ static struct task_slice null_schedule(const struct scheduler *ops,
s_time_t now,
bool_t tasklet_work_scheduled)
{
+ unsigned int bs;
const unsigned int cpu = smp_processor_id();
struct null_private *prv = null_priv(ops);
struct null_vcpu *wvc;
@@ -656,13 +678,35 @@ static struct task_slice null_schedule(const struct scheduler *ops,
if ( unlikely(ret.task == NULL) )
{
spin_lock(&prv->waitq_lock);
- wvc = list_first_entry_or_null(&prv->waitq, struct null_vcpu, waitq_elem);
- if ( wvc && vcpu_check_affinity(wvc->vcpu, cpu) )
+
+ if ( list_empty(&prv->waitq) )
+ goto unlock;
+
+ /*
+ * We scan the waitqueue twice, for prioritizing vcpus that have
+ * soft-affinity with cpu. This may look like something expensive to
+ * do here in null_schedule(), but it's actually fine, beceuse we do
+ * it only in cases where a pcpu has no vcpu associated (e.g., as
+ * said above, the cpu has just joined a cpupool).
+ */
+ for_each_affinity_balance_step( bs )
{
- vcpu_assign(prv, wvc->vcpu, cpu);
- list_del_init(&wvc->waitq_elem);
- ret.task = wvc->vcpu;
+ list_for_each_entry( wvc, &prv->waitq, waitq_elem )
+ {
+ if ( bs == BALANCE_SOFT_AFFINITY &&
+ !has_soft_affinity(wvc->vcpu, wvc->vcpu->cpu_hard_affinity) )
+ continue;
+
+ if ( vcpu_check_affinity(wvc->vcpu, cpu, bs) )
+ {
+ vcpu_assign(prv, wvc->vcpu, cpu);
+ list_del_init(&wvc->waitq_elem);
+ ret.task = wvc->vcpu;
+ goto unlock;
+ }
+ }
}
+ unlock:
spin_unlock(&prv->waitq_lock);
}
The null scheduler does not really use hard-affinity for scheduling, it uses it for 'placement', i.e., for deciding to what pCPU to statically assign a vCPU. Let's use soft-affinity in the same way, of course with the difference that, if there's no free pCPU within the vCPU's soft-affinity, we go checking the hard-affinity, instead of putting the vCPU in the waitqueue. This does has no impact on the scheduling overhead, because soft-affinity is only considered in cold-path (like when a vCPU joins the scheduler for the first time, or is manually moved between pCPUs by the user). Signed-off-by: Dario Faggioli <dario.faggioli@citrix.com> --- Cc: George Dunlap <george.dunlap@eu.citrix.com> --- xen/common/sched_null.c | 110 +++++++++++++++++++++++++++++++++-------------- 1 file changed, 77 insertions(+), 33 deletions(-)