@@ -568,7 +568,7 @@ void domain_update_node_affinity(struct domain *d)
cpumask_var_t dom_cpumask, dom_cpumask_soft;
cpumask_t *dom_affinity;
const cpumask_t *online;
- struct vcpu *v;
+ struct sched_unit *unit;
unsigned int cpu;
/* Do we have vcpus already? If not, no need to update node-affinity. */
@@ -601,12 +601,11 @@ void domain_update_node_affinity(struct domain *d)
* and the full mask of where it would prefer to run (the union of
* the soft affinity of all its various vcpus). Let's build them.
*/
- for_each_vcpu ( d, v )
+ for_each_sched_unit ( d, unit )
{
- cpumask_or(dom_cpumask, dom_cpumask,
- v->sched_unit->cpu_hard_affinity);
+ cpumask_or(dom_cpumask, dom_cpumask, unit->cpu_hard_affinity);
cpumask_or(dom_cpumask_soft, dom_cpumask_soft,
- v->sched_unit->cpu_soft_affinity);
+ unit->cpu_soft_affinity);
}
/* Filter out non-online cpus */
cpumask_and(dom_cpumask, dom_cpumask, online);
@@ -157,26 +157,32 @@ static inline struct scheduler *dom_scheduler(const struct domain *d)
return &ops;
}
-static inline struct scheduler *vcpu_scheduler(const struct vcpu *v)
+static inline struct scheduler *unit_scheduler(const struct sched_unit *unit)
{
- struct domain *d = v->domain;
+ struct domain *d = unit->domain;
if ( likely(d->cpupool != NULL) )
return d->cpupool->sched;
/*
- * If d->cpupool is NULL, this is a vCPU of the idle domain. And this
+ * If d->cpupool is NULL, this is a unit of the idle domain. And this
* case is special because the idle domain does not really belong to
* a cpupool and, hence, doesn't really have a scheduler). In fact, its
- * vCPUs (may) run on pCPUs which are in different pools, with different
+ * units (may) run on pCPUs which are in different pools, with different
* schedulers.
*
* What we want, in this case, is the scheduler of the pCPU where this
- * particular idle vCPU is running. And, since v->processor never changes
- * for idle vCPUs, it is safe to use it, with no locks, to figure that out.
+ * particular idle unit is running. And, since unit->res never changes
+ * for idle units, it is safe to use it, with no locks, to figure that out.
*/
+
ASSERT(is_idle_domain(d));
- return per_cpu(scheduler, v->processor);
+ return per_cpu(scheduler, unit->res->master_cpu);
+}
+
+static inline struct scheduler *vcpu_scheduler(const struct vcpu *v)
+{
+ return unit_scheduler(v->sched_unit);
}
#define VCPU2ONLINE(_v) cpupool_domain_cpumask((_v)->domain)
@@ -502,10 +508,11 @@ static void sched_move_irqs(const struct sched_unit *unit)
int sched_move_domain(struct domain *d, struct cpupool *c)
{
struct vcpu *v;
- unsigned int new_p;
- void **vcpu_priv;
+ struct sched_unit *unit;
+ unsigned int new_p, unit_idx;
+ void **unit_priv;
void *domdata;
- void *vcpudata;
+ void *unitdata;
struct scheduler *old_ops;
void *old_domdata;
@@ -519,25 +526,27 @@ int sched_move_domain(struct domain *d, struct cpupool *c)
if ( IS_ERR(domdata) )
return PTR_ERR(domdata);
- vcpu_priv = xzalloc_array(void *, d->max_vcpus);
- if ( vcpu_priv == NULL )
+ /* TODO: fix array size with multiple vcpus per unit. */
+ unit_priv = xzalloc_array(void *, d->max_vcpus);
+ if ( unit_priv == NULL )
{
sched_free_domdata(c->sched, domdata);
return -ENOMEM;
}
- for_each_vcpu ( d, v )
+ unit_idx = 0;
+ for_each_sched_unit ( d, unit )
{
- vcpu_priv[v->vcpu_id] = sched_alloc_udata(c->sched, v->sched_unit,
- domdata);
- if ( vcpu_priv[v->vcpu_id] == NULL )
+ unit_priv[unit_idx] = sched_alloc_udata(c->sched, unit, domdata);
+ if ( unit_priv[unit_idx] == NULL )
{
- for_each_vcpu ( d, v )
- sched_free_udata(c->sched, vcpu_priv[v->vcpu_id]);
- xfree(vcpu_priv);
+ for ( unit_idx = 0; unit_priv[unit_idx]; unit_idx++ )
+ sched_free_udata(c->sched, unit_priv[unit_idx]);
+ xfree(unit_priv);
sched_free_domdata(c->sched, domdata);
return -ENOMEM;
}
+ unit_idx++;
}
domain_pause(d);
@@ -545,30 +554,36 @@ int sched_move_domain(struct domain *d, struct cpupool *c)
old_ops = dom_scheduler(d);
old_domdata = d->sched_priv;
- for_each_vcpu ( d, v )
+ for_each_sched_unit ( d, unit )
{
- sched_remove_unit(old_ops, v->sched_unit);
+ sched_remove_unit(old_ops, unit);
}
d->cpupool = c;
d->sched_priv = domdata;
new_p = cpumask_first(c->cpu_valid);
- for_each_vcpu ( d, v )
+ unit_idx = 0;
+ for_each_sched_unit ( d, unit )
{
spinlock_t *lock;
+ unsigned int unit_p = new_p;
- vcpudata = v->sched_unit->priv;
+ unitdata = unit->priv;
- migrate_timer(&v->periodic_timer, new_p);
- migrate_timer(&v->singleshot_timer, new_p);
- migrate_timer(&v->poll_timer, new_p);
+ for_each_sched_unit_vcpu ( unit, v )
+ {
+ migrate_timer(&v->periodic_timer, new_p);
+ migrate_timer(&v->singleshot_timer, new_p);
+ migrate_timer(&v->poll_timer, new_p);
+ new_p = cpumask_cycle(new_p, c->cpu_valid);
+ }
- lock = unit_schedule_lock_irq(v->sched_unit);
+ lock = unit_schedule_lock_irq(unit);
- sched_set_affinity(v->sched_unit, &cpumask_all, &cpumask_all);
+ sched_set_affinity(unit, &cpumask_all, &cpumask_all);
- sched_set_res(v->sched_unit, get_sched_res(new_p));
+ sched_set_res(unit, get_sched_res(unit_p));
/*
* With v->processor modified we must not
* - make any further changes assuming we hold the scheduler lock,
@@ -576,15 +591,15 @@ int sched_move_domain(struct domain *d, struct cpupool *c)
*/
spin_unlock_irq(lock);
- v->sched_unit->priv = vcpu_priv[v->vcpu_id];
+ unit->priv = unit_priv[unit_idx];
if ( !d->is_dying )
- sched_move_irqs(v->sched_unit);
+ sched_move_irqs(unit);
- new_p = cpumask_cycle(new_p, c->cpu_valid);
+ sched_insert_unit(c->sched, unit);
- sched_insert_unit(c->sched, v->sched_unit);
+ sched_free_udata(old_ops, unitdata);
- sched_free_udata(old_ops, vcpudata);
+ unit_idx++;
}
domain_update_node_affinity(d);
@@ -593,7 +608,7 @@ int sched_move_domain(struct domain *d, struct cpupool *c)
sched_free_domdata(old_ops, old_domdata);
- xfree(vcpu_priv);
+ xfree(unit_priv);
return 0;
}
@@ -877,18 +892,36 @@ static void vcpu_migrate_finish(struct vcpu *v)
vcpu_wake(v);
}
+static bool sched_check_affinity_broken(const struct sched_unit *unit)
+{
+ const struct vcpu *v;
+
+ for_each_sched_unit_vcpu ( unit, v )
+ if ( v->affinity_broken )
+ return true;
+
+ return false;
+}
+
+static void sched_reset_affinity_broken(struct sched_unit *unit)
+{
+ struct vcpu *v;
+
+ for_each_sched_unit_vcpu ( unit, v )
+ v->affinity_broken = false;
+}
+
void restore_vcpu_affinity(struct domain *d)
{
unsigned int cpu = smp_processor_id();
- struct vcpu *v;
+ struct sched_unit *unit;
ASSERT(system_state == SYS_STATE_resume);
- for_each_vcpu ( d, v )
+ for_each_sched_unit ( d, unit )
{
spinlock_t *lock;
- unsigned int old_cpu = v->processor;
- struct sched_unit *unit = v->sched_unit;
+ unsigned int old_cpu = sched_unit_master(unit);
struct sched_resource *res;
ASSERT(!unit_runnable(unit));
@@ -907,17 +940,19 @@ void restore_vcpu_affinity(struct domain *d)
cpupool_domain_cpumask(d));
if ( cpumask_empty(cpumask_scratch_cpu(cpu)) )
{
- if ( v->affinity_broken )
+ if ( sched_check_affinity_broken(unit) )
{
sched_set_affinity(unit, unit->cpu_hard_affinity_saved, NULL);
- v->affinity_broken = 0;
+ sched_reset_affinity_broken(unit);
cpumask_and(cpumask_scratch_cpu(cpu), unit->cpu_hard_affinity,
cpupool_domain_cpumask(d));
}
if ( cpumask_empty(cpumask_scratch_cpu(cpu)) )
{
- printk(XENLOG_DEBUG "Breaking affinity for %pv\n", v);
+ /* Affinity settings of one vcpu are for the complete unit. */
+ printk(XENLOG_DEBUG "Breaking affinity for %pv\n",
+ unit->vcpu_list);
sched_set_affinity(unit, &cpumask_all, NULL);
cpumask_and(cpumask_scratch_cpu(cpu), unit->cpu_hard_affinity,
cpupool_domain_cpumask(d));
@@ -931,12 +966,12 @@ void restore_vcpu_affinity(struct domain *d)
/* v->processor might have changed, so reacquire the lock. */
lock = unit_schedule_lock_irq(unit);
- res = sched_pick_resource(vcpu_scheduler(v), unit);
+ res = sched_pick_resource(unit_scheduler(unit), unit);
sched_set_res(unit, res);
spin_unlock_irq(lock);
- if ( old_cpu != v->processor )
- sched_move_irqs(v->sched_unit);
+ if ( old_cpu != sched_unit_master(unit) )
+ sched_move_irqs(unit);
}
domain_update_node_affinity(d);
@@ -950,7 +985,6 @@ void restore_vcpu_affinity(struct domain *d)
int cpu_disable_scheduler(unsigned int cpu)
{
struct domain *d;
- struct vcpu *v;
struct cpupool *c;
cpumask_t online_affinity;
int ret = 0;
@@ -961,32 +995,34 @@ int cpu_disable_scheduler(unsigned int cpu)
for_each_domain_in_cpupool ( d, c )
{
- for_each_vcpu ( d, v )
+ struct sched_unit *unit;
+
+ for_each_sched_unit ( d, unit )
{
unsigned long flags;
- struct sched_unit *unit = v->sched_unit;
spinlock_t *lock = unit_schedule_lock_irqsave(unit, &flags);
cpumask_and(&online_affinity, unit->cpu_hard_affinity, c->cpu_valid);
if ( cpumask_empty(&online_affinity) &&
cpumask_test_cpu(cpu, unit->cpu_hard_affinity) )
{
- if ( v->affinity_broken )
+ if ( sched_check_affinity_broken(unit) )
{
- /* The vcpu is temporarily pinned, can't move it. */
+ /* The unit is temporarily pinned, can't move it. */
unit_schedule_unlock_irqrestore(lock, flags, unit);
ret = -EADDRINUSE;
break;
}
- printk(XENLOG_DEBUG "Breaking affinity for %pv\n", v);
+ printk(XENLOG_DEBUG "Breaking affinity for %pv\n",
+ unit->vcpu_list);
sched_set_affinity(unit, &cpumask_all, NULL);
}
- if ( v->processor != cpu )
+ if ( unit->res != get_sched_res(cpu) )
{
- /* The vcpu is not on this cpu, so we can move on. */
+ /* The unit is not on this cpu, so we can move on. */
unit_schedule_unlock_irqrestore(lock, flags, unit);
continue;
}
@@ -999,17 +1035,18 @@ int cpu_disable_scheduler(unsigned int cpu)
* * the scheduler will always find a suitable solution, or
* things would have failed before getting in here.
*/
- vcpu_migrate_start(v);
+ /* TODO: multiple vcpus per unit. */
+ vcpu_migrate_start(unit->vcpu_list);
unit_schedule_unlock_irqrestore(lock, flags, unit);
- vcpu_migrate_finish(v);
+ vcpu_migrate_finish(unit->vcpu_list);
/*
* The only caveat, in this case, is that if a vcpu active in
* the hypervisor isn't migratable. In this case, the caller
* should try again after releasing and reaquiring all locks.
*/
- if ( v->processor == cpu )
+ if ( unit->res == get_sched_res(cpu) )
ret = -EAGAIN;
}
}
@@ -1329,17 +1366,17 @@ int vcpu_temporary_affinity(struct vcpu *v, unsigned int cpu, uint8_t reason)
ret = 0;
v->affinity_broken &= ~reason;
}
- if ( !ret && !v->affinity_broken )
+ if ( !ret && !sched_check_affinity_broken(unit) )
sched_set_affinity(unit, unit->cpu_hard_affinity_saved, NULL);
}
else if ( cpu < nr_cpu_ids )
{
if ( (v->affinity_broken & reason) ||
- (v->affinity_broken && v->processor != cpu) )
+ (sched_check_affinity_broken(unit) && v->processor != cpu) )
ret = -EBUSY;
else if ( cpumask_test_cpu(cpu, VCPU2ONLINE(v)) )
{
- if ( !v->affinity_broken )
+ if ( !sched_check_affinity_broken(unit) )
{
cpumask_copy(unit->cpu_hard_affinity_saved,
unit->cpu_hard_affinity);
Where appropriate switch from for_each_vcpu() to for_each_sched_unit() in order to prepare core scheduling. As it is beneficial once here and for sure in future add a unit_scheduler() helper and let vcpu_scheduler() use it. Signed-off-by: Juergen Gross <jgross@suse.com> --- V2: - handle affinity_broken correctly (Jan Beulich) - add unit_scheduler() (Jan Beulich) V3: - add const (Jan Beulich) - add TODOs for missing multiple vcpu per unit support (Jan Beulich) V4: - simplify test for correct cpu (Jan Beulich) - remove stale change (Jan Beulich) - use sched_check_affinity_broken(unit) (Jan Beulich) --- xen/common/domain.c | 9 ++- xen/common/schedule.c | 155 +++++++++++++++++++++++++++++++------------------- 2 files changed, 100 insertions(+), 64 deletions(-)