@@ -504,9 +504,11 @@ struct csched2_private {
* Physical CPU
*
* The only per-pCPU information we need to maintain is of which runqueue
- * each CPU is part of.
+ * each CPU is part of (-1 to mean 'to none').
*/
-static DEFINE_PER_CPU(int, runq_map);
+struct csched2_pcpu {
+ int runq_id;
+};
/*
* Virtual CPU
@@ -565,6 +567,11 @@ static inline struct csched2_private *csched2_priv(const struct scheduler *ops)
return ops->sched_data;
}
+static inline struct csched2_pcpu *csched2_pcpu(unsigned int cpu)
+{
+ return per_cpu(schedule_data, cpu).sched_priv;
+}
+
static inline struct csched2_vcpu *csched2_vcpu(const struct vcpu *v)
{
return v->sched_priv;
@@ -578,7 +585,7 @@ static inline struct csched2_dom *csched2_dom(const struct domain *d)
/* CPU to runq_id macro */
static inline int c2r(unsigned int cpu)
{
- return per_cpu(runq_map, cpu);
+ return csched2_pcpu(cpu)->runq_id;
}
/* CPU to runqueue struct macro */
@@ -3769,31 +3776,33 @@ csched2_dump(const struct scheduler *ops)
#undef cpustr
}
-/* Returns the ID of the runqueue the cpu is assigned to. */
-static unsigned
-init_pdata(struct csched2_private *prv, unsigned int cpu)
+static void
+init_pdata(struct csched2_private *prv,
+ struct csched2_pcpu *spc,
+ unsigned int cpu)
{
- unsigned rqi;
struct csched2_runqueue_data *rqd;
ASSERT(rw_is_write_locked(&prv->lock));
+ /* The CPU must not have been initialized already, in this scheduler. */
ASSERT(!cpumask_test_cpu(cpu, &prv->initialized));
+ /* The per-CPU data needs to be allocated, but STILL uninitialized. */
+ ASSERT(spc && spc->runq_id == -1);
+ /* alloc_pdata must have been called already. */
+ ASSERT(per_cpu(schedule_data, cpu).sched_priv == spc);
/* Figure out which runqueue to put it in */
- rqi = cpu_to_runqueue(prv, cpu);
+ spc->runq_id = cpu_to_runqueue(prv, cpu);
- rqd = prv->rqd + rqi;
+ rqd = prv->rqd + spc->runq_id;
- printk(XENLOG_INFO "Adding cpu %d to runqueue %d\n", cpu, rqi);
- if ( ! cpumask_test_cpu(rqi, &prv->active_queues) )
+ printk(XENLOG_INFO "Adding cpu %d to runqueue %d\n", cpu, spc->runq_id);
+ if ( !cpumask_test_cpu(spc->runq_id, &prv->active_queues) )
{
printk(XENLOG_INFO " First cpu on runqueue, activating\n");
- activate_runqueue(prv, rqi);
+ activate_runqueue(prv, spc->runq_id);
}
- /* Set the runqueue map */
- per_cpu(runq_map, cpu) = rqi;
-
__cpumask_set_cpu(cpu, &rqd->idle);
__cpumask_set_cpu(cpu, &rqd->active);
__cpumask_set_cpu(cpu, &prv->initialized);
@@ -3801,8 +3810,21 @@ init_pdata(struct csched2_private *prv, unsigned int cpu)
if ( cpumask_weight(&rqd->active) == 1 )
rqd->pick_bias = cpu;
+}
- return rqi;
+static void *
+csched2_alloc_pdata(const struct scheduler *ops, int cpu)
+{
+ struct csched2_pcpu *spc;
+
+ spc = xzalloc(struct csched2_pcpu);
+ if ( spc == NULL )
+ return ERR_PTR(-ENOMEM);
+
+ /* Initial situation: not part of any runqueue. */
+ spc->runq_id = -1;
+
+ return spc;
}
static void
@@ -3811,20 +3833,14 @@ csched2_init_pdata(const struct scheduler *ops, void *pdata, int cpu)
struct csched2_private *prv = csched2_priv(ops);
spinlock_t *old_lock;
unsigned long flags;
- unsigned rqi;
-
- /*
- * pdata contains what alloc_pdata returned. But since we don't (need to)
- * implement alloc_pdata, either that's NULL, or something is very wrong!
- */
- ASSERT(!pdata);
write_lock_irqsave(&prv->lock, flags);
old_lock = pcpu_schedule_lock(cpu);
- rqi = init_pdata(prv, cpu);
+ init_pdata(prv, pdata, cpu);
+
/* Move the scheduler lock to the new runq lock. */
- per_cpu(schedule_data, cpu).schedule_lock = &prv->rqd[rqi].lock;
+ per_cpu(schedule_data, cpu).schedule_lock = &c2rqd(ops, cpu)->lock;
/* _Not_ pcpu_schedule_unlock(): schedule_lock may have changed! */
spin_unlock(old_lock);
@@ -3838,9 +3854,8 @@ csched2_switch_sched(struct scheduler *new_ops, unsigned int cpu,
{
struct csched2_private *prv = csched2_priv(new_ops);
struct csched2_vcpu *svc = vdata;
- unsigned rqi;
- ASSERT(!pdata && svc && is_idle_vcpu(svc->vcpu));
+ ASSERT(pdata && svc && is_idle_vcpu(svc->vcpu));
/*
* We own one runqueue lock already (from schedule_cpu_switch()). This
@@ -3855,7 +3870,10 @@ csched2_switch_sched(struct scheduler *new_ops, unsigned int cpu,
idle_vcpu[cpu]->sched_priv = vdata;
- rqi = init_pdata(prv, cpu);
+ per_cpu(scheduler, cpu) = new_ops;
+ per_cpu(schedule_data, cpu).sched_priv = pdata;
+
+ init_pdata(prv, pdata, cpu);
/*
* Now that we know what runqueue we'll go in, double check what's said
@@ -3863,10 +3881,7 @@ csched2_switch_sched(struct scheduler *new_ops, unsigned int cpu,
* this scheduler, and so it's safe to have taken it /before/ our
* private global lock.
*/
- ASSERT(per_cpu(schedule_data, cpu).schedule_lock != &prv->rqd[rqi].lock);
-
- per_cpu(scheduler, cpu) = new_ops;
- per_cpu(schedule_data, cpu).sched_priv = NULL; /* no pdata */
+ ASSERT(per_cpu(schedule_data, cpu).schedule_lock != &c2rqd(new_ops, cpu)->lock);
/*
* (Re?)route the lock to the per pCPU lock as /last/ thing. In fact,
@@ -3874,7 +3889,7 @@ csched2_switch_sched(struct scheduler *new_ops, unsigned int cpu,
* taking it, find all the initializations we've done above in place.
*/
smp_mb();
- per_cpu(schedule_data, cpu).schedule_lock = &prv->rqd[rqi].lock;
+ per_cpu(schedule_data, cpu).schedule_lock = &c2rqd(new_ops, cpu)->lock;
write_unlock(&prv->lock);
}
@@ -3885,25 +3900,27 @@ csched2_deinit_pdata(const struct scheduler *ops, void *pcpu, int cpu)
unsigned long flags;
struct csched2_private *prv = csched2_priv(ops);
struct csched2_runqueue_data *rqd;
- int rqi;
+ struct csched2_pcpu *spc = pcpu;
write_lock_irqsave(&prv->lock, flags);
+ /* Both alloc_pdata and init_pdata must have been called on this CPU. */
+ ASSERT(spc && spc->runq_id != -1 &&
+ cpumask_test_cpu(cpu, &prv->initialized));
+
/*
- * alloc_pdata is not implemented, so pcpu must be NULL. On the other
- * hand, init_pdata must have been called for this pCPU.
+ * NB. Since per_cpu(schedule_data,cpu).sched_priv may have been set to
+ * something that is different than pcpu already, we **can't** use the
+ * csched2_pcpu(), c2r() and c2rqd() helpers in here!
*/
- ASSERT(!pcpu && cpumask_test_cpu(cpu, &prv->initialized));
-
- /* Find the old runqueue and remove this cpu from it */
- rqi = per_cpu(runq_map, cpu);
- rqd = prv->rqd + rqi;
+ /* Find the old runqueue and remove this cpu from it */
+ rqd = prv->rqd + spc->runq_id;
/* No need to save IRQs here, they're already disabled */
spin_lock(&rqd->lock);
- printk(XENLOG_INFO "Removing cpu %d from runqueue %d\n", cpu, rqi);
+ printk(XENLOG_INFO "Removing cpu %d from runqueue %d\n", cpu, spc->runq_id);
__cpumask_clear_cpu(cpu, &rqd->idle);
__cpumask_clear_cpu(cpu, &rqd->smt_idle);
@@ -3912,12 +3929,12 @@ csched2_deinit_pdata(const struct scheduler *ops, void *pcpu, int cpu)
if ( cpumask_empty(&rqd->active) )
{
printk(XENLOG_INFO " No cpus left on runqueue, disabling\n");
- deactivate_runqueue(prv, rqi);
+ deactivate_runqueue(prv, spc->runq_id);
}
else if ( rqd->pick_bias == cpu )
rqd->pick_bias = cpumask_first(&rqd->active);
- per_cpu(runq_map, cpu) = -1;
+ spc->runq_id = -1;
spin_unlock(&rqd->lock);
@@ -3928,6 +3945,32 @@ csched2_deinit_pdata(const struct scheduler *ops, void *pcpu, int cpu)
return;
}
+static void
+csched2_free_pdata(const struct scheduler *ops, void *pcpu, int cpu)
+{
+ struct csched2_private *prv = csched2_priv(ops);
+
+ /*
+ * We want to be sure that either init_pdata has never been called, for
+ * this CPU in this scheduler, or that deinit_pdata has been called on
+ * it already.
+ */
+ ASSERT(!cpumask_test_cpu(cpu, &prv->initialized));
+
+ /*
+ * pcpu can be NULL, in case we are coming from CPU_UP_CANCELLED
+ * (because bringing up the CPU failed). And if it is not, it must,
+ * for the same reasons explained in the above comment, be invalid.
+ */
+ if ( pcpu != NULL )
+ {
+ struct csched2_pcpu *spc = pcpu;
+
+ ASSERT(spc->runq_id == -1);
+ xfree(spc);
+ }
+}
+
static int
csched2_init(struct scheduler *ops)
{
@@ -4045,8 +4088,10 @@ static const struct scheduler sched_credit2_def = {
.deinit = csched2_deinit,
.alloc_vdata = csched2_alloc_vdata,
.free_vdata = csched2_free_vdata,
+ .alloc_pdata = csched2_alloc_pdata,
.init_pdata = csched2_init_pdata,
.deinit_pdata = csched2_deinit_pdata,
+ .free_pdata = csched2_free_pdata,
.switch_sched = csched2_switch_sched,
.alloc_domdata = csched2_alloc_domdata,
.free_domdata = csched2_free_domdata,
This fixes hitting this BUG_ON(): xen/common/sched_credit2.c:3452 rqd = c2rqd(ops, cpu); BUG_ON(!cpumask_test_cpu(cpu, &rqd->active)); when Credit2 is used as default scheduler, and a cpupool, also using Credit2, is created. The bug hit because c2rqd() accesses rqd[per_cpu(runq_map,cpu)]->active when the value of runq_map for this cpu is -1 (and so, we basically access random memory). Bug was introduced in a2c4e5ab ("xen: credit2: make the cpu to runqueue map per-cpu"). In fact, in that commit the CPU to runq map became a global data structure (instead of a per-scheduler array, as it was before that commit). So, when a CPU is deassigned from a scheduler, its corresponding value in the runq map is set to -1. And this makes things explode, in case the CPU was also being assigned to another scheduler. Basically, when moving a CPU from a scheduler (cpupool) to another, we call: - csched2_init_pdata(new_ops, cpu); | --> per_cpu(runq_map,cpu)= _new_runq_id_ ; - csched2_deinit_pdata(old_ops, cpu); | --> per_cpu(runq_map,cpu)= -1; The solution is to put the information of to which runqueue a CPU belongs in a 'csched2_pcpu' data structure (as, e.g., Credit does, for all the per-pcpu pieces of information that it needs to keep), and use the sched_priv field of the per_cpu(schedule_data) structure, to reach out to it. This solution has the following properties: - it prevents Xen from crashing; :-) - it's more in line with how the schedule.c<-->sched_foo.c interface has been designed, and is supposed to be used (that sched_priv field, is specifically meant to be used for this kind of data); - the improvement brought by a2c4e5ab is retained. In fact, the fact that each instance of Credit2 was (needlessly) keeping a large array of int-s, for CPU to runq mapping, which that commit made unnecessary, is *not* reintroduced. Actually, we use even less memory, as instead of a per-cpu set of int-s (which means there were one even for the CPUs which don't belong to a Credit2 instance), we allocate a 1-field-only (an int) struct, only for the CPUs that belong to a Credit2 instance. Note that this, also: * allows the init_pdata() internal helper function to return void, simplifying its callers (and making it more similar to its counterparts in other schedulers); * enables more extensive use of the c2rqd() helper, making the code more uniform and easier to read; * requires the hook csched2_{alloc,free}_pdata to be defined for Credit2; * requires the assignment of new values to 'scheduler' and 'schedule_data.sched_priv' per-cpu variables to be moved up a bit, in csched2_switch_sched(). Signed-off-by: Dario Faggioli <dario.faggioli@citrix.com> --- Cc: George Dunlap <george.dunlap@citrix.com> Cc: Anshul Makkar <anshulmakkar@gmail.com> Cc: Julien Grall <julien.grall@arm.com> --- Release-wise, Julien, considering that: - we're not even freezed yet, - IAC, this is a bugfix, I'd say it should go in. --- xen/common/sched_credit2.c | 133 +++++++++++++++++++++++++++++--------------- 1 file changed, 89 insertions(+), 44 deletions(-)