@@ -723,7 +723,7 @@ __csched_vcpu_is_migrateable(const struct csched_private *prv, struct vcpu *vc,
* The caller is supposed to have already checked that vc is also
* not running.
*/
- ASSERT(!vc->is_running);
+ ASSERT(!vc->sched_item->is_running);
return !__csched_vcpu_is_cache_hot(prv, vc) &&
cpumask_test_cpu(dest_cpu, mask);
@@ -1047,7 +1047,8 @@ csched_item_insert(const struct scheduler *ops, struct sched_item *item)
lock = item_schedule_lock_irq(item);
- if ( !__vcpu_on_runq(svc) && vcpu_runnable(vc) && !vc->is_running )
+ if ( !__vcpu_on_runq(svc) && vcpu_runnable(vc) &&
+ !vc->sched_item->is_running )
runq_insert(svc);
item_schedule_unlock_irq(lock, item);
@@ -1659,8 +1660,9 @@ csched_runq_steal(int peer_cpu, int cpu, int pri, int balance_step)
* vCPUs with useful soft affinities in some sort of bitmap
* or counter.
*/
- if ( vc->is_running || (balance_step == BALANCE_SOFT_AFFINITY &&
- !has_soft_affinity(vc->sched_item)) )
+ if ( vc->sched_item->is_running ||
+ (balance_step == BALANCE_SOFT_AFFINITY &&
+ !has_soft_affinity(vc->sched_item)) )
continue;
affinity_balance_cpumask(vc->sched_item, balance_step, cpumask_scratch);
@@ -1868,7 +1870,7 @@ csched_schedule(
(unsigned char *)&d);
}
- runtime = now - current->runstate.state_entry_time;
+ runtime = now - current->sched_item->state_entry_time;
if ( runtime < 0 ) /* Does this ever happen? */
runtime = 0;
@@ -1283,7 +1283,7 @@ runq_insert(const struct scheduler *ops, struct csched2_item *svc)
ASSERT(&svc->rqd->runq == runq);
ASSERT(!is_idle_vcpu(svc->vcpu));
- ASSERT(!svc->vcpu->is_running);
+ ASSERT(!svc->vcpu->sched_item->is_running);
ASSERT(!(svc->flags & CSFLAG_scheduled));
list_for_each( iter, runq )
@@ -1340,8 +1340,8 @@ static inline bool is_preemptable(const struct csched2_item *svc,
if ( ratelimit <= CSCHED2_RATELIMIT_TICKLE_TOLERANCE )
return true;
- ASSERT(svc->vcpu->is_running);
- return now - svc->vcpu->runstate.state_entry_time >
+ ASSERT(svc->vcpu->sched_item->is_running);
+ return now - svc->vcpu->sched_item->state_entry_time >
ratelimit - CSCHED2_RATELIMIT_TICKLE_TOLERANCE;
}
@@ -2931,7 +2931,7 @@ csched2_dom_cntl(
{
svc = csched2_item(v->sched_item);
lock = item_schedule_lock(svc->vcpu->sched_item);
- if ( v->is_running )
+ if ( v->sched_item->is_running )
{
unsigned int cpu = v->processor;
struct csched2_runqueue_data *rqd = c2rqd(ops, cpu);
@@ -3204,8 +3204,8 @@ csched2_runtime(const struct scheduler *ops, int cpu,
if ( prv->ratelimit_us )
{
s_time_t ratelimit_min = MICROSECS(prv->ratelimit_us);
- if ( snext->vcpu->is_running )
- ratelimit_min = snext->vcpu->runstate.state_entry_time +
+ if ( snext->vcpu->sched_item->is_running )
+ ratelimit_min = snext->vcpu->sched_item->state_entry_time +
MICROSECS(prv->ratelimit_us) - now;
if ( ratelimit_min > min_time )
min_time = ratelimit_min;
@@ -3302,7 +3302,7 @@ runq_candidate(struct csched2_runqueue_data *rqd,
* no point forcing it to do so until rate limiting expires.
*/
if ( !yield && prv->ratelimit_us && vcpu_runnable(scurr->vcpu) &&
- (now - scurr->vcpu->runstate.state_entry_time) <
+ (now - scurr->vcpu->sched_item->state_entry_time) <
MICROSECS(prv->ratelimit_us) )
{
if ( unlikely(tb_init_done) )
@@ -3313,7 +3313,7 @@ runq_candidate(struct csched2_runqueue_data *rqd,
} d;
d.dom = scurr->vcpu->domain->domain_id;
d.vcpu = scurr->vcpu->vcpu_id;
- d.runtime = now - scurr->vcpu->runstate.state_entry_time;
+ d.runtime = now - scurr->vcpu->sched_item->state_entry_time;
__trace_var(TRC_CSCHED2_RATELIMIT, 1,
sizeof(d),
(unsigned char *)&d);
@@ -3561,7 +3561,7 @@ csched2_schedule(
if ( snext != scurr )
{
ASSERT(snext->rqd == rqd);
- ASSERT(!snext->vcpu->is_running);
+ ASSERT(!snext->vcpu->sched_item->is_running);
runq_remove(snext);
__set_bit(__CSFLAG_scheduled, &snext->flags);
@@ -914,7 +914,7 @@ rt_item_insert(const struct scheduler *ops, struct sched_item *item)
{
replq_insert(ops, svc);
- if ( !vc->is_running )
+ if ( !item->is_running )
runq_insert(ops, svc);
}
item_schedule_unlock_irq(lock, item);
@@ -353,6 +353,8 @@ int sched_init_vcpu(struct vcpu *v, unsigned int processor)
{
per_cpu(sched_res, v->processor)->curr = item;
v->is_running = 1;
+ item->is_running = 1;
+ item->state_entry_time = NOW();
}
else
{
@@ -673,7 +675,8 @@ static void vcpu_migrate_finish(struct vcpu *v)
* context_saved(); and in any case, if the bit is cleared, then
* someone else has already done the work so we don't need to.
*/
- if ( v->is_running || !test_bit(_VPF_migrating, &v->pause_flags) )
+ if ( v->sched_item->is_running ||
+ !test_bit(_VPF_migrating, &v->pause_flags) )
return;
old_cpu = new_cpu = v->processor;
@@ -727,7 +730,7 @@ static void vcpu_migrate_finish(struct vcpu *v)
* because they both happen in (different) spinlock regions, and those
* regions are strictly serialised.
*/
- if ( v->is_running ||
+ if ( v->sched_item->is_running ||
!test_and_clear_bit(_VPF_migrating, &v->pause_flags) )
{
sched_spin_unlock_double(old_lock, new_lock, flags);
@@ -755,7 +758,7 @@ void vcpu_force_reschedule(struct vcpu *v)
{
spinlock_t *lock = item_schedule_lock_irq(v->sched_item);
- if ( v->is_running )
+ if ( v->sched_item->is_running )
vcpu_migrate_start(v);
item_schedule_unlock_irq(lock, v->sched_item);
@@ -1582,8 +1585,10 @@ static void schedule(void)
* switch, else lost_records resume will not work properly.
*/
- ASSERT(!next->is_running);
+ ASSERT(!next->sched_item->is_running);
next->is_running = 1;
+ next->sched_item->is_running = 1;
+ next->sched_item->state_entry_time = now;
pcpu_schedule_unlock_irq(lock, cpu);
@@ -1605,6 +1610,8 @@ void context_saved(struct vcpu *prev)
smp_wmb();
prev->is_running = 0;
+ prev->sched_item->is_running = 0;
+ prev->sched_item->state_entry_time = NOW();
/* Check for migration request /after/ clearing running flag. */
smp_mb();
@@ -270,7 +270,11 @@ struct sched_item {
/* Last time when item has been scheduled out. */
uint64_t last_run_time;
+ /* Last time item got (de-)scheduled. */
+ uint64_t state_entry_time;
+ /* Currently running on a CPU? */
+ bool is_running;
/* Item needs affinity restored. */
bool affinity_broken;
/* Does soft affinity actually play a role (given hard affinity)? */
Add an is_running indicator to struct sched_item which will be set whenever the item is being scheduled. Switch scheduler code to use item->is_running instead of vcpu->is_running for scheduling decisions. At the same time introduce a state_entry_time field in struct sched_item being updated whenever the is_running indicator is changed. Use that new field in the schedulers instead of the similar vcpu field. Signed-off-by: Juergen Gross <jgross@suse.com> --- RFC V2: fix arm build, don't drop v->is_running --- xen/common/sched_credit.c | 12 +++++++----- xen/common/sched_credit2.c | 18 +++++++++--------- xen/common/sched_rt.c | 2 +- xen/common/schedule.c | 15 +++++++++++---- xen/include/xen/sched.h | 4 ++++ 5 files changed, 32 insertions(+), 19 deletions(-)