Message ID | 20190914085251.18816-36-jgross@suse.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | xen: add core scheduling support | expand |
On 14.09.2019 10:52, Juergen Gross wrote: > --- a/xen/common/schedule.c > +++ b/xen/common/schedule.c > @@ -724,8 +724,10 @@ void sched_destroy_domain(struct domain *d) > } > } > > -void vcpu_sleep_nosync_locked(struct vcpu *v) > +static void vcpu_sleep_nosync_locked(struct vcpu *v) > { > + struct sched_unit *unit = v->sched_unit; > + > ASSERT(spin_is_locked(get_sched_res(v->processor)->schedule_lock)); > > if ( likely(!vcpu_runnable(v)) ) > @@ -733,7 +735,14 @@ void vcpu_sleep_nosync_locked(struct vcpu *v) > if ( v->runstate.state == RUNSTATE_runnable ) > vcpu_runstate_change(v, RUNSTATE_offline, NOW()); > > - sched_sleep(vcpu_scheduler(v), v->sched_unit); > + if ( likely(!unit_runnable(unit)) ) > + sched_sleep(vcpu_scheduler(v), unit); unit_scheduler(unit) (also elsewhere)? > @@ -765,16 +774,22 @@ void vcpu_wake(struct vcpu *v) > { > unsigned long flags; > spinlock_t *lock; > + struct sched_unit *unit = v->sched_unit; > > TRACE_2D(TRC_SCHED_WAKE, v->domain->domain_id, v->vcpu_id); > > - lock = unit_schedule_lock_irqsave(v->sched_unit, &flags); > + lock = unit_schedule_lock_irqsave(unit, &flags); > > if ( likely(vcpu_runnable(v)) ) > { > if ( v->runstate.state >= RUNSTATE_blocked ) > vcpu_runstate_change(v, RUNSTATE_runnable, NOW()); > - sched_wake(vcpu_scheduler(v), v->sched_unit); > + sched_wake(vcpu_scheduler(v), unit); Is this correct / necessary when the unit is not asleep as a whole? After all the corresponding sched_sleep() further up is called conditionally only. > @@ -1998,6 +2013,62 @@ static void sched_context_switch(struct vcpu *vprev, struct vcpu *vnext, > context_switch(vprev, vnext); > } > > +/* > + * Force a context switch of a single vcpu of an unit. > + * Might be called either if a vcpu of an already running unit is woken up > + * or if a vcpu of a running unit is put asleep with other vcpus of the same > + * unit still running. > + */ > +static struct vcpu *sched_force_context_switch(struct vcpu *vprev, > + struct vcpu *v, > + int cpu, s_time_t now) unsigned int cpu? (Aiui it's suppose to equal smp_processor_id() anyway.) > +{ > + v->force_context_switch = false; > + > + if ( vcpu_runnable(v) == v->is_running ) > + return NULL; This and other NULL returns suggest that the comment ahead of the function might better state what the return value here is / means. > + if ( vcpu_runnable(v) ) > + { > + if ( is_idle_vcpu(vprev) ) > + { > + vcpu_runstate_change(vprev, RUNSTATE_runnable, now); > + vprev->sched_unit = get_sched_res(cpu)->sched_unit_idle; > + } > + vcpu_runstate_change(v, RUNSTATE_running, now); > + } > + else > + { > + /* Make sure not to switch last vcpu of an unit away. */ > + if ( unit_running(v->sched_unit) == 1 ) > + return NULL; > + > + v->new_state = vcpu_runstate_blocked(v); > + vcpu_runstate_change(v, v->new_state, now); > + v = sched_unit2vcpu_cpu(vprev->sched_unit, cpu); > + if ( v != vprev ) > + { > + if ( is_idle_vcpu(vprev) ) > + { > + vcpu_runstate_change(vprev, RUNSTATE_runnable, now); > + vprev->sched_unit = get_sched_res(cpu)->sched_unit_idle; > + } > + else > + { > + v->sched_unit = vprev->sched_unit; > + vcpu_runstate_change(v, RUNSTATE_running, now); > + } > + } > + } > + > + v->is_running = 1; Besides this wanting to use "true", how come this is unconditional despite the function here being used for both waking and putting to sleep of a vCPU? > @@ -2067,9 +2160,29 @@ static void sched_slave(void) > > now = NOW(); > > + v = unit2vcpu_cpu(prev, cpu); > + if ( v && v->force_context_switch ) > + { > + v = sched_force_context_switch(vprev, v, cpu, now); > + > + if ( v ) > + { > + pcpu_schedule_unlock_irq(lock, cpu); I can't figure what it is that guarantees that this unlock isn't going to be followed ... > + sched_context_switch(vprev, v, false, now); > + } > + > + do_softirq = true; > + } > + > if ( !prev->rendezvous_in_cnt ) > { > pcpu_schedule_unlock_irq(lock, cpu); ... by another unlock here. Or wait - is sched_context_switch() (and perhaps other functions involved there) lacking a "noreturn" annotation? > --- a/xen/include/xen/sched-if.h > +++ b/xen/include/xen/sched-if.h > @@ -100,6 +100,11 @@ static inline bool unit_runnable(const struct sched_unit *unit) > return false; > } > > +static inline int vcpu_runstate_blocked(struct vcpu *v) const? Jan
On 24.09.19 13:55, Jan Beulich wrote: > On 14.09.2019 10:52, Juergen Gross wrote: >> --- a/xen/common/schedule.c >> +++ b/xen/common/schedule.c >> @@ -724,8 +724,10 @@ void sched_destroy_domain(struct domain *d) >> } >> } >> >> -void vcpu_sleep_nosync_locked(struct vcpu *v) >> +static void vcpu_sleep_nosync_locked(struct vcpu *v) >> { >> + struct sched_unit *unit = v->sched_unit; >> + >> ASSERT(spin_is_locked(get_sched_res(v->processor)->schedule_lock)); >> >> if ( likely(!vcpu_runnable(v)) ) >> @@ -733,7 +735,14 @@ void vcpu_sleep_nosync_locked(struct vcpu *v) >> if ( v->runstate.state == RUNSTATE_runnable ) >> vcpu_runstate_change(v, RUNSTATE_offline, NOW()); >> >> - sched_sleep(vcpu_scheduler(v), v->sched_unit); >> + if ( likely(!unit_runnable(unit)) ) >> + sched_sleep(vcpu_scheduler(v), unit); > > unit_scheduler(unit) (also elsewhere)? Yes. > >> @@ -765,16 +774,22 @@ void vcpu_wake(struct vcpu *v) >> { >> unsigned long flags; >> spinlock_t *lock; >> + struct sched_unit *unit = v->sched_unit; >> >> TRACE_2D(TRC_SCHED_WAKE, v->domain->domain_id, v->vcpu_id); >> >> - lock = unit_schedule_lock_irqsave(v->sched_unit, &flags); >> + lock = unit_schedule_lock_irqsave(unit, &flags); >> >> if ( likely(vcpu_runnable(v)) ) >> { >> if ( v->runstate.state >= RUNSTATE_blocked ) >> vcpu_runstate_change(v, RUNSTATE_runnable, NOW()); >> - sched_wake(vcpu_scheduler(v), v->sched_unit); >> + sched_wake(vcpu_scheduler(v), unit); > > Is this correct / necessary when the unit is not asleep as a whole? > After all the corresponding sched_sleep() further up is called > conditionally only. Oh, indeed. Will change that. > >> @@ -1998,6 +2013,62 @@ static void sched_context_switch(struct vcpu *vprev, struct vcpu *vnext, >> context_switch(vprev, vnext); >> } >> >> +/* >> + * Force a context switch of a single vcpu of an unit. >> + * Might be called either if a vcpu of an already running unit is woken up >> + * or if a vcpu of a running unit is put asleep with other vcpus of the same >> + * unit still running. >> + */ >> +static struct vcpu *sched_force_context_switch(struct vcpu *vprev, >> + struct vcpu *v, >> + int cpu, s_time_t now) > > unsigned int cpu? (Aiui it's suppose to equal smp_processor_id() > anyway.) Yes and yes. > >> +{ >> + v->force_context_switch = false; >> + >> + if ( vcpu_runnable(v) == v->is_running ) >> + return NULL; > > This and other NULL returns suggest that the comment ahead of the > function might better state what the return value here is / means. Okay. > >> + if ( vcpu_runnable(v) ) >> + { >> + if ( is_idle_vcpu(vprev) ) >> + { >> + vcpu_runstate_change(vprev, RUNSTATE_runnable, now); >> + vprev->sched_unit = get_sched_res(cpu)->sched_unit_idle; >> + } >> + vcpu_runstate_change(v, RUNSTATE_running, now); >> + } >> + else >> + { >> + /* Make sure not to switch last vcpu of an unit away. */ >> + if ( unit_running(v->sched_unit) == 1 ) >> + return NULL; >> + >> + v->new_state = vcpu_runstate_blocked(v); >> + vcpu_runstate_change(v, v->new_state, now); >> + v = sched_unit2vcpu_cpu(vprev->sched_unit, cpu); >> + if ( v != vprev ) >> + { >> + if ( is_idle_vcpu(vprev) ) >> + { >> + vcpu_runstate_change(vprev, RUNSTATE_runnable, now); >> + vprev->sched_unit = get_sched_res(cpu)->sched_unit_idle; >> + } >> + else >> + { >> + v->sched_unit = vprev->sched_unit; >> + vcpu_runstate_change(v, RUNSTATE_running, now); >> + } >> + } >> + } >> + >> + v->is_running = 1; > > Besides this wanting to use "true", how come this is unconditional > despite the function here being used for both waking and putting to > sleep of a vCPU? At that time v is the vcpu which will be running next, so either the just woken up one, or the idle vcpu. I can add a comment. > >> @@ -2067,9 +2160,29 @@ static void sched_slave(void) >> >> now = NOW(); >> >> + v = unit2vcpu_cpu(prev, cpu); >> + if ( v && v->force_context_switch ) >> + { >> + v = sched_force_context_switch(vprev, v, cpu, now); >> + >> + if ( v ) >> + { >> + pcpu_schedule_unlock_irq(lock, cpu); > > I can't figure what it is that guarantees that this unlock isn't > going to be followed ... > >> + sched_context_switch(vprev, v, false, now); >> + } >> + >> + do_softirq = true; >> + } >> + >> if ( !prev->rendezvous_in_cnt ) >> { >> pcpu_schedule_unlock_irq(lock, cpu); > > ... by another unlock here. Or wait - is sched_context_switch() > (and perhaps other functions involved there) lacking a "noreturn" > annotation? Indeed it is. Like context_switch() today. :-) I'll annotate the functions. > >> --- a/xen/include/xen/sched-if.h >> +++ b/xen/include/xen/sched-if.h >> @@ -100,6 +100,11 @@ static inline bool unit_runnable(const struct sched_unit *unit) >> return false; >> } >> >> +static inline int vcpu_runstate_blocked(struct vcpu *v) > > const? Yes. Juergen
On 25.09.19 15:07, Jürgen Groß wrote: > On 24.09.19 13:55, Jan Beulich wrote: >> On 14.09.2019 10:52, Juergen Gross wrote: >>> @@ -765,16 +774,22 @@ void vcpu_wake(struct vcpu *v) >>> { >>> unsigned long flags; >>> spinlock_t *lock; >>> + struct sched_unit *unit = v->sched_unit; >>> TRACE_2D(TRC_SCHED_WAKE, v->domain->domain_id, v->vcpu_id); >>> - lock = unit_schedule_lock_irqsave(v->sched_unit, &flags); >>> + lock = unit_schedule_lock_irqsave(unit, &flags); >>> if ( likely(vcpu_runnable(v)) ) >>> { >>> if ( v->runstate.state >= RUNSTATE_blocked ) >>> vcpu_runstate_change(v, RUNSTATE_runnable, NOW()); >>> - sched_wake(vcpu_scheduler(v), v->sched_unit); >>> + sched_wake(vcpu_scheduler(v), unit); >> >> Is this correct / necessary when the unit is not asleep as a whole? >> After all the corresponding sched_sleep() further up is called >> conditionally only. > > Oh, indeed. Will change that. It turned out this is not so easy at it seemed. I encountered dom0 boot hangs with making the call conditional, even when running in cpu scheduling mode. I guess the reason is that a vcpu can call do_poll() which will try to put itself to sleep and in some cases call vcpu_wake() in case the condition already changed. In that case we need the sched_wake() call even if the unit is still running. >>> @@ -2067,9 +2160,29 @@ static void sched_slave(void) >>> now = NOW(); >>> + v = unit2vcpu_cpu(prev, cpu); >>> + if ( v && v->force_context_switch ) >>> + { >>> + v = sched_force_context_switch(vprev, v, cpu, now); >>> + >>> + if ( v ) >>> + { >>> + pcpu_schedule_unlock_irq(lock, cpu); >> >> I can't figure what it is that guarantees that this unlock isn't >> going to be followed ... >> >>> + sched_context_switch(vprev, v, false, now); >>> + } >>> + >>> + do_softirq = true; >>> + } >>> + >>> if ( !prev->rendezvous_in_cnt ) >>> { >>> pcpu_schedule_unlock_irq(lock, cpu); >> >> ... by another unlock here. Or wait - is sched_context_switch() >> (and perhaps other functions involved there) lacking a "noreturn" >> annotation? > > Indeed it is. Like context_switch() today. :-) > > I'll annotate the functions. And now I discovered that on ARM context_switch is _not_ "noreturn". So thanks for noticing that problem. I have fixed it in order to avoid a latent problem in case we want to support core scheduling on ARM some day (and yes: that would only have been a problem in core mode). Juergen
On Fri, 2019-09-27 at 06:42 +0200, Jürgen Groß wrote: > On 25.09.19 15:07, Jürgen Groß wrote: > > On 24.09.19 13:55, Jan Beulich wrote: > > > On 14.09.2019 10:52, Juergen Gross wrote: > > > > @@ -765,16 +774,22 @@ void vcpu_wake(struct vcpu *v) > > > > { > > > > unsigned long flags; > > > > spinlock_t *lock; > > > > + struct sched_unit *unit = v->sched_unit; > > > > TRACE_2D(TRC_SCHED_WAKE, v->domain->domain_id, v- > > > > >vcpu_id); > > > > - lock = unit_schedule_lock_irqsave(v->sched_unit, &flags); > > > > + lock = unit_schedule_lock_irqsave(unit, &flags); > > > > if ( likely(vcpu_runnable(v)) ) > > > > { > > > > if ( v->runstate.state >= RUNSTATE_blocked ) > > > > vcpu_runstate_change(v, RUNSTATE_runnable, > > > > NOW()); > > > > - sched_wake(vcpu_scheduler(v), v->sched_unit); > > > > + sched_wake(vcpu_scheduler(v), unit); > > > > > > Is this correct / necessary when the unit is not asleep as a > > > whole? > > > After all the corresponding sched_sleep() further up is called > > > conditionally only. > > > > Oh, indeed. Will change that. > > It turned out this is not so easy at it seemed. > > I encountered dom0 boot hangs with making the call conditional, even > when running in cpu scheduling mode. I guess the reason is that a > vcpu > can call do_poll() which will try to put itself to sleep and in some > cases call vcpu_wake() in case the condition already changed. In that > case we need the sched_wake() call even if the unit is still running. > TBH, I think it is ok for this call to be unconditional. Indeed it looks a bit weird when you compare this to the sched_sleep() calls in vcpu_sleep_nosync_locked(), as they are conditional, but I think a comment explaining why this has to be the case would be enough. E.g., something like what the changelog already say, in vcpu_sleep_nosync_locked(), and maybe something like what you said here, in vcpu_wake(). Regards
On 27.09.19 09:32, Dario Faggioli wrote: > On Fri, 2019-09-27 at 06:42 +0200, Jürgen Groß wrote: >> On 25.09.19 15:07, Jürgen Groß wrote: >>> On 24.09.19 13:55, Jan Beulich wrote: >>>> On 14.09.2019 10:52, Juergen Gross wrote: >>>>> @@ -765,16 +774,22 @@ void vcpu_wake(struct vcpu *v) >>>>> { >>>>> unsigned long flags; >>>>> spinlock_t *lock; >>>>> + struct sched_unit *unit = v->sched_unit; >>>>> TRACE_2D(TRC_SCHED_WAKE, v->domain->domain_id, v- >>>>>> vcpu_id); >>>>> - lock = unit_schedule_lock_irqsave(v->sched_unit, &flags); >>>>> + lock = unit_schedule_lock_irqsave(unit, &flags); >>>>> if ( likely(vcpu_runnable(v)) ) >>>>> { >>>>> if ( v->runstate.state >= RUNSTATE_blocked ) >>>>> vcpu_runstate_change(v, RUNSTATE_runnable, >>>>> NOW()); >>>>> - sched_wake(vcpu_scheduler(v), v->sched_unit); >>>>> + sched_wake(vcpu_scheduler(v), unit); >>>> >>>> Is this correct / necessary when the unit is not asleep as a >>>> whole? >>>> After all the corresponding sched_sleep() further up is called >>>> conditionally only. >>> >>> Oh, indeed. Will change that. >> >> It turned out this is not so easy at it seemed. >> >> I encountered dom0 boot hangs with making the call conditional, even >> when running in cpu scheduling mode. I guess the reason is that a >> vcpu >> can call do_poll() which will try to put itself to sleep and in some >> cases call vcpu_wake() in case the condition already changed. In that >> case we need the sched_wake() call even if the unit is still running. >> > TBH, I think it is ok for this call to be unconditional. Indeed it > looks a bit weird when you compare this to the sched_sleep() calls in > vcpu_sleep_nosync_locked(), as they are conditional, but I think a > comment explaining why this has to be the case would be enough. > > E.g., something like what the changelog already say, in > vcpu_sleep_nosync_locked(), and maybe something like what you said > here, in vcpu_wake(). Okay, will add comments. Juergen
diff --git a/xen/common/schedule.c b/xen/common/schedule.c index d53c60b966..1e793617ec 100644 --- a/xen/common/schedule.c +++ b/xen/common/schedule.c @@ -724,8 +724,10 @@ void sched_destroy_domain(struct domain *d) } } -void vcpu_sleep_nosync_locked(struct vcpu *v) +static void vcpu_sleep_nosync_locked(struct vcpu *v) { + struct sched_unit *unit = v->sched_unit; + ASSERT(spin_is_locked(get_sched_res(v->processor)->schedule_lock)); if ( likely(!vcpu_runnable(v)) ) @@ -733,7 +735,14 @@ void vcpu_sleep_nosync_locked(struct vcpu *v) if ( v->runstate.state == RUNSTATE_runnable ) vcpu_runstate_change(v, RUNSTATE_offline, NOW()); - sched_sleep(vcpu_scheduler(v), v->sched_unit); + if ( likely(!unit_runnable(unit)) ) + sched_sleep(vcpu_scheduler(v), unit); + else if ( unit_running(unit) > 1 && v->is_running && + !v->force_context_switch ) + { + v->force_context_switch = true; + cpu_raise_softirq(v->processor, SCHED_SLAVE_SOFTIRQ); + } } } @@ -765,16 +774,22 @@ void vcpu_wake(struct vcpu *v) { unsigned long flags; spinlock_t *lock; + struct sched_unit *unit = v->sched_unit; TRACE_2D(TRC_SCHED_WAKE, v->domain->domain_id, v->vcpu_id); - lock = unit_schedule_lock_irqsave(v->sched_unit, &flags); + lock = unit_schedule_lock_irqsave(unit, &flags); if ( likely(vcpu_runnable(v)) ) { if ( v->runstate.state >= RUNSTATE_blocked ) vcpu_runstate_change(v, RUNSTATE_runnable, NOW()); - sched_wake(vcpu_scheduler(v), v->sched_unit); + sched_wake(vcpu_scheduler(v), unit); + if ( unit->is_running && !v->is_running && !v->force_context_switch ) + { + v->force_context_switch = true; + cpu_raise_softirq(v->processor, SCHED_SLAVE_SOFTIRQ); + } } else if ( !(v->pause_flags & VPF_blocked) ) { @@ -782,7 +797,7 @@ void vcpu_wake(struct vcpu *v) vcpu_runstate_change(v, RUNSTATE_offline, NOW()); } - unit_schedule_unlock_irqrestore(lock, flags, v->sched_unit); + unit_schedule_unlock_irqrestore(lock, flags, unit); } void vcpu_unblock(struct vcpu *v) @@ -1998,6 +2013,62 @@ static void sched_context_switch(struct vcpu *vprev, struct vcpu *vnext, context_switch(vprev, vnext); } +/* + * Force a context switch of a single vcpu of an unit. + * Might be called either if a vcpu of an already running unit is woken up + * or if a vcpu of a running unit is put asleep with other vcpus of the same + * unit still running. + */ +static struct vcpu *sched_force_context_switch(struct vcpu *vprev, + struct vcpu *v, + int cpu, s_time_t now) +{ + v->force_context_switch = false; + + if ( vcpu_runnable(v) == v->is_running ) + return NULL; + + if ( vcpu_runnable(v) ) + { + if ( is_idle_vcpu(vprev) ) + { + vcpu_runstate_change(vprev, RUNSTATE_runnable, now); + vprev->sched_unit = get_sched_res(cpu)->sched_unit_idle; + } + vcpu_runstate_change(v, RUNSTATE_running, now); + } + else + { + /* Make sure not to switch last vcpu of an unit away. */ + if ( unit_running(v->sched_unit) == 1 ) + return NULL; + + v->new_state = vcpu_runstate_blocked(v); + vcpu_runstate_change(v, v->new_state, now); + v = sched_unit2vcpu_cpu(vprev->sched_unit, cpu); + if ( v != vprev ) + { + if ( is_idle_vcpu(vprev) ) + { + vcpu_runstate_change(vprev, RUNSTATE_runnable, now); + vprev->sched_unit = get_sched_res(cpu)->sched_unit_idle; + } + else + { + v->sched_unit = vprev->sched_unit; + vcpu_runstate_change(v, RUNSTATE_running, now); + } + } + } + + v->is_running = 1; + + /* Make sure not to loose another slave call. */ + raise_softirq(SCHED_SLAVE_SOFTIRQ); + + return v; +} + /* * Rendezvous before taking a scheduling decision. * Called with schedule lock held, so all accesses to the rendezvous counter @@ -2013,6 +2084,7 @@ static struct sched_unit *sched_wait_rendezvous_in(struct sched_unit *prev, s_time_t now) { struct sched_unit *next; + struct vcpu *v; if ( !--prev->rendezvous_in_cnt ) { @@ -2021,8 +2093,28 @@ static struct sched_unit *sched_wait_rendezvous_in(struct sched_unit *prev, return next; } + v = unit2vcpu_cpu(prev, cpu); while ( prev->rendezvous_in_cnt ) { + if ( v && v->force_context_switch ) + { + struct vcpu *vprev = current; + + v = sched_force_context_switch(vprev, v, cpu, now); + + if ( v ) + { + /* We'll come back another time, so adjust rendezvous_in_cnt. */ + prev->rendezvous_in_cnt++; + atomic_set(&prev->rendezvous_out_cnt, 0); + + pcpu_schedule_unlock_irq(*lock, cpu); + + sched_context_switch(vprev, v, false, now); + } + + v = unit2vcpu_cpu(prev, cpu); + } /* * Coming from idle might need to do tasklet work. * In order to avoid deadlocks we can't do that here, but have to @@ -2055,10 +2147,11 @@ static struct sched_unit *sched_wait_rendezvous_in(struct sched_unit *prev, static void sched_slave(void) { - struct vcpu *vprev = current; + struct vcpu *v, *vprev = current; struct sched_unit *prev = vprev->sched_unit, *next; s_time_t now; spinlock_t *lock; + bool do_softirq = false; unsigned int cpu = smp_processor_id(); ASSERT_NOT_IN_ATOMIC(); @@ -2067,9 +2160,29 @@ static void sched_slave(void) now = NOW(); + v = unit2vcpu_cpu(prev, cpu); + if ( v && v->force_context_switch ) + { + v = sched_force_context_switch(vprev, v, cpu, now); + + if ( v ) + { + pcpu_schedule_unlock_irq(lock, cpu); + + sched_context_switch(vprev, v, false, now); + } + + do_softirq = true; + } + if ( !prev->rendezvous_in_cnt ) { pcpu_schedule_unlock_irq(lock, cpu); + + /* Check for failed forced context switch. */ + if ( do_softirq ) + raise_softirq(SCHEDULE_SOFTIRQ); + return; } diff --git a/xen/include/xen/sched-if.h b/xen/include/xen/sched-if.h index 2929154d35..655eb3af32 100644 --- a/xen/include/xen/sched-if.h +++ b/xen/include/xen/sched-if.h @@ -100,6 +100,11 @@ static inline bool unit_runnable(const struct sched_unit *unit) return false; } +static inline int vcpu_runstate_blocked(struct vcpu *v) +{ + return (v->pause_flags & VPF_blocked) ? RUNSTATE_blocked : RUNSTATE_offline; +} + static inline bool unit_runnable_state(const struct sched_unit *unit) { struct vcpu *v; @@ -112,9 +117,7 @@ static inline bool unit_runnable_state(const struct sched_unit *unit) { runnable = vcpu_runnable(v); - v->new_state = runnable ? RUNSTATE_running - : (v->pause_flags & VPF_blocked) - ? RUNSTATE_blocked : RUNSTATE_offline; + v->new_state = runnable ? RUNSTATE_running : vcpu_runstate_blocked(v); if ( runnable ) ret = true; diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h index 144d353447..f276ec9398 100644 --- a/xen/include/xen/sched.h +++ b/xen/include/xen/sched.h @@ -186,6 +186,8 @@ struct vcpu bool is_running; /* VCPU should wake fast (do not deep sleep the CPU). */ bool is_urgent; + /* VCPU must context_switch without scheduling unit. */ + bool force_context_switch; #ifdef VCPU_TRAP_LAST #define VCPU_TRAP_NONE 0
vcpu_wake() and vcpu_sleep() need to be made core scheduling aware: they might need to switch a single vcpu of an already scheduled unit between running and not running. Especially when vcpu_sleep() for a vcpu is being called by a vcpu of the same scheduling unit special care must be taken in order to avoid a deadlock: the vcpu to be put asleep must be forced through a context switch without doing so for the calling vcpu. For this purpose add a vcpu flag handled in sched_slave() and in sched_wait_rendezvous_in() allowing a vcpu of the currently running unit to switch state at a higher priority than a normal schedule event. Use the same mechanism when waking up a vcpu of a currently active unit. While at it make vcpu_sleep_nosync_locked() static as it is used in schedule.c only. Signed-off-by: Juergen Gross <jgross@suse.com> --- RFC V2: add vcpu_sleep() handling and force_context_switch flag V2: fix runstate change in sched_force_context_switch() --- xen/common/schedule.c | 125 ++++++++++++++++++++++++++++++++++++++++++--- xen/include/xen/sched-if.h | 9 ++-- xen/include/xen/sched.h | 2 + 3 files changed, 127 insertions(+), 9 deletions(-)