Message ID | 20210223023428.757694-2-volodymyr_babchuk@epam.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Preemption in hypervisor (ARM only) | expand |
On 23.02.21 03:34, Volodymyr Babchuk wrote: > With XEN preemption enabled, scheduler functions can be called with > IRQs disabled (for example, at end of IRQ handler), so we should > save and restore IRQ state in schedulers code. This breaks core scheduling. Waiting for another sibling with interrupts disabled is an absolute no go, as deadlocks are the consequence. You could (in theory) make preemption and core scheduling mutually exclusive, but this would break the forward path to mutexes etc. Juergen > > Signed-off-by: Volodymyr Babchuk <volodymyr_babchuk@epam.com> > --- > xen/common/sched/core.c | 33 ++++++++++++++++++--------------- > 1 file changed, 18 insertions(+), 15 deletions(-) > > diff --git a/xen/common/sched/core.c b/xen/common/sched/core.c > index 9745a77eee..7e075613d5 100644 > --- a/xen/common/sched/core.c > +++ b/xen/common/sched/core.c > @@ -2470,7 +2470,8 @@ static struct vcpu *sched_force_context_switch(struct vcpu *vprev, > * sched_res_rculock has been dropped. > */ > static struct sched_unit *sched_wait_rendezvous_in(struct sched_unit *prev, > - spinlock_t **lock, int cpu, > + spinlock_t **lock, > + unsigned long *flags, int cpu, > s_time_t now) > { > struct sched_unit *next; > @@ -2500,7 +2501,7 @@ static struct sched_unit *sched_wait_rendezvous_in(struct sched_unit *prev, > prev->rendezvous_in_cnt++; > atomic_set(&prev->rendezvous_out_cnt, 0); > > - pcpu_schedule_unlock_irq(*lock, cpu); > + pcpu_schedule_unlock_irqrestore(*lock, *flags, cpu); > > sched_context_switch(vprev, v, false, now); > > @@ -2530,7 +2531,7 @@ static struct sched_unit *sched_wait_rendezvous_in(struct sched_unit *prev, > prev->rendezvous_in_cnt++; > atomic_set(&prev->rendezvous_out_cnt, 0); > > - pcpu_schedule_unlock_irq(*lock, cpu); > + pcpu_schedule_unlock_irqrestore(*lock, *flags, cpu); > > raise_softirq(SCHED_SLAVE_SOFTIRQ); > sched_context_switch(vprev, vprev, false, now); > @@ -2538,11 +2539,11 @@ static struct sched_unit *sched_wait_rendezvous_in(struct sched_unit *prev, > return NULL; /* ARM only. */ > } > > - pcpu_schedule_unlock_irq(*lock, cpu); > + pcpu_schedule_unlock_irqrestore(*lock, *flags, cpu); > > cpu_relax(); > > - *lock = pcpu_schedule_lock_irq(cpu); > + *lock = pcpu_schedule_lock_irqsave(cpu, flags); > > /* > * Check for scheduling resource switched. This happens when we are > @@ -2557,7 +2558,7 @@ static struct sched_unit *sched_wait_rendezvous_in(struct sched_unit *prev, > ASSERT(is_idle_unit(prev)); > atomic_set(&prev->next_task->rendezvous_out_cnt, 0); > prev->rendezvous_in_cnt = 0; > - pcpu_schedule_unlock_irq(*lock, cpu); > + pcpu_schedule_unlock_irqrestore(*lock, *flags, cpu); > rcu_read_unlock(&sched_res_rculock); > return NULL; > } > @@ -2574,12 +2575,13 @@ static void sched_slave(void) > spinlock_t *lock; > bool do_softirq = false; > unsigned int cpu = smp_processor_id(); > + unsigned long flags; > > ASSERT_NOT_IN_ATOMIC(); > > rcu_read_lock(&sched_res_rculock); > > - lock = pcpu_schedule_lock_irq(cpu); > + lock = pcpu_schedule_lock_irqsave(cpu, &flags); > > now = NOW(); > > @@ -2590,7 +2592,7 @@ static void sched_slave(void) > > if ( v ) > { > - pcpu_schedule_unlock_irq(lock, cpu); > + pcpu_schedule_unlock_irqrestore(lock, flags, cpu); > > sched_context_switch(vprev, v, false, now); > > @@ -2602,7 +2604,7 @@ static void sched_slave(void) > > if ( !prev->rendezvous_in_cnt ) > { > - pcpu_schedule_unlock_irq(lock, cpu); > + pcpu_schedule_unlock_irqrestore(lock, flags, cpu); > > rcu_read_unlock(&sched_res_rculock); > > @@ -2615,11 +2617,11 @@ static void sched_slave(void) > > stop_timer(&get_sched_res(cpu)->s_timer); > > - next = sched_wait_rendezvous_in(prev, &lock, cpu, now); > + next = sched_wait_rendezvous_in(prev, &lock, &flags, cpu, now); > if ( !next ) > return; > > - pcpu_schedule_unlock_irq(lock, cpu); > + pcpu_schedule_unlock_irqrestore(lock, flags, cpu); > > sched_context_switch(vprev, sched_unit2vcpu_cpu(next, cpu), > is_idle_unit(next) && !is_idle_unit(prev), now); > @@ -2637,6 +2639,7 @@ static void schedule(void) > s_time_t now; > struct sched_resource *sr; > spinlock_t *lock; > + unsigned long flags; > int cpu = smp_processor_id(); > unsigned int gran; > > @@ -2646,7 +2649,7 @@ static void schedule(void) > > rcu_read_lock(&sched_res_rculock); > > - lock = pcpu_schedule_lock_irq(cpu); > + lock = pcpu_schedule_lock_irqsave(cpu, &flags); > > sr = get_sched_res(cpu); > gran = sr->granularity; > @@ -2657,7 +2660,7 @@ static void schedule(void) > * We have a race: sched_slave() should be called, so raise a softirq > * in order to re-enter schedule() later and call sched_slave() now. > */ > - pcpu_schedule_unlock_irq(lock, cpu); > + pcpu_schedule_unlock_irqrestore(lock, flags, cpu); > > rcu_read_unlock(&sched_res_rculock); > > @@ -2676,7 +2679,7 @@ static void schedule(void) > prev->rendezvous_in_cnt = gran; > cpumask_andnot(mask, sr->cpus, cpumask_of(cpu)); > cpumask_raise_softirq(mask, SCHED_SLAVE_SOFTIRQ); > - next = sched_wait_rendezvous_in(prev, &lock, cpu, now); > + next = sched_wait_rendezvous_in(prev, &lock, &flags, cpu, now); > if ( !next ) > return; > } > @@ -2687,7 +2690,7 @@ static void schedule(void) > atomic_set(&next->rendezvous_out_cnt, 0); > } > > - pcpu_schedule_unlock_irq(lock, cpu); > + pcpu_schedule_unlock_irqrestore(lock, flags, cpu); > > vnext = sched_unit2vcpu_cpu(next, cpu); > sched_context_switch(vprev, vnext, >
Hi Jurgen, Jürgen Groß writes: > On 23.02.21 03:34, Volodymyr Babchuk wrote: >> With XEN preemption enabled, scheduler functions can be called with >> IRQs disabled (for example, at end of IRQ handler), so we should >> save and restore IRQ state in schedulers code. > > This breaks core scheduling. Yes, thank you. I forgot to mention that this PoC is not compatible with core scheduling. It is not used on ARM, so I could not test it anyways. > Waiting for another sibling with interrupts disabled is an absolute > no go, as deadlocks are the consequence. > > You could (in theory) make preemption and core scheduling mutually > exclusive, but this would break the forward path to mutexes etc. > Well, I implemented the most naive way to enable hypervisor preemption. I'm sure that with a bit more careful approach I can make it compatible with core scheduling. There is no strict requirement to run scheduler with IRQs disabled. > > Juergen > >> Signed-off-by: Volodymyr Babchuk <volodymyr_babchuk@epam.com> >> --- >> xen/common/sched/core.c | 33 ++++++++++++++++++--------------- >> 1 file changed, 18 insertions(+), 15 deletions(-) >> diff --git a/xen/common/sched/core.c b/xen/common/sched/core.c >> index 9745a77eee..7e075613d5 100644 >> --- a/xen/common/sched/core.c >> +++ b/xen/common/sched/core.c >> @@ -2470,7 +2470,8 @@ static struct vcpu *sched_force_context_switch(struct vcpu *vprev, >> * sched_res_rculock has been dropped. >> */ >> static struct sched_unit *sched_wait_rendezvous_in(struct sched_unit *prev, >> - spinlock_t **lock, int cpu, >> + spinlock_t **lock, >> + unsigned long *flags, int cpu, >> s_time_t now) >> { >> struct sched_unit *next; >> @@ -2500,7 +2501,7 @@ static struct sched_unit *sched_wait_rendezvous_in(struct sched_unit *prev, >> prev->rendezvous_in_cnt++; >> atomic_set(&prev->rendezvous_out_cnt, 0); >> - pcpu_schedule_unlock_irq(*lock, cpu); >> + pcpu_schedule_unlock_irqrestore(*lock, *flags, cpu); >> sched_context_switch(vprev, v, false, now); >> @@ -2530,7 +2531,7 @@ static struct sched_unit >> *sched_wait_rendezvous_in(struct sched_unit *prev, >> prev->rendezvous_in_cnt++; >> atomic_set(&prev->rendezvous_out_cnt, 0); >> - pcpu_schedule_unlock_irq(*lock, cpu); >> + pcpu_schedule_unlock_irqrestore(*lock, *flags, cpu); >> raise_softirq(SCHED_SLAVE_SOFTIRQ); >> sched_context_switch(vprev, vprev, false, now); >> @@ -2538,11 +2539,11 @@ static struct sched_unit *sched_wait_rendezvous_in(struct sched_unit *prev, >> return NULL; /* ARM only. */ >> } >> - pcpu_schedule_unlock_irq(*lock, cpu); >> + pcpu_schedule_unlock_irqrestore(*lock, *flags, cpu); >> cpu_relax(); >> - *lock = pcpu_schedule_lock_irq(cpu); >> + *lock = pcpu_schedule_lock_irqsave(cpu, flags); >> /* >> * Check for scheduling resource switched. This happens when we are >> @@ -2557,7 +2558,7 @@ static struct sched_unit *sched_wait_rendezvous_in(struct sched_unit *prev, >> ASSERT(is_idle_unit(prev)); >> atomic_set(&prev->next_task->rendezvous_out_cnt, 0); >> prev->rendezvous_in_cnt = 0; >> - pcpu_schedule_unlock_irq(*lock, cpu); >> + pcpu_schedule_unlock_irqrestore(*lock, *flags, cpu); >> rcu_read_unlock(&sched_res_rculock); >> return NULL; >> } >> @@ -2574,12 +2575,13 @@ static void sched_slave(void) >> spinlock_t *lock; >> bool do_softirq = false; >> unsigned int cpu = smp_processor_id(); >> + unsigned long flags; >> ASSERT_NOT_IN_ATOMIC(); >> rcu_read_lock(&sched_res_rculock); >> - lock = pcpu_schedule_lock_irq(cpu); >> + lock = pcpu_schedule_lock_irqsave(cpu, &flags); >> now = NOW(); >> @@ -2590,7 +2592,7 @@ static void sched_slave(void) >> if ( v ) >> { >> - pcpu_schedule_unlock_irq(lock, cpu); >> + pcpu_schedule_unlock_irqrestore(lock, flags, cpu); >> sched_context_switch(vprev, v, false, now); >> @@ -2602,7 +2604,7 @@ static void sched_slave(void) >> if ( !prev->rendezvous_in_cnt ) >> { >> - pcpu_schedule_unlock_irq(lock, cpu); >> + pcpu_schedule_unlock_irqrestore(lock, flags, cpu); >> rcu_read_unlock(&sched_res_rculock); >> @@ -2615,11 +2617,11 @@ static void sched_slave(void) >> stop_timer(&get_sched_res(cpu)->s_timer); >> - next = sched_wait_rendezvous_in(prev, &lock, cpu, now); >> + next = sched_wait_rendezvous_in(prev, &lock, &flags, cpu, now); >> if ( !next ) >> return; >> - pcpu_schedule_unlock_irq(lock, cpu); >> + pcpu_schedule_unlock_irqrestore(lock, flags, cpu); >> sched_context_switch(vprev, sched_unit2vcpu_cpu(next, cpu), >> is_idle_unit(next) && !is_idle_unit(prev), now); >> @@ -2637,6 +2639,7 @@ static void schedule(void) >> s_time_t now; >> struct sched_resource *sr; >> spinlock_t *lock; >> + unsigned long flags; >> int cpu = smp_processor_id(); >> unsigned int gran; >> @@ -2646,7 +2649,7 @@ static void schedule(void) >> rcu_read_lock(&sched_res_rculock); >> - lock = pcpu_schedule_lock_irq(cpu); >> + lock = pcpu_schedule_lock_irqsave(cpu, &flags); >> sr = get_sched_res(cpu); >> gran = sr->granularity; >> @@ -2657,7 +2660,7 @@ static void schedule(void) >> * We have a race: sched_slave() should be called, so raise a softirq >> * in order to re-enter schedule() later and call sched_slave() now. >> */ >> - pcpu_schedule_unlock_irq(lock, cpu); >> + pcpu_schedule_unlock_irqrestore(lock, flags, cpu); >> rcu_read_unlock(&sched_res_rculock); >> @@ -2676,7 +2679,7 @@ static void schedule(void) >> prev->rendezvous_in_cnt = gran; >> cpumask_andnot(mask, sr->cpus, cpumask_of(cpu)); >> cpumask_raise_softirq(mask, SCHED_SLAVE_SOFTIRQ); >> - next = sched_wait_rendezvous_in(prev, &lock, cpu, now); >> + next = sched_wait_rendezvous_in(prev, &lock, &flags, cpu, now); >> if ( !next ) >> return; >> } >> @@ -2687,7 +2690,7 @@ static void schedule(void) >> atomic_set(&next->rendezvous_out_cnt, 0); >> } >> - pcpu_schedule_unlock_irq(lock, cpu); >> + pcpu_schedule_unlock_irqrestore(lock, flags, cpu); >> vnext = sched_unit2vcpu_cpu(next, cpu); >> sched_context_switch(vprev, vnext, >>
On 23/02/2021 02:34, Volodymyr Babchuk wrote: > With XEN preemption enabled, scheduler functions can be called with > IRQs disabled (for example, at end of IRQ handler), so we should > save and restore IRQ state in schedulers code. > > Signed-off-by: Volodymyr Babchuk <volodymyr_babchuk@epam.com> These functions need to fixed not to be _irq() variants in the first place. The only reason they're like that is so the schedule softirq/irq can edit the runqueues. I seem to recall Dario having a plan to switch the runqueues to using a lockless update mechanism, which IIRC removes any need for any of the scheduler locks to be irqs-off. ~Andrew
diff --git a/xen/common/sched/core.c b/xen/common/sched/core.c index 9745a77eee..7e075613d5 100644 --- a/xen/common/sched/core.c +++ b/xen/common/sched/core.c @@ -2470,7 +2470,8 @@ static struct vcpu *sched_force_context_switch(struct vcpu *vprev, * sched_res_rculock has been dropped. */ static struct sched_unit *sched_wait_rendezvous_in(struct sched_unit *prev, - spinlock_t **lock, int cpu, + spinlock_t **lock, + unsigned long *flags, int cpu, s_time_t now) { struct sched_unit *next; @@ -2500,7 +2501,7 @@ static struct sched_unit *sched_wait_rendezvous_in(struct sched_unit *prev, prev->rendezvous_in_cnt++; atomic_set(&prev->rendezvous_out_cnt, 0); - pcpu_schedule_unlock_irq(*lock, cpu); + pcpu_schedule_unlock_irqrestore(*lock, *flags, cpu); sched_context_switch(vprev, v, false, now); @@ -2530,7 +2531,7 @@ static struct sched_unit *sched_wait_rendezvous_in(struct sched_unit *prev, prev->rendezvous_in_cnt++; atomic_set(&prev->rendezvous_out_cnt, 0); - pcpu_schedule_unlock_irq(*lock, cpu); + pcpu_schedule_unlock_irqrestore(*lock, *flags, cpu); raise_softirq(SCHED_SLAVE_SOFTIRQ); sched_context_switch(vprev, vprev, false, now); @@ -2538,11 +2539,11 @@ static struct sched_unit *sched_wait_rendezvous_in(struct sched_unit *prev, return NULL; /* ARM only. */ } - pcpu_schedule_unlock_irq(*lock, cpu); + pcpu_schedule_unlock_irqrestore(*lock, *flags, cpu); cpu_relax(); - *lock = pcpu_schedule_lock_irq(cpu); + *lock = pcpu_schedule_lock_irqsave(cpu, flags); /* * Check for scheduling resource switched. This happens when we are @@ -2557,7 +2558,7 @@ static struct sched_unit *sched_wait_rendezvous_in(struct sched_unit *prev, ASSERT(is_idle_unit(prev)); atomic_set(&prev->next_task->rendezvous_out_cnt, 0); prev->rendezvous_in_cnt = 0; - pcpu_schedule_unlock_irq(*lock, cpu); + pcpu_schedule_unlock_irqrestore(*lock, *flags, cpu); rcu_read_unlock(&sched_res_rculock); return NULL; } @@ -2574,12 +2575,13 @@ static void sched_slave(void) spinlock_t *lock; bool do_softirq = false; unsigned int cpu = smp_processor_id(); + unsigned long flags; ASSERT_NOT_IN_ATOMIC(); rcu_read_lock(&sched_res_rculock); - lock = pcpu_schedule_lock_irq(cpu); + lock = pcpu_schedule_lock_irqsave(cpu, &flags); now = NOW(); @@ -2590,7 +2592,7 @@ static void sched_slave(void) if ( v ) { - pcpu_schedule_unlock_irq(lock, cpu); + pcpu_schedule_unlock_irqrestore(lock, flags, cpu); sched_context_switch(vprev, v, false, now); @@ -2602,7 +2604,7 @@ static void sched_slave(void) if ( !prev->rendezvous_in_cnt ) { - pcpu_schedule_unlock_irq(lock, cpu); + pcpu_schedule_unlock_irqrestore(lock, flags, cpu); rcu_read_unlock(&sched_res_rculock); @@ -2615,11 +2617,11 @@ static void sched_slave(void) stop_timer(&get_sched_res(cpu)->s_timer); - next = sched_wait_rendezvous_in(prev, &lock, cpu, now); + next = sched_wait_rendezvous_in(prev, &lock, &flags, cpu, now); if ( !next ) return; - pcpu_schedule_unlock_irq(lock, cpu); + pcpu_schedule_unlock_irqrestore(lock, flags, cpu); sched_context_switch(vprev, sched_unit2vcpu_cpu(next, cpu), is_idle_unit(next) && !is_idle_unit(prev), now); @@ -2637,6 +2639,7 @@ static void schedule(void) s_time_t now; struct sched_resource *sr; spinlock_t *lock; + unsigned long flags; int cpu = smp_processor_id(); unsigned int gran; @@ -2646,7 +2649,7 @@ static void schedule(void) rcu_read_lock(&sched_res_rculock); - lock = pcpu_schedule_lock_irq(cpu); + lock = pcpu_schedule_lock_irqsave(cpu, &flags); sr = get_sched_res(cpu); gran = sr->granularity; @@ -2657,7 +2660,7 @@ static void schedule(void) * We have a race: sched_slave() should be called, so raise a softirq * in order to re-enter schedule() later and call sched_slave() now. */ - pcpu_schedule_unlock_irq(lock, cpu); + pcpu_schedule_unlock_irqrestore(lock, flags, cpu); rcu_read_unlock(&sched_res_rculock); @@ -2676,7 +2679,7 @@ static void schedule(void) prev->rendezvous_in_cnt = gran; cpumask_andnot(mask, sr->cpus, cpumask_of(cpu)); cpumask_raise_softirq(mask, SCHED_SLAVE_SOFTIRQ); - next = sched_wait_rendezvous_in(prev, &lock, cpu, now); + next = sched_wait_rendezvous_in(prev, &lock, &flags, cpu, now); if ( !next ) return; } @@ -2687,7 +2690,7 @@ static void schedule(void) atomic_set(&next->rendezvous_out_cnt, 0); } - pcpu_schedule_unlock_irq(lock, cpu); + pcpu_schedule_unlock_irqrestore(lock, flags, cpu); vnext = sched_unit2vcpu_cpu(next, cpu); sched_context_switch(vprev, vnext,
With XEN preemption enabled, scheduler functions can be called with IRQs disabled (for example, at end of IRQ handler), so we should save and restore IRQ state in schedulers code. Signed-off-by: Volodymyr Babchuk <volodymyr_babchuk@epam.com> --- xen/common/sched/core.c | 33 ++++++++++++++++++--------------- 1 file changed, 18 insertions(+), 15 deletions(-)