diff mbox series

[2/2] xen/rcu: don't use stop_machine_run() for rcu_barrier()

Message ID 20200217072006.20211-3-jgross@suse.com (mailing list archive)
State Superseded
Headers show
Series xen/rcu: let rcu work better with core scheduling | expand

Commit Message

Jürgen Groß Feb. 17, 2020, 7:20 a.m. UTC
Today rcu_barrier() is calling stop_machine_run() to synchronize all
physical cpus in order to ensure all pending rcu calls have finished
when returning.

As stop_machine_run() is using tasklets this requires scheduling of
idle vcpus on all cpus imposing the need to call rcu_barrier() on idle
cpus only in case of core scheduling being active, as otherwise a
scheduling deadlock would occur.

There is no need at all to do the syncing of the cpus in tasklets, as
rcu activity is started in __do_softirq() called whenever softirq
activity is allowed. So rcu_barrier() can easily be modified to use
softirq for synchronization of the cpus no longer requiring any
scheduling activity.

As there already is a rcu softirq reuse that for the synchronization.

Finally switch rcu_barrier() to return void as it now can never fail.

Signed-off-by: Juergen Gross <jgross@suse.com>
---
 xen/common/rcupdate.c      | 49 ++++++++++++++++++++++++++--------------------
 xen/include/xen/rcupdate.h |  2 +-
 2 files changed, 29 insertions(+), 22 deletions(-)

Comments

Julien Grall Feb. 17, 2020, 11:49 a.m. UTC | #1
Hi Juergen,

On 17/02/2020 07:20, Juergen Gross wrote:
> Today rcu_barrier() is calling stop_machine_run() to synchronize all
> physical cpus in order to ensure all pending rcu calls have finished
> when returning.
> 
> As stop_machine_run() is using tasklets this requires scheduling of
> idle vcpus on all cpus imposing the need to call rcu_barrier() on idle
> cpus only in case of core scheduling being active, as otherwise a
> scheduling deadlock would occur.
> 
> There is no need at all to do the syncing of the cpus in tasklets, as
> rcu activity is started in __do_softirq() called whenever softirq
> activity is allowed. So rcu_barrier() can easily be modified to use
> softirq for synchronization of the cpus no longer requiring any
> scheduling activity.
> 
> As there already is a rcu softirq reuse that for the synchronization.
> 
> Finally switch rcu_barrier() to return void as it now can never fail.
> 
> Signed-off-by: Juergen Gross <jgross@suse.com>
> ---
>   xen/common/rcupdate.c      | 49 ++++++++++++++++++++++++++--------------------
>   xen/include/xen/rcupdate.h |  2 +-
>   2 files changed, 29 insertions(+), 22 deletions(-)
> 
> diff --git a/xen/common/rcupdate.c b/xen/common/rcupdate.c
> index 079ea9d8a1..1f02a804e3 100644
> --- a/xen/common/rcupdate.c
> +++ b/xen/common/rcupdate.c
> @@ -143,47 +143,51 @@ static int qhimark = 10000;
>   static int qlowmark = 100;
>   static int rsinterval = 1000;
>   
> -struct rcu_barrier_data {
> -    struct rcu_head head;
> -    atomic_t *cpu_count;
> -};
> +/*
> + * rcu_barrier() handling:
> + * cpu_count holds the number of cpu required to finish barrier handling.
> + * Cpus are synchronized via softirq mechanism. rcu_barrier() is regarded to
> + * be active if cpu_count is not zero. In case rcu_barrier() is called on
> + * multiple cpus it is enough to check for cpu_count being not zero on entry
> + * and to call process_pending_softirqs() in a loop until cpu_count drops to
> + * zero, as syncing has been requested already and we don't need to sync
> + * multiple times.
> + */
> +static atomic_t cpu_count = ATOMIC_INIT(0);
>   
>   static void rcu_barrier_callback(struct rcu_head *head)
>   {
> -    struct rcu_barrier_data *data = container_of(
> -        head, struct rcu_barrier_data, head);
> -    atomic_inc(data->cpu_count);
> +    atomic_dec(&cpu_count);
>   }
>   
> -static int rcu_barrier_action(void *_cpu_count)
> +static void rcu_barrier_action(void)
>   {
> -    struct rcu_barrier_data data = { .cpu_count = _cpu_count };
> -
> -    ASSERT(!local_irq_is_enabled());
> -    local_irq_enable();
> +    struct rcu_head head;
>   
>       /*
>        * When callback is executed, all previously-queued RCU work on this CPU
>        * is completed. When all CPUs have executed their callback, data.cpu_count
>        * will have been incremented to include every online CPU.
>        */
> -    call_rcu(&data.head, rcu_barrier_callback);
> +    call_rcu(&head, rcu_barrier_callback);
>   
> -    while ( atomic_read(data.cpu_count) != num_online_cpus() )
> +    while ( atomic_read(&cpu_count) )
>       {
>           process_pending_softirqs();
>           cpu_relax();
>       }
> -
> -    local_irq_disable();
> -
> -    return 0;
>   }
>   
> -int rcu_barrier(void)
> +void rcu_barrier(void)
>   {
> -    atomic_t cpu_count = ATOMIC_INIT(0);
> -    return stop_machine_run(rcu_barrier_action, &cpu_count, NR_CPUS);
> +    if ( !atomic_cmpxchg(&cpu_count, 0, num_online_cpus()) )

What does prevent the cpu_online_map to change under your feet? 
Shouldn't you grab the lock via get_cpu_maps()?

> +        cpumask_raise_softirq(&cpu_online_map, RCU_SOFTIRQ);
> +
> +    while ( atomic_read(&cpu_count) )
> +    {
> +        process_pending_softirqs();
> +        cpu_relax();
> +    }
>   }
>   
>   /* Is batch a before batch b ? */
> @@ -422,6 +426,9 @@ static void rcu_process_callbacks(void)
>           rdp->process_callbacks = false;
>           __rcu_process_callbacks(&rcu_ctrlblk, rdp);
>       }
> +
> +    if ( atomic_read(&cpu_count) )
> +        rcu_barrier_action();
>   }
>   
>   static int __rcu_pending(struct rcu_ctrlblk *rcp, struct rcu_data *rdp)
> diff --git a/xen/include/xen/rcupdate.h b/xen/include/xen/rcupdate.h
> index 174d058113..87f35b7704 100644
> --- a/xen/include/xen/rcupdate.h
> +++ b/xen/include/xen/rcupdate.h
> @@ -143,7 +143,7 @@ void rcu_check_callbacks(int cpu);
>   void call_rcu(struct rcu_head *head,
>                 void (*func)(struct rcu_head *head));
>   
> -int rcu_barrier(void);
> +void rcu_barrier(void);
>   
>   void rcu_idle_enter(unsigned int cpu);
>   void rcu_idle_exit(unsigned int cpu);
> 

Cheers,
Jürgen Groß Feb. 17, 2020, 12:11 p.m. UTC | #2
On 17.02.20 12:49, Julien Grall wrote:
> Hi Juergen,
> 
> On 17/02/2020 07:20, Juergen Gross wrote:
>> +void rcu_barrier(void)
>>   {
>> -    atomic_t cpu_count = ATOMIC_INIT(0);
>> -    return stop_machine_run(rcu_barrier_action, &cpu_count, NR_CPUS);
>> +    if ( !atomic_cmpxchg(&cpu_count, 0, num_online_cpus()) )
> 
> What does prevent the cpu_online_map to change under your feet? 
> Shouldn't you grab the lock via get_cpu_maps()?

Oh, indeed.

This in turn will require a modification of the logic to detect parallel
calls on multiple cpus.


Juergen
Roger Pau Monné Feb. 17, 2020, 12:17 p.m. UTC | #3
On Mon, Feb 17, 2020 at 01:11:59PM +0100, Jürgen Groß wrote:
> On 17.02.20 12:49, Julien Grall wrote:
> > Hi Juergen,
> > 
> > On 17/02/2020 07:20, Juergen Gross wrote:
> > > +void rcu_barrier(void)
> > >   {
> > > -    atomic_t cpu_count = ATOMIC_INIT(0);
> > > -    return stop_machine_run(rcu_barrier_action, &cpu_count, NR_CPUS);
> > > +    if ( !atomic_cmpxchg(&cpu_count, 0, num_online_cpus()) )
> > 
> > What does prevent the cpu_online_map to change under your feet?
> > Shouldn't you grab the lock via get_cpu_maps()?
> 
> Oh, indeed.
> 
> This in turn will require a modification of the logic to detect parallel
> calls on multiple cpus.

If you pick my patch to turn that into a rw lock you shouldn't worry
about parallel calls I think, but the lock acquisition can still fail
if there's a CPU plug/unplug going on:

https://lists.xenproject.org/archives/html/xen-devel/2020-02/msg00940.html

Roger.
Igor Druzhinin Feb. 17, 2020, 12:26 p.m. UTC | #4
On 17/02/2020 07:20, Juergen Gross wrote:
> Today rcu_barrier() is calling stop_machine_run() to synchronize all
> physical cpus in order to ensure all pending rcu calls have finished
> when returning.
> 
> As stop_machine_run() is using tasklets this requires scheduling of
> idle vcpus on all cpus imposing the need to call rcu_barrier() on idle
> cpus only in case of core scheduling being active, as otherwise a
> scheduling deadlock would occur.
> 
> There is no need at all to do the syncing of the cpus in tasklets, as
> rcu activity is started in __do_softirq() called whenever softirq
> activity is allowed. So rcu_barrier() can easily be modified to use
> softirq for synchronization of the cpus no longer requiring any
> scheduling activity.
> 
> As there already is a rcu softirq reuse that for the synchronization.
> 
> Finally switch rcu_barrier() to return void as it now can never fail.
> 

Would this implementation guarantee progress as previous implementation
guaranteed?

Igor
Jürgen Groß Feb. 17, 2020, 12:28 p.m. UTC | #5
On 17.02.20 13:26, Igor Druzhinin wrote:
> On 17/02/2020 07:20, Juergen Gross wrote:
>> Today rcu_barrier() is calling stop_machine_run() to synchronize all
>> physical cpus in order to ensure all pending rcu calls have finished
>> when returning.
>>
>> As stop_machine_run() is using tasklets this requires scheduling of
>> idle vcpus on all cpus imposing the need to call rcu_barrier() on idle
>> cpus only in case of core scheduling being active, as otherwise a
>> scheduling deadlock would occur.
>>
>> There is no need at all to do the syncing of the cpus in tasklets, as
>> rcu activity is started in __do_softirq() called whenever softirq
>> activity is allowed. So rcu_barrier() can easily be modified to use
>> softirq for synchronization of the cpus no longer requiring any
>> scheduling activity.
>>
>> As there already is a rcu softirq reuse that for the synchronization.
>>
>> Finally switch rcu_barrier() to return void as it now can never fail.
>>
> 
> Would this implementation guarantee progress as previous implementation
> guaranteed?

Yes.


Juergen
Igor Druzhinin Feb. 17, 2020, 12:30 p.m. UTC | #6
On 17/02/2020 12:28, Jürgen Groß wrote:
> On 17.02.20 13:26, Igor Druzhinin wrote:
>> On 17/02/2020 07:20, Juergen Gross wrote:
>>> Today rcu_barrier() is calling stop_machine_run() to synchronize all
>>> physical cpus in order to ensure all pending rcu calls have finished
>>> when returning.
>>>
>>> As stop_machine_run() is using tasklets this requires scheduling of
>>> idle vcpus on all cpus imposing the need to call rcu_barrier() on idle
>>> cpus only in case of core scheduling being active, as otherwise a
>>> scheduling deadlock would occur.
>>>
>>> There is no need at all to do the syncing of the cpus in tasklets, as
>>> rcu activity is started in __do_softirq() called whenever softirq
>>> activity is allowed. So rcu_barrier() can easily be modified to use
>>> softirq for synchronization of the cpus no longer requiring any
>>> scheduling activity.
>>>
>>> As there already is a rcu softirq reuse that for the synchronization.
>>>
>>> Finally switch rcu_barrier() to return void as it now can never fail.
>>>
>>
>> Would this implementation guarantee progress as previous implementation
>> guaranteed?
> 
> Yes.

Thanks, I'll put it to test today to see if it solves our use case.

Igor
Jürgen Groß Feb. 17, 2020, 12:32 p.m. UTC | #7
On 17.02.20 13:17, Roger Pau Monné wrote:
> On Mon, Feb 17, 2020 at 01:11:59PM +0100, Jürgen Groß wrote:
>> On 17.02.20 12:49, Julien Grall wrote:
>>> Hi Juergen,
>>>
>>> On 17/02/2020 07:20, Juergen Gross wrote:
>>>> +void rcu_barrier(void)
>>>>    {
>>>> -    atomic_t cpu_count = ATOMIC_INIT(0);
>>>> -    return stop_machine_run(rcu_barrier_action, &cpu_count, NR_CPUS);
>>>> +    if ( !atomic_cmpxchg(&cpu_count, 0, num_online_cpus()) )
>>>
>>> What does prevent the cpu_online_map to change under your feet?
>>> Shouldn't you grab the lock via get_cpu_maps()?
>>
>> Oh, indeed.
>>
>> This in turn will require a modification of the logic to detect parallel
>> calls on multiple cpus.
> 
> If you pick my patch to turn that into a rw lock you shouldn't worry
> about parallel calls I think, but the lock acquisition can still fail
> if there's a CPU plug/unplug going on:
> 
> https://lists.xenproject.org/archives/html/xen-devel/2020-02/msg00940.html

Thanks, but letting rcu_barrier() fail is a no go, so I still need to
handle that case (I mean the case failing to get the lock). And handling
of parallel calls is not needed to be functional correct, but to avoid
not necessary cpu synchronization (each parallel call detected can just
wait until the master has finished and then return).

BTW - The recursive spinlock today would allow for e.g. rcu_barrier() to
be called inside a CPU plug/unplug section. Your rwlock is removing that
possibility. Any chance that could be handled?


Juergen
Roger Pau Monné Feb. 17, 2020, 12:49 p.m. UTC | #8
On Mon, Feb 17, 2020 at 01:32:59PM +0100, Jürgen Groß wrote:
> On 17.02.20 13:17, Roger Pau Monné wrote:
> > On Mon, Feb 17, 2020 at 01:11:59PM +0100, Jürgen Groß wrote:
> > > On 17.02.20 12:49, Julien Grall wrote:
> > > > Hi Juergen,
> > > > 
> > > > On 17/02/2020 07:20, Juergen Gross wrote:
> > > > > +void rcu_barrier(void)
> > > > >    {
> > > > > -    atomic_t cpu_count = ATOMIC_INIT(0);
> > > > > -    return stop_machine_run(rcu_barrier_action, &cpu_count, NR_CPUS);
> > > > > +    if ( !atomic_cmpxchg(&cpu_count, 0, num_online_cpus()) )
> > > > 
> > > > What does prevent the cpu_online_map to change under your feet?
> > > > Shouldn't you grab the lock via get_cpu_maps()?
> > > 
> > > Oh, indeed.
> > > 
> > > This in turn will require a modification of the logic to detect parallel
> > > calls on multiple cpus.
> > 
> > If you pick my patch to turn that into a rw lock you shouldn't worry
> > about parallel calls I think, but the lock acquisition can still fail
> > if there's a CPU plug/unplug going on:
> > 
> > https://lists.xenproject.org/archives/html/xen-devel/2020-02/msg00940.html
> 
> Thanks, but letting rcu_barrier() fail is a no go, so I still need to
> handle that case (I mean the case failing to get the lock). And handling
> of parallel calls is not needed to be functional correct, but to avoid
> not necessary cpu synchronization (each parallel call detected can just
> wait until the master has finished and then return).
>
> BTW - The recursive spinlock today would allow for e.g. rcu_barrier() to
> be called inside a CPU plug/unplug section. Your rwlock is removing that
> possibility. Any chance that could be handled?

While this might be interesting for the rcu stuff, it certainly isn't
for other pieces also relying on the cpu maps lock.

Ie: get_cpu_maps must fail when called by send_IPI_mask if there's a
CPU plug/unplug operation going on, even if it's on the same pCPU
that's holding the lock in write mode.

I guess you could add a pCPU variable to record whether the current
pCPU is in the middle of a CPU plug/unplug operation (and hence has
the maps locked in write mode) and avoid taking the lock in that case?

Roger.
Jürgen Groß Feb. 17, 2020, 1:17 p.m. UTC | #9
On 17.02.20 13:49, Roger Pau Monné wrote:
> On Mon, Feb 17, 2020 at 01:32:59PM +0100, Jürgen Groß wrote:
>> On 17.02.20 13:17, Roger Pau Monné wrote:
>>> On Mon, Feb 17, 2020 at 01:11:59PM +0100, Jürgen Groß wrote:
>>>> On 17.02.20 12:49, Julien Grall wrote:
>>>>> Hi Juergen,
>>>>>
>>>>> On 17/02/2020 07:20, Juergen Gross wrote:
>>>>>> +void rcu_barrier(void)
>>>>>>     {
>>>>>> -    atomic_t cpu_count = ATOMIC_INIT(0);
>>>>>> -    return stop_machine_run(rcu_barrier_action, &cpu_count, NR_CPUS);
>>>>>> +    if ( !atomic_cmpxchg(&cpu_count, 0, num_online_cpus()) )
>>>>>
>>>>> What does prevent the cpu_online_map to change under your feet?
>>>>> Shouldn't you grab the lock via get_cpu_maps()?
>>>>
>>>> Oh, indeed.
>>>>
>>>> This in turn will require a modification of the logic to detect parallel
>>>> calls on multiple cpus.
>>>
>>> If you pick my patch to turn that into a rw lock you shouldn't worry
>>> about parallel calls I think, but the lock acquisition can still fail
>>> if there's a CPU plug/unplug going on:
>>>
>>> https://lists.xenproject.org/archives/html/xen-devel/2020-02/msg00940.html
>>
>> Thanks, but letting rcu_barrier() fail is a no go, so I still need to
>> handle that case (I mean the case failing to get the lock). And handling
>> of parallel calls is not needed to be functional correct, but to avoid
>> not necessary cpu synchronization (each parallel call detected can just
>> wait until the master has finished and then return).
>>
>> BTW - The recursive spinlock today would allow for e.g. rcu_barrier() to
>> be called inside a CPU plug/unplug section. Your rwlock is removing that
>> possibility. Any chance that could be handled?
> 
> While this might be interesting for the rcu stuff, it certainly isn't
> for other pieces also relying on the cpu maps lock.
> 
> Ie: get_cpu_maps must fail when called by send_IPI_mask if there's a
> CPU plug/unplug operation going on, even if it's on the same pCPU
> that's holding the lock in write mode.

Sure? How is cpu_down() working then? It is calling stop_machine_run()
which is using send_IPI_mask()...


Juergen
Roger Pau Monné Feb. 17, 2020, 1:47 p.m. UTC | #10
On Mon, Feb 17, 2020 at 02:17:23PM +0100, Jürgen Groß wrote:
> On 17.02.20 13:49, Roger Pau Monné wrote:
> > On Mon, Feb 17, 2020 at 01:32:59PM +0100, Jürgen Groß wrote:
> > > On 17.02.20 13:17, Roger Pau Monné wrote:
> > > > On Mon, Feb 17, 2020 at 01:11:59PM +0100, Jürgen Groß wrote:
> > > > > On 17.02.20 12:49, Julien Grall wrote:
> > > > > > Hi Juergen,
> > > > > > 
> > > > > > On 17/02/2020 07:20, Juergen Gross wrote:
> > > > > > > +void rcu_barrier(void)
> > > > > > >     {
> > > > > > > -    atomic_t cpu_count = ATOMIC_INIT(0);
> > > > > > > -    return stop_machine_run(rcu_barrier_action, &cpu_count, NR_CPUS);
> > > > > > > +    if ( !atomic_cmpxchg(&cpu_count, 0, num_online_cpus()) )
> > > > > > 
> > > > > > What does prevent the cpu_online_map to change under your feet?
> > > > > > Shouldn't you grab the lock via get_cpu_maps()?
> > > > > 
> > > > > Oh, indeed.
> > > > > 
> > > > > This in turn will require a modification of the logic to detect parallel
> > > > > calls on multiple cpus.
> > > > 
> > > > If you pick my patch to turn that into a rw lock you shouldn't worry
> > > > about parallel calls I think, but the lock acquisition can still fail
> > > > if there's a CPU plug/unplug going on:
> > > > 
> > > > https://lists.xenproject.org/archives/html/xen-devel/2020-02/msg00940.html
> > > 
> > > Thanks, but letting rcu_barrier() fail is a no go, so I still need to
> > > handle that case (I mean the case failing to get the lock). And handling
> > > of parallel calls is not needed to be functional correct, but to avoid
> > > not necessary cpu synchronization (each parallel call detected can just
> > > wait until the master has finished and then return).
> > > 
> > > BTW - The recursive spinlock today would allow for e.g. rcu_barrier() to
> > > be called inside a CPU plug/unplug section. Your rwlock is removing that
> > > possibility. Any chance that could be handled?
> > 
> > While this might be interesting for the rcu stuff, it certainly isn't
> > for other pieces also relying on the cpu maps lock.
> > 
> > Ie: get_cpu_maps must fail when called by send_IPI_mask if there's a
> > CPU plug/unplug operation going on, even if it's on the same pCPU
> > that's holding the lock in write mode.
> 
> Sure? How is cpu_down() working then?

send_IPI_mask failing to acquire the cpu maps lock prevents it from
using the APIC shorthand, which is what we want in that case.

> It is calling stop_machine_run()
> which is using send_IPI_mask()...

Xen should avoid using the APIC shorthand in that case, which I don't
think it's happening now, as the lock is recursive.

Thanks, Roger.
Jürgen Groß Feb. 17, 2020, 1:56 p.m. UTC | #11
On 17.02.20 14:47, Roger Pau Monné wrote:
> On Mon, Feb 17, 2020 at 02:17:23PM +0100, Jürgen Groß wrote:
>> On 17.02.20 13:49, Roger Pau Monné wrote:
>>> On Mon, Feb 17, 2020 at 01:32:59PM +0100, Jürgen Groß wrote:
>>>> On 17.02.20 13:17, Roger Pau Monné wrote:
>>>>> On Mon, Feb 17, 2020 at 01:11:59PM +0100, Jürgen Groß wrote:
>>>>>> On 17.02.20 12:49, Julien Grall wrote:
>>>>>>> Hi Juergen,
>>>>>>>
>>>>>>> On 17/02/2020 07:20, Juergen Gross wrote:
>>>>>>>> +void rcu_barrier(void)
>>>>>>>>      {
>>>>>>>> -    atomic_t cpu_count = ATOMIC_INIT(0);
>>>>>>>> -    return stop_machine_run(rcu_barrier_action, &cpu_count, NR_CPUS);
>>>>>>>> +    if ( !atomic_cmpxchg(&cpu_count, 0, num_online_cpus()) )
>>>>>>>
>>>>>>> What does prevent the cpu_online_map to change under your feet?
>>>>>>> Shouldn't you grab the lock via get_cpu_maps()?
>>>>>>
>>>>>> Oh, indeed.
>>>>>>
>>>>>> This in turn will require a modification of the logic to detect parallel
>>>>>> calls on multiple cpus.
>>>>>
>>>>> If you pick my patch to turn that into a rw lock you shouldn't worry
>>>>> about parallel calls I think, but the lock acquisition can still fail
>>>>> if there's a CPU plug/unplug going on:
>>>>>
>>>>> https://lists.xenproject.org/archives/html/xen-devel/2020-02/msg00940.html
>>>>
>>>> Thanks, but letting rcu_barrier() fail is a no go, so I still need to
>>>> handle that case (I mean the case failing to get the lock). And handling
>>>> of parallel calls is not needed to be functional correct, but to avoid
>>>> not necessary cpu synchronization (each parallel call detected can just
>>>> wait until the master has finished and then return).
>>>>
>>>> BTW - The recursive spinlock today would allow for e.g. rcu_barrier() to
>>>> be called inside a CPU plug/unplug section. Your rwlock is removing that
>>>> possibility. Any chance that could be handled?
>>>
>>> While this might be interesting for the rcu stuff, it certainly isn't
>>> for other pieces also relying on the cpu maps lock.
>>>
>>> Ie: get_cpu_maps must fail when called by send_IPI_mask if there's a
>>> CPU plug/unplug operation going on, even if it's on the same pCPU
>>> that's holding the lock in write mode.
>>
>> Sure? How is cpu_down() working then?
> 
> send_IPI_mask failing to acquire the cpu maps lock prevents it from
> using the APIC shorthand, which is what we want in that case.
> 
>> It is calling stop_machine_run()
>> which is using send_IPI_mask()...
> 
> Xen should avoid using the APIC shorthand in that case, which I don't
> think it's happening now, as the lock is recursive.

In fact the code area where this is true is much smaller than that
protected by the lock.

Basically only __cpu_disable() and __cpu_up() (on x86) are critical in
this regard.


Juergen
Igor Druzhinin Feb. 17, 2020, 2:23 p.m. UTC | #12
On 17/02/2020 12:30, Igor Druzhinin wrote:
> On 17/02/2020 12:28, Jürgen Groß wrote:
>> On 17.02.20 13:26, Igor Druzhinin wrote:
>>> On 17/02/2020 07:20, Juergen Gross wrote:
>>>> Today rcu_barrier() is calling stop_machine_run() to synchronize all
>>>> physical cpus in order to ensure all pending rcu calls have finished
>>>> when returning.
>>>>
>>>> As stop_machine_run() is using tasklets this requires scheduling of
>>>> idle vcpus on all cpus imposing the need to call rcu_barrier() on idle
>>>> cpus only in case of core scheduling being active, as otherwise a
>>>> scheduling deadlock would occur.
>>>>
>>>> There is no need at all to do the syncing of the cpus in tasklets, as
>>>> rcu activity is started in __do_softirq() called whenever softirq
>>>> activity is allowed. So rcu_barrier() can easily be modified to use
>>>> softirq for synchronization of the cpus no longer requiring any
>>>> scheduling activity.
>>>>
>>>> As there already is a rcu softirq reuse that for the synchronization.
>>>>
>>>> Finally switch rcu_barrier() to return void as it now can never fail.
>>>>
>>>
>>> Would this implementation guarantee progress as previous implementation
>>> guaranteed?
>>
>> Yes.
> 
> Thanks, I'll put it to test today to see if it solves our use case.

Just manually tried it - gives infinite (up to stack size) trace like:

(XEN) [    1.496520]    [<ffff82d08022e435>] F softirq.c#__do_softirq+0x85/0x90
(XEN) [    1.496561]    [<ffff82d08022e475>] F process_pending_softirqs+0x35/0x37
(XEN) [    1.496600]    [<ffff82d080221101>] F rcupdate.c#rcu_process_callbacks+0x1df/0x1f6
(XEN) [    1.496643]    [<ffff82d08022e435>] F softirq.c#__do_softirq+0x85/0x90
(XEN) [    1.496685]    [<ffff82d08022e475>] F process_pending_softirqs+0x35/0x37
(XEN) [    1.496726]    [<ffff82d080221101>] F rcupdate.c#rcu_process_callbacks+0x1df/0x1f6
(XEN) [    1.496766]    [<ffff82d08022e435>] F softirq.c#__do_softirq+0x85/0x90
(XEN) [    1.496806]    [<ffff82d08022e475>] F process_pending_softirqs+0x35/0x37
(XEN) [    1.496847]    [<ffff82d080221101>] F rcupdate.c#rcu_process_callbacks+0x1df/0x1f6
(XEN) [    1.496887]    [<ffff82d08022e435>] F softirq.c#__do_softirq+0x85/0x90
(XEN) [    1.496927]    [<ffff82d08022e475>] F process_pending_softirqs+0x35/0x37

Igor
Jürgen Groß Feb. 17, 2020, 3:07 p.m. UTC | #13
On 17.02.20 15:23, Igor Druzhinin wrote:
> On 17/02/2020 12:30, Igor Druzhinin wrote:
>> On 17/02/2020 12:28, Jürgen Groß wrote:
>>> On 17.02.20 13:26, Igor Druzhinin wrote:
>>>> On 17/02/2020 07:20, Juergen Gross wrote:
>>>>> Today rcu_barrier() is calling stop_machine_run() to synchronize all
>>>>> physical cpus in order to ensure all pending rcu calls have finished
>>>>> when returning.
>>>>>
>>>>> As stop_machine_run() is using tasklets this requires scheduling of
>>>>> idle vcpus on all cpus imposing the need to call rcu_barrier() on idle
>>>>> cpus only in case of core scheduling being active, as otherwise a
>>>>> scheduling deadlock would occur.
>>>>>
>>>>> There is no need at all to do the syncing of the cpus in tasklets, as
>>>>> rcu activity is started in __do_softirq() called whenever softirq
>>>>> activity is allowed. So rcu_barrier() can easily be modified to use
>>>>> softirq for synchronization of the cpus no longer requiring any
>>>>> scheduling activity.
>>>>>
>>>>> As there already is a rcu softirq reuse that for the synchronization.
>>>>>
>>>>> Finally switch rcu_barrier() to return void as it now can never fail.
>>>>>
>>>>
>>>> Would this implementation guarantee progress as previous implementation
>>>> guaranteed?
>>>
>>> Yes.
>>
>> Thanks, I'll put it to test today to see if it solves our use case.
> 
> Just manually tried it - gives infinite (up to stack size) trace like:
> 
> (XEN) [    1.496520]    [<ffff82d08022e435>] F softirq.c#__do_softirq+0x85/0x90
> (XEN) [    1.496561]    [<ffff82d08022e475>] F process_pending_softirqs+0x35/0x37
> (XEN) [    1.496600]    [<ffff82d080221101>] F rcupdate.c#rcu_process_callbacks+0x1df/0x1f6
> (XEN) [    1.496643]    [<ffff82d08022e435>] F softirq.c#__do_softirq+0x85/0x90
> (XEN) [    1.496685]    [<ffff82d08022e475>] F process_pending_softirqs+0x35/0x37
> (XEN) [    1.496726]    [<ffff82d080221101>] F rcupdate.c#rcu_process_callbacks+0x1df/0x1f6
> (XEN) [    1.496766]    [<ffff82d08022e435>] F softirq.c#__do_softirq+0x85/0x90
> (XEN) [    1.496806]    [<ffff82d08022e475>] F process_pending_softirqs+0x35/0x37
> (XEN) [    1.496847]    [<ffff82d080221101>] F rcupdate.c#rcu_process_callbacks+0x1df/0x1f6
> (XEN) [    1.496887]    [<ffff82d08022e435>] F softirq.c#__do_softirq+0x85/0x90
> (XEN) [    1.496927]    [<ffff82d08022e475>] F process_pending_softirqs+0x35/0x37

Interesting I didn't run into this problem. Obviously I managed to
forget handling the case of recursion.


Juergen
diff mbox series

Patch

diff --git a/xen/common/rcupdate.c b/xen/common/rcupdate.c
index 079ea9d8a1..1f02a804e3 100644
--- a/xen/common/rcupdate.c
+++ b/xen/common/rcupdate.c
@@ -143,47 +143,51 @@  static int qhimark = 10000;
 static int qlowmark = 100;
 static int rsinterval = 1000;
 
-struct rcu_barrier_data {
-    struct rcu_head head;
-    atomic_t *cpu_count;
-};
+/*
+ * rcu_barrier() handling:
+ * cpu_count holds the number of cpu required to finish barrier handling.
+ * Cpus are synchronized via softirq mechanism. rcu_barrier() is regarded to
+ * be active if cpu_count is not zero. In case rcu_barrier() is called on
+ * multiple cpus it is enough to check for cpu_count being not zero on entry
+ * and to call process_pending_softirqs() in a loop until cpu_count drops to
+ * zero, as syncing has been requested already and we don't need to sync
+ * multiple times.
+ */
+static atomic_t cpu_count = ATOMIC_INIT(0);
 
 static void rcu_barrier_callback(struct rcu_head *head)
 {
-    struct rcu_barrier_data *data = container_of(
-        head, struct rcu_barrier_data, head);
-    atomic_inc(data->cpu_count);
+    atomic_dec(&cpu_count);
 }
 
-static int rcu_barrier_action(void *_cpu_count)
+static void rcu_barrier_action(void)
 {
-    struct rcu_barrier_data data = { .cpu_count = _cpu_count };
-
-    ASSERT(!local_irq_is_enabled());
-    local_irq_enable();
+    struct rcu_head head;
 
     /*
      * When callback is executed, all previously-queued RCU work on this CPU
      * is completed. When all CPUs have executed their callback, data.cpu_count
      * will have been incremented to include every online CPU.
      */
-    call_rcu(&data.head, rcu_barrier_callback);
+    call_rcu(&head, rcu_barrier_callback);
 
-    while ( atomic_read(data.cpu_count) != num_online_cpus() )
+    while ( atomic_read(&cpu_count) )
     {
         process_pending_softirqs();
         cpu_relax();
     }
-
-    local_irq_disable();
-
-    return 0;
 }
 
-int rcu_barrier(void)
+void rcu_barrier(void)
 {
-    atomic_t cpu_count = ATOMIC_INIT(0);
-    return stop_machine_run(rcu_barrier_action, &cpu_count, NR_CPUS);
+    if ( !atomic_cmpxchg(&cpu_count, 0, num_online_cpus()) )
+        cpumask_raise_softirq(&cpu_online_map, RCU_SOFTIRQ);
+
+    while ( atomic_read(&cpu_count) )
+    {
+        process_pending_softirqs();
+        cpu_relax();
+    }
 }
 
 /* Is batch a before batch b ? */
@@ -422,6 +426,9 @@  static void rcu_process_callbacks(void)
         rdp->process_callbacks = false;
         __rcu_process_callbacks(&rcu_ctrlblk, rdp);
     }
+
+    if ( atomic_read(&cpu_count) )
+        rcu_barrier_action();
 }
 
 static int __rcu_pending(struct rcu_ctrlblk *rcp, struct rcu_data *rdp)
diff --git a/xen/include/xen/rcupdate.h b/xen/include/xen/rcupdate.h
index 174d058113..87f35b7704 100644
--- a/xen/include/xen/rcupdate.h
+++ b/xen/include/xen/rcupdate.h
@@ -143,7 +143,7 @@  void rcu_check_callbacks(int cpu);
 void call_rcu(struct rcu_head *head, 
               void (*func)(struct rcu_head *head));
 
-int rcu_barrier(void);
+void rcu_barrier(void);
 
 void rcu_idle_enter(unsigned int cpu);
 void rcu_idle_exit(unsigned int cpu);