diff mbox series

[V7,4/5] asm-generic: spinlock: Add combo spinlock (ticket & queued)

Message ID 20220628081707.1997728-5-guoren@kernel.org (mailing list archive)
State New, archived
Headers show
Series riscv: Add qspinlock support with combo style | expand

Commit Message

Guo Ren June 28, 2022, 8:17 a.m. UTC
From: Guo Ren <guoren@linux.alibaba.com>

Some architecture has a flexible requirement on the type of spinlock.
Some LL/SC architectures of ISA don't force micro-arch to give a strong
forward guarantee. Thus different kinds of memory model micro-arch would
come out in one ISA. The ticket lock is suitable for exclusive monitor
designed LL/SC micro-arch with limited cores and "!NUMA". The
queue-spinlock could deal with NUMA/large-scale scenarios with a strong
forward guarantee designed LL/SC micro-arch.

So, make the spinlock a combo with feature.

Signed-off-by: Guo Ren <guoren@linux.alibaba.com>
Signed-off-by: Guo Ren <guoren@kernel.org>
Cc: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Palmer Dabbelt <palmer@rivosinc.com>
---
 include/asm-generic/spinlock.h | 43 ++++++++++++++++++++++++++++++++--
 kernel/locking/qspinlock.c     |  2 ++
 2 files changed, 43 insertions(+), 2 deletions(-)

Comments

Waiman Long June 28, 2022, 6:13 p.m. UTC | #1
On 6/28/22 04:17, guoren@kernel.org wrote:
> From: Guo Ren <guoren@linux.alibaba.com>
>
> Some architecture has a flexible requirement on the type of spinlock.
> Some LL/SC architectures of ISA don't force micro-arch to give a strong
> forward guarantee. Thus different kinds of memory model micro-arch would
> come out in one ISA. The ticket lock is suitable for exclusive monitor
> designed LL/SC micro-arch with limited cores and "!NUMA". The
> queue-spinlock could deal with NUMA/large-scale scenarios with a strong
> forward guarantee designed LL/SC micro-arch.
>
> So, make the spinlock a combo with feature.
>
> Signed-off-by: Guo Ren <guoren@linux.alibaba.com>
> Signed-off-by: Guo Ren <guoren@kernel.org>
> Cc: Peter Zijlstra (Intel) <peterz@infradead.org>
> Cc: Arnd Bergmann <arnd@arndb.de>
> Cc: Palmer Dabbelt <palmer@rivosinc.com>
> ---
>   include/asm-generic/spinlock.h | 43 ++++++++++++++++++++++++++++++++--
>   kernel/locking/qspinlock.c     |  2 ++
>   2 files changed, 43 insertions(+), 2 deletions(-)
>
> diff --git a/include/asm-generic/spinlock.h b/include/asm-generic/spinlock.h
> index f41dc7c2b900..a9b43089bf99 100644
> --- a/include/asm-generic/spinlock.h
> +++ b/include/asm-generic/spinlock.h
> @@ -28,34 +28,73 @@
>   #define __ASM_GENERIC_SPINLOCK_H
>   
>   #include <asm-generic/ticket_spinlock.h>
> +#ifdef CONFIG_ARCH_USE_QUEUED_SPINLOCKS
> +#include <linux/jump_label.h>
> +#include <asm-generic/qspinlock.h>
> +
> +DECLARE_STATIC_KEY_TRUE(use_qspinlock_key);
> +#endif
> +
> +#undef arch_spin_is_locked
> +#undef arch_spin_is_contended
> +#undef arch_spin_value_unlocked
> +#undef arch_spin_lock
> +#undef arch_spin_trylock
> +#undef arch_spin_unlock
>   
>   static __always_inline void arch_spin_lock(arch_spinlock_t *lock)
>   {
> -	ticket_spin_lock(lock);
> +#ifdef CONFIG_ARCH_USE_QUEUED_SPINLOCKS
> +	if (static_branch_likely(&use_qspinlock_key))
> +		queued_spin_lock(lock);
> +	else
> +#endif
> +		ticket_spin_lock(lock);
>   }

Why do you use a static key to control whether to use qspinlock or 
ticket lock? In the next patch, you have

+#if !defined(CONFIG_NUMA) && defined(CONFIG_QUEUED_SPINLOCKS)
+	static_branch_disable(&use_qspinlock_key);
+#endif

So the current config setting determines if qspinlock will be used, not 
some boot time parameter that user needs to specify. This patch will 
just add useless code to lock/unlock sites. I don't see any benefit of 
doing that.

Cheers,
Longman
Guo Ren June 29, 2022, 1:17 a.m. UTC | #2
On Wed, Jun 29, 2022 at 2:13 AM Waiman Long <longman@redhat.com> wrote:
>
> On 6/28/22 04:17, guoren@kernel.org wrote:
> > From: Guo Ren <guoren@linux.alibaba.com>
> >
> > Some architecture has a flexible requirement on the type of spinlock.
> > Some LL/SC architectures of ISA don't force micro-arch to give a strong
> > forward guarantee. Thus different kinds of memory model micro-arch would
> > come out in one ISA. The ticket lock is suitable for exclusive monitor
> > designed LL/SC micro-arch with limited cores and "!NUMA". The
> > queue-spinlock could deal with NUMA/large-scale scenarios with a strong
> > forward guarantee designed LL/SC micro-arch.
> >
> > So, make the spinlock a combo with feature.
> >
> > Signed-off-by: Guo Ren <guoren@linux.alibaba.com>
> > Signed-off-by: Guo Ren <guoren@kernel.org>
> > Cc: Peter Zijlstra (Intel) <peterz@infradead.org>
> > Cc: Arnd Bergmann <arnd@arndb.de>
> > Cc: Palmer Dabbelt <palmer@rivosinc.com>
> > ---
> >   include/asm-generic/spinlock.h | 43 ++++++++++++++++++++++++++++++++--
> >   kernel/locking/qspinlock.c     |  2 ++
> >   2 files changed, 43 insertions(+), 2 deletions(-)
> >
> > diff --git a/include/asm-generic/spinlock.h b/include/asm-generic/spinlock.h
> > index f41dc7c2b900..a9b43089bf99 100644
> > --- a/include/asm-generic/spinlock.h
> > +++ b/include/asm-generic/spinlock.h
> > @@ -28,34 +28,73 @@
> >   #define __ASM_GENERIC_SPINLOCK_H
> >
> >   #include <asm-generic/ticket_spinlock.h>
> > +#ifdef CONFIG_ARCH_USE_QUEUED_SPINLOCKS
> > +#include <linux/jump_label.h>
> > +#include <asm-generic/qspinlock.h>
> > +
> > +DECLARE_STATIC_KEY_TRUE(use_qspinlock_key);
> > +#endif
> > +
> > +#undef arch_spin_is_locked
> > +#undef arch_spin_is_contended
> > +#undef arch_spin_value_unlocked
> > +#undef arch_spin_lock
> > +#undef arch_spin_trylock
> > +#undef arch_spin_unlock
> >
> >   static __always_inline void arch_spin_lock(arch_spinlock_t *lock)
> >   {
> > -     ticket_spin_lock(lock);
> > +#ifdef CONFIG_ARCH_USE_QUEUED_SPINLOCKS
> > +     if (static_branch_likely(&use_qspinlock_key))
> > +             queued_spin_lock(lock);
> > +     else
> > +#endif
> > +             ticket_spin_lock(lock);
> >   }
>
> Why do you use a static key to control whether to use qspinlock or
> ticket lock? In the next patch, you have
>
> +#if !defined(CONFIG_NUMA) && defined(CONFIG_QUEUED_SPINLOCKS)
> +       static_branch_disable(&use_qspinlock_key);
> +#endif
>
> So the current config setting determines if qspinlock will be used, not
> some boot time parameter that user needs to specify. This patch will
> just add useless code to lock/unlock sites. I don't see any benefit of
> doing that.
This is a startup patch for riscv. next, we could let vendors make choices.
I'm not sure they like cmdline or vendor-specific errata style.

Eventually, we would let one riscv Image support all machines, some
use ticket-lock, and some use qspinlock.

>
> Cheers,
> Longman
>



--
Best Regards
 Guo Ren

ML: https://lore.kernel.org/linux-csky/
Waiman Long June 29, 2022, 1:34 a.m. UTC | #3
On 6/28/22 21:17, Guo Ren wrote:
> On Wed, Jun 29, 2022 at 2:13 AM Waiman Long <longman@redhat.com> wrote:
>> On 6/28/22 04:17, guoren@kernel.org wrote:
>>> From: Guo Ren <guoren@linux.alibaba.com>
>>>
>>> Some architecture has a flexible requirement on the type of spinlock.
>>> Some LL/SC architectures of ISA don't force micro-arch to give a strong
>>> forward guarantee. Thus different kinds of memory model micro-arch would
>>> come out in one ISA. The ticket lock is suitable for exclusive monitor
>>> designed LL/SC micro-arch with limited cores and "!NUMA". The
>>> queue-spinlock could deal with NUMA/large-scale scenarios with a strong
>>> forward guarantee designed LL/SC micro-arch.
>>>
>>> So, make the spinlock a combo with feature.
>>>
>>> Signed-off-by: Guo Ren <guoren@linux.alibaba.com>
>>> Signed-off-by: Guo Ren <guoren@kernel.org>
>>> Cc: Peter Zijlstra (Intel) <peterz@infradead.org>
>>> Cc: Arnd Bergmann <arnd@arndb.de>
>>> Cc: Palmer Dabbelt <palmer@rivosinc.com>
>>> ---
>>>    include/asm-generic/spinlock.h | 43 ++++++++++++++++++++++++++++++++--
>>>    kernel/locking/qspinlock.c     |  2 ++
>>>    2 files changed, 43 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/include/asm-generic/spinlock.h b/include/asm-generic/spinlock.h
>>> index f41dc7c2b900..a9b43089bf99 100644
>>> --- a/include/asm-generic/spinlock.h
>>> +++ b/include/asm-generic/spinlock.h
>>> @@ -28,34 +28,73 @@
>>>    #define __ASM_GENERIC_SPINLOCK_H
>>>
>>>    #include <asm-generic/ticket_spinlock.h>
>>> +#ifdef CONFIG_ARCH_USE_QUEUED_SPINLOCKS
>>> +#include <linux/jump_label.h>
>>> +#include <asm-generic/qspinlock.h>
>>> +
>>> +DECLARE_STATIC_KEY_TRUE(use_qspinlock_key);
>>> +#endif
>>> +
>>> +#undef arch_spin_is_locked
>>> +#undef arch_spin_is_contended
>>> +#undef arch_spin_value_unlocked
>>> +#undef arch_spin_lock
>>> +#undef arch_spin_trylock
>>> +#undef arch_spin_unlock
>>>
>>>    static __always_inline void arch_spin_lock(arch_spinlock_t *lock)
>>>    {
>>> -     ticket_spin_lock(lock);
>>> +#ifdef CONFIG_ARCH_USE_QUEUED_SPINLOCKS
>>> +     if (static_branch_likely(&use_qspinlock_key))
>>> +             queued_spin_lock(lock);
>>> +     else
>>> +#endif
>>> +             ticket_spin_lock(lock);
>>>    }
>> Why do you use a static key to control whether to use qspinlock or
>> ticket lock? In the next patch, you have
>>
>> +#if !defined(CONFIG_NUMA) && defined(CONFIG_QUEUED_SPINLOCKS)
>> +       static_branch_disable(&use_qspinlock_key);
>> +#endif
>>
>> So the current config setting determines if qspinlock will be used, not
>> some boot time parameter that user needs to specify. This patch will
>> just add useless code to lock/unlock sites. I don't see any benefit of
>> doing that.
> This is a startup patch for riscv. next, we could let vendors make choices.
> I'm not sure they like cmdline or vendor-specific errata style.
>
> Eventually, we would let one riscv Image support all machines, some
> use ticket-lock, and some use qspinlock.

OK. Maybe you can postpone this combo spinlock until there is a good use 
case for it. Upstream usually don't accept patches that have no good use 
case yet.

Cheers,
Longman
Guo Ren June 29, 2022, 2:29 a.m. UTC | #4
On Wed, Jun 29, 2022 at 9:34 AM Waiman Long <longman@redhat.com> wrote:
>
> On 6/28/22 21:17, Guo Ren wrote:
> > On Wed, Jun 29, 2022 at 2:13 AM Waiman Long <longman@redhat.com> wrote:
> >> On 6/28/22 04:17, guoren@kernel.org wrote:
> >>> From: Guo Ren <guoren@linux.alibaba.com>
> >>>
> >>> Some architecture has a flexible requirement on the type of spinlock.
> >>> Some LL/SC architectures of ISA don't force micro-arch to give a strong
> >>> forward guarantee. Thus different kinds of memory model micro-arch would
> >>> come out in one ISA. The ticket lock is suitable for exclusive monitor
> >>> designed LL/SC micro-arch with limited cores and "!NUMA". The
> >>> queue-spinlock could deal with NUMA/large-scale scenarios with a strong
> >>> forward guarantee designed LL/SC micro-arch.
> >>>
> >>> So, make the spinlock a combo with feature.
> >>>
> >>> Signed-off-by: Guo Ren <guoren@linux.alibaba.com>
> >>> Signed-off-by: Guo Ren <guoren@kernel.org>
> >>> Cc: Peter Zijlstra (Intel) <peterz@infradead.org>
> >>> Cc: Arnd Bergmann <arnd@arndb.de>
> >>> Cc: Palmer Dabbelt <palmer@rivosinc.com>
> >>> ---
> >>>    include/asm-generic/spinlock.h | 43 ++++++++++++++++++++++++++++++++--
> >>>    kernel/locking/qspinlock.c     |  2 ++
> >>>    2 files changed, 43 insertions(+), 2 deletions(-)
> >>>
> >>> diff --git a/include/asm-generic/spinlock.h b/include/asm-generic/spinlock.h
> >>> index f41dc7c2b900..a9b43089bf99 100644
> >>> --- a/include/asm-generic/spinlock.h
> >>> +++ b/include/asm-generic/spinlock.h
> >>> @@ -28,34 +28,73 @@
> >>>    #define __ASM_GENERIC_SPINLOCK_H
> >>>
> >>>    #include <asm-generic/ticket_spinlock.h>
> >>> +#ifdef CONFIG_ARCH_USE_QUEUED_SPINLOCKS
> >>> +#include <linux/jump_label.h>
> >>> +#include <asm-generic/qspinlock.h>
> >>> +
> >>> +DECLARE_STATIC_KEY_TRUE(use_qspinlock_key);
> >>> +#endif
> >>> +
> >>> +#undef arch_spin_is_locked
> >>> +#undef arch_spin_is_contended
> >>> +#undef arch_spin_value_unlocked
> >>> +#undef arch_spin_lock
> >>> +#undef arch_spin_trylock
> >>> +#undef arch_spin_unlock
> >>>
> >>>    static __always_inline void arch_spin_lock(arch_spinlock_t *lock)
> >>>    {
> >>> -     ticket_spin_lock(lock);
> >>> +#ifdef CONFIG_ARCH_USE_QUEUED_SPINLOCKS
> >>> +     if (static_branch_likely(&use_qspinlock_key))
> >>> +             queued_spin_lock(lock);
> >>> +     else
> >>> +#endif
> >>> +             ticket_spin_lock(lock);
> >>>    }
> >> Why do you use a static key to control whether to use qspinlock or
> >> ticket lock? In the next patch, you have
> >>
> >> +#if !defined(CONFIG_NUMA) && defined(CONFIG_QUEUED_SPINLOCKS)
> >> +       static_branch_disable(&use_qspinlock_key);
> >> +#endif
> >>
> >> So the current config setting determines if qspinlock will be used, not
> >> some boot time parameter that user needs to specify. This patch will
> >> just add useless code to lock/unlock sites. I don't see any benefit of
> >> doing that.
> > This is a startup patch for riscv. next, we could let vendors make choices.
> > I'm not sure they like cmdline or vendor-specific errata style.
> >
> > Eventually, we would let one riscv Image support all machines, some
> > use ticket-lock, and some use qspinlock.
>
> OK. Maybe you can postpone this combo spinlock until there is a good use
> case for it. Upstream usually don't accept patches that have no good use
> case yet.
>
> Cheers,
> Longman
>

I would add a cmdline to control the choice of qspinlock/ticket-lock
in the next version.

diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
index b9b234157a66..5ade490c2f27 100644
--- a/arch/riscv/kernel/setup.c
+++ b/arch/riscv/kernel/setup.c
@@ -270,6 +270,10 @@ void __init setup_arch(char **cmdline_p)

        early_ioremap_setup();
        jump_label_init();
+
+#if !defined(CONFIG_NUMA) && defined(CONFIG_QUEUED_SPINLOCKS)
+       static_branch_disable(&use_qspinlock_key);
+#endif
        parse_early_param();

        efi_init();
@@ -295,10 +299,6 @@ void __init setup_arch(char **cmdline_p)
        setup_smp();
 #endif

-#if !defined(CONFIG_NUMA) && defined(CONFIG_QUEUED_SPINLOCKS)
-       static_branch_disable(&use_qspinlock_key);
-#endif
-
        riscv_fill_hwcap();
        apply_boot_alternatives();
 }
@@ -330,3 +330,13 @@ void free_initmem(void)

        free_initmem_default(POISON_FREE_INITMEM);
 }
+
+#ifdef CONFIG_QUEUED_SPINLOCKS
+static int __init disable_qspinlock(char *p)
+{
+       static_branch_disable(&use_qspinlock_key);
+       return 0;
+}
+
+early_param("disable_qspinlock", disable_qspinlock);
+#endif
Arnd Bergmann June 29, 2022, 7:08 a.m. UTC | #5
On Wed, Jun 29, 2022 at 3:34 AM Waiman Long <longman@redhat.com> wrote:
> On 6/28/22 21:17, Guo Ren wrote:
> > On Wed, Jun 29, 2022 at 2:13 AM Waiman Long <longman@redhat.com> wrote:
> >> On 6/28/22 04:17, guoren@kernel.org wrote:
> >>
> >> So the current config setting determines if qspinlock will be used, not
> >> some boot time parameter that user needs to specify. This patch will
> >> just add useless code to lock/unlock sites. I don't see any benefit of
> >> doing that.
> > This is a startup patch for riscv. next, we could let vendors make choices.
> > I'm not sure they like cmdline or vendor-specific errata style.
> >
> > Eventually, we would let one riscv Image support all machines, some
> > use ticket-lock, and some use qspinlock.
>
> OK. Maybe you can postpone this combo spinlock until there is a good use
> case for it. Upstream usually don't accept patches that have no good use
> case yet.

I think the usecase on risc-v is this: there are cases where the qspinlock
is preferred for performance reasons, but there are also CPU cores on
which it is not safe to use. risc-v like most modern architectures has a
strict rule about being able to build kernels that work on all machines,
so without something like this, it would not be able to use qspinlock at all.

On the other hand, I don't really like the idea of putting the static-key
wrapper into the asm-generic header. Especially the ticket spinlock
implementation should be simple and not depend on jump labels.

From looking at the header file dependencies on arm64, I know that
putting jump labels into core infrastructure like the arch_spin_lock()
makes a big mess of indirect includes and measurably slows down
the kernel build.

I think this can still be done in the riscv asm/spinlock.h header with
minimal impact on the asm-generic file if the riscv maintainers see
a significant enough advantage, but I don't want it in the common code.

        Arnd
Guo Ren June 29, 2022, 8:24 a.m. UTC | #6
On Wed, Jun 29, 2022 at 3:09 PM Arnd Bergmann <arnd@arndb.de> wrote:
>
> On Wed, Jun 29, 2022 at 3:34 AM Waiman Long <longman@redhat.com> wrote:
> > On 6/28/22 21:17, Guo Ren wrote:
> > > On Wed, Jun 29, 2022 at 2:13 AM Waiman Long <longman@redhat.com> wrote:
> > >> On 6/28/22 04:17, guoren@kernel.org wrote:
> > >>
> > >> So the current config setting determines if qspinlock will be used, not
> > >> some boot time parameter that user needs to specify. This patch will
> > >> just add useless code to lock/unlock sites. I don't see any benefit of
> > >> doing that.
> > > This is a startup patch for riscv. next, we could let vendors make choices.
> > > I'm not sure they like cmdline or vendor-specific errata style.
> > >
> > > Eventually, we would let one riscv Image support all machines, some
> > > use ticket-lock, and some use qspinlock.
> >
> > OK. Maybe you can postpone this combo spinlock until there is a good use
> > case for it. Upstream usually don't accept patches that have no good use
> > case yet.
>
> I think the usecase on risc-v is this: there are cases where the qspinlock
> is preferred for performance reasons, but there are also CPU cores on
> which it is not safe to use. risc-v like most modern architectures has a
> strict rule about being able to build kernels that work on all machines,
> so without something like this, it would not be able to use qspinlock at all.
>
> On the other hand, I don't really like the idea of putting the static-key
> wrapper into the asm-generic header. Especially the ticket spinlock
> implementation should be simple and not depend on jump labels.
If CONFIG_ARCH_USE_QUEUED_SPINLOCKS is not enabled, the patch still
will keep the ticket-lock simple without jump labels.

>
> From looking at the header file dependencies on arm64, I know that
> putting jump labels into core infrastructure like the arch_spin_lock()
> makes a big mess of indirect includes and measurably slows down
> the kernel build.
arm64 needn't combo spinlock, it could use pure qspinlock with keeping
current header files included.

>
> I think this can still be done in the riscv asm/spinlock.h header with
> minimal impact on the asm-generic file if the riscv maintainers see
> a significant enough advantage, but I don't want it in the common code.
Yes, it could. I agree with using combo spinlock only with riscv.

>
>         Arnd
Arnd Bergmann June 29, 2022, 8:29 a.m. UTC | #7
On Wed, Jun 29, 2022 at 10:24 AM Guo Ren <guoren@kernel.org> wrote:
> On Wed, Jun 29, 2022 at 3:09 PM Arnd Bergmann <arnd@arndb.de> wrote:
> > On Wed, Jun 29, 2022 at 3:34 AM Waiman Long <longman@redhat.com> wrote:
> >
> > From looking at the header file dependencies on arm64, I know that
> > putting jump labels into core infrastructure like the arch_spin_lock()
> > makes a big mess of indirect includes and measurably slows down
> > the kernel build.
> arm64 needn't combo spinlock, it could use pure qspinlock with keeping
> current header files included.

arm64 has a different problem: there are two separate sets of atomic
instructions, and the decision between those is similarly done using
jump labels. I definitely like the ability to choose between qspinlock
and ticket spinlock on arm64 as well. This can be done as a
compile-time choice, but both of them still depend on jump labels.

        Arnd
Waiman Long June 29, 2022, 12:53 p.m. UTC | #8
On 6/29/22 03:08, Arnd Bergmann wrote:
> On Wed, Jun 29, 2022 at 3:34 AM Waiman Long <longman@redhat.com> wrote:
>> On 6/28/22 21:17, Guo Ren wrote:
>>> On Wed, Jun 29, 2022 at 2:13 AM Waiman Long <longman@redhat.com> wrote:
>>>> On 6/28/22 04:17, guoren@kernel.org wrote:
>>>>
>>>> So the current config setting determines if qspinlock will be used, not
>>>> some boot time parameter that user needs to specify. This patch will
>>>> just add useless code to lock/unlock sites. I don't see any benefit of
>>>> doing that.
>>> This is a startup patch for riscv. next, we could let vendors make choices.
>>> I'm not sure they like cmdline or vendor-specific errata style.
>>>
>>> Eventually, we would let one riscv Image support all machines, some
>>> use ticket-lock, and some use qspinlock.
>> OK. Maybe you can postpone this combo spinlock until there is a good use
>> case for it. Upstream usually don't accept patches that have no good use
>> case yet.
> I think the usecase on risc-v is this: there are cases where the qspinlock
> is preferred for performance reasons, but there are also CPU cores on
> which it is not safe to use. risc-v like most modern architectures has a
> strict rule about being able to build kernels that work on all machines,
> so without something like this, it would not be able to use qspinlock at all.

My objection for the current patch is really on the fact that everything 
is determined at compiled time. So there is no point to use static key 
if it cannot be changed at the boot time. Adding a boot time switch do 
make the use of static key more reasonable.


>
> On the other hand, I don't really like the idea of putting the static-key
> wrapper into the asm-generic header. Especially the ticket spinlock
> implementation should be simple and not depend on jump labels.
>
>  From looking at the header file dependencies on arm64, I know that
> putting jump labels into core infrastructure like the arch_spin_lock()
> makes a big mess of indirect includes and measurably slows down
> the kernel build.
>
> I think this can still be done in the riscv asm/spinlock.h header with
> minimal impact on the asm-generic file if the riscv maintainers see
> a significant enough advantage, but I don't want it in the common code.

I have a similar feeling. In addition, I don't like the idea of adding a 
static key to qspinlock.c that have nothing to do with the qspinlock 
logic. I would like to see it put elsewhere.

Cheers,
Longman
Guo Ren July 1, 2022, 12:18 p.m. UTC | #9
On Wed, Jun 29, 2022 at 4:30 PM Arnd Bergmann <arnd@arndb.de> wrote:
>
> On Wed, Jun 29, 2022 at 10:24 AM Guo Ren <guoren@kernel.org> wrote:
> > On Wed, Jun 29, 2022 at 3:09 PM Arnd Bergmann <arnd@arndb.de> wrote:
> > > On Wed, Jun 29, 2022 at 3:34 AM Waiman Long <longman@redhat.com> wrote:
> > >
> > > From looking at the header file dependencies on arm64, I know that
> > > putting jump labels into core infrastructure like the arch_spin_lock()
> > > makes a big mess of indirect includes and measurably slows down
> > > the kernel build.
> > arm64 needn't combo spinlock, it could use pure qspinlock with keeping
> > current header files included.
>
> arm64 has a different problem: there are two separate sets of atomic
> instructions, and the decision between those is similarly done using
> jump labels. I definitely like the ability to choose between qspinlock
> and ticket spinlock on arm64 as well. This can be done as a
> compile-time choice, but both of them still depend on jump labels.
1. xchg use ALTERNATIVE, but cmpxchg to jump labels.
2. arm64 is still using qspinlock when ll/sc, and I think they give
strong enough fwd guarantee with "prfm pstl1strm".
But another question is if ll/sc could give enough strong fwd
guarantee, why arm64 introduce LSE, for code size reduction? Why
instructions fusion technology is not enough?


>
>         Arnd



--
Best Regards
 Guo Ren

ML: https://lore.kernel.org/linux-csky/
Peter Zijlstra July 4, 2022, 9:57 a.m. UTC | #10
On Tue, Jun 28, 2022 at 04:17:06AM -0400, guoren@kernel.org wrote:
> From: Guo Ren <guoren@linux.alibaba.com>
> 
> Some architecture has a flexible requirement on the type of spinlock.
> Some LL/SC architectures of ISA don't force micro-arch to give a strong
> forward guarantee. Thus different kinds of memory model micro-arch would
> come out in one ISA. The ticket lock is suitable for exclusive monitor
> designed LL/SC micro-arch with limited cores and "!NUMA". The
> queue-spinlock could deal with NUMA/large-scale scenarios with a strong
> forward guarantee designed LL/SC micro-arch.
> 
> So, make the spinlock a combo with feature.
> 
> Signed-off-by: Guo Ren <guoren@linux.alibaba.com>
> Signed-off-by: Guo Ren <guoren@kernel.org>
> Cc: Peter Zijlstra (Intel) <peterz@infradead.org>
> Cc: Arnd Bergmann <arnd@arndb.de>
> Cc: Palmer Dabbelt <palmer@rivosinc.com>
> ---
>  include/asm-generic/spinlock.h | 43 ++++++++++++++++++++++++++++++++--
>  kernel/locking/qspinlock.c     |  2 ++
>  2 files changed, 43 insertions(+), 2 deletions(-)
> 
> diff --git a/include/asm-generic/spinlock.h b/include/asm-generic/spinlock.h
> index f41dc7c2b900..a9b43089bf99 100644
> --- a/include/asm-generic/spinlock.h
> +++ b/include/asm-generic/spinlock.h
> @@ -28,34 +28,73 @@
>  #define __ASM_GENERIC_SPINLOCK_H
>  
>  #include <asm-generic/ticket_spinlock.h>
> +#ifdef CONFIG_ARCH_USE_QUEUED_SPINLOCKS
> +#include <linux/jump_label.h>
> +#include <asm-generic/qspinlock.h>
> +
> +DECLARE_STATIC_KEY_TRUE(use_qspinlock_key);
> +#endif
> +
> +#undef arch_spin_is_locked
> +#undef arch_spin_is_contended
> +#undef arch_spin_value_unlocked
> +#undef arch_spin_lock
> +#undef arch_spin_trylock
> +#undef arch_spin_unlock
>  
>  static __always_inline void arch_spin_lock(arch_spinlock_t *lock)
>  {
> -	ticket_spin_lock(lock);
> +#ifdef CONFIG_ARCH_USE_QUEUED_SPINLOCKS
> +	if (static_branch_likely(&use_qspinlock_key))
> +		queued_spin_lock(lock);
> +	else
> +#endif
> +		ticket_spin_lock(lock);
>  }
>  
>  static __always_inline bool arch_spin_trylock(arch_spinlock_t *lock)
>  {
> +#ifdef CONFIG_ARCH_USE_QUEUED_SPINLOCKS
> +	if (static_branch_likely(&use_qspinlock_key))
> +		return queued_spin_trylock(lock);
> +#endif
>  	return ticket_spin_trylock(lock);
>  }
>  
>  static __always_inline void arch_spin_unlock(arch_spinlock_t *lock)
>  {
> -	ticket_spin_unlock(lock);
> +#ifdef CONFIG_ARCH_USE_QUEUED_SPINLOCKS
> +	if (static_branch_likely(&use_qspinlock_key))
> +		queued_spin_unlock(lock);
> +	else
> +#endif
> +		ticket_spin_unlock(lock);
>  }
>  
>  static __always_inline int arch_spin_is_locked(arch_spinlock_t *lock)
>  {
> +#ifdef CONFIG_ARCH_USE_QUEUED_SPINLOCKS
> +	if (static_branch_likely(&use_qspinlock_key))
> +		return queued_spin_is_locked(lock);
> +#endif
>  	return ticket_spin_is_locked(lock);
>  }
>  
>  static __always_inline int arch_spin_is_contended(arch_spinlock_t *lock)
>  {
> +#ifdef CONFIG_ARCH_USE_QUEUED_SPINLOCKS
> +	if (static_branch_likely(&use_qspinlock_key))
> +		return queued_spin_is_contended(lock);
> +#endif
>  	return ticket_spin_is_contended(lock);
>  }
>  
>  static __always_inline int arch_spin_value_unlocked(arch_spinlock_t lock)
>  {
> +#ifdef CONFIG_ARCH_USE_QUEUED_SPINLOCKS
> +	if (static_branch_likely(&use_qspinlock_key))
> +		return queued_spin_value_unlocked(lock);
> +#endif
>  	return ticket_spin_value_unlocked(lock);
>  }

Urggghhhh....

I really don't think you want this in generic code. Also, I'm thinking
any arch that does this wants to make sure it doesn't inline any of this
stuff. That is, said arch must not have ARCH_INLINE_SPIN_*

And if you're going to force things out of line, then I think you can
get better code using static_call().

*shudder*...
Guo Ren July 4, 2022, 1:13 p.m. UTC | #11
On Mon, Jul 4, 2022 at 5:58 PM Peter Zijlstra <peterz@infradead.org> wrote:
>
> On Tue, Jun 28, 2022 at 04:17:06AM -0400, guoren@kernel.org wrote:
> > From: Guo Ren <guoren@linux.alibaba.com>
> >
> > Some architecture has a flexible requirement on the type of spinlock.
> > Some LL/SC architectures of ISA don't force micro-arch to give a strong
> > forward guarantee. Thus different kinds of memory model micro-arch would
> > come out in one ISA. The ticket lock is suitable for exclusive monitor
> > designed LL/SC micro-arch with limited cores and "!NUMA". The
> > queue-spinlock could deal with NUMA/large-scale scenarios with a strong
> > forward guarantee designed LL/SC micro-arch.
> >
> > So, make the spinlock a combo with feature.
> >
> > Signed-off-by: Guo Ren <guoren@linux.alibaba.com>
> > Signed-off-by: Guo Ren <guoren@kernel.org>
> > Cc: Peter Zijlstra (Intel) <peterz@infradead.org>
> > Cc: Arnd Bergmann <arnd@arndb.de>
> > Cc: Palmer Dabbelt <palmer@rivosinc.com>
> > ---
> >  include/asm-generic/spinlock.h | 43 ++++++++++++++++++++++++++++++++--
> >  kernel/locking/qspinlock.c     |  2 ++
> >  2 files changed, 43 insertions(+), 2 deletions(-)
> >
> > diff --git a/include/asm-generic/spinlock.h b/include/asm-generic/spinlock.h
> > index f41dc7c2b900..a9b43089bf99 100644
> > --- a/include/asm-generic/spinlock.h
> > +++ b/include/asm-generic/spinlock.h
> > @@ -28,34 +28,73 @@
> >  #define __ASM_GENERIC_SPINLOCK_H
> >
> >  #include <asm-generic/ticket_spinlock.h>
> > +#ifdef CONFIG_ARCH_USE_QUEUED_SPINLOCKS
> > +#include <linux/jump_label.h>
> > +#include <asm-generic/qspinlock.h>
> > +
> > +DECLARE_STATIC_KEY_TRUE(use_qspinlock_key);
> > +#endif
> > +
> > +#undef arch_spin_is_locked
> > +#undef arch_spin_is_contended
> > +#undef arch_spin_value_unlocked
> > +#undef arch_spin_lock
> > +#undef arch_spin_trylock
> > +#undef arch_spin_unlock
> >
> >  static __always_inline void arch_spin_lock(arch_spinlock_t *lock)
> >  {
> > -     ticket_spin_lock(lock);
> > +#ifdef CONFIG_ARCH_USE_QUEUED_SPINLOCKS
> > +     if (static_branch_likely(&use_qspinlock_key))
> > +             queued_spin_lock(lock);
> > +     else
> > +#endif
> > +             ticket_spin_lock(lock);
> >  }
> >
> >  static __always_inline bool arch_spin_trylock(arch_spinlock_t *lock)
> >  {
> > +#ifdef CONFIG_ARCH_USE_QUEUED_SPINLOCKS
> > +     if (static_branch_likely(&use_qspinlock_key))
> > +             return queued_spin_trylock(lock);
> > +#endif
> >       return ticket_spin_trylock(lock);
> >  }
> >
> >  static __always_inline void arch_spin_unlock(arch_spinlock_t *lock)
> >  {
> > -     ticket_spin_unlock(lock);
> > +#ifdef CONFIG_ARCH_USE_QUEUED_SPINLOCKS
> > +     if (static_branch_likely(&use_qspinlock_key))
> > +             queued_spin_unlock(lock);
> > +     else
> > +#endif
> > +             ticket_spin_unlock(lock);
> >  }
> >
> >  static __always_inline int arch_spin_is_locked(arch_spinlock_t *lock)
> >  {
> > +#ifdef CONFIG_ARCH_USE_QUEUED_SPINLOCKS
> > +     if (static_branch_likely(&use_qspinlock_key))
> > +             return queued_spin_is_locked(lock);
> > +#endif
> >       return ticket_spin_is_locked(lock);
> >  }
> >
> >  static __always_inline int arch_spin_is_contended(arch_spinlock_t *lock)
> >  {
> > +#ifdef CONFIG_ARCH_USE_QUEUED_SPINLOCKS
> > +     if (static_branch_likely(&use_qspinlock_key))
> > +             return queued_spin_is_contended(lock);
> > +#endif
> >       return ticket_spin_is_contended(lock);
> >  }
> >
> >  static __always_inline int arch_spin_value_unlocked(arch_spinlock_t lock)
> >  {
> > +#ifdef CONFIG_ARCH_USE_QUEUED_SPINLOCKS
> > +     if (static_branch_likely(&use_qspinlock_key))
> > +             return queued_spin_value_unlocked(lock);
> > +#endif
> >       return ticket_spin_value_unlocked(lock);
> >  }
>
> Urggghhhh....
>
> I really don't think you want this in generic code. Also, I'm thinking
> any arch that does this wants to make sure it doesn't inline any of this
Your advice is the same with Arnd, I would move static_branch out of generic.

> stuff. That is, said arch must not have ARCH_INLINE_SPIN_*
What do you mean? I've tested with ARCH_INLINE_SPIN_* and it's okay
with EXPORT_SYMBOL(use_qspinlock_key).

diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index 47e12ab9c822..4587fb544326 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -32,6 +32,32 @@ config RISCV
        select ARCH_HAS_STRICT_MODULE_RWX if MMU && !XIP_KERNEL
        select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
        select ARCH_HAS_UBSAN_SANITIZE_ALL
+       select ARCH_INLINE_READ_LOCK if !PREEMPTION
+       select ARCH_INLINE_READ_LOCK_BH if !PREEMPTION
+       select ARCH_INLINE_READ_LOCK_IRQ if !PREEMPTION
+       select ARCH_INLINE_READ_LOCK_IRQSAVE if !PREEMPTION
+       select ARCH_INLINE_READ_UNLOCK if !PREEMPTION
+       select ARCH_INLINE_READ_UNLOCK_BH if !PREEMPTION
+       select ARCH_INLINE_READ_UNLOCK_IRQ if !PREEMPTION
+       select ARCH_INLINE_READ_UNLOCK_IRQRESTORE if !PREEMPTION
+       select ARCH_INLINE_WRITE_LOCK if !PREEMPTION
+       select ARCH_INLINE_WRITE_LOCK_BH if !PREEMPTION
+       select ARCH_INLINE_WRITE_LOCK_IRQ if !PREEMPTION
+       select ARCH_INLINE_WRITE_LOCK_IRQSAVE if !PREEMPTION
+       select ARCH_INLINE_WRITE_UNLOCK if !PREEMPTION
+       select ARCH_INLINE_WRITE_UNLOCK_BH if !PREEMPTION
+       select ARCH_INLINE_WRITE_UNLOCK_IRQ if !PREEMPTION
+       select ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE if !PREEMPTION
+       select ARCH_INLINE_SPIN_TRYLOCK if !PREEMPTION
+       select ARCH_INLINE_SPIN_TRYLOCK_BH if !PREEMPTION
+       select ARCH_INLINE_SPIN_LOCK if !PREEMPTION
+       select ARCH_INLINE_SPIN_LOCK_BH if !PREEMPTION
+       select ARCH_INLINE_SPIN_LOCK_IRQ if !PREEMPTION
+       select ARCH_INLINE_SPIN_LOCK_IRQSAVE if !PREEMPTION
+       select ARCH_INLINE_SPIN_UNLOCK if !PREEMPTION
+       select ARCH_INLINE_SPIN_UNLOCK_BH if !PREEMPTION
+       select ARCH_INLINE_SPIN_UNLOCK_IRQ if !PREEMPTION
+       select ARCH_INLINE_SPIN_UNLOCK_IRQRESTORE if !PREEMPTION
        select ARCH_OPTIONAL_KERNEL_RWX if ARCH_HAS_STRICT_KERNEL_RWX
        select ARCH_OPTIONAL_KERNEL_RWX_DEFAULT
        select ARCH_STACKWALK

Shall I add the above diff in the next version of the patch series?

>
> And if you're going to force things out of line, then I think you can
> get better code using static_call().
Good point, thx.

>
> *shudder*...



--
Best Regards
 Guo Ren

ML: https://lore.kernel.org/linux-csky/
Peter Zijlstra July 4, 2022, 1:45 p.m. UTC | #12
On Mon, Jul 04, 2022 at 09:13:40PM +0800, Guo Ren wrote:

> > Urggghhhh....
> >
> > I really don't think you want this in generic code. Also, I'm thinking
> > any arch that does this wants to make sure it doesn't inline any of this
> Your advice is the same with Arnd, I would move static_branch out of generic.
> 
> > stuff. That is, said arch must not have ARCH_INLINE_SPIN_*
> What do you mean? I've tested with ARCH_INLINE_SPIN_* and it's okay
> with EXPORT_SYMBOL(use_qspinlock_key).

Well, with the static_branch and the two paths I just don't see the code
being sane/small enough to inline. I mean, sure, you can force it to
inline the thing, but I'm not sure that's wise.
diff mbox series

Patch

diff --git a/include/asm-generic/spinlock.h b/include/asm-generic/spinlock.h
index f41dc7c2b900..a9b43089bf99 100644
--- a/include/asm-generic/spinlock.h
+++ b/include/asm-generic/spinlock.h
@@ -28,34 +28,73 @@ 
 #define __ASM_GENERIC_SPINLOCK_H
 
 #include <asm-generic/ticket_spinlock.h>
+#ifdef CONFIG_ARCH_USE_QUEUED_SPINLOCKS
+#include <linux/jump_label.h>
+#include <asm-generic/qspinlock.h>
+
+DECLARE_STATIC_KEY_TRUE(use_qspinlock_key);
+#endif
+
+#undef arch_spin_is_locked
+#undef arch_spin_is_contended
+#undef arch_spin_value_unlocked
+#undef arch_spin_lock
+#undef arch_spin_trylock
+#undef arch_spin_unlock
 
 static __always_inline void arch_spin_lock(arch_spinlock_t *lock)
 {
-	ticket_spin_lock(lock);
+#ifdef CONFIG_ARCH_USE_QUEUED_SPINLOCKS
+	if (static_branch_likely(&use_qspinlock_key))
+		queued_spin_lock(lock);
+	else
+#endif
+		ticket_spin_lock(lock);
 }
 
 static __always_inline bool arch_spin_trylock(arch_spinlock_t *lock)
 {
+#ifdef CONFIG_ARCH_USE_QUEUED_SPINLOCKS
+	if (static_branch_likely(&use_qspinlock_key))
+		return queued_spin_trylock(lock);
+#endif
 	return ticket_spin_trylock(lock);
 }
 
 static __always_inline void arch_spin_unlock(arch_spinlock_t *lock)
 {
-	ticket_spin_unlock(lock);
+#ifdef CONFIG_ARCH_USE_QUEUED_SPINLOCKS
+	if (static_branch_likely(&use_qspinlock_key))
+		queued_spin_unlock(lock);
+	else
+#endif
+		ticket_spin_unlock(lock);
 }
 
 static __always_inline int arch_spin_is_locked(arch_spinlock_t *lock)
 {
+#ifdef CONFIG_ARCH_USE_QUEUED_SPINLOCKS
+	if (static_branch_likely(&use_qspinlock_key))
+		return queued_spin_is_locked(lock);
+#endif
 	return ticket_spin_is_locked(lock);
 }
 
 static __always_inline int arch_spin_is_contended(arch_spinlock_t *lock)
 {
+#ifdef CONFIG_ARCH_USE_QUEUED_SPINLOCKS
+	if (static_branch_likely(&use_qspinlock_key))
+		return queued_spin_is_contended(lock);
+#endif
 	return ticket_spin_is_contended(lock);
 }
 
 static __always_inline int arch_spin_value_unlocked(arch_spinlock_t lock)
 {
+#ifdef CONFIG_ARCH_USE_QUEUED_SPINLOCKS
+	if (static_branch_likely(&use_qspinlock_key))
+		return queued_spin_value_unlocked(lock);
+#endif
 	return ticket_spin_value_unlocked(lock);
 }
 
diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c
index 65a9a10caa6f..b7f7436f42f6 100644
--- a/kernel/locking/qspinlock.c
+++ b/kernel/locking/qspinlock.c
@@ -566,6 +566,8 @@  void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
 }
 EXPORT_SYMBOL(queued_spin_lock_slowpath);
 
+DEFINE_STATIC_KEY_TRUE_RO(use_qspinlock_key);
+
 /*
  * Generate the paravirt code for queued_spin_unlock_slowpath().
  */