Message ID | 20220316232600.20419-3-palmer@rivosinc.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Generic Ticket Spinlocks | expand |
On Wed, Mar 16, 2022 at 04:25:57PM -0700, Palmer Dabbelt wrote: > From: Peter Zijlstra <peterz@infradead.org> > > This is a simple, fair spinlock. Specifically it doesn't have all the > subtle memory model dependencies that qspinlock has, which makes it more > suitable for simple systems as it is more likely to be correct. > > [Palmer: commit text] > Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> > > -- > > I have specifically not included Peter's SOB on this, as he sent his > original patch > <https://lore.kernel.org/lkml/YHbBBuVFNnI4kjj3@hirez.programming.kicks-ass.net/> > without one. Fixed ;-)
On Wed, Mar 16, 2022 at 04:25:57PM -0700, Palmer Dabbelt wrote: > From: Peter Zijlstra <peterz@infradead.org> > > This is a simple, fair spinlock. Specifically it doesn't have all the > subtle memory model dependencies that qspinlock has, which makes it more > suitable for simple systems as it is more likely to be correct. > > [Palmer: commit text] > Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com> > > -- > > I have specifically not included Peter's SOB on this, as he sent his > original patch > <https://lore.kernel.org/lkml/YHbBBuVFNnI4kjj3@hirez.programming.kicks-ass.net/> > without one. > --- > include/asm-generic/ticket-lock-types.h | 11 ++++ > include/asm-generic/ticket-lock.h | 86 +++++++++++++++++++++++++ > 2 files changed, 97 insertions(+) > create mode 100644 include/asm-generic/ticket-lock-types.h > create mode 100644 include/asm-generic/ticket-lock.h > > diff --git a/include/asm-generic/ticket-lock-types.h b/include/asm-generic/ticket-lock-types.h > new file mode 100644 > index 000000000000..829759aedda8 > --- /dev/null > +++ b/include/asm-generic/ticket-lock-types.h > @@ -0,0 +1,11 @@ > +/* SPDX-License-Identifier: GPL-2.0 */ > + > +#ifndef __ASM_GENERIC_TICKET_LOCK_TYPES_H > +#define __ASM_GENERIC_TICKET_LOCK_TYPES_H > + > +#include <linux/types.h> > +typedef atomic_t arch_spinlock_t; > + > +#define __ARCH_SPIN_LOCK_UNLOCKED ATOMIC_INIT(0) > + > +#endif /* __ASM_GENERIC_TICKET_LOCK_TYPES_H */ > diff --git a/include/asm-generic/ticket-lock.h b/include/asm-generic/ticket-lock.h > new file mode 100644 > index 000000000000..3f0d53e21a37 > --- /dev/null > +++ b/include/asm-generic/ticket-lock.h > @@ -0,0 +1,86 @@ > +/* SPDX-License-Identifier: GPL-2.0 */ > + > +/* > + * 'Generic' ticket-lock implementation. > + * > + * It relies on atomic_fetch_add() having well defined forward progress > + * guarantees under contention. If your architecture cannot provide this, stick > + * to a test-and-set lock. > + * > + * It also relies on atomic_fetch_add() being safe vs smp_store_release() on a > + * sub-word of the value. This is generally true for anything LL/SC although > + * you'd be hard pressed to find anything useful in architecture specifications > + * about this. If your architecture cannot do this you might be better off with > + * a test-and-set. > + * > + * It further assumes atomic_*_release() + atomic_*_acquire() is RCpc and hence > + * uses atomic_fetch_add() which is SC to create an RCsc lock. > + * Probably it's better to use "fully-ordered" instead of "SC", because our atomic documents never use "SC" or "Sequential Consisteny" to describe the semantics, further I'm not sure our "fully-ordered" is equivalent to SC, better not cause misunderstanding in the future here. > + * The implementation uses smp_cond_load_acquire() to spin, so if the > + * architecture has WFE like instructions to sleep instead of poll for word > + * modifications be sure to implement that (see ARM64 for example). > + * > + */ > + > +#ifndef __ASM_GENERIC_TICKET_LOCK_H > +#define __ASM_GENERIC_TICKET_LOCK_H > + > +#include <linux/atomic.h> > +#include <asm/ticket-lock-types.h> > + > +static __always_inline void ticket_lock(arch_spinlock_t *lock) > +{ > + u32 val = atomic_fetch_add(1<<16, lock); /* SC, gives us RCsc */ > + u16 ticket = val >> 16; > + > + if (ticket == (u16)val) > + return; > + > + atomic_cond_read_acquire(lock, ticket == (u16)VAL); If you want to make the lock RCsc, you will also need to make the above atomic_cond_read_acquire() a RCsc acquire, now it's only RCpc. Regards, Boqun > +} > + > +static __always_inline bool ticket_trylock(arch_spinlock_t *lock) > +{ > + u32 old = atomic_read(lock); > + > + if ((old >> 16) != (old & 0xffff)) > + return false; > + > + return atomic_try_cmpxchg(lock, &old, old + (1<<16)); /* SC, for RCsc */ > +} > + > +static __always_inline void ticket_unlock(arch_spinlock_t *lock) > +{ > + u16 *ptr = (u16 *)lock + __is_defined(__BIG_ENDIAN); > + u32 val = atomic_read(lock); > + > + smp_store_release(ptr, (u16)val + 1); > +} > + > +static __always_inline int ticket_is_locked(arch_spinlock_t *lock) > +{ > + u32 val = atomic_read(lock); > + > + return ((val >> 16) != (val & 0xffff)); > +} > + > +static __always_inline int ticket_is_contended(arch_spinlock_t *lock) > +{ > + u32 val = atomic_read(lock); > + > + return (s16)((val >> 16) - (val & 0xffff)) > 1; > +} > + > +static __always_inline int ticket_value_unlocked(arch_spinlock_t lock) > +{ > + return !ticket_is_locked(&lock); > +} > + > +#define arch_spin_lock(l) ticket_lock(l) > +#define arch_spin_trylock(l) ticket_trylock(l) > +#define arch_spin_unlock(l) ticket_unlock(l) > +#define arch_spin_is_locked(l) ticket_is_locked(l) > +#define arch_spin_is_contended(l) ticket_is_contended(l) > +#define arch_spin_value_unlocked(l) ticket_value_unlocked(l) > + > +#endif /* __ASM_GENERIC_TICKET_LOCK_H */ > -- > 2.34.1 >
On 3/17/22 09:57, Boqun Feng wrote: > On Wed, Mar 16, 2022 at 04:25:57PM -0700, Palmer Dabbelt wrote: >> From: Peter Zijlstra <peterz@infradead.org> >> >> This is a simple, fair spinlock. Specifically it doesn't have all the >> subtle memory model dependencies that qspinlock has, which makes it more >> suitable for simple systems as it is more likely to be correct. >> >> [Palmer: commit text] >> Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com> >> >> -- >> >> I have specifically not included Peter's SOB on this, as he sent his >> original patch >> <https://lore.kernel.org/lkml/YHbBBuVFNnI4kjj3@hirez.programming.kicks-ass.net/> >> without one. >> --- >> include/asm-generic/ticket-lock-types.h | 11 ++++ >> include/asm-generic/ticket-lock.h | 86 +++++++++++++++++++++++++ >> 2 files changed, 97 insertions(+) >> create mode 100644 include/asm-generic/ticket-lock-types.h >> create mode 100644 include/asm-generic/ticket-lock.h >> >> diff --git a/include/asm-generic/ticket-lock-types.h b/include/asm-generic/ticket-lock-types.h >> new file mode 100644 >> index 000000000000..829759aedda8 >> --- /dev/null >> +++ b/include/asm-generic/ticket-lock-types.h >> @@ -0,0 +1,11 @@ >> +/* SPDX-License-Identifier: GPL-2.0 */ >> + >> +#ifndef __ASM_GENERIC_TICKET_LOCK_TYPES_H >> +#define __ASM_GENERIC_TICKET_LOCK_TYPES_H >> + >> +#include <linux/types.h> >> +typedef atomic_t arch_spinlock_t; >> + >> +#define __ARCH_SPIN_LOCK_UNLOCKED ATOMIC_INIT(0) >> + >> +#endif /* __ASM_GENERIC_TICKET_LOCK_TYPES_H */ >> diff --git a/include/asm-generic/ticket-lock.h b/include/asm-generic/ticket-lock.h >> new file mode 100644 >> index 000000000000..3f0d53e21a37 >> --- /dev/null >> +++ b/include/asm-generic/ticket-lock.h >> @@ -0,0 +1,86 @@ >> +/* SPDX-License-Identifier: GPL-2.0 */ >> + >> +/* >> + * 'Generic' ticket-lock implementation. >> + * >> + * It relies on atomic_fetch_add() having well defined forward progress >> + * guarantees under contention. If your architecture cannot provide this, stick >> + * to a test-and-set lock. >> + * >> + * It also relies on atomic_fetch_add() being safe vs smp_store_release() on a >> + * sub-word of the value. This is generally true for anything LL/SC although >> + * you'd be hard pressed to find anything useful in architecture specifications >> + * about this. If your architecture cannot do this you might be better off with >> + * a test-and-set. >> + * >> + * It further assumes atomic_*_release() + atomic_*_acquire() is RCpc and hence >> + * uses atomic_fetch_add() which is SC to create an RCsc lock. >> + * > Probably it's better to use "fully-ordered" instead of "SC", because our > atomic documents never use "SC" or "Sequential Consisteny" to describe > the semantics, further I'm not sure our "fully-ordered" is equivalent to > SC, better not cause misunderstanding in the future here. The terms RCpc, RCsc comes from academia. I believe we can keep this but add more comment to elaborate what they are and what do they mean for the average kernel engineer. Cheers, Longman
On Thu, Mar 17, 2022 at 11:03:40AM -0400, Waiman Long wrote: [...] > > > + * It also relies on atomic_fetch_add() being safe vs smp_store_release() on a > > > + * sub-word of the value. This is generally true for anything LL/SC although > > > + * you'd be hard pressed to find anything useful in architecture specifications > > > + * about this. If your architecture cannot do this you might be better off with > > > + * a test-and-set. > > > + * > > > + * It further assumes atomic_*_release() + atomic_*_acquire() is RCpc and hence > > > + * uses atomic_fetch_add() which is SC to create an RCsc lock. > > > + * > > Probably it's better to use "fully-ordered" instead of "SC", because our > > atomic documents never use "SC" or "Sequential Consisteny" to describe > > the semantics, further I'm not sure our "fully-ordered" is equivalent to > > SC, better not cause misunderstanding in the future here. > > The terms RCpc, RCsc comes from academia. I believe we can keep this but add I'm not saying we cannot keep "RCpc" and "RCsc", and we actually use them to describe the memory ordering attributes of our lock or atomic primitives. These terms are well defined. The thing is that instead of "SC" we use "fully-ordered" to describe the memory ordering semantics of atomics like cmpxchg(), and IIUC the definition of "SC" isn't equivalent to "fully-ordered", in other words, there is no "SC" atomic in Linux kernel right now. So using "SC" here is not quite right. Just say "...which is fully-ordered to create an RCsc lock." But yes, maybe I'm wrong, and "SC" can be used exchangably with "fully-ordered", but at least some reasoning is needed. Regards, Boqun > more comment to elaborate what they are and what do they mean for the > average kernel engineer. > > Cheers, > Longman >
On 3/16/22 19:25, Palmer Dabbelt wrote: > From: Peter Zijlstra <peterz@infradead.org> > > This is a simple, fair spinlock. Specifically it doesn't have all the > subtle memory model dependencies that qspinlock has, which makes it more > suitable for simple systems as it is more likely to be correct. > > [Palmer: commit text] > Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com> > > -- > > I have specifically not included Peter's SOB on this, as he sent his > original patch > <https://lore.kernel.org/lkml/YHbBBuVFNnI4kjj3@hirez.programming.kicks-ass.net/> > without one. > --- > include/asm-generic/ticket-lock-types.h | 11 ++++ > include/asm-generic/ticket-lock.h | 86 +++++++++++++++++++++++++ > 2 files changed, 97 insertions(+) > create mode 100644 include/asm-generic/ticket-lock-types.h > create mode 100644 include/asm-generic/ticket-lock.h > > diff --git a/include/asm-generic/ticket-lock-types.h b/include/asm-generic/ticket-lock-types.h > new file mode 100644 > index 000000000000..829759aedda8 > --- /dev/null > +++ b/include/asm-generic/ticket-lock-types.h > @@ -0,0 +1,11 @@ > +/* SPDX-License-Identifier: GPL-2.0 */ > + > +#ifndef __ASM_GENERIC_TICKET_LOCK_TYPES_H > +#define __ASM_GENERIC_TICKET_LOCK_TYPES_H > + > +#include <linux/types.h> > +typedef atomic_t arch_spinlock_t; > + > +#define __ARCH_SPIN_LOCK_UNLOCKED ATOMIC_INIT(0) > + > +#endif /* __ASM_GENERIC_TICKET_LOCK_TYPES_H */ > diff --git a/include/asm-generic/ticket-lock.h b/include/asm-generic/ticket-lock.h > new file mode 100644 > index 000000000000..3f0d53e21a37 > --- /dev/null > +++ b/include/asm-generic/ticket-lock.h > @@ -0,0 +1,86 @@ > +/* SPDX-License-Identifier: GPL-2.0 */ > + > +/* > + * 'Generic' ticket-lock implementation. > + * > + * It relies on atomic_fetch_add() having well defined forward progress > + * guarantees under contention. If your architecture cannot provide this, stick > + * to a test-and-set lock. > + * > + * It also relies on atomic_fetch_add() being safe vs smp_store_release() on a > + * sub-word of the value. This is generally true for anything LL/SC although > + * you'd be hard pressed to find anything useful in architecture specifications > + * about this. If your architecture cannot do this you might be better off with > + * a test-and-set. > + * > + * It further assumes atomic_*_release() + atomic_*_acquire() is RCpc and hence > + * uses atomic_fetch_add() which is SC to create an RCsc lock. > + * > + * The implementation uses smp_cond_load_acquire() to spin, so if the > + * architecture has WFE like instructions to sleep instead of poll for word > + * modifications be sure to implement that (see ARM64 for example). > + * > + */ > + > +#ifndef __ASM_GENERIC_TICKET_LOCK_H > +#define __ASM_GENERIC_TICKET_LOCK_H > + > +#include <linux/atomic.h> > +#include <asm/ticket-lock-types.h> > + > +static __always_inline void ticket_lock(arch_spinlock_t *lock) > +{ > + u32 val = atomic_fetch_add(1<<16, lock); /* SC, gives us RCsc */ > + u16 ticket = val >> 16; > + > + if (ticket == (u16)val) > + return; > + > + atomic_cond_read_acquire(lock, ticket == (u16)VAL); > +} > + > +static __always_inline bool ticket_trylock(arch_spinlock_t *lock) > +{ > + u32 old = atomic_read(lock); > + > + if ((old >> 16) != (old & 0xffff)) > + return false; > + > + return atomic_try_cmpxchg(lock, &old, old + (1<<16)); /* SC, for RCsc */ > +} > + > +static __always_inline void ticket_unlock(arch_spinlock_t *lock) > +{ > + u16 *ptr = (u16 *)lock + __is_defined(__BIG_ENDIAN); > + u32 val = atomic_read(lock); > + > + smp_store_release(ptr, (u16)val + 1); > +} > + > +static __always_inline int ticket_is_locked(arch_spinlock_t *lock) > +{ > + u32 val = atomic_read(lock); > + > + return ((val >> 16) != (val & 0xffff)); > +} > + > +static __always_inline int ticket_is_contended(arch_spinlock_t *lock) > +{ > + u32 val = atomic_read(lock); > + > + return (s16)((val >> 16) - (val & 0xffff)) > 1; > +} > + > +static __always_inline int ticket_value_unlocked(arch_spinlock_t lock) > +{ > + return !ticket_is_locked(&lock); > +} > + > +#define arch_spin_lock(l) ticket_lock(l) > +#define arch_spin_trylock(l) ticket_trylock(l) > +#define arch_spin_unlock(l) ticket_unlock(l) > +#define arch_spin_is_locked(l) ticket_is_locked(l) > +#define arch_spin_is_contended(l) ticket_is_contended(l) > +#define arch_spin_value_unlocked(l) ticket_value_unlocked(l) > + > +#endif /* __ASM_GENERIC_TICKET_LOCK_H */ Acked-by: Waiman Long <longman@redhat.com>
diff --git a/include/asm-generic/ticket-lock-types.h b/include/asm-generic/ticket-lock-types.h new file mode 100644 index 000000000000..829759aedda8 --- /dev/null +++ b/include/asm-generic/ticket-lock-types.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef __ASM_GENERIC_TICKET_LOCK_TYPES_H +#define __ASM_GENERIC_TICKET_LOCK_TYPES_H + +#include <linux/types.h> +typedef atomic_t arch_spinlock_t; + +#define __ARCH_SPIN_LOCK_UNLOCKED ATOMIC_INIT(0) + +#endif /* __ASM_GENERIC_TICKET_LOCK_TYPES_H */ diff --git a/include/asm-generic/ticket-lock.h b/include/asm-generic/ticket-lock.h new file mode 100644 index 000000000000..3f0d53e21a37 --- /dev/null +++ b/include/asm-generic/ticket-lock.h @@ -0,0 +1,86 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +/* + * 'Generic' ticket-lock implementation. + * + * It relies on atomic_fetch_add() having well defined forward progress + * guarantees under contention. If your architecture cannot provide this, stick + * to a test-and-set lock. + * + * It also relies on atomic_fetch_add() being safe vs smp_store_release() on a + * sub-word of the value. This is generally true for anything LL/SC although + * you'd be hard pressed to find anything useful in architecture specifications + * about this. If your architecture cannot do this you might be better off with + * a test-and-set. + * + * It further assumes atomic_*_release() + atomic_*_acquire() is RCpc and hence + * uses atomic_fetch_add() which is SC to create an RCsc lock. + * + * The implementation uses smp_cond_load_acquire() to spin, so if the + * architecture has WFE like instructions to sleep instead of poll for word + * modifications be sure to implement that (see ARM64 for example). + * + */ + +#ifndef __ASM_GENERIC_TICKET_LOCK_H +#define __ASM_GENERIC_TICKET_LOCK_H + +#include <linux/atomic.h> +#include <asm/ticket-lock-types.h> + +static __always_inline void ticket_lock(arch_spinlock_t *lock) +{ + u32 val = atomic_fetch_add(1<<16, lock); /* SC, gives us RCsc */ + u16 ticket = val >> 16; + + if (ticket == (u16)val) + return; + + atomic_cond_read_acquire(lock, ticket == (u16)VAL); +} + +static __always_inline bool ticket_trylock(arch_spinlock_t *lock) +{ + u32 old = atomic_read(lock); + + if ((old >> 16) != (old & 0xffff)) + return false; + + return atomic_try_cmpxchg(lock, &old, old + (1<<16)); /* SC, for RCsc */ +} + +static __always_inline void ticket_unlock(arch_spinlock_t *lock) +{ + u16 *ptr = (u16 *)lock + __is_defined(__BIG_ENDIAN); + u32 val = atomic_read(lock); + + smp_store_release(ptr, (u16)val + 1); +} + +static __always_inline int ticket_is_locked(arch_spinlock_t *lock) +{ + u32 val = atomic_read(lock); + + return ((val >> 16) != (val & 0xffff)); +} + +static __always_inline int ticket_is_contended(arch_spinlock_t *lock) +{ + u32 val = atomic_read(lock); + + return (s16)((val >> 16) - (val & 0xffff)) > 1; +} + +static __always_inline int ticket_value_unlocked(arch_spinlock_t lock) +{ + return !ticket_is_locked(&lock); +} + +#define arch_spin_lock(l) ticket_lock(l) +#define arch_spin_trylock(l) ticket_trylock(l) +#define arch_spin_unlock(l) ticket_unlock(l) +#define arch_spin_is_locked(l) ticket_is_locked(l) +#define arch_spin_is_contended(l) ticket_is_contended(l) +#define arch_spin_value_unlocked(l) ticket_value_unlocked(l) + +#endif /* __ASM_GENERIC_TICKET_LOCK_H */