Message ID | 20190909143134.15379-2-jgross@suse.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | enhance lock debugging | expand |
On 09.09.2019 16:31, Juergen Gross wrote: > --- a/xen/include/xen/spinlock.h > +++ b/xen/include/xen/spinlock.h > @@ -5,15 +5,24 @@ > #include <asm/spinlock.h> > #include <asm/types.h> > > +#define SPINLOCK_CPU_BITS 12 > + > #ifndef NDEBUG > -struct lock_debug { > - s16 irq_safe; /* +1: IRQ-safe; 0: not IRQ-safe; -1: don't know yet */ > +union lock_debug { > + uint16_t val; > +#define LOCK_DEBUG_INITVAL 0xffff > + struct { > + uint16_t cpu:SPINLOCK_CPU_BITS; > + uint16_t :(14 - SPINLOCK_CPU_BITS); I'm sorry that I realize this only now that I see this and ... > + bool irq_safe:1; > + bool unseen:1; > + }; > }; > -#define _LOCK_DEBUG { -1 } > +#define _LOCK_DEBUG { LOCK_DEBUG_INITVAL } > void spin_debug_enable(void); > void spin_debug_disable(void); > #else > -struct lock_debug { }; > +union lock_debug { }; > #define _LOCK_DEBUG { } > #define spin_debug_enable() ((void)0) > #define spin_debug_disable() ((void)0) > @@ -138,11 +147,12 @@ typedef union { > > typedef struct spinlock { > spinlock_tickets_t tickets; > - u16 recurse_cpu:12; > -#define SPINLOCK_NO_CPU 0xfffu > - u16 recurse_cnt:4; > -#define SPINLOCK_MAX_RECURSE 0xfu > - struct lock_debug debug; > + u16 recurse_cpu:SPINLOCK_CPU_BITS; > +#define SPINLOCK_NO_CPU ((1u << SPINLOCK_CPU_BITS) - 1) > +#define SPINLOCK_RECURSE_BITS (16 - SPINLOCK_CPU_BITS) > + u16 recurse_cnt:SPINLOCK_RECURSE_BITS; > +#define SPINLOCK_MAX_RECURSE ((1u << SPINLOCK_RECURSE_BITS) - 1) ... this: These subtractions are prone to yield de-generate bitfields when the difference ends up zero (leading to presumably very strange breakage, albeit one would hope that it also would be very obvious that _something_ is broken). I think we need BUILD_BUG_ON()s checking that neither difference actually is zero. Jan
On 09.09.19 16:50, Jan Beulich wrote: > On 09.09.2019 16:31, Juergen Gross wrote: >> --- a/xen/include/xen/spinlock.h >> +++ b/xen/include/xen/spinlock.h >> @@ -5,15 +5,24 @@ >> #include <asm/spinlock.h> >> #include <asm/types.h> >> >> +#define SPINLOCK_CPU_BITS 12 >> + >> #ifndef NDEBUG >> -struct lock_debug { >> - s16 irq_safe; /* +1: IRQ-safe; 0: not IRQ-safe; -1: don't know yet */ >> +union lock_debug { >> + uint16_t val; >> +#define LOCK_DEBUG_INITVAL 0xffff >> + struct { >> + uint16_t cpu:SPINLOCK_CPU_BITS; >> + uint16_t :(14 - SPINLOCK_CPU_BITS); > > I'm sorry that I realize this only now that I see this and ... > >> + bool irq_safe:1; >> + bool unseen:1; >> + }; >> }; >> -#define _LOCK_DEBUG { -1 } >> +#define _LOCK_DEBUG { LOCK_DEBUG_INITVAL } >> void spin_debug_enable(void); >> void spin_debug_disable(void); >> #else >> -struct lock_debug { }; >> +union lock_debug { }; >> #define _LOCK_DEBUG { } >> #define spin_debug_enable() ((void)0) >> #define spin_debug_disable() ((void)0) >> @@ -138,11 +147,12 @@ typedef union { >> >> typedef struct spinlock { >> spinlock_tickets_t tickets; >> - u16 recurse_cpu:12; >> -#define SPINLOCK_NO_CPU 0xfffu >> - u16 recurse_cnt:4; >> -#define SPINLOCK_MAX_RECURSE 0xfu >> - struct lock_debug debug; >> + u16 recurse_cpu:SPINLOCK_CPU_BITS; >> +#define SPINLOCK_NO_CPU ((1u << SPINLOCK_CPU_BITS) - 1) >> +#define SPINLOCK_RECURSE_BITS (16 - SPINLOCK_CPU_BITS) >> + u16 recurse_cnt:SPINLOCK_RECURSE_BITS; >> +#define SPINLOCK_MAX_RECURSE ((1u << SPINLOCK_RECURSE_BITS) - 1) > > ... this: These subtractions are prone to yield de-generate > bitfields when the difference ends up zero (leading to > presumably very strange breakage, albeit one would hope that > it also would be very obvious that _something_ is broken). I > think we need BUILD_BUG_ON()s checking that neither > difference actually is zero. Okay, will add them. Juergen
diff --git a/xen/common/spinlock.c b/xen/common/spinlock.c index 6bc52d70c0..1be1b5ebe6 100644 --- a/xen/common/spinlock.c +++ b/xen/common/spinlock.c @@ -13,9 +13,9 @@ static atomic_t spin_debug __read_mostly = ATOMIC_INIT(0); -static void check_lock(struct lock_debug *debug) +static void check_lock(union lock_debug *debug) { - int irq_safe = !local_irq_is_enabled(); + bool irq_safe = !local_irq_is_enabled(); if ( unlikely(atomic_read(&spin_debug) <= 0) ) return; @@ -43,18 +43,21 @@ static void check_lock(struct lock_debug *debug) */ if ( unlikely(debug->irq_safe != irq_safe) ) { - int seen = cmpxchg(&debug->irq_safe, -1, irq_safe); + union lock_debug seen, new = { 0 }; - if ( seen == !irq_safe ) + new.irq_safe = irq_safe; + seen.val = cmpxchg(&debug->val, LOCK_DEBUG_INITVAL, new.val); + + if ( !seen.unseen && seen.irq_safe == !irq_safe ) { printk("CHECKLOCK FAILURE: prev irqsafe: %d, curr irqsafe %d\n", - seen, irq_safe); + seen.irq_safe, irq_safe); BUG(); } } } -static void check_barrier(struct lock_debug *debug) +static void check_barrier(union lock_debug *debug) { if ( unlikely(atomic_read(&spin_debug) <= 0) ) return; @@ -70,7 +73,18 @@ static void check_barrier(struct lock_debug *debug) * However, if we spin on an IRQ-unsafe lock with IRQs disabled then that * is clearly wrong, for the same reason outlined in check_lock() above. */ - BUG_ON(!local_irq_is_enabled() && (debug->irq_safe == 0)); + BUG_ON(!local_irq_is_enabled() && !debug->irq_safe); +} + +static void got_lock(union lock_debug *debug) +{ + debug->cpu = smp_processor_id(); +} + +static void rel_lock(union lock_debug *debug) +{ + ASSERT(debug->cpu == smp_processor_id()); + debug->cpu = SPINLOCK_NO_CPU; } void spin_debug_enable(void) @@ -87,6 +101,8 @@ void spin_debug_disable(void) #define check_lock(l) ((void)0) #define check_barrier(l) ((void)0) +#define got_lock(l) ((void)0) +#define rel_lock(l) ((void)0) #endif @@ -150,6 +166,7 @@ void inline _spin_lock_cb(spinlock_t *lock, void (*cb)(void *), void *data) cb(data); arch_lock_relax(); } + got_lock(&lock->debug); LOCK_PROFILE_GOT; preempt_disable(); arch_lock_acquire_barrier(); @@ -181,6 +198,7 @@ void _spin_unlock(spinlock_t *lock) arch_lock_release_barrier(); preempt_enable(); LOCK_PROFILE_REL; + rel_lock(&lock->debug); add_sized(&lock->tickets.head, 1); arch_lock_signal(); } @@ -224,6 +242,7 @@ int _spin_trylock(spinlock_t *lock) if ( cmpxchg(&lock->tickets.head_tail, old.head_tail, new.head_tail) != old.head_tail ) return 0; + got_lock(&lock->debug); #ifdef CONFIG_LOCK_PROFILE if (lock->profile) lock->profile->time_locked = NOW(); diff --git a/xen/include/xen/spinlock.h b/xen/include/xen/spinlock.h index 2c7415e23a..24405386a7 100644 --- a/xen/include/xen/spinlock.h +++ b/xen/include/xen/spinlock.h @@ -5,15 +5,24 @@ #include <asm/spinlock.h> #include <asm/types.h> +#define SPINLOCK_CPU_BITS 12 + #ifndef NDEBUG -struct lock_debug { - s16 irq_safe; /* +1: IRQ-safe; 0: not IRQ-safe; -1: don't know yet */ +union lock_debug { + uint16_t val; +#define LOCK_DEBUG_INITVAL 0xffff + struct { + uint16_t cpu:SPINLOCK_CPU_BITS; + uint16_t :(14 - SPINLOCK_CPU_BITS); + bool irq_safe:1; + bool unseen:1; + }; }; -#define _LOCK_DEBUG { -1 } +#define _LOCK_DEBUG { LOCK_DEBUG_INITVAL } void spin_debug_enable(void); void spin_debug_disable(void); #else -struct lock_debug { }; +union lock_debug { }; #define _LOCK_DEBUG { } #define spin_debug_enable() ((void)0) #define spin_debug_disable() ((void)0) @@ -138,11 +147,12 @@ typedef union { typedef struct spinlock { spinlock_tickets_t tickets; - u16 recurse_cpu:12; -#define SPINLOCK_NO_CPU 0xfffu - u16 recurse_cnt:4; -#define SPINLOCK_MAX_RECURSE 0xfu - struct lock_debug debug; + u16 recurse_cpu:SPINLOCK_CPU_BITS; +#define SPINLOCK_NO_CPU ((1u << SPINLOCK_CPU_BITS) - 1) +#define SPINLOCK_RECURSE_BITS (16 - SPINLOCK_CPU_BITS) + u16 recurse_cnt:SPINLOCK_RECURSE_BITS; +#define SPINLOCK_MAX_RECURSE ((1u << SPINLOCK_RECURSE_BITS) - 1) + union lock_debug debug; #ifdef CONFIG_LOCK_PROFILE struct lock_profile *profile; #endif
Add the cpu currently holding the lock to struct lock_debug. This makes analysis of locking errors easier and it can be tested whether the correct cpu is releasing a lock again. Signed-off-by: Juergen Gross <jgross@suse.com> --- V2: - adjust types (Jan Beulich) V4: - add define for bitfield size to store cpu number (Jan Beulich) - make padding field unnamed (Jan Beulich) --- xen/common/spinlock.c | 33 ++++++++++++++++++++++++++------- xen/include/xen/spinlock.h | 28 +++++++++++++++++++--------- 2 files changed, 45 insertions(+), 16 deletions(-)