Message ID | 20190829101846.21369-2-jgross@suse.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | enhance lock debugging | expand |
On 29.08.2019 12:18, Juergen Gross wrote: > --- a/xen/include/xen/spinlock.h > +++ b/xen/include/xen/spinlock.h > @@ -6,14 +6,21 @@ > #include <asm/types.h> > > #ifndef NDEBUG > -struct lock_debug { > - s16 irq_safe; /* +1: IRQ-safe; 0: not IRQ-safe; -1: don't know yet */ > +union lock_debug { > + uint16_t val; > +#define LOCK_DEBUG_INITVAL 0xffff > + struct { > + uint16_t cpu:12; I'm afraid I have one more request: The literal 12 in struct spinlock is sitting right next to the SPINLOCK_NO_CPU definition, making it pretty unlikely that both author and reviewer of a change wouldn't notice one getting changed without adjustment to the other. The literal 12 here, though, is sufficiently remote to that other place, so there needs to be a connection established. Best I can think of right away is to have a #define for the 12, move SPINLOCK_NO_CPU's definition next to it, and use the new constant in both places. > + uint16_t pad:2; While at it, would you mind making this an unnamed field? Jan
On 03.09.19 16:11, Jan Beulich wrote: > On 29.08.2019 12:18, Juergen Gross wrote: >> --- a/xen/include/xen/spinlock.h >> +++ b/xen/include/xen/spinlock.h >> @@ -6,14 +6,21 @@ >> #include <asm/types.h> >> >> #ifndef NDEBUG >> -struct lock_debug { >> - s16 irq_safe; /* +1: IRQ-safe; 0: not IRQ-safe; -1: don't know yet */ >> +union lock_debug { >> + uint16_t val; >> +#define LOCK_DEBUG_INITVAL 0xffff >> + struct { >> + uint16_t cpu:12; > > I'm afraid I have one more request: The literal 12 in struct spinlock is > sitting right next to the SPINLOCK_NO_CPU definition, making it pretty > unlikely that both author and reviewer of a change wouldn't notice one > getting changed without adjustment to the other. The literal 12 here, > though, is sufficiently remote to that other place, so there needs to be > a connection established. Best I can think of right away is to have a > #define for the 12, move SPINLOCK_NO_CPU's definition next to it, and > use the new constant in both places. Can do. > >> + uint16_t pad:2; > > While at it, would you mind making this an unnamed field? NP. I guess the "2" should then be replaced to depend on the added #define above. Juergen
On 03.09.2019 16:31, Juergen Gross wrote: > On 03.09.19 16:11, Jan Beulich wrote: >> On 29.08.2019 12:18, Juergen Gross wrote: >>> --- a/xen/include/xen/spinlock.h >>> +++ b/xen/include/xen/spinlock.h >>> @@ -6,14 +6,21 @@ >>> #include <asm/types.h> >>> >>> #ifndef NDEBUG >>> -struct lock_debug { >>> - s16 irq_safe; /* +1: IRQ-safe; 0: not IRQ-safe; -1: don't know yet */ >>> +union lock_debug { >>> + uint16_t val; >>> +#define LOCK_DEBUG_INITVAL 0xffff >>> + struct { >>> + uint16_t cpu:12; >> >> I'm afraid I have one more request: The literal 12 in struct spinlock is >> sitting right next to the SPINLOCK_NO_CPU definition, making it pretty >> unlikely that both author and reviewer of a change wouldn't notice one >> getting changed without adjustment to the other. The literal 12 here, >> though, is sufficiently remote to that other place, so there needs to be >> a connection established. Best I can think of right away is to have a >> #define for the 12, move SPINLOCK_NO_CPU's definition next to it, and >> use the new constant in both places. > > Can do. > >> >>> + uint16_t pad:2; >> >> While at it, would you mind making this an unnamed field? > > NP. I guess the "2" should then be replaced to depend on the added > #define above. Ah yes, I didn't even notice this yet. Jan
diff --git a/xen/common/spinlock.c b/xen/common/spinlock.c index 6bc52d70c0..1be1b5ebe6 100644 --- a/xen/common/spinlock.c +++ b/xen/common/spinlock.c @@ -13,9 +13,9 @@ static atomic_t spin_debug __read_mostly = ATOMIC_INIT(0); -static void check_lock(struct lock_debug *debug) +static void check_lock(union lock_debug *debug) { - int irq_safe = !local_irq_is_enabled(); + bool irq_safe = !local_irq_is_enabled(); if ( unlikely(atomic_read(&spin_debug) <= 0) ) return; @@ -43,18 +43,21 @@ static void check_lock(struct lock_debug *debug) */ if ( unlikely(debug->irq_safe != irq_safe) ) { - int seen = cmpxchg(&debug->irq_safe, -1, irq_safe); + union lock_debug seen, new = { 0 }; - if ( seen == !irq_safe ) + new.irq_safe = irq_safe; + seen.val = cmpxchg(&debug->val, LOCK_DEBUG_INITVAL, new.val); + + if ( !seen.unseen && seen.irq_safe == !irq_safe ) { printk("CHECKLOCK FAILURE: prev irqsafe: %d, curr irqsafe %d\n", - seen, irq_safe); + seen.irq_safe, irq_safe); BUG(); } } } -static void check_barrier(struct lock_debug *debug) +static void check_barrier(union lock_debug *debug) { if ( unlikely(atomic_read(&spin_debug) <= 0) ) return; @@ -70,7 +73,18 @@ static void check_barrier(struct lock_debug *debug) * However, if we spin on an IRQ-unsafe lock with IRQs disabled then that * is clearly wrong, for the same reason outlined in check_lock() above. */ - BUG_ON(!local_irq_is_enabled() && (debug->irq_safe == 0)); + BUG_ON(!local_irq_is_enabled() && !debug->irq_safe); +} + +static void got_lock(union lock_debug *debug) +{ + debug->cpu = smp_processor_id(); +} + +static void rel_lock(union lock_debug *debug) +{ + ASSERT(debug->cpu == smp_processor_id()); + debug->cpu = SPINLOCK_NO_CPU; } void spin_debug_enable(void) @@ -87,6 +101,8 @@ void spin_debug_disable(void) #define check_lock(l) ((void)0) #define check_barrier(l) ((void)0) +#define got_lock(l) ((void)0) +#define rel_lock(l) ((void)0) #endif @@ -150,6 +166,7 @@ void inline _spin_lock_cb(spinlock_t *lock, void (*cb)(void *), void *data) cb(data); arch_lock_relax(); } + got_lock(&lock->debug); LOCK_PROFILE_GOT; preempt_disable(); arch_lock_acquire_barrier(); @@ -181,6 +198,7 @@ void _spin_unlock(spinlock_t *lock) arch_lock_release_barrier(); preempt_enable(); LOCK_PROFILE_REL; + rel_lock(&lock->debug); add_sized(&lock->tickets.head, 1); arch_lock_signal(); } @@ -224,6 +242,7 @@ int _spin_trylock(spinlock_t *lock) if ( cmpxchg(&lock->tickets.head_tail, old.head_tail, new.head_tail) != old.head_tail ) return 0; + got_lock(&lock->debug); #ifdef CONFIG_LOCK_PROFILE if (lock->profile) lock->profile->time_locked = NOW(); diff --git a/xen/include/xen/spinlock.h b/xen/include/xen/spinlock.h index 2c7415e23a..88017e0a89 100644 --- a/xen/include/xen/spinlock.h +++ b/xen/include/xen/spinlock.h @@ -6,14 +6,21 @@ #include <asm/types.h> #ifndef NDEBUG -struct lock_debug { - s16 irq_safe; /* +1: IRQ-safe; 0: not IRQ-safe; -1: don't know yet */ +union lock_debug { + uint16_t val; +#define LOCK_DEBUG_INITVAL 0xffff + struct { + uint16_t cpu:12; + uint16_t pad:2; + bool irq_safe:1; + bool unseen:1; + }; }; -#define _LOCK_DEBUG { -1 } +#define _LOCK_DEBUG { LOCK_DEBUG_INITVAL } void spin_debug_enable(void); void spin_debug_disable(void); #else -struct lock_debug { }; +union lock_debug { }; #define _LOCK_DEBUG { } #define spin_debug_enable() ((void)0) #define spin_debug_disable() ((void)0) @@ -142,7 +149,7 @@ typedef struct spinlock { #define SPINLOCK_NO_CPU 0xfffu u16 recurse_cnt:4; #define SPINLOCK_MAX_RECURSE 0xfu - struct lock_debug debug; + union lock_debug debug; #ifdef CONFIG_LOCK_PROFILE struct lock_profile *profile; #endif
Add the cpu currently holding the lock to struct lock_debug. This makes analysis of locking errors easier and it can be tested whether the correct cpu is releasing a lock again. Signed-off-by: Juergen Gross <jgross@suse.com> --- V2: adjust types (Jan Beulich) --- xen/common/spinlock.c | 33 ++++++++++++++++++++++++++------- xen/include/xen/spinlock.h | 17 ++++++++++++----- 2 files changed, 38 insertions(+), 12 deletions(-)