diff mbox series

[v4,1/5] xen/spinlocks: in debug builds store cpu holding the lock

Message ID 20190909143134.15379-2-jgross@suse.com (mailing list archive)
State Superseded
Headers show
Series enhance lock debugging | expand

Commit Message

Jürgen Groß Sept. 9, 2019, 2:31 p.m. UTC
Add the cpu currently holding the lock to struct lock_debug. This makes
analysis of locking errors easier and it can be tested whether the
correct cpu is releasing a lock again.

Signed-off-by: Juergen Gross <jgross@suse.com>
---
V2:
- adjust types (Jan Beulich)
V4:
- add define for bitfield size to store cpu number (Jan Beulich)
- make padding field unnamed (Jan Beulich)
---
 xen/common/spinlock.c      | 33 ++++++++++++++++++++++++++-------
 xen/include/xen/spinlock.h | 28 +++++++++++++++++++---------
 2 files changed, 45 insertions(+), 16 deletions(-)

Comments

Jan Beulich Sept. 9, 2019, 2:50 p.m. UTC | #1
On 09.09.2019 16:31, Juergen Gross wrote:
> --- a/xen/include/xen/spinlock.h
> +++ b/xen/include/xen/spinlock.h
> @@ -5,15 +5,24 @@
>  #include <asm/spinlock.h>
>  #include <asm/types.h>
>  
> +#define SPINLOCK_CPU_BITS  12
> +
>  #ifndef NDEBUG
> -struct lock_debug {
> -    s16 irq_safe; /* +1: IRQ-safe; 0: not IRQ-safe; -1: don't know yet */
> +union lock_debug {
> +    uint16_t val;
> +#define LOCK_DEBUG_INITVAL 0xffff
> +    struct {
> +        uint16_t cpu:SPINLOCK_CPU_BITS;
> +        uint16_t :(14 - SPINLOCK_CPU_BITS);

I'm sorry that I realize this only now that I see this and ...

> +        bool irq_safe:1;
> +        bool unseen:1;
> +    };
>  };
> -#define _LOCK_DEBUG { -1 }
> +#define _LOCK_DEBUG { LOCK_DEBUG_INITVAL }
>  void spin_debug_enable(void);
>  void spin_debug_disable(void);
>  #else
> -struct lock_debug { };
> +union lock_debug { };
>  #define _LOCK_DEBUG { }
>  #define spin_debug_enable() ((void)0)
>  #define spin_debug_disable() ((void)0)
> @@ -138,11 +147,12 @@ typedef union {
>  
>  typedef struct spinlock {
>      spinlock_tickets_t tickets;
> -    u16 recurse_cpu:12;
> -#define SPINLOCK_NO_CPU 0xfffu
> -    u16 recurse_cnt:4;
> -#define SPINLOCK_MAX_RECURSE 0xfu
> -    struct lock_debug debug;
> +    u16 recurse_cpu:SPINLOCK_CPU_BITS;
> +#define SPINLOCK_NO_CPU        ((1u << SPINLOCK_CPU_BITS) - 1)
> +#define SPINLOCK_RECURSE_BITS  (16 - SPINLOCK_CPU_BITS)
> +    u16 recurse_cnt:SPINLOCK_RECURSE_BITS;
> +#define SPINLOCK_MAX_RECURSE   ((1u << SPINLOCK_RECURSE_BITS) - 1)

... this: These subtractions are prone to yield de-generate
bitfields when the difference ends up zero (leading to
presumably very strange breakage, albeit one would hope that
it also would be very obvious that _something_ is broken). I
think we need BUILD_BUG_ON()s checking that neither
difference actually is zero.

Jan
Jürgen Groß Sept. 9, 2019, 2:56 p.m. UTC | #2
On 09.09.19 16:50, Jan Beulich wrote:
> On 09.09.2019 16:31, Juergen Gross wrote:
>> --- a/xen/include/xen/spinlock.h
>> +++ b/xen/include/xen/spinlock.h
>> @@ -5,15 +5,24 @@
>>   #include <asm/spinlock.h>
>>   #include <asm/types.h>
>>   
>> +#define SPINLOCK_CPU_BITS  12
>> +
>>   #ifndef NDEBUG
>> -struct lock_debug {
>> -    s16 irq_safe; /* +1: IRQ-safe; 0: not IRQ-safe; -1: don't know yet */
>> +union lock_debug {
>> +    uint16_t val;
>> +#define LOCK_DEBUG_INITVAL 0xffff
>> +    struct {
>> +        uint16_t cpu:SPINLOCK_CPU_BITS;
>> +        uint16_t :(14 - SPINLOCK_CPU_BITS);
> 
> I'm sorry that I realize this only now that I see this and ...
> 
>> +        bool irq_safe:1;
>> +        bool unseen:1;
>> +    };
>>   };
>> -#define _LOCK_DEBUG { -1 }
>> +#define _LOCK_DEBUG { LOCK_DEBUG_INITVAL }
>>   void spin_debug_enable(void);
>>   void spin_debug_disable(void);
>>   #else
>> -struct lock_debug { };
>> +union lock_debug { };
>>   #define _LOCK_DEBUG { }
>>   #define spin_debug_enable() ((void)0)
>>   #define spin_debug_disable() ((void)0)
>> @@ -138,11 +147,12 @@ typedef union {
>>   
>>   typedef struct spinlock {
>>       spinlock_tickets_t tickets;
>> -    u16 recurse_cpu:12;
>> -#define SPINLOCK_NO_CPU 0xfffu
>> -    u16 recurse_cnt:4;
>> -#define SPINLOCK_MAX_RECURSE 0xfu
>> -    struct lock_debug debug;
>> +    u16 recurse_cpu:SPINLOCK_CPU_BITS;
>> +#define SPINLOCK_NO_CPU        ((1u << SPINLOCK_CPU_BITS) - 1)
>> +#define SPINLOCK_RECURSE_BITS  (16 - SPINLOCK_CPU_BITS)
>> +    u16 recurse_cnt:SPINLOCK_RECURSE_BITS;
>> +#define SPINLOCK_MAX_RECURSE   ((1u << SPINLOCK_RECURSE_BITS) - 1)
> 
> ... this: These subtractions are prone to yield de-generate
> bitfields when the difference ends up zero (leading to
> presumably very strange breakage, albeit one would hope that
> it also would be very obvious that _something_ is broken). I
> think we need BUILD_BUG_ON()s checking that neither
> difference actually is zero.

Okay, will add them.


Juergen
diff mbox series

Patch

diff --git a/xen/common/spinlock.c b/xen/common/spinlock.c
index 6bc52d70c0..1be1b5ebe6 100644
--- a/xen/common/spinlock.c
+++ b/xen/common/spinlock.c
@@ -13,9 +13,9 @@ 
 
 static atomic_t spin_debug __read_mostly = ATOMIC_INIT(0);
 
-static void check_lock(struct lock_debug *debug)
+static void check_lock(union lock_debug *debug)
 {
-    int irq_safe = !local_irq_is_enabled();
+    bool irq_safe = !local_irq_is_enabled();
 
     if ( unlikely(atomic_read(&spin_debug) <= 0) )
         return;
@@ -43,18 +43,21 @@  static void check_lock(struct lock_debug *debug)
      */
     if ( unlikely(debug->irq_safe != irq_safe) )
     {
-        int seen = cmpxchg(&debug->irq_safe, -1, irq_safe);
+        union lock_debug seen, new = { 0 };
 
-        if ( seen == !irq_safe )
+        new.irq_safe = irq_safe;
+        seen.val = cmpxchg(&debug->val, LOCK_DEBUG_INITVAL, new.val);
+
+        if ( !seen.unseen && seen.irq_safe == !irq_safe )
         {
             printk("CHECKLOCK FAILURE: prev irqsafe: %d, curr irqsafe %d\n",
-                   seen, irq_safe);
+                   seen.irq_safe, irq_safe);
             BUG();
         }
     }
 }
 
-static void check_barrier(struct lock_debug *debug)
+static void check_barrier(union lock_debug *debug)
 {
     if ( unlikely(atomic_read(&spin_debug) <= 0) )
         return;
@@ -70,7 +73,18 @@  static void check_barrier(struct lock_debug *debug)
      * However, if we spin on an IRQ-unsafe lock with IRQs disabled then that
      * is clearly wrong, for the same reason outlined in check_lock() above.
      */
-    BUG_ON(!local_irq_is_enabled() && (debug->irq_safe == 0));
+    BUG_ON(!local_irq_is_enabled() && !debug->irq_safe);
+}
+
+static void got_lock(union lock_debug *debug)
+{
+    debug->cpu = smp_processor_id();
+}
+
+static void rel_lock(union lock_debug *debug)
+{
+    ASSERT(debug->cpu == smp_processor_id());
+    debug->cpu = SPINLOCK_NO_CPU;
 }
 
 void spin_debug_enable(void)
@@ -87,6 +101,8 @@  void spin_debug_disable(void)
 
 #define check_lock(l) ((void)0)
 #define check_barrier(l) ((void)0)
+#define got_lock(l) ((void)0)
+#define rel_lock(l) ((void)0)
 
 #endif
 
@@ -150,6 +166,7 @@  void inline _spin_lock_cb(spinlock_t *lock, void (*cb)(void *), void *data)
             cb(data);
         arch_lock_relax();
     }
+    got_lock(&lock->debug);
     LOCK_PROFILE_GOT;
     preempt_disable();
     arch_lock_acquire_barrier();
@@ -181,6 +198,7 @@  void _spin_unlock(spinlock_t *lock)
     arch_lock_release_barrier();
     preempt_enable();
     LOCK_PROFILE_REL;
+    rel_lock(&lock->debug);
     add_sized(&lock->tickets.head, 1);
     arch_lock_signal();
 }
@@ -224,6 +242,7 @@  int _spin_trylock(spinlock_t *lock)
     if ( cmpxchg(&lock->tickets.head_tail,
                  old.head_tail, new.head_tail) != old.head_tail )
         return 0;
+    got_lock(&lock->debug);
 #ifdef CONFIG_LOCK_PROFILE
     if (lock->profile)
         lock->profile->time_locked = NOW();
diff --git a/xen/include/xen/spinlock.h b/xen/include/xen/spinlock.h
index 2c7415e23a..24405386a7 100644
--- a/xen/include/xen/spinlock.h
+++ b/xen/include/xen/spinlock.h
@@ -5,15 +5,24 @@ 
 #include <asm/spinlock.h>
 #include <asm/types.h>
 
+#define SPINLOCK_CPU_BITS  12
+
 #ifndef NDEBUG
-struct lock_debug {
-    s16 irq_safe; /* +1: IRQ-safe; 0: not IRQ-safe; -1: don't know yet */
+union lock_debug {
+    uint16_t val;
+#define LOCK_DEBUG_INITVAL 0xffff
+    struct {
+        uint16_t cpu:SPINLOCK_CPU_BITS;
+        uint16_t :(14 - SPINLOCK_CPU_BITS);
+        bool irq_safe:1;
+        bool unseen:1;
+    };
 };
-#define _LOCK_DEBUG { -1 }
+#define _LOCK_DEBUG { LOCK_DEBUG_INITVAL }
 void spin_debug_enable(void);
 void spin_debug_disable(void);
 #else
-struct lock_debug { };
+union lock_debug { };
 #define _LOCK_DEBUG { }
 #define spin_debug_enable() ((void)0)
 #define spin_debug_disable() ((void)0)
@@ -138,11 +147,12 @@  typedef union {
 
 typedef struct spinlock {
     spinlock_tickets_t tickets;
-    u16 recurse_cpu:12;
-#define SPINLOCK_NO_CPU 0xfffu
-    u16 recurse_cnt:4;
-#define SPINLOCK_MAX_RECURSE 0xfu
-    struct lock_debug debug;
+    u16 recurse_cpu:SPINLOCK_CPU_BITS;
+#define SPINLOCK_NO_CPU        ((1u << SPINLOCK_CPU_BITS) - 1)
+#define SPINLOCK_RECURSE_BITS  (16 - SPINLOCK_CPU_BITS)
+    u16 recurse_cnt:SPINLOCK_RECURSE_BITS;
+#define SPINLOCK_MAX_RECURSE   ((1u << SPINLOCK_RECURSE_BITS) - 1)
+    union lock_debug debug;
 #ifdef CONFIG_LOCK_PROFILE
     struct lock_profile *profile;
 #endif