diff mbox series

[RESEND,V3,07/23] lockdep: Prepare for noinstr sections

Message ID 20200320180033.092520097@linutronix.de (mailing list archive)
State New, archived
Headers show
Series x86/entry: Consolidation part II (syscalls) | expand

Commit Message

Thomas Gleixner March 20, 2020, 6 p.m. UTC
From: Peter Zijlstra <peterz@infradead.org>

Lockdep is invoked after RCU stopped watching or before it restarted
watching from the low level entry/exit code.

lockdep_hardirqs_on() is part of the irq-state tracking; it is the
callback that indicates we're about to enable IRQs. But because of
this, lockdep has co-opted this callback to do lock state updates. All
(still) held locks will get marked with ENABLED_HARDIRQ, which then
also looks for cycles connecting to USED_IN_HARDIRQ for IRQ recursion
deadlocks.

This results in quite a lot of lockdep code getting ran, but worse, it
will want to do stack-traces for the lock state changes. Stack traces
require RCU.

Because code that requires RCU must not run after we've shut down RCU,
and shutting down RCU itself requires locks in some circumstances,
split this into two parts:

  - lockdep_hardirqs_on_prepare() -- updates the held lock state
  - lockdep_hardirqs_on() -- does the irq state tracking

This allows running the lock state changes and stack-traces with RCU
enaabled, while doing the IRQ state change later. Of course, this
opens a window where the lock stack can change. Therefore
lockdep_hardirqs_on_prepare() will snapshot the chain_key and
lockdep_hardirqs_on() will validate it still matches. This ensures
that, even when interleaved code uses locks, the actual lock state
didn't change between these two calls.

Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/irqflags.h        |    2 +
 include/linux/sched.h           |    1 
 kernel/locking/lockdep.c        |   66 +++++++++++++++++++++++++++++-----------
 kernel/trace/trace_preemptirq.c |    2 +
 lib/debug_locks.c               |    2 -
 5 files changed, 55 insertions(+), 18 deletions(-)
diff mbox series

Patch

--- a/include/linux/irqflags.h
+++ b/include/linux/irqflags.h
@@ -19,11 +19,13 @@ 
 #ifdef CONFIG_PROVE_LOCKING
   extern void trace_softirqs_on(unsigned long ip);
   extern void trace_softirqs_off(unsigned long ip);
+  extern void lockdep_hardirqs_on_prepare(unsigned long ip);
   extern void lockdep_hardirqs_on(unsigned long ip);
   extern void lockdep_hardirqs_off(unsigned long ip);
 #else
   static inline void trace_softirqs_on(unsigned long ip) { }
   static inline void trace_softirqs_off(unsigned long ip) { }
+  static inline void lockdep_hardirqs_on_prepare(unsigned long ip) { }
   static inline void lockdep_hardirqs_on(unsigned long ip) { }
   static inline void lockdep_hardirqs_off(unsigned long ip) { }
 #endif
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -976,6 +976,7 @@  struct task_struct {
 	unsigned int			hardirq_disable_event;
 	int				hardirqs_enabled;
 	int				hardirq_context;
+	u64				hardirq_chain_key;
 	unsigned long			softirq_disable_ip;
 	unsigned long			softirq_enable_ip;
 	unsigned int			softirq_disable_event;
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -3370,9 +3370,6 @@  static void __trace_hardirqs_on_caller(u
 {
 	struct task_struct *curr = current;
 
-	/* we'll do an OFF -> ON transition: */
-	curr->hardirqs_enabled = 1;
-
 	/*
 	 * We are going to turn hardirqs on, so set the
 	 * usage bit for all held locks:
@@ -3384,16 +3381,13 @@  static void __trace_hardirqs_on_caller(u
 	 * bit for all held locks. (disabled hardirqs prevented
 	 * this bit from being set before)
 	 */
-	if (curr->softirqs_enabled)
+	if (curr->softirqs_enabled) {
 		if (!mark_held_locks(curr, LOCK_ENABLED_SOFTIRQ))
 			return;
-
-	curr->hardirq_enable_ip = ip;
-	curr->hardirq_enable_event = ++curr->irq_events;
-	debug_atomic_inc(hardirqs_on_events);
+	}
 }
 
-void lockdep_hardirqs_on(unsigned long ip)
+void lockdep_hardirqs_on_prepare(unsigned long ip)
 {
 	if (unlikely(!debug_locks || current->lockdep_recursion))
 		return;
@@ -3429,20 +3423,59 @@  void lockdep_hardirqs_on(unsigned long i
 	if (DEBUG_LOCKS_WARN_ON(current->hardirq_context))
 		return;
 
+	current->hardirq_chain_key = current->curr_chain_key;
+
 	current->lockdep_recursion = 1;
 	__trace_hardirqs_on_caller(ip);
 	current->lockdep_recursion = 0;
 }
-NOKPROBE_SYMBOL(lockdep_hardirqs_on);
 
+void noinstr lockdep_hardirqs_on(unsigned long ip)
+{
+	struct task_struct *curr = current;
+
+	if (unlikely(!debug_locks || curr->lockdep_recursion))
+		return;
+
+	if (curr->hardirqs_enabled) {
+		/*
+		 * Neither irq nor preemption are disabled here
+		 * so this is racy by nature but losing one hit
+		 * in a stat is not a big deal.
+		 */
+		__debug_atomic_inc(redundant_hardirqs_on);
+		return;
+	}
+
+	/*
+	 * We're enabling irqs and according to our state above irqs weren't
+	 * already enabled, yet we find the hardware thinks they are in fact
+	 * enabled.. someone messed up their IRQ state tracing.
+	 */
+	if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
+		return;
+
+	/*
+	 * Ensure the lock stack remained unchanged between
+	 * lockdep_hardirqs_on_prepare() and lockdep_hardirqs_on().
+	 */
+	DEBUG_LOCKS_WARN_ON(current->hardirq_chain_key !=
+			    current->curr_chain_key);
+
+	/* we'll do an OFF -> ON transition: */
+	curr->hardirqs_enabled = 1;
+	curr->hardirq_enable_ip = ip;
+	curr->hardirq_enable_event = ++curr->irq_events;
+	debug_atomic_inc(hardirqs_on_events);
+}
 /*
  * Hardirqs were disabled:
  */
-void lockdep_hardirqs_off(unsigned long ip)
+void noinstr lockdep_hardirqs_off(unsigned long ip)
 {
 	struct task_struct *curr = current;
 
-	if (unlikely(!debug_locks || current->lockdep_recursion))
+	if (unlikely(!debug_locks || curr->lockdep_recursion))
 		return;
 
 	/*
@@ -3463,7 +3496,6 @@  void lockdep_hardirqs_off(unsigned long
 	} else
 		debug_atomic_inc(redundant_hardirqs_off);
 }
-NOKPROBE_SYMBOL(lockdep_hardirqs_off);
 
 /*
  * Softirqs will be enabled:
@@ -4007,8 +4039,8 @@  static void print_unlock_imbalance_bug(s
 	dump_stack();
 }
 
-static int match_held_lock(const struct held_lock *hlock,
-					const struct lockdep_map *lock)
+static noinstr int match_held_lock(const struct held_lock *hlock,
+				   const struct lockdep_map *lock)
 {
 	if (hlock->instance == lock)
 		return 1;
@@ -4293,7 +4325,7 @@  static int
 	return 0;
 }
 
-static nokprobe_inline
+static __always_inline
 int __lock_is_held(const struct lockdep_map *lock, int read)
 {
 	struct task_struct *curr = current;
@@ -4506,7 +4538,7 @@  void lock_release(struct lockdep_map *lo
 }
 EXPORT_SYMBOL_GPL(lock_release);
 
-int lock_is_held_type(const struct lockdep_map *lock, int read)
+noinstr int lock_is_held_type(const struct lockdep_map *lock, int read)
 {
 	unsigned long flags;
 	int ret = 0;
--- a/kernel/trace/trace_preemptirq.c
+++ b/kernel/trace/trace_preemptirq.c
@@ -39,6 +39,7 @@  void trace_hardirqs_on(void)
 		this_cpu_write(tracing_irq_cpu, 0);
 	}
 
+	lockdep_hardirqs_on_prepare(CALLER_ADDR0);
 	lockdep_hardirqs_on(CALLER_ADDR0);
 }
 EXPORT_SYMBOL(trace_hardirqs_on);
@@ -79,6 +80,7 @@  NOKPROBE_SYMBOL(trace_hardirqs_off);
 		this_cpu_write(tracing_irq_cpu, 0);
 	}
 
+	lockdep_hardirqs_on_prepare(CALLER_ADDR0);
 	lockdep_hardirqs_on(CALLER_ADDR0);
 }
 EXPORT_SYMBOL(trace_hardirqs_on_caller);
--- a/lib/debug_locks.c
+++ b/lib/debug_locks.c
@@ -36,7 +36,7 @@  EXPORT_SYMBOL_GPL(debug_locks_silent);
 /*
  * Generic 'turn off all lock debugging' function:
  */
-int debug_locks_off(void)
+noinstr int debug_locks_off(void)
 {
 	if (debug_locks && __debug_locks_off()) {
 		if (!debug_locks_silent) {