diff mbox series

[2/5] kprobes: Use guard() for external locks

Message ID 173371208663.480397.7535769878667655223.stgit@devnote2 (mailing list archive)
State Superseded
Commit 557c36519b1a99b3a090703b76d77e034ad02d65
Headers show
Series kprobes: jump label: Cleanup with guard and __free | expand

Commit Message

Masami Hiramatsu (Google) Dec. 9, 2024, 2:41 a.m. UTC
From: Masami Hiramatsu (Google) <mhiramat@kernel.org>

Use guard() for text_mutex, cpu_read_lock, and jump_label_lock in
the kprobes.

Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
---
 kernel/kprobes.c |  209 +++++++++++++++++++++++-------------------------------
 1 file changed, 90 insertions(+), 119 deletions(-)

Comments

Peter Zijlstra Dec. 9, 2024, 11:04 a.m. UTC | #1
On Mon, Dec 09, 2024 at 11:41:26AM +0900, Masami Hiramatsu (Google) wrote:
> From: Masami Hiramatsu (Google) <mhiramat@kernel.org>
> 
> Use guard() for text_mutex, cpu_read_lock, and jump_label_lock in
> the kprobes.

> @@ -853,29 +850,24 @@ static void try_to_optimize_kprobe(struct kprobe *p)
>  		return;
>  
>  	/* For preparing optimization, jump_label_text_reserved() is called. */
> -	cpus_read_lock();
> -	jump_label_lock();
> -	mutex_lock(&text_mutex);
> +	guard(cpus_read_lock)();
> +	guard(jump_label_lock)();
> +	guard(mutex)(&text_mutex);
>  

> @@ -1294,62 +1280,55 @@ static int register_aggr_kprobe(struct kprobe *orig_p, struct kprobe *p)
>  	int ret = 0;
>  	struct kprobe *ap = orig_p;
>  
> -	cpus_read_lock();
> -
> -	/* For preparing optimization, jump_label_text_reserved() is called */
> -	jump_label_lock();
> -	mutex_lock(&text_mutex);

Why does kprobe need jump_label_lock and how does it then not also need
static_call_lock ?
Masami Hiramatsu (Google) Dec. 10, 2024, 2:04 a.m. UTC | #2
On Mon, 9 Dec 2024 12:04:11 +0100
Peter Zijlstra <peterz@infradead.org> wrote:

> On Mon, Dec 09, 2024 at 11:41:26AM +0900, Masami Hiramatsu (Google) wrote:
> > From: Masami Hiramatsu (Google) <mhiramat@kernel.org>
> > 
> > Use guard() for text_mutex, cpu_read_lock, and jump_label_lock in
> > the kprobes.
> 
> > @@ -853,29 +850,24 @@ static void try_to_optimize_kprobe(struct kprobe *p)
> >  		return;
> >  
> >  	/* For preparing optimization, jump_label_text_reserved() is called. */
> > -	cpus_read_lock();
> > -	jump_label_lock();
> > -	mutex_lock(&text_mutex);
> > +	guard(cpus_read_lock)();
> > +	guard(jump_label_lock)();
> > +	guard(mutex)(&text_mutex);
> >  
> 
> > @@ -1294,62 +1280,55 @@ static int register_aggr_kprobe(struct kprobe *orig_p, struct kprobe *p)
> >  	int ret = 0;
> >  	struct kprobe *ap = orig_p;
> >  
> > -	cpus_read_lock();
> > -
> > -	/* For preparing optimization, jump_label_text_reserved() is called */
> > -	jump_label_lock();
> > -	mutex_lock(&text_mutex);
> 
> Why does kprobe need jump_label_lock and how does it then not also need
> static_call_lock ?

Good catch! It has not been updated for static_call_text_reserved().
We need static_call_lock() here too.

Thanks!
Masami Hiramatsu (Google) Dec. 10, 2024, 2:15 a.m. UTC | #3
On Tue, 10 Dec 2024 11:04:28 +0900
Masami Hiramatsu (Google) <mhiramat@kernel.org> wrote:

> On Mon, 9 Dec 2024 12:04:11 +0100
> Peter Zijlstra <peterz@infradead.org> wrote:
> 
> > On Mon, Dec 09, 2024 at 11:41:26AM +0900, Masami Hiramatsu (Google) wrote:
> > > From: Masami Hiramatsu (Google) <mhiramat@kernel.org>
> > > 
> > > Use guard() for text_mutex, cpu_read_lock, and jump_label_lock in
> > > the kprobes.
> > 
> > > @@ -853,29 +850,24 @@ static void try_to_optimize_kprobe(struct kprobe *p)
> > >  		return;
> > >  
> > >  	/* For preparing optimization, jump_label_text_reserved() is called. */
> > > -	cpus_read_lock();
> > > -	jump_label_lock();
> > > -	mutex_lock(&text_mutex);
> > > +	guard(cpus_read_lock)();
> > > +	guard(jump_label_lock)();
> > > +	guard(mutex)(&text_mutex);
> > >  
> > 
> > > @@ -1294,62 +1280,55 @@ static int register_aggr_kprobe(struct kprobe *orig_p, struct kprobe *p)
> > >  	int ret = 0;
> > >  	struct kprobe *ap = orig_p;
> > >  
> > > -	cpus_read_lock();
> > > -
> > > -	/* For preparing optimization, jump_label_text_reserved() is called */
> > > -	jump_label_lock();
> > > -	mutex_lock(&text_mutex);
> > 
> > Why does kprobe need jump_label_lock and how does it then not also need
> > static_call_lock ?
> 
> Good catch! It has not been updated for static_call_text_reserved().
> We need static_call_lock() here too.

Wait, this is for checking the jump_label_text_reserved(), but as far as
I know, the text reserved area of jump_label will be updated when the
module is loaded or removed. And the static call too, right?

In that case, what we need is to lock the modules (for the short term,
can we use rcu_read_lock?) for using both jump_label_text_reserved()
and static_call_text_reserved()?

Thank you,
Peter Zijlstra Dec. 10, 2024, 12:10 p.m. UTC | #4
On Tue, Dec 10, 2024 at 11:15:28AM +0900, Masami Hiramatsu wrote:
> On Tue, 10 Dec 2024 11:04:28 +0900
> Masami Hiramatsu (Google) <mhiramat@kernel.org> wrote:
> 
> > On Mon, 9 Dec 2024 12:04:11 +0100
> > Peter Zijlstra <peterz@infradead.org> wrote:
> > 
> > > On Mon, Dec 09, 2024 at 11:41:26AM +0900, Masami Hiramatsu (Google) wrote:
> > > > From: Masami Hiramatsu (Google) <mhiramat@kernel.org>
> > > > 
> > > > Use guard() for text_mutex, cpu_read_lock, and jump_label_lock in
> > > > the kprobes.
> > > 
> > > > @@ -853,29 +850,24 @@ static void try_to_optimize_kprobe(struct kprobe *p)
> > > >  		return;
> > > >  
> > > >  	/* For preparing optimization, jump_label_text_reserved() is called. */
> > > > -	cpus_read_lock();
> > > > -	jump_label_lock();
> > > > -	mutex_lock(&text_mutex);
> > > > +	guard(cpus_read_lock)();
> > > > +	guard(jump_label_lock)();
> > > > +	guard(mutex)(&text_mutex);
> > > >  
> > > 
> > > > @@ -1294,62 +1280,55 @@ static int register_aggr_kprobe(struct kprobe *orig_p, struct kprobe *p)
> > > >  	int ret = 0;
> > > >  	struct kprobe *ap = orig_p;
> > > >  
> > > > -	cpus_read_lock();
> > > > -
> > > > -	/* For preparing optimization, jump_label_text_reserved() is called */
> > > > -	jump_label_lock();
> > > > -	mutex_lock(&text_mutex);
> > > 
> > > Why does kprobe need jump_label_lock and how does it then not also need
> > > static_call_lock ?
> > 
> > Good catch! It has not been updated for static_call_text_reserved().
> > We need static_call_lock() here too.
> 
> Wait, this is for checking the jump_label_text_reserved(), but as far as
> I know, the text reserved area of jump_label will be updated when the
> module is loaded or removed. And the static call too, right?

Correct.

> In that case, what we need is to lock the modules (for the short term,
> can we use rcu_read_lock?) for using both jump_label_text_reserved()
> and static_call_text_reserved()?

Yes, rcu_read_lock() is sufficient to observe fully loaded modules. I
don't think you care about placing kprobes on modules that are still
loading (that doesn't really make sense).

Also see:

  https://lkml.kernel.org/r/20241205215102.hRywUW2A@linutronix.de
Masami Hiramatsu (Google) Dec. 10, 2024, 2:12 p.m. UTC | #5
On Tue, 10 Dec 2024 13:10:27 +0100
Peter Zijlstra <peterz@infradead.org> wrote:

> 
> > Wait, this is for checking the jump_label_text_reserved(), but as far as
> > I know, the text reserved area of jump_label will be updated when the
> > module is loaded or removed. And the static call too, right?
> 
> Correct.
> 
> > In that case, what we need is to lock the modules (for the short term,
> > can we use rcu_read_lock?) for using both jump_label_text_reserved()
> > and static_call_text_reserved()?
> 
> Yes, rcu_read_lock() is sufficient to observe fully loaded modules. I
> don't think you care about placing kprobes on modules that are still
> loading (that doesn't really make sense).

Actually, to probe module's __init function, it may put a probe during
loading modules (by trace_kprobe.c) which has been done by module
notification callback.

trace_kprobe_module_callback()
 -> register_module_trace_kprobe()
   -> __register_trace_kprobe()
      -> register_kprobe()
         -> check_kprobe_address_safe()

Anyway, unless we run the module notifier callbacks in parallel,
it should be safe.

Hmm, however, it seems that trace_probe's module notifier priority
is not correct. It must be lower than jump_label but it is the same.

OK, let me remove jump_label_lock() from kprobes (if it gets
module reference), and give a lower priority to the trace_probe's
module notifier to ensure it is called after jump_label is updated.

> 
> Also see:
> 
>   https://lkml.kernel.org/r/20241205215102.hRywUW2A@linutronix.de

Thank you,
Masami Hiramatsu (Google) Dec. 10, 2024, 11:17 p.m. UTC | #6
On Tue, 10 Dec 2024 23:12:41 +0900
Masami Hiramatsu (Google) <mhiramat@kernel.org> wrote:

> On Tue, 10 Dec 2024 13:10:27 +0100
> Peter Zijlstra <peterz@infradead.org> wrote:
> 
> > 
> > > Wait, this is for checking the jump_label_text_reserved(), but as far as
> > > I know, the text reserved area of jump_label will be updated when the
> > > module is loaded or removed. And the static call too, right?
> > 
> > Correct.
> > 
> > > In that case, what we need is to lock the modules (for the short term,
> > > can we use rcu_read_lock?) for using both jump_label_text_reserved()
> > > and static_call_text_reserved()?
> > 
> > Yes, rcu_read_lock() is sufficient to observe fully loaded modules. I
> > don't think you care about placing kprobes on modules that are still
> > loading (that doesn't really make sense).
> 
> Actually, to probe module's __init function, it may put a probe during
> loading modules (by trace_kprobe.c) which has been done by module
> notification callback.
> 
> trace_kprobe_module_callback()
>  -> register_module_trace_kprobe()
>    -> __register_trace_kprobe()
>       -> register_kprobe()
>          -> check_kprobe_address_safe()
> 
> Anyway, unless we run the module notifier callbacks in parallel,
> it should be safe.

Hmm, this is still a problem. We need to acquire jump_label_lock()
even for the jump_label_text_reserved().

If user runs module load and register_kprobe() in parallel, 
jump_label_module_notify() and check_kprobe_address_safe() may run
in parallel.

jump_label_module_notify()
  -> jump_label_add_module()
     -> jump_label_sort_entries() <- update mod->jump_entries

check_kprobe_address_safe()
  -> jump_label_text_reserved()
      -> __jump_label_mod_text_reserved() <- read mod->jump_entries

So there is a race on mod->jump_entries. jump_label_lock() avoids
this race.
(IIRC, module can get the reference in the MODULE_STATE_COMING state.)

On the other hand, static_call_text_reserved() does not need a lock
because it does not sort the list, nor update the static_call_site::addr.

In conclusion, we need jump_label_lock() as it is, and don't need
static_call_lock().

Thank you,
diff mbox series

Patch

diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 62b5b08d809d..004eb8326520 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -596,41 +596,38 @@  static void kick_kprobe_optimizer(void)
 /* Kprobe jump optimizer */
 static void kprobe_optimizer(struct work_struct *work)
 {
-	mutex_lock(&kprobe_mutex);
-	cpus_read_lock();
-	mutex_lock(&text_mutex);
+	guard(mutex)(&kprobe_mutex);
 
-	/*
-	 * Step 1: Unoptimize kprobes and collect cleaned (unused and disarmed)
-	 * kprobes before waiting for quiesence period.
-	 */
-	do_unoptimize_kprobes();
+	scoped_guard(cpus_read_lock) {
+		guard(mutex)(&text_mutex);
 
-	/*
-	 * Step 2: Wait for quiesence period to ensure all potentially
-	 * preempted tasks to have normally scheduled. Because optprobe
-	 * may modify multiple instructions, there is a chance that Nth
-	 * instruction is preempted. In that case, such tasks can return
-	 * to 2nd-Nth byte of jump instruction. This wait is for avoiding it.
-	 * Note that on non-preemptive kernel, this is transparently converted
-	 * to synchronoze_sched() to wait for all interrupts to have completed.
-	 */
-	synchronize_rcu_tasks();
+		/*
+		 * Step 1: Unoptimize kprobes and collect cleaned (unused and disarmed)
+		 * kprobes before waiting for quiesence period.
+		 */
+		do_unoptimize_kprobes();
 
-	/* Step 3: Optimize kprobes after quiesence period */
-	do_optimize_kprobes();
+		/*
+		 * Step 2: Wait for quiesence period to ensure all potentially
+		 * preempted tasks to have normally scheduled. Because optprobe
+		 * may modify multiple instructions, there is a chance that Nth
+		 * instruction is preempted. In that case, such tasks can return
+		 * to 2nd-Nth byte of jump instruction. This wait is for avoiding it.
+		 * Note that on non-preemptive kernel, this is transparently converted
+		 * to synchronoze_sched() to wait for all interrupts to have completed.
+		 */
+		synchronize_rcu_tasks();
 
-	/* Step 4: Free cleaned kprobes after quiesence period */
-	do_free_cleaned_kprobes();
+		/* Step 3: Optimize kprobes after quiesence period */
+		do_optimize_kprobes();
 
-	mutex_unlock(&text_mutex);
-	cpus_read_unlock();
+		/* Step 4: Free cleaned kprobes after quiesence period */
+		do_free_cleaned_kprobes();
+	}
 
 	/* Step 5: Kick optimizer again if needed */
 	if (!list_empty(&optimizing_list) || !list_empty(&unoptimizing_list))
 		kick_kprobe_optimizer();
-
-	mutex_unlock(&kprobe_mutex);
 }
 
 static void wait_for_kprobe_optimizer_locked(void)
@@ -853,29 +850,24 @@  static void try_to_optimize_kprobe(struct kprobe *p)
 		return;
 
 	/* For preparing optimization, jump_label_text_reserved() is called. */
-	cpus_read_lock();
-	jump_label_lock();
-	mutex_lock(&text_mutex);
+	guard(cpus_read_lock)();
+	guard(jump_label_lock)();
+	guard(mutex)(&text_mutex);
 
 	ap = alloc_aggr_kprobe(p);
 	if (!ap)
-		goto out;
+		return;
 
 	op = container_of(ap, struct optimized_kprobe, kp);
 	if (!arch_prepared_optinsn(&op->optinsn)) {
 		/* If failed to setup optimizing, fallback to kprobe. */
 		arch_remove_optimized_kprobe(op);
 		kfree(op);
-		goto out;
+		return;
 	}
 
 	init_aggr_kprobe(ap, p);
 	optimize_kprobe(ap);	/* This just kicks optimizer thread. */
-
-out:
-	mutex_unlock(&text_mutex);
-	jump_label_unlock();
-	cpus_read_unlock();
 }
 
 static void optimize_all_kprobes(void)
@@ -1158,12 +1150,9 @@  static int arm_kprobe(struct kprobe *kp)
 	if (unlikely(kprobe_ftrace(kp)))
 		return arm_kprobe_ftrace(kp);
 
-	cpus_read_lock();
-	mutex_lock(&text_mutex);
+	guard(cpus_read_lock)();
+	guard(mutex)(&text_mutex);
 	__arm_kprobe(kp);
-	mutex_unlock(&text_mutex);
-	cpus_read_unlock();
-
 	return 0;
 }
 
@@ -1172,12 +1161,9 @@  static int disarm_kprobe(struct kprobe *kp, bool reopt)
 	if (unlikely(kprobe_ftrace(kp)))
 		return disarm_kprobe_ftrace(kp);
 
-	cpus_read_lock();
-	mutex_lock(&text_mutex);
+	guard(cpus_read_lock)();
+	guard(mutex)(&text_mutex);
 	__disarm_kprobe(kp, reopt);
-	mutex_unlock(&text_mutex);
-	cpus_read_unlock();
-
 	return 0;
 }
 
@@ -1294,62 +1280,55 @@  static int register_aggr_kprobe(struct kprobe *orig_p, struct kprobe *p)
 	int ret = 0;
 	struct kprobe *ap = orig_p;
 
-	cpus_read_lock();
-
-	/* For preparing optimization, jump_label_text_reserved() is called */
-	jump_label_lock();
-	mutex_lock(&text_mutex);
-
-	if (!kprobe_aggrprobe(orig_p)) {
-		/* If 'orig_p' is not an 'aggr_kprobe', create new one. */
-		ap = alloc_aggr_kprobe(orig_p);
-		if (!ap) {
-			ret = -ENOMEM;
-			goto out;
+	scoped_guard(cpus_read_lock) {
+		/* For preparing optimization, jump_label_text_reserved() is called */
+		guard(jump_label_lock)();
+		guard(mutex)(&text_mutex);
+
+		if (!kprobe_aggrprobe(orig_p)) {
+			/* If 'orig_p' is not an 'aggr_kprobe', create new one. */
+			ap = alloc_aggr_kprobe(orig_p);
+			if (!ap)
+				return -ENOMEM;
+			init_aggr_kprobe(ap, orig_p);
+		} else if (kprobe_unused(ap)) {
+			/* This probe is going to die. Rescue it */
+			ret = reuse_unused_kprobe(ap);
+			if (ret)
+				return ret;
 		}
-		init_aggr_kprobe(ap, orig_p);
-	} else if (kprobe_unused(ap)) {
-		/* This probe is going to die. Rescue it */
-		ret = reuse_unused_kprobe(ap);
-		if (ret)
-			goto out;
-	}
 
-	if (kprobe_gone(ap)) {
-		/*
-		 * Attempting to insert new probe at the same location that
-		 * had a probe in the module vaddr area which already
-		 * freed. So, the instruction slot has already been
-		 * released. We need a new slot for the new probe.
-		 */
-		ret = arch_prepare_kprobe(ap);
-		if (ret)
+		if (kprobe_gone(ap)) {
 			/*
-			 * Even if fail to allocate new slot, don't need to
-			 * free the 'ap'. It will be used next time, or
-			 * freed by unregister_kprobe().
+			 * Attempting to insert new probe at the same location that
+			 * had a probe in the module vaddr area which already
+			 * freed. So, the instruction slot has already been
+			 * released. We need a new slot for the new probe.
 			 */
-			goto out;
+			ret = arch_prepare_kprobe(ap);
+			if (ret)
+				/*
+				 * Even if fail to allocate new slot, don't need to
+				 * free the 'ap'. It will be used next time, or
+				 * freed by unregister_kprobe().
+				 */
+				return ret;
 
-		/* Prepare optimized instructions if possible. */
-		prepare_optimized_kprobe(ap);
+			/* Prepare optimized instructions if possible. */
+			prepare_optimized_kprobe(ap);
 
-		/*
-		 * Clear gone flag to prevent allocating new slot again, and
-		 * set disabled flag because it is not armed yet.
-		 */
-		ap->flags = (ap->flags & ~KPROBE_FLAG_GONE)
-			    | KPROBE_FLAG_DISABLED;
-	}
-
-	/* Copy the insn slot of 'p' to 'ap'. */
-	copy_kprobe(ap, p);
-	ret = add_new_kprobe(ap, p);
+			/*
+			 * Clear gone flag to prevent allocating new slot again, and
+			 * set disabled flag because it is not armed yet.
+			 */
+			ap->flags = (ap->flags & ~KPROBE_FLAG_GONE)
+					| KPROBE_FLAG_DISABLED;
+		}
 
-out:
-	mutex_unlock(&text_mutex);
-	jump_label_unlock();
-	cpus_read_unlock();
+		/* Copy the insn slot of 'p' to 'ap'. */
+		copy_kprobe(ap, p);
+		ret = add_new_kprobe(ap, p);
+	}
 
 	if (ret == 0 && kprobe_disabled(ap) && !kprobe_disabled(p)) {
 		ap->flags &= ~KPROBE_FLAG_DISABLED;
@@ -1559,26 +1538,23 @@  static int check_kprobe_address_safe(struct kprobe *p,
 	ret = check_ftrace_location(p);
 	if (ret)
 		return ret;
-	jump_label_lock();
+
+	guard(jump_label_lock)();
 
 	/* Ensure the address is in a text area, and find a module if exists. */
 	*probed_mod = NULL;
 	if (!core_kernel_text((unsigned long) p->addr)) {
 		guard(preempt)();
 		*probed_mod = __module_text_address((unsigned long) p->addr);
-		if (!(*probed_mod)) {
-			ret = -EINVAL;
-			goto out;
-		}
+		if (!(*probed_mod))
+			return -EINVAL;
 
 		/*
 		 * We must hold a refcount of the probed module while updating
 		 * its code to prohibit unexpected unloading.
 		 */
-		if (unlikely(!try_module_get(*probed_mod))) {
-			ret = -ENOENT;
-			goto out;
-		}
+		if (unlikely(!try_module_get(*probed_mod)))
+			return -ENOENT;
 	}
 	/* Ensure it is not in reserved area. */
 	if (in_gate_area_no_mm((unsigned long) p->addr) ||
@@ -1588,8 +1564,7 @@  static int check_kprobe_address_safe(struct kprobe *p,
 	    find_bug((unsigned long)p->addr) ||
 	    is_cfi_preamble_symbol((unsigned long)p->addr)) {
 		module_put(*probed_mod);
-		ret = -EINVAL;
-		goto out;
+		return -EINVAL;
 	}
 
 	/* Get module refcount and reject __init functions for loaded modules. */
@@ -1601,14 +1576,11 @@  static int check_kprobe_address_safe(struct kprobe *p,
 		if (within_module_init((unsigned long)p->addr, *probed_mod) &&
 		    !module_is_coming(*probed_mod)) {
 			module_put(*probed_mod);
-			ret = -ENOENT;
+			return -ENOENT;
 		}
 	}
 
-out:
-	jump_label_unlock();
-
-	return ret;
+	return 0;
 }
 
 static int __register_kprobe(struct kprobe *p)
@@ -1623,14 +1595,13 @@  static int __register_kprobe(struct kprobe *p)
 		/* Since this may unoptimize 'old_p', locking 'text_mutex'. */
 		return register_aggr_kprobe(old_p, p);
 
-	cpus_read_lock();
-	/* Prevent text modification */
-	mutex_lock(&text_mutex);
-	ret = prepare_kprobe(p);
-	mutex_unlock(&text_mutex);
-	cpus_read_unlock();
-	if (ret)
-		return ret;
+	scoped_guard(cpus_read_lock) {
+		/* Prevent text modification */
+		guard(mutex)(&text_mutex);
+		ret = prepare_kprobe(p);
+		if (ret)
+			return ret;
+	}
 
 	INIT_HLIST_NODE(&p->hlist);
 	hlist_add_head_rcu(&p->hlist,