diff mbox series

[v4,32/39] perf: Remove get_perf_callchain() 'crosstask' argument

Message ID 73229250c0279667f617c3d6da121c6621164e4d.1737511963.git.jpoimboe@kernel.org (mailing list archive)
State New
Headers show
Series unwind, perf: sframe user space unwinding | expand

Commit Message

Josh Poimboeuf Jan. 22, 2025, 2:31 a.m. UTC
get_perf_callchain() doesn't support cross-task unwinding, so it doesn't
make much sense to have 'crosstask' as an argument.

Acked-by: Namhyung Kim <namhyung@kernel.org>
Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org>
---
 include/linux/perf_event.h |  2 +-
 kernel/bpf/stackmap.c      | 12 ++++--------
 kernel/events/callchain.c  |  6 +-----
 kernel/events/core.c       |  9 +++++----
 4 files changed, 11 insertions(+), 18 deletions(-)

Comments

Andrii Nakryiko Jan. 24, 2025, 6:13 p.m. UTC | #1
On Tue, Jan 21, 2025 at 6:32 PM Josh Poimboeuf <jpoimboe@kernel.org> wrote:
>
> get_perf_callchain() doesn't support cross-task unwinding, so it doesn't
> make much sense to have 'crosstask' as an argument.
>
> Acked-by: Namhyung Kim <namhyung@kernel.org>
> Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org>
> ---
>  include/linux/perf_event.h |  2 +-
>  kernel/bpf/stackmap.c      | 12 ++++--------
>  kernel/events/callchain.c  |  6 +-----
>  kernel/events/core.c       |  9 +++++----
>  4 files changed, 11 insertions(+), 18 deletions(-)
>
> diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
> index 4c8ff7258c6a..1563dc2cd979 100644
> --- a/include/linux/perf_event.h
> +++ b/include/linux/perf_event.h
> @@ -1590,7 +1590,7 @@ extern void perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct p
>  extern void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs);
>  extern struct perf_callchain_entry *
>  get_perf_callchain(struct pt_regs *regs, bool kernel, bool user,
> -                  u32 max_stack, bool crosstask, bool add_mark);
> +                  u32 max_stack, bool add_mark);
>  extern int get_callchain_buffers(int max_stack);
>  extern void put_callchain_buffers(void);
>  extern struct perf_callchain_entry *get_callchain_entry(int *rctx);
> diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
> index ec3a57a5fba1..ee9701337912 100644
> --- a/kernel/bpf/stackmap.c
> +++ b/kernel/bpf/stackmap.c
> @@ -314,8 +314,7 @@ BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map,
>         if (max_depth > sysctl_perf_event_max_stack)
>                 max_depth = sysctl_perf_event_max_stack;
>
> -       trace = get_perf_callchain(regs, kernel, user, max_depth,
> -                                  false, false);
> +       trace = get_perf_callchain(regs, kernel, user, max_depth, false);
>
>         if (unlikely(!trace))
>                 /* couldn't fetch the stack trace */
> @@ -430,10 +429,8 @@ static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task,
>         if (task && user && !user_mode(regs))
>                 goto err_fault;
>
> -       /* get_perf_callchain does not support crosstask user stack walking
> -        * but returns an empty stack instead of NULL.
> -        */
> -       if (crosstask && user) {
> +       /* get_perf_callchain() does not support crosstask stack walking */
> +       if (crosstask) {

crosstask stack trace is supported for kernel stack traces (see
get_callchain_entry_for_task() call), so this is breaking that case

>                 err = -EOPNOTSUPP;
>                 goto clear;
>         }
> @@ -451,8 +448,7 @@ static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task,
>         else if (kernel && task)
>                 trace = get_callchain_entry_for_task(task, max_depth);
>         else
> -               trace = get_perf_callchain(regs, kernel, user, max_depth,
> -                                          crosstask, false);
> +               trace = get_perf_callchain(regs, kernel, user, max_depth,false);

nit: missing space

>
>         if (unlikely(!trace) || trace->nr < skip) {
>                 if (may_fault)
> diff --git a/kernel/events/callchain.c b/kernel/events/callchain.c
> index 83834203e144..655fb25a725b 100644
> --- a/kernel/events/callchain.c
> +++ b/kernel/events/callchain.c

[...]
Josh Poimboeuf Jan. 24, 2025, 10 p.m. UTC | #2
On Fri, Jan 24, 2025 at 10:13:23AM -0800, Andrii Nakryiko wrote:
> On Tue, Jan 21, 2025 at 6:32 PM Josh Poimboeuf <jpoimboe@kernel.org> wrote:
> > @@ -430,10 +429,8 @@ static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task,
> >         if (task && user && !user_mode(regs))
> >                 goto err_fault;
> >
> > -       /* get_perf_callchain does not support crosstask user stack walking
> > -        * but returns an empty stack instead of NULL.
> > -        */
> > -       if (crosstask && user) {
> > +       /* get_perf_callchain() does not support crosstask stack walking */
> > +       if (crosstask) {
> 
> crosstask stack trace is supported for kernel stack traces (see
> get_callchain_entry_for_task() call), so this is breaking that case

Oh I see, thanks.

BTW, that seems dubious, does it do anything to ensure the task isn't
running?   Otherwise the unwind is going to be a wild ride.
Andrii Nakryiko Jan. 28, 2025, 12:39 a.m. UTC | #3
On Fri, Jan 24, 2025 at 2:00 PM Josh Poimboeuf <jpoimboe@kernel.org> wrote:
>
> On Fri, Jan 24, 2025 at 10:13:23AM -0800, Andrii Nakryiko wrote:
> > On Tue, Jan 21, 2025 at 6:32 PM Josh Poimboeuf <jpoimboe@kernel.org> wrote:
> > > @@ -430,10 +429,8 @@ static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task,
> > >         if (task && user && !user_mode(regs))
> > >                 goto err_fault;
> > >
> > > -       /* get_perf_callchain does not support crosstask user stack walking
> > > -        * but returns an empty stack instead of NULL.
> > > -        */
> > > -       if (crosstask && user) {
> > > +       /* get_perf_callchain() does not support crosstask stack walking */
> > > +       if (crosstask) {
> >
> > crosstask stack trace is supported for kernel stack traces (see
> > get_callchain_entry_for_task() call), so this is breaking that case
>
> Oh I see, thanks.
>
> BTW, that seems dubious, does it do anything to ensure the task isn't
> running?   Otherwise the unwind is going to be a wild ride.

Yeah, I think it's very speculative and doesn't pause the task in any
way (just makes sure it doesn't go away). We just rely on
stack_trace_save_tsk() -> arch_stack_walk(), which just optimistically
tries to unwind, it seems.

It's still useful and if the user is prepared to handle a potentially
garbage stack trace, why not. People do similar thing for user space
stack trace (with custom BPF code), and it's very useful (even if not
"reliable" by any means).


>
> --
> Josh
diff mbox series

Patch

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 4c8ff7258c6a..1563dc2cd979 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1590,7 +1590,7 @@  extern void perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct p
 extern void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs);
 extern struct perf_callchain_entry *
 get_perf_callchain(struct pt_regs *regs, bool kernel, bool user,
-		   u32 max_stack, bool crosstask, bool add_mark);
+		   u32 max_stack, bool add_mark);
 extern int get_callchain_buffers(int max_stack);
 extern void put_callchain_buffers(void);
 extern struct perf_callchain_entry *get_callchain_entry(int *rctx);
diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
index ec3a57a5fba1..ee9701337912 100644
--- a/kernel/bpf/stackmap.c
+++ b/kernel/bpf/stackmap.c
@@ -314,8 +314,7 @@  BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map,
 	if (max_depth > sysctl_perf_event_max_stack)
 		max_depth = sysctl_perf_event_max_stack;
 
-	trace = get_perf_callchain(regs, kernel, user, max_depth,
-				   false, false);
+	trace = get_perf_callchain(regs, kernel, user, max_depth, false);
 
 	if (unlikely(!trace))
 		/* couldn't fetch the stack trace */
@@ -430,10 +429,8 @@  static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task,
 	if (task && user && !user_mode(regs))
 		goto err_fault;
 
-	/* get_perf_callchain does not support crosstask user stack walking
-	 * but returns an empty stack instead of NULL.
-	 */
-	if (crosstask && user) {
+	/* get_perf_callchain() does not support crosstask stack walking */
+	if (crosstask) {
 		err = -EOPNOTSUPP;
 		goto clear;
 	}
@@ -451,8 +448,7 @@  static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task,
 	else if (kernel && task)
 		trace = get_callchain_entry_for_task(task, max_depth);
 	else
-		trace = get_perf_callchain(regs, kernel, user, max_depth,
-					   crosstask, false);
+		trace = get_perf_callchain(regs, kernel, user, max_depth,false);
 
 	if (unlikely(!trace) || trace->nr < skip) {
 		if (may_fault)
diff --git a/kernel/events/callchain.c b/kernel/events/callchain.c
index 83834203e144..655fb25a725b 100644
--- a/kernel/events/callchain.c
+++ b/kernel/events/callchain.c
@@ -217,7 +217,7 @@  static void fixup_uretprobe_trampoline_entries(struct perf_callchain_entry *entr
 
 struct perf_callchain_entry *
 get_perf_callchain(struct pt_regs *regs, bool kernel, bool user,
-		   u32 max_stack, bool crosstask, bool add_mark)
+		   u32 max_stack, bool add_mark)
 {
 	struct perf_callchain_entry *entry;
 	struct perf_callchain_entry_ctx ctx;
@@ -248,9 +248,6 @@  get_perf_callchain(struct pt_regs *regs, bool kernel, bool user,
 		}
 
 		if (regs) {
-			if (crosstask)
-				goto exit_put;
-
 			if (add_mark)
 				perf_callchain_store_context(&ctx, PERF_CONTEXT_USER);
 
@@ -260,7 +257,6 @@  get_perf_callchain(struct pt_regs *regs, bool kernel, bool user,
 		}
 	}
 
-exit_put:
 	put_callchain_entry(rctx);
 
 	return entry;
diff --git a/kernel/events/core.c b/kernel/events/core.c
index ebe457bacf96..99f0f28feeb5 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -7793,16 +7793,17 @@  perf_callchain(struct perf_event *event, struct pt_regs *regs)
 {
 	bool kernel = !event->attr.exclude_callchain_kernel;
 	bool user   = !event->attr.exclude_callchain_user;
-	/* Disallow cross-task user callchains. */
-	bool crosstask = event->ctx->task && event->ctx->task != current;
 	const u32 max_stack = event->attr.sample_max_stack;
 	struct perf_callchain_entry *callchain;
 
 	if (!kernel && !user)
 		return &__empty_callchain;
 
-	callchain = get_perf_callchain(regs, kernel, user,
-				       max_stack, crosstask, true);
+	/* Disallow cross-task callchains. */
+	if (event->ctx->task && event->ctx->task != current)
+		return &__empty_callchain;
+
+	callchain = get_perf_callchain(regs, kernel, user, max_stack, true);
 	return callchain ?: &__empty_callchain;
 }