Message ID | 73229250c0279667f617c3d6da121c6621164e4d.1737511963.git.jpoimboe@kernel.org (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | unwind, perf: sframe user space unwinding | expand |
On Tue, Jan 21, 2025 at 6:32 PM Josh Poimboeuf <jpoimboe@kernel.org> wrote: > > get_perf_callchain() doesn't support cross-task unwinding, so it doesn't > make much sense to have 'crosstask' as an argument. > > Acked-by: Namhyung Kim <namhyung@kernel.org> > Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org> > --- > include/linux/perf_event.h | 2 +- > kernel/bpf/stackmap.c | 12 ++++-------- > kernel/events/callchain.c | 6 +----- > kernel/events/core.c | 9 +++++---- > 4 files changed, 11 insertions(+), 18 deletions(-) > > diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h > index 4c8ff7258c6a..1563dc2cd979 100644 > --- a/include/linux/perf_event.h > +++ b/include/linux/perf_event.h > @@ -1590,7 +1590,7 @@ extern void perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct p > extern void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs); > extern struct perf_callchain_entry * > get_perf_callchain(struct pt_regs *regs, bool kernel, bool user, > - u32 max_stack, bool crosstask, bool add_mark); > + u32 max_stack, bool add_mark); > extern int get_callchain_buffers(int max_stack); > extern void put_callchain_buffers(void); > extern struct perf_callchain_entry *get_callchain_entry(int *rctx); > diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c > index ec3a57a5fba1..ee9701337912 100644 > --- a/kernel/bpf/stackmap.c > +++ b/kernel/bpf/stackmap.c > @@ -314,8 +314,7 @@ BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map, > if (max_depth > sysctl_perf_event_max_stack) > max_depth = sysctl_perf_event_max_stack; > > - trace = get_perf_callchain(regs, kernel, user, max_depth, > - false, false); > + trace = get_perf_callchain(regs, kernel, user, max_depth, false); > > if (unlikely(!trace)) > /* couldn't fetch the stack trace */ > @@ -430,10 +429,8 @@ static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task, > if (task && user && !user_mode(regs)) > goto err_fault; > > - /* get_perf_callchain does not support crosstask user stack walking > - * but returns an empty stack instead of NULL. > - */ > - if (crosstask && user) { > + /* get_perf_callchain() does not support crosstask stack walking */ > + if (crosstask) { crosstask stack trace is supported for kernel stack traces (see get_callchain_entry_for_task() call), so this is breaking that case > err = -EOPNOTSUPP; > goto clear; > } > @@ -451,8 +448,7 @@ static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task, > else if (kernel && task) > trace = get_callchain_entry_for_task(task, max_depth); > else > - trace = get_perf_callchain(regs, kernel, user, max_depth, > - crosstask, false); > + trace = get_perf_callchain(regs, kernel, user, max_depth,false); nit: missing space > > if (unlikely(!trace) || trace->nr < skip) { > if (may_fault) > diff --git a/kernel/events/callchain.c b/kernel/events/callchain.c > index 83834203e144..655fb25a725b 100644 > --- a/kernel/events/callchain.c > +++ b/kernel/events/callchain.c [...]
On Fri, Jan 24, 2025 at 10:13:23AM -0800, Andrii Nakryiko wrote: > On Tue, Jan 21, 2025 at 6:32 PM Josh Poimboeuf <jpoimboe@kernel.org> wrote: > > @@ -430,10 +429,8 @@ static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task, > > if (task && user && !user_mode(regs)) > > goto err_fault; > > > > - /* get_perf_callchain does not support crosstask user stack walking > > - * but returns an empty stack instead of NULL. > > - */ > > - if (crosstask && user) { > > + /* get_perf_callchain() does not support crosstask stack walking */ > > + if (crosstask) { > > crosstask stack trace is supported for kernel stack traces (see > get_callchain_entry_for_task() call), so this is breaking that case Oh I see, thanks. BTW, that seems dubious, does it do anything to ensure the task isn't running? Otherwise the unwind is going to be a wild ride.
On Fri, Jan 24, 2025 at 2:00 PM Josh Poimboeuf <jpoimboe@kernel.org> wrote: > > On Fri, Jan 24, 2025 at 10:13:23AM -0800, Andrii Nakryiko wrote: > > On Tue, Jan 21, 2025 at 6:32 PM Josh Poimboeuf <jpoimboe@kernel.org> wrote: > > > @@ -430,10 +429,8 @@ static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task, > > > if (task && user && !user_mode(regs)) > > > goto err_fault; > > > > > > - /* get_perf_callchain does not support crosstask user stack walking > > > - * but returns an empty stack instead of NULL. > > > - */ > > > - if (crosstask && user) { > > > + /* get_perf_callchain() does not support crosstask stack walking */ > > > + if (crosstask) { > > > > crosstask stack trace is supported for kernel stack traces (see > > get_callchain_entry_for_task() call), so this is breaking that case > > Oh I see, thanks. > > BTW, that seems dubious, does it do anything to ensure the task isn't > running? Otherwise the unwind is going to be a wild ride. Yeah, I think it's very speculative and doesn't pause the task in any way (just makes sure it doesn't go away). We just rely on stack_trace_save_tsk() -> arch_stack_walk(), which just optimistically tries to unwind, it seems. It's still useful and if the user is prepared to handle a potentially garbage stack trace, why not. People do similar thing for user space stack trace (with custom BPF code), and it's very useful (even if not "reliable" by any means). > > -- > Josh
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 4c8ff7258c6a..1563dc2cd979 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1590,7 +1590,7 @@ extern void perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct p extern void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs); extern struct perf_callchain_entry * get_perf_callchain(struct pt_regs *regs, bool kernel, bool user, - u32 max_stack, bool crosstask, bool add_mark); + u32 max_stack, bool add_mark); extern int get_callchain_buffers(int max_stack); extern void put_callchain_buffers(void); extern struct perf_callchain_entry *get_callchain_entry(int *rctx); diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c index ec3a57a5fba1..ee9701337912 100644 --- a/kernel/bpf/stackmap.c +++ b/kernel/bpf/stackmap.c @@ -314,8 +314,7 @@ BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map, if (max_depth > sysctl_perf_event_max_stack) max_depth = sysctl_perf_event_max_stack; - trace = get_perf_callchain(regs, kernel, user, max_depth, - false, false); + trace = get_perf_callchain(regs, kernel, user, max_depth, false); if (unlikely(!trace)) /* couldn't fetch the stack trace */ @@ -430,10 +429,8 @@ static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task, if (task && user && !user_mode(regs)) goto err_fault; - /* get_perf_callchain does not support crosstask user stack walking - * but returns an empty stack instead of NULL. - */ - if (crosstask && user) { + /* get_perf_callchain() does not support crosstask stack walking */ + if (crosstask) { err = -EOPNOTSUPP; goto clear; } @@ -451,8 +448,7 @@ static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task, else if (kernel && task) trace = get_callchain_entry_for_task(task, max_depth); else - trace = get_perf_callchain(regs, kernel, user, max_depth, - crosstask, false); + trace = get_perf_callchain(regs, kernel, user, max_depth,false); if (unlikely(!trace) || trace->nr < skip) { if (may_fault) diff --git a/kernel/events/callchain.c b/kernel/events/callchain.c index 83834203e144..655fb25a725b 100644 --- a/kernel/events/callchain.c +++ b/kernel/events/callchain.c @@ -217,7 +217,7 @@ static void fixup_uretprobe_trampoline_entries(struct perf_callchain_entry *entr struct perf_callchain_entry * get_perf_callchain(struct pt_regs *regs, bool kernel, bool user, - u32 max_stack, bool crosstask, bool add_mark) + u32 max_stack, bool add_mark) { struct perf_callchain_entry *entry; struct perf_callchain_entry_ctx ctx; @@ -248,9 +248,6 @@ get_perf_callchain(struct pt_regs *regs, bool kernel, bool user, } if (regs) { - if (crosstask) - goto exit_put; - if (add_mark) perf_callchain_store_context(&ctx, PERF_CONTEXT_USER); @@ -260,7 +257,6 @@ get_perf_callchain(struct pt_regs *regs, bool kernel, bool user, } } -exit_put: put_callchain_entry(rctx); return entry; diff --git a/kernel/events/core.c b/kernel/events/core.c index ebe457bacf96..99f0f28feeb5 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -7793,16 +7793,17 @@ perf_callchain(struct perf_event *event, struct pt_regs *regs) { bool kernel = !event->attr.exclude_callchain_kernel; bool user = !event->attr.exclude_callchain_user; - /* Disallow cross-task user callchains. */ - bool crosstask = event->ctx->task && event->ctx->task != current; const u32 max_stack = event->attr.sample_max_stack; struct perf_callchain_entry *callchain; if (!kernel && !user) return &__empty_callchain; - callchain = get_perf_callchain(regs, kernel, user, - max_stack, crosstask, true); + /* Disallow cross-task callchains. */ + if (event->ctx->task && event->ctx->task != current) + return &__empty_callchain; + + callchain = get_perf_callchain(regs, kernel, user, max_stack, true); return callchain ?: &__empty_callchain; }