Message ID | 20220518224725.742882-5-namhyung@kernel.org (mailing list archive) |
---|---|
State | RFC |
Delegated to: | BPF |
Headers | show |
Series | perf record: Implement off-cpu profiling with BPF (v3) | expand |
Context | Check | Description |
---|---|---|
bpf/vmtest-bpf-next-VM_Test-2 | success | Logs for Kernel LATEST on ubuntu-latest with llvm-15 |
bpf/vmtest-bpf-next-VM_Test-3 | fail | Logs for Kernel LATEST on z15 with gcc |
bpf/vmtest-bpf-next-PR | fail | PR summary |
bpf/vmtest-bpf-next-VM_Test-1 | success | Logs for Kernel LATEST on ubuntu-latest with gcc |
netdev/tree_selection | success | Not a local patch |
On Wed, May 18, 2022 at 3:47 PM Namhyung Kim <namhyung@kernel.org> wrote: > > Recently sched_switch tracepoint added a new argument for prev_state, > but it's hard to handle the change in a BPF program. Instead, we can > check the function prototype in BTF before loading the program. > > Signed-off-by: Namhyung Kim <namhyung@kernel.org> Acked-by: Ian Rogers <irogers@google.com> Thanks, Ian > --- > tools/perf/util/bpf_off_cpu.c | 28 +++++++++++++++++++++ > tools/perf/util/bpf_skel/off_cpu.bpf.c | 35 ++++++++++++++++++-------- > 2 files changed, 52 insertions(+), 11 deletions(-) > > diff --git a/tools/perf/util/bpf_off_cpu.c b/tools/perf/util/bpf_off_cpu.c > index b5e2d038da50..874856c55101 100644 > --- a/tools/perf/util/bpf_off_cpu.c > +++ b/tools/perf/util/bpf_off_cpu.c > @@ -89,6 +89,33 @@ static void off_cpu_finish(void *arg __maybe_unused) > off_cpu_bpf__destroy(skel); > } > > +/* v5.18 kernel added prev_state arg, so it needs to check the signature */ > +static void check_sched_switch_args(void) > +{ > + const struct btf *btf = bpf_object__btf(skel->obj); > + const struct btf_type *t1, *t2, *t3; > + u32 type_id; > + > + type_id = btf__find_by_name_kind(btf, "bpf_trace_sched_switch", > + BTF_KIND_TYPEDEF); > + if ((s32)type_id < 0) > + return; > + > + t1 = btf__type_by_id(btf, type_id); > + if (t1 == NULL) > + return; > + > + t2 = btf__type_by_id(btf, t1->type); > + if (t2 == NULL || !btf_is_ptr(t2)) > + return; > + > + t3 = btf__type_by_id(btf, t2->type); > + if (t3 && btf_is_func_proto(t3) && btf_vlen(t3) == 4) { > + /* new format: pass prev_state as 4th arg */ > + skel->rodata->has_prev_state = true; > + } > +} > + > int off_cpu_prepare(struct evlist *evlist, struct target *target) > { > int err, fd, i; > @@ -117,6 +144,7 @@ int off_cpu_prepare(struct evlist *evlist, struct target *target) > } > > set_max_rlimit(); > + check_sched_switch_args(); > > err = off_cpu_bpf__load(skel); > if (err) { > diff --git a/tools/perf/util/bpf_skel/off_cpu.bpf.c b/tools/perf/util/bpf_skel/off_cpu.bpf.c > index 78cdcc8ff863..986d7db6e75d 100644 > --- a/tools/perf/util/bpf_skel/off_cpu.bpf.c > +++ b/tools/perf/util/bpf_skel/off_cpu.bpf.c > @@ -72,6 +72,8 @@ int enabled = 0; > int has_cpu = 0; > int has_task = 0; > > +const volatile bool has_prev_state = false; > + > /* > * Old kernel used to call it task_struct->state and now it's '__state'. > * Use BPF CO-RE "ignored suffix rule" to deal with it like below: > @@ -121,22 +123,13 @@ static inline int can_record(struct task_struct *t, int state) > return 1; > } > > -SEC("tp_btf/sched_switch") > -int on_switch(u64 *ctx) > +static int off_cpu_stat(u64 *ctx, struct task_struct *prev, > + struct task_struct *next, int state) > { > __u64 ts; > - int state; > __u32 stack_id; > - struct task_struct *prev, *next; > struct tstamp_data *pelem; > > - if (!enabled) > - return 0; > - > - prev = (struct task_struct *)ctx[1]; > - next = (struct task_struct *)ctx[2]; > - state = get_task_state(prev); > - > ts = bpf_ktime_get_ns(); > > if (!can_record(prev, state)) > @@ -180,4 +173,24 @@ int on_switch(u64 *ctx) > return 0; > } > > +SEC("tp_btf/sched_switch") > +int on_switch(u64 *ctx) > +{ > + struct task_struct *prev, *next; > + int prev_state; > + > + if (!enabled) > + return 0; > + > + prev = (struct task_struct *)ctx[1]; > + next = (struct task_struct *)ctx[2]; > + > + if (has_prev_state) > + prev_state = (int)ctx[3]; > + else > + prev_state = get_task_state(prev); > + > + return off_cpu_stat(ctx, prev, next, prev_state); > +} > + > char LICENSE[] SEC("license") = "Dual BSD/GPL"; > -- > 2.36.1.124.g0e6072fb45-goog >
diff --git a/tools/perf/util/bpf_off_cpu.c b/tools/perf/util/bpf_off_cpu.c index b5e2d038da50..874856c55101 100644 --- a/tools/perf/util/bpf_off_cpu.c +++ b/tools/perf/util/bpf_off_cpu.c @@ -89,6 +89,33 @@ static void off_cpu_finish(void *arg __maybe_unused) off_cpu_bpf__destroy(skel); } +/* v5.18 kernel added prev_state arg, so it needs to check the signature */ +static void check_sched_switch_args(void) +{ + const struct btf *btf = bpf_object__btf(skel->obj); + const struct btf_type *t1, *t2, *t3; + u32 type_id; + + type_id = btf__find_by_name_kind(btf, "bpf_trace_sched_switch", + BTF_KIND_TYPEDEF); + if ((s32)type_id < 0) + return; + + t1 = btf__type_by_id(btf, type_id); + if (t1 == NULL) + return; + + t2 = btf__type_by_id(btf, t1->type); + if (t2 == NULL || !btf_is_ptr(t2)) + return; + + t3 = btf__type_by_id(btf, t2->type); + if (t3 && btf_is_func_proto(t3) && btf_vlen(t3) == 4) { + /* new format: pass prev_state as 4th arg */ + skel->rodata->has_prev_state = true; + } +} + int off_cpu_prepare(struct evlist *evlist, struct target *target) { int err, fd, i; @@ -117,6 +144,7 @@ int off_cpu_prepare(struct evlist *evlist, struct target *target) } set_max_rlimit(); + check_sched_switch_args(); err = off_cpu_bpf__load(skel); if (err) { diff --git a/tools/perf/util/bpf_skel/off_cpu.bpf.c b/tools/perf/util/bpf_skel/off_cpu.bpf.c index 78cdcc8ff863..986d7db6e75d 100644 --- a/tools/perf/util/bpf_skel/off_cpu.bpf.c +++ b/tools/perf/util/bpf_skel/off_cpu.bpf.c @@ -72,6 +72,8 @@ int enabled = 0; int has_cpu = 0; int has_task = 0; +const volatile bool has_prev_state = false; + /* * Old kernel used to call it task_struct->state and now it's '__state'. * Use BPF CO-RE "ignored suffix rule" to deal with it like below: @@ -121,22 +123,13 @@ static inline int can_record(struct task_struct *t, int state) return 1; } -SEC("tp_btf/sched_switch") -int on_switch(u64 *ctx) +static int off_cpu_stat(u64 *ctx, struct task_struct *prev, + struct task_struct *next, int state) { __u64 ts; - int state; __u32 stack_id; - struct task_struct *prev, *next; struct tstamp_data *pelem; - if (!enabled) - return 0; - - prev = (struct task_struct *)ctx[1]; - next = (struct task_struct *)ctx[2]; - state = get_task_state(prev); - ts = bpf_ktime_get_ns(); if (!can_record(prev, state)) @@ -180,4 +173,24 @@ int on_switch(u64 *ctx) return 0; } +SEC("tp_btf/sched_switch") +int on_switch(u64 *ctx) +{ + struct task_struct *prev, *next; + int prev_state; + + if (!enabled) + return 0; + + prev = (struct task_struct *)ctx[1]; + next = (struct task_struct *)ctx[2]; + + if (has_prev_state) + prev_state = (int)ctx[3]; + else + prev_state = get_task_state(prev); + + return off_cpu_stat(ctx, prev, next, prev_state); +} + char LICENSE[] SEC("license") = "Dual BSD/GPL";
Recently sched_switch tracepoint added a new argument for prev_state, but it's hard to handle the change in a BPF program. Instead, we can check the function prototype in BTF before loading the program. Signed-off-by: Namhyung Kim <namhyung@kernel.org> --- tools/perf/util/bpf_off_cpu.c | 28 +++++++++++++++++++++ tools/perf/util/bpf_skel/off_cpu.bpf.c | 35 ++++++++++++++++++-------- 2 files changed, 52 insertions(+), 11 deletions(-)