diff mbox series

[v4,bpf-next,2/3] bpf: introduce helper bpf_get_branch_snapshot

Message ID 20210901003517.3953145-3-songliubraving@fb.com (mailing list archive)
State Superseded
Delegated to: BPF
Headers show
Series bpf: introduce bpf_get_branch_snapshot | expand

Checks

Context Check Description
netdev/cover_letter success Link
netdev/fixes_present success Link
netdev/patch_count success Link
netdev/tree_selection success Clearly marked for bpf-next
netdev/subject_prefix success Link
netdev/cc_maintainers warning 15 maintainers not CCed: joe@cilium.io andrii@kernel.org daniel@iogearbox.net kpsingh@kernel.org revest@chromium.org netdev@vger.kernel.org yhs@fb.com brouer@redhat.com haoluo@google.com jackmanb@google.com ast@kernel.org quentin@isovalent.com rostedt@goodmis.org kafai@fb.com john.fastabend@gmail.com
netdev/source_inline success Was 0 now: 0
netdev/verify_signedoff success Link
netdev/module_param success Was 0 now: 0
netdev/build_32bit success Errors and warnings before: 11884 this patch: 11884
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/verify_fixes success Link
netdev/checkpatch warning WARNING: line length of 89 exceeds 80 columns
netdev/build_allmodconfig_warn success Errors and warnings before: 11408 this patch: 11408
netdev/header_inline success Link
bpf/vmtest-bpf-next fail VM_Test
bpf/vmtest-bpf-next-PR fail PR summary

Commit Message

Song Liu Sept. 1, 2021, 12:35 a.m. UTC
Introduce bpf_get_branch_snapshot(), which allows tracing pogram to get
branch trace from hardware (e.g. Intel LBR). To use the feature, the
user need to create perf_event with proper branch_record filtering
on each cpu, and then calls bpf_get_branch_snapshot in the bpf function.
On Intel CPUs, VLBR event (raw event 0x1b00) can be use for this.

Signed-off-by: Song Liu <songliubraving@fb.com>
---
 include/uapi/linux/bpf.h       | 22 +++++++++++++++++++
 kernel/bpf/trampoline.c        |  3 ++-
 kernel/trace/bpf_trace.c       | 40 ++++++++++++++++++++++++++++++++++
 tools/include/uapi/linux/bpf.h | 22 +++++++++++++++++++
 4 files changed, 86 insertions(+), 1 deletion(-)

Comments

Andrii Nakryiko Sept. 1, 2021, 4:02 a.m. UTC | #1
On Tue, Aug 31, 2021 at 7:01 PM Song Liu <songliubraving@fb.com> wrote:
>
> Introduce bpf_get_branch_snapshot(), which allows tracing pogram to get
> branch trace from hardware (e.g. Intel LBR). To use the feature, the
> user need to create perf_event with proper branch_record filtering
> on each cpu, and then calls bpf_get_branch_snapshot in the bpf function.
> On Intel CPUs, VLBR event (raw event 0x1b00) can be use for this.
>
> Signed-off-by: Song Liu <songliubraving@fb.com>
> ---
>  include/uapi/linux/bpf.h       | 22 +++++++++++++++++++
>  kernel/bpf/trampoline.c        |  3 ++-
>  kernel/trace/bpf_trace.c       | 40 ++++++++++++++++++++++++++++++++++
>  tools/include/uapi/linux/bpf.h | 22 +++++++++++++++++++
>  4 files changed, 86 insertions(+), 1 deletion(-)
>
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index 791f31dd0abee..c986e6fad5bc0 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -4877,6 +4877,27 @@ union bpf_attr {
>   *             Get the struct pt_regs associated with **task**.
>   *     Return
>   *             A pointer to struct pt_regs.
> + *
> + * long bpf_get_branch_snapshot(void *entries, u32 size, u64 flags)
> + *     Description
> + *             Get branch trace from hardware engines like Intel LBR. The
> + *             branch trace is taken soon after the trigger point of the
> + *             BPF program, so it may contain some entries after the
> + *             trigger point. The user need to filter these entries
> + *             accordingly.
> + *
> + *             The data is stored as struct perf_branch_entry into output
> + *             buffer *entries*. *size* is the size of *entries* in bytes.
> + *             *flags* is reserved for now and must be zero.
> + *
> + *     Return
> + *             On success, number of bytes written to *buf*. On error, a
> + *             negative value.
> + *
> + *             **-EINVAL** if arguments invalid or **size** not a multiple
> + *             of **sizeof**\ (**struct perf_branch_entry**\ ).
> + *
> + *             **-ENOENT** if architecture does not support branch records.
>   */
>  #define __BPF_FUNC_MAPPER(FN)          \
>         FN(unspec),                     \
> @@ -5055,6 +5076,7 @@ union bpf_attr {
>         FN(get_func_ip),                \
>         FN(get_attach_cookie),          \
>         FN(task_pt_regs),               \
> +       FN(get_branch_snapshot),        \
>         /* */
>
>  /* integer value in 'imm' field of BPF_CALL instruction selects which helper
> diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c
> index fe1e857324e66..39eaaff81953d 100644
> --- a/kernel/bpf/trampoline.c
> +++ b/kernel/bpf/trampoline.c
> @@ -10,6 +10,7 @@
>  #include <linux/rcupdate_trace.h>
>  #include <linux/rcupdate_wait.h>
>  #include <linux/module.h>
> +#include <linux/static_call.h>
>
>  /* dummy _ops. The verifier will operate on target program's ops. */
>  const struct bpf_verifier_ops bpf_extension_verifier_ops = {
> @@ -526,7 +527,7 @@ void bpf_trampoline_put(struct bpf_trampoline *tr)
>  }
>
>  #define NO_START_TIME 1
> -static u64 notrace bpf_prog_start_time(void)
> +static __always_inline u64 notrace bpf_prog_start_time(void)
>  {
>         u64 start = NO_START_TIME;
>
> diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
> index 8e2eb950aa829..a8ec3634a3329 100644
> --- a/kernel/trace/bpf_trace.c
> +++ b/kernel/trace/bpf_trace.c
> @@ -1017,6 +1017,44 @@ static const struct bpf_func_proto bpf_get_attach_cookie_proto_pe = {
>         .arg1_type      = ARG_PTR_TO_CTX,
>  };
>
> +static DEFINE_PER_CPU(struct perf_branch_snapshot, bpf_perf_branch_snapshot);
> +
> +BPF_CALL_3(bpf_get_branch_snapshot, void *, buf, u32, size, u64, flags)
> +{
> +#ifndef CONFIG_X86
> +       return -ENOENT;

nit: -EOPNOTSUPP probably makes more sense for this?

> +#else
> +       static const u32 br_entry_size = sizeof(struct perf_branch_entry);
> +       u32 to_copy;
> +
> +       if (unlikely(flags))
> +               return -EINVAL;
> +
> +       if (!buf || (size % br_entry_size != 0))
> +               return -EINVAL;
> +
> +       static_call(perf_snapshot_branch_stack)(this_cpu_ptr(&bpf_perf_branch_snapshot));

First, you have four this_cpu_ptr(&bpf_perf_branch_snapshot)
invocations in this function, probably cleaner to store the pointer in
local variable?

But second, this still has the reentrancy problem, right? And further,
we copy the same LBR data twice (to per-cpu buffer and into
user-provided destination).

What if we change perf_snapshot_branch_stack signature to this:

int perf_snapshot_branch_stack(struct perf_branch_entry *entries, int
max_nr_entries);

with the semantics that it will copy only min(max_nr_entreis,
PERF_MAX_BRANCH_RECORDS) * sizeof(struct perf_branch_entry) bytes.
That way we can copy directly into a user-provided buffer with no
per-cpu storage. Of course, perf_snapshot_branch_stack will return
number of entries copied, either as return result, or if static calls
don't support that, as another int *nr_entries output argument.


> +
> +       if (this_cpu_ptr(&bpf_perf_branch_snapshot)->nr == 0)
> +               return -ENOENT;
> +
> +       to_copy = this_cpu_ptr(&bpf_perf_branch_snapshot)->nr *
> +               sizeof(struct perf_branch_entry);
> +       to_copy = min_t(u32, size, to_copy);
> +       memcpy(buf, this_cpu_ptr(&bpf_perf_branch_snapshot)->entries, to_copy);
> +
> +       return to_copy;
> +#endif
> +}
> +

[...]
Song Liu Sept. 1, 2021, 3:41 p.m. UTC | #2
> On Aug 31, 2021, at 9:02 PM, Andrii Nakryiko <andrii.nakryiko@gmail.com> wrote:
> 
> On Tue, Aug 31, 2021 at 7:01 PM Song Liu <songliubraving@fb.com> wrote:
>> 
>> Introduce bpf_get_branch_snapshot(), which allows tracing pogram to get
>> branch trace from hardware (e.g. Intel LBR). To use the feature, the
>> user need to create perf_event with proper branch_record filtering
>> on each cpu, and then calls bpf_get_branch_snapshot in the bpf function.
>> On Intel CPUs, VLBR event (raw event 0x1b00) can be use for this.
>> 
>> Signed-off-by: Song Liu <songliubraving@fb.com>
>> ---
>> include/uapi/linux/bpf.h       | 22 +++++++++++++++++++
>> kernel/bpf/trampoline.c        |  3 ++-
>> kernel/trace/bpf_trace.c       | 40 ++++++++++++++++++++++++++++++++++
>> tools/include/uapi/linux/bpf.h | 22 +++++++++++++++++++
>> 4 files changed, 86 insertions(+), 1 deletion(-)
>> 
>> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
>> index 791f31dd0abee..c986e6fad5bc0 100644
>> --- a/include/uapi/linux/bpf.h
>> +++ b/include/uapi/linux/bpf.h
>> @@ -4877,6 +4877,27 @@ union bpf_attr {
>>  *             Get the struct pt_regs associated with **task**.
>>  *     Return
>>  *             A pointer to struct pt_regs.
>> + *
>> + * long bpf_get_branch_snapshot(void *entries, u32 size, u64 flags)
>> + *     Description
>> + *             Get branch trace from hardware engines like Intel LBR. The
>> + *             branch trace is taken soon after the trigger point of the
>> + *             BPF program, so it may contain some entries after the
>> + *             trigger point. The user need to filter these entries
>> + *             accordingly.
>> + *
>> + *             The data is stored as struct perf_branch_entry into output
>> + *             buffer *entries*. *size* is the size of *entries* in bytes.
>> + *             *flags* is reserved for now and must be zero.
>> + *
>> + *     Return
>> + *             On success, number of bytes written to *buf*. On error, a
>> + *             negative value.
>> + *
>> + *             **-EINVAL** if arguments invalid or **size** not a multiple
>> + *             of **sizeof**\ (**struct perf_branch_entry**\ ).
>> + *
>> + *             **-ENOENT** if architecture does not support branch records.
>>  */
>> #define __BPF_FUNC_MAPPER(FN)          \
>>        FN(unspec),                     \
>> @@ -5055,6 +5076,7 @@ union bpf_attr {
>>        FN(get_func_ip),                \
>>        FN(get_attach_cookie),          \
>>        FN(task_pt_regs),               \
>> +       FN(get_branch_snapshot),        \
>>        /* */
>> 
>> /* integer value in 'imm' field of BPF_CALL instruction selects which helper
>> diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c
>> index fe1e857324e66..39eaaff81953d 100644
>> --- a/kernel/bpf/trampoline.c
>> +++ b/kernel/bpf/trampoline.c
>> @@ -10,6 +10,7 @@
>> #include <linux/rcupdate_trace.h>
>> #include <linux/rcupdate_wait.h>
>> #include <linux/module.h>
>> +#include <linux/static_call.h>
>> 
>> /* dummy _ops. The verifier will operate on target program's ops. */
>> const struct bpf_verifier_ops bpf_extension_verifier_ops = {
>> @@ -526,7 +527,7 @@ void bpf_trampoline_put(struct bpf_trampoline *tr)
>> }
>> 
>> #define NO_START_TIME 1
>> -static u64 notrace bpf_prog_start_time(void)
>> +static __always_inline u64 notrace bpf_prog_start_time(void)
>> {
>>        u64 start = NO_START_TIME;
>> 
>> diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
>> index 8e2eb950aa829..a8ec3634a3329 100644
>> --- a/kernel/trace/bpf_trace.c
>> +++ b/kernel/trace/bpf_trace.c
>> @@ -1017,6 +1017,44 @@ static const struct bpf_func_proto bpf_get_attach_cookie_proto_pe = {
>>        .arg1_type      = ARG_PTR_TO_CTX,
>> };
>> 
>> +static DEFINE_PER_CPU(struct perf_branch_snapshot, bpf_perf_branch_snapshot);
>> +
>> +BPF_CALL_3(bpf_get_branch_snapshot, void *, buf, u32, size, u64, flags)
>> +{
>> +#ifndef CONFIG_X86
>> +       return -ENOENT;
> 
> nit: -EOPNOTSUPP probably makes more sense for this?

I had -EOPNOTSUPP in earlier version. But bpf_read_branch_records uses
-ENOENT, so I updated here in v4. I guess -ENOENT also makes sense? I 
won't insist if you think -EOPNOTSUPP is better.  

> 
>> +#else
>> +       static const u32 br_entry_size = sizeof(struct perf_branch_entry);
>> +       u32 to_copy;
>> +
>> +       if (unlikely(flags))
>> +               return -EINVAL;
>> +
>> +       if (!buf || (size % br_entry_size != 0))
>> +               return -EINVAL;
>> +
>> +       static_call(perf_snapshot_branch_stack)(this_cpu_ptr(&bpf_perf_branch_snapshot));
> 
> First, you have four this_cpu_ptr(&bpf_perf_branch_snapshot)
> invocations in this function, probably cleaner to store the pointer in
> local variable?
> 
> But second, this still has the reentrancy problem, right? And further,
> we copy the same LBR data twice (to per-cpu buffer and into
> user-provided destination).
> 
> What if we change perf_snapshot_branch_stack signature to this:
> 
> int perf_snapshot_branch_stack(struct perf_branch_entry *entries, int
> max_nr_entries);
> 
> with the semantics that it will copy only min(max_nr_entreis,
> PERF_MAX_BRANCH_RECORDS) * sizeof(struct perf_branch_entry) bytes.
> That way we can copy directly into a user-provided buffer with no
> per-cpu storage. Of course, perf_snapshot_branch_stack will return
> number of entries copied, either as return result, or if static calls
> don't support that, as another int *nr_entries output argument.

I like this idea. Once we get feedback from Peter, I will change this 
in v5. 

Thanks,
Song
Andrii Nakryiko Sept. 1, 2021, 7 p.m. UTC | #3
On Wed, Sep 1, 2021 at 8:41 AM Song Liu <songliubraving@fb.com> wrote:
>
>
>
> > On Aug 31, 2021, at 9:02 PM, Andrii Nakryiko <andrii.nakryiko@gmail.com> wrote:
> >
> > On Tue, Aug 31, 2021 at 7:01 PM Song Liu <songliubraving@fb.com> wrote:
> >>
> >> Introduce bpf_get_branch_snapshot(), which allows tracing pogram to get
> >> branch trace from hardware (e.g. Intel LBR). To use the feature, the
> >> user need to create perf_event with proper branch_record filtering
> >> on each cpu, and then calls bpf_get_branch_snapshot in the bpf function.
> >> On Intel CPUs, VLBR event (raw event 0x1b00) can be use for this.
> >>
> >> Signed-off-by: Song Liu <songliubraving@fb.com>
> >> ---
> >> include/uapi/linux/bpf.h       | 22 +++++++++++++++++++
> >> kernel/bpf/trampoline.c        |  3 ++-
> >> kernel/trace/bpf_trace.c       | 40 ++++++++++++++++++++++++++++++++++
> >> tools/include/uapi/linux/bpf.h | 22 +++++++++++++++++++
> >> 4 files changed, 86 insertions(+), 1 deletion(-)
> >>
> >> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> >> index 791f31dd0abee..c986e6fad5bc0 100644
> >> --- a/include/uapi/linux/bpf.h
> >> +++ b/include/uapi/linux/bpf.h
> >> @@ -4877,6 +4877,27 @@ union bpf_attr {
> >>  *             Get the struct pt_regs associated with **task**.
> >>  *     Return
> >>  *             A pointer to struct pt_regs.
> >> + *
> >> + * long bpf_get_branch_snapshot(void *entries, u32 size, u64 flags)
> >> + *     Description
> >> + *             Get branch trace from hardware engines like Intel LBR. The
> >> + *             branch trace is taken soon after the trigger point of the
> >> + *             BPF program, so it may contain some entries after the
> >> + *             trigger point. The user need to filter these entries
> >> + *             accordingly.
> >> + *
> >> + *             The data is stored as struct perf_branch_entry into output
> >> + *             buffer *entries*. *size* is the size of *entries* in bytes.
> >> + *             *flags* is reserved for now and must be zero.
> >> + *
> >> + *     Return
> >> + *             On success, number of bytes written to *buf*. On error, a
> >> + *             negative value.
> >> + *
> >> + *             **-EINVAL** if arguments invalid or **size** not a multiple
> >> + *             of **sizeof**\ (**struct perf_branch_entry**\ ).
> >> + *
> >> + *             **-ENOENT** if architecture does not support branch records.
> >>  */
> >> #define __BPF_FUNC_MAPPER(FN)          \
> >>        FN(unspec),                     \
> >> @@ -5055,6 +5076,7 @@ union bpf_attr {
> >>        FN(get_func_ip),                \
> >>        FN(get_attach_cookie),          \
> >>        FN(task_pt_regs),               \
> >> +       FN(get_branch_snapshot),        \
> >>        /* */
> >>
> >> /* integer value in 'imm' field of BPF_CALL instruction selects which helper
> >> diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c
> >> index fe1e857324e66..39eaaff81953d 100644
> >> --- a/kernel/bpf/trampoline.c
> >> +++ b/kernel/bpf/trampoline.c
> >> @@ -10,6 +10,7 @@
> >> #include <linux/rcupdate_trace.h>
> >> #include <linux/rcupdate_wait.h>
> >> #include <linux/module.h>
> >> +#include <linux/static_call.h>
> >>
> >> /* dummy _ops. The verifier will operate on target program's ops. */
> >> const struct bpf_verifier_ops bpf_extension_verifier_ops = {
> >> @@ -526,7 +527,7 @@ void bpf_trampoline_put(struct bpf_trampoline *tr)
> >> }
> >>
> >> #define NO_START_TIME 1
> >> -static u64 notrace bpf_prog_start_time(void)
> >> +static __always_inline u64 notrace bpf_prog_start_time(void)
> >> {
> >>        u64 start = NO_START_TIME;
> >>
> >> diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
> >> index 8e2eb950aa829..a8ec3634a3329 100644
> >> --- a/kernel/trace/bpf_trace.c
> >> +++ b/kernel/trace/bpf_trace.c
> >> @@ -1017,6 +1017,44 @@ static const struct bpf_func_proto bpf_get_attach_cookie_proto_pe = {
> >>        .arg1_type      = ARG_PTR_TO_CTX,
> >> };
> >>
> >> +static DEFINE_PER_CPU(struct perf_branch_snapshot, bpf_perf_branch_snapshot);
> >> +
> >> +BPF_CALL_3(bpf_get_branch_snapshot, void *, buf, u32, size, u64, flags)
> >> +{
> >> +#ifndef CONFIG_X86
> >> +       return -ENOENT;
> >
> > nit: -EOPNOTSUPP probably makes more sense for this?
>
> I had -EOPNOTSUPP in earlier version. But bpf_read_branch_records uses
> -ENOENT, so I updated here in v4. I guess -ENOENT also makes sense? I
> won't insist if you think -EOPNOTSUPP is better.

Hm... ok, I guess consistency takes priority, let's keep -ENOENT then.

>
> >
> >> +#else
> >> +       static const u32 br_entry_size = sizeof(struct perf_branch_entry);
> >> +       u32 to_copy;
> >> +
> >> +       if (unlikely(flags))
> >> +               return -EINVAL;
> >> +
> >> +       if (!buf || (size % br_entry_size != 0))
> >> +               return -EINVAL;
> >> +
> >> +       static_call(perf_snapshot_branch_stack)(this_cpu_ptr(&bpf_perf_branch_snapshot));
> >
> > First, you have four this_cpu_ptr(&bpf_perf_branch_snapshot)
> > invocations in this function, probably cleaner to store the pointer in
> > local variable?
> >
> > But second, this still has the reentrancy problem, right? And further,
> > we copy the same LBR data twice (to per-cpu buffer and into
> > user-provided destination).
> >
> > What if we change perf_snapshot_branch_stack signature to this:
> >
> > int perf_snapshot_branch_stack(struct perf_branch_entry *entries, int
> > max_nr_entries);
> >
> > with the semantics that it will copy only min(max_nr_entreis,
> > PERF_MAX_BRANCH_RECORDS) * sizeof(struct perf_branch_entry) bytes.
> > That way we can copy directly into a user-provided buffer with no
> > per-cpu storage. Of course, perf_snapshot_branch_stack will return
> > number of entries copied, either as return result, or if static calls
> > don't support that, as another int *nr_entries output argument.
>
> I like this idea. Once we get feedback from Peter, I will change this
> in v5.

Sounds good, thanks!

>
> Thanks,
> Song
>
diff mbox series

Patch

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 791f31dd0abee..c986e6fad5bc0 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -4877,6 +4877,27 @@  union bpf_attr {
  *		Get the struct pt_regs associated with **task**.
  *	Return
  *		A pointer to struct pt_regs.
+ *
+ * long bpf_get_branch_snapshot(void *entries, u32 size, u64 flags)
+ *	Description
+ *		Get branch trace from hardware engines like Intel LBR. The
+ *		branch trace is taken soon after the trigger point of the
+ *		BPF program, so it may contain some entries after the
+ *		trigger point. The user need to filter these entries
+ *		accordingly.
+ *
+ *		The data is stored as struct perf_branch_entry into output
+ *		buffer *entries*. *size* is the size of *entries* in bytes.
+ *		*flags* is reserved for now and must be zero.
+ *
+ *	Return
+ *		On success, number of bytes written to *buf*. On error, a
+ *		negative value.
+ *
+ *		**-EINVAL** if arguments invalid or **size** not a multiple
+ *		of **sizeof**\ (**struct perf_branch_entry**\ ).
+ *
+ *		**-ENOENT** if architecture does not support branch records.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -5055,6 +5076,7 @@  union bpf_attr {
 	FN(get_func_ip),		\
 	FN(get_attach_cookie),		\
 	FN(task_pt_regs),		\
+	FN(get_branch_snapshot),	\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c
index fe1e857324e66..39eaaff81953d 100644
--- a/kernel/bpf/trampoline.c
+++ b/kernel/bpf/trampoline.c
@@ -10,6 +10,7 @@ 
 #include <linux/rcupdate_trace.h>
 #include <linux/rcupdate_wait.h>
 #include <linux/module.h>
+#include <linux/static_call.h>
 
 /* dummy _ops. The verifier will operate on target program's ops. */
 const struct bpf_verifier_ops bpf_extension_verifier_ops = {
@@ -526,7 +527,7 @@  void bpf_trampoline_put(struct bpf_trampoline *tr)
 }
 
 #define NO_START_TIME 1
-static u64 notrace bpf_prog_start_time(void)
+static __always_inline u64 notrace bpf_prog_start_time(void)
 {
 	u64 start = NO_START_TIME;
 
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 8e2eb950aa829..a8ec3634a3329 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -1017,6 +1017,44 @@  static const struct bpf_func_proto bpf_get_attach_cookie_proto_pe = {
 	.arg1_type	= ARG_PTR_TO_CTX,
 };
 
+static DEFINE_PER_CPU(struct perf_branch_snapshot, bpf_perf_branch_snapshot);
+
+BPF_CALL_3(bpf_get_branch_snapshot, void *, buf, u32, size, u64, flags)
+{
+#ifndef CONFIG_X86
+	return -ENOENT;
+#else
+	static const u32 br_entry_size = sizeof(struct perf_branch_entry);
+	u32 to_copy;
+
+	if (unlikely(flags))
+		return -EINVAL;
+
+	if (!buf || (size % br_entry_size != 0))
+		return -EINVAL;
+
+	static_call(perf_snapshot_branch_stack)(this_cpu_ptr(&bpf_perf_branch_snapshot));
+
+	if (this_cpu_ptr(&bpf_perf_branch_snapshot)->nr == 0)
+		return -ENOENT;
+
+	to_copy = this_cpu_ptr(&bpf_perf_branch_snapshot)->nr *
+		sizeof(struct perf_branch_entry);
+	to_copy = min_t(u32, size, to_copy);
+	memcpy(buf, this_cpu_ptr(&bpf_perf_branch_snapshot)->entries, to_copy);
+
+	return to_copy;
+#endif
+}
+
+static const struct bpf_func_proto bpf_get_branch_snapshot_proto = {
+	.func		= bpf_get_branch_snapshot,
+	.gpl_only	= true,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_UNINIT_MEM,
+	.arg2_type	= ARG_CONST_SIZE_OR_ZERO,
+};
+
 static const struct bpf_func_proto *
 bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 {
@@ -1132,6 +1170,8 @@  bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_snprintf_proto;
 	case BPF_FUNC_get_func_ip:
 		return &bpf_get_func_ip_proto_tracing;
+	case BPF_FUNC_get_branch_snapshot:
+		return &bpf_get_branch_snapshot_proto;
 	default:
 		return bpf_base_func_proto(func_id);
 	}
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 791f31dd0abee..c986e6fad5bc0 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -4877,6 +4877,27 @@  union bpf_attr {
  *		Get the struct pt_regs associated with **task**.
  *	Return
  *		A pointer to struct pt_regs.
+ *
+ * long bpf_get_branch_snapshot(void *entries, u32 size, u64 flags)
+ *	Description
+ *		Get branch trace from hardware engines like Intel LBR. The
+ *		branch trace is taken soon after the trigger point of the
+ *		BPF program, so it may contain some entries after the
+ *		trigger point. The user need to filter these entries
+ *		accordingly.
+ *
+ *		The data is stored as struct perf_branch_entry into output
+ *		buffer *entries*. *size* is the size of *entries* in bytes.
+ *		*flags* is reserved for now and must be zero.
+ *
+ *	Return
+ *		On success, number of bytes written to *buf*. On error, a
+ *		negative value.
+ *
+ *		**-EINVAL** if arguments invalid or **size** not a multiple
+ *		of **sizeof**\ (**struct perf_branch_entry**\ ).
+ *
+ *		**-ENOENT** if architecture does not support branch records.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -5055,6 +5076,7 @@  union bpf_attr {
 	FN(get_func_ip),		\
 	FN(get_attach_cookie),		\
 	FN(task_pt_regs),		\
+	FN(get_branch_snapshot),	\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper