diff mbox series

[bpf-next] bpf: Cache the last valid build_id.

Message ID 20220223222002.1085114-1-haoluo@google.com (mailing list archive)
State Superseded
Delegated to: BPF
Headers show
Series [bpf-next] bpf: Cache the last valid build_id. | expand

Checks

Context Check Description
bpf/vmtest-bpf-next-PR pending PR summary
netdev/tree_selection success Clearly marked for bpf-next
netdev/fixes_present success Fixes tag not required for -next series
netdev/subject_prefix success Link
netdev/cover_letter success Single patches do not need cover letters
netdev/patch_count success Link
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 2 this patch: 2
netdev/cc_maintainers warning 5 maintainers not CCed: kpsingh@kernel.org john.fastabend@gmail.com kafai@fb.com yhs@fb.com netdev@vger.kernel.org
netdev/build_clang success Errors and warnings before: 18 this patch: 18
netdev/module_param success Was 0 now: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 7 this patch: 7
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 32 lines checked
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0
bpf/vmtest-bpf-next pending VM_Test

Commit Message

Hao Luo Feb. 23, 2022, 10:20 p.m. UTC
For binaries that are statically linked, consecutive stack frames are
likely to be in the same VMA and therefore have the same build id.
As an optimization for this case, we can cache the previous frame's
VMA, if the new frame has the same VMA as the previous one, reuse the
previous one's build id. We are holding the MM locks as reader across
the entire loop, so we don't need to worry about VMA going away.

Tested through "stacktrace_build_id" and "stacktrace_build_id_nmi" in
test_progs.

Suggested-by: Greg Thelen <gthelen@google.com>
Signed-off-by: Hao Luo <haoluo@google.com>
---
 kernel/bpf/stackmap.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

Comments

Andrii Nakryiko Feb. 23, 2022, 10:52 p.m. UTC | #1
On Wed, Feb 23, 2022 at 2:20 PM Hao Luo <haoluo@google.com> wrote:
>
> For binaries that are statically linked, consecutive stack frames are
> likely to be in the same VMA and therefore have the same build id.
> As an optimization for this case, we can cache the previous frame's
> VMA, if the new frame has the same VMA as the previous one, reuse the
> previous one's build id. We are holding the MM locks as reader across
> the entire loop, so we don't need to worry about VMA going away.
>
> Tested through "stacktrace_build_id" and "stacktrace_build_id_nmi" in
> test_progs.
>
> Suggested-by: Greg Thelen <gthelen@google.com>
> Signed-off-by: Hao Luo <haoluo@google.com>
> ---
>  kernel/bpf/stackmap.c | 11 ++++++++++-
>  1 file changed, 10 insertions(+), 1 deletion(-)
>
> diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
> index 22c8ae94e4c1..280b9198af27 100644
> --- a/kernel/bpf/stackmap.c
> +++ b/kernel/bpf/stackmap.c
> @@ -132,7 +132,8 @@ static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs,
>         int i;
>         struct mmap_unlock_irq_work *work = NULL;
>         bool irq_work_busy = bpf_mmap_unlock_get_irq_work(&work);
> -       struct vm_area_struct *vma;
> +       struct vm_area_struct *vma, *prev_vma = NULL;
> +       const char *prev_build_id;
>
>         /* If the irq_work is in use, fall back to report ips. Same
>          * fallback is used for kernel stack (!user) on a stackmap with
> @@ -151,6 +152,11 @@ static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs,
>
>         for (i = 0; i < trace_nr; i++) {
>                 vma = find_vma(current->mm, ips[i]);

as a further optimization, shouldn't we first check if ips[i] is
within prev_vma and avoid rbtree walk altogether? Would this work:

if (prev_vma && range_in_vma(prev_vma, ips[i])) {
   /* reuse build_id */
}
vma = find_vma(current->mm, ips[i]);


?

> +               if (vma && vma == prev_vma) {
> +                       memcpy(id_offs[i].build_id, prev_build_id,
> +                              BUILD_ID_SIZE_MAX);
> +                       goto build_id_valid;
> +               }
>                 if (!vma || build_id_parse(vma, id_offs[i].build_id, NULL)) {
>                         /* per entry fall back to ips */
>                         id_offs[i].status = BPF_STACK_BUILD_ID_IP;
> @@ -158,9 +164,12 @@ static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs,
>                         memset(id_offs[i].build_id, 0, BUILD_ID_SIZE_MAX);
>                         continue;
>                 }
> +build_id_valid:
>                 id_offs[i].offset = (vma->vm_pgoff << PAGE_SHIFT) + ips[i]
>                         - vma->vm_start;
>                 id_offs[i].status = BPF_STACK_BUILD_ID_VALID;
> +               prev_vma = vma;
> +               prev_build_id = id_offs[i].build_id;
>         }
>         bpf_mmap_unlock_mm(work, current->mm);
>  }
> --
> 2.35.1.473.g83b2b277ed-goog
>
Greg Thelen Feb. 23, 2022, 11:16 p.m. UTC | #2
Andrii Nakryiko <andrii.nakryiko@gmail.com> wrote:

> On Wed, Feb 23, 2022 at 2:20 PM Hao Luo <haoluo@google.com> wrote:
>>
>> For binaries that are statically linked, consecutive stack frames are
>> likely to be in the same VMA and therefore have the same build id.
>> As an optimization for this case, we can cache the previous frame's
>> VMA, if the new frame has the same VMA as the previous one, reuse the
>> previous one's build id. We are holding the MM locks as reader across
>> the entire loop, so we don't need to worry about VMA going away.
>>
>> Tested through "stacktrace_build_id" and "stacktrace_build_id_nmi" in
>> test_progs.
>>
>> Suggested-by: Greg Thelen <gthelen@google.com>
>> Signed-off-by: Hao Luo <haoluo@google.com>
>> ---
>>  kernel/bpf/stackmap.c | 11 ++++++++++-
>>  1 file changed, 10 insertions(+), 1 deletion(-)
>>
>> diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
>> index 22c8ae94e4c1..280b9198af27 100644
>> --- a/kernel/bpf/stackmap.c
>> +++ b/kernel/bpf/stackmap.c
>> @@ -132,7 +132,8 @@ static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs,
>>         int i;
>>         struct mmap_unlock_irq_work *work = NULL;
>>         bool irq_work_busy = bpf_mmap_unlock_get_irq_work(&work);
>> -       struct vm_area_struct *vma;
>> +       struct vm_area_struct *vma, *prev_vma = NULL;
>> +       const char *prev_build_id;
>>
>>         /* If the irq_work is in use, fall back to report ips. Same
>>          * fallback is used for kernel stack (!user) on a stackmap with
>> @@ -151,6 +152,11 @@ static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs,
>>
>>         for (i = 0; i < trace_nr; i++) {
>>                 vma = find_vma(current->mm, ips[i]);
>
> as a further optimization, shouldn't we first check if ips[i] is
> within prev_vma and avoid rbtree walk altogether? Would this work:
>
> if (prev_vma && range_in_vma(prev_vma, ips[i])) {
>    /* reuse build_id */
> }
> vma = find_vma(current->mm, ips[i]);
>
>
> ?

Yes, that's a nice addition. Good idea.

>> +               if (vma && vma == prev_vma) {
>> +                       memcpy(id_offs[i].build_id, prev_build_id,
>> +                              BUILD_ID_SIZE_MAX);
>> +                       goto build_id_valid;
>> +               }
>>                 if (!vma || build_id_parse(vma, id_offs[i].build_id, NULL)) {
>>                         /* per entry fall back to ips */
>>                         id_offs[i].status = BPF_STACK_BUILD_ID_IP;
>> @@ -158,9 +164,12 @@ static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs,
>>                         memset(id_offs[i].build_id, 0, BUILD_ID_SIZE_MAX);
>>                         continue;
>>                 }
>> +build_id_valid:
>>                 id_offs[i].offset = (vma->vm_pgoff << PAGE_SHIFT) + ips[i]
>>                         - vma->vm_start;
>>                 id_offs[i].status = BPF_STACK_BUILD_ID_VALID;
>> +               prev_vma = vma;
>> +               prev_build_id = id_offs[i].build_id;
>>         }
>>         bpf_mmap_unlock_mm(work, current->mm);
>>  }
>> --
>> 2.35.1.473.g83b2b277ed-goog
>>
Hao Luo Feb. 23, 2022, 11:40 p.m. UTC | #3
On Wed, Feb 23, 2022 at 3:16 PM Greg Thelen <gthelen@google.com> wrote:
>
> Andrii Nakryiko <andrii.nakryiko@gmail.com> wrote:
>
> >
> > as a further optimization, shouldn't we first check if ips[i] is
> > within prev_vma and avoid rbtree walk altogether? Would this work:
> >
> > if (prev_vma && range_in_vma(prev_vma, ips[i])) {
> >    /* reuse build_id */
> > }
> > vma = find_vma(current->mm, ips[i]);
> >
> >
> > ?
>
> Yes, that's a nice addition. Good idea.

Yes, great point!

I noticed range_in_vma() already has a check on the null-ness of
prev_vma. I am going to send a v2.
diff mbox series

Patch

diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
index 22c8ae94e4c1..280b9198af27 100644
--- a/kernel/bpf/stackmap.c
+++ b/kernel/bpf/stackmap.c
@@ -132,7 +132,8 @@  static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs,
 	int i;
 	struct mmap_unlock_irq_work *work = NULL;
 	bool irq_work_busy = bpf_mmap_unlock_get_irq_work(&work);
-	struct vm_area_struct *vma;
+	struct vm_area_struct *vma, *prev_vma = NULL;
+	const char *prev_build_id;
 
 	/* If the irq_work is in use, fall back to report ips. Same
 	 * fallback is used for kernel stack (!user) on a stackmap with
@@ -151,6 +152,11 @@  static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs,
 
 	for (i = 0; i < trace_nr; i++) {
 		vma = find_vma(current->mm, ips[i]);
+		if (vma && vma == prev_vma) {
+			memcpy(id_offs[i].build_id, prev_build_id,
+			       BUILD_ID_SIZE_MAX);
+			goto build_id_valid;
+		}
 		if (!vma || build_id_parse(vma, id_offs[i].build_id, NULL)) {
 			/* per entry fall back to ips */
 			id_offs[i].status = BPF_STACK_BUILD_ID_IP;
@@ -158,9 +164,12 @@  static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs,
 			memset(id_offs[i].build_id, 0, BUILD_ID_SIZE_MAX);
 			continue;
 		}
+build_id_valid:
 		id_offs[i].offset = (vma->vm_pgoff << PAGE_SHIFT) + ips[i]
 			- vma->vm_start;
 		id_offs[i].status = BPF_STACK_BUILD_ID_VALID;
+		prev_vma = vma;
+		prev_build_id = id_offs[i].build_id;
 	}
 	bpf_mmap_unlock_mm(work, current->mm);
 }