diff mbox series

[2/4] perf lock contention: Use lock_stat_find{,new}

Message ID 20230202050455.2187592-3-namhyung@kernel.org (mailing list archive)
State Handled Elsewhere
Delegated to: BPF
Headers show
Series perf lock contention: Improve aggr x filter combination (v1) | expand

Checks

Context Check Description
netdev/tree_selection success Not a local patch
bpf/vmtest-bpf-PR success PR summary
bpf/vmtest-bpf-VM_Test-1 success Logs for ShellCheck
bpf/vmtest-bpf-VM_Test-2 success Logs for build for aarch64 with gcc
bpf/vmtest-bpf-VM_Test-3 success Logs for build for aarch64 with llvm-17
bpf/vmtest-bpf-VM_Test-4 success Logs for build for s390x with gcc
bpf/vmtest-bpf-VM_Test-5 success Logs for build for x86_64 with gcc
bpf/vmtest-bpf-VM_Test-6 success Logs for build for x86_64 with llvm-17
bpf/vmtest-bpf-VM_Test-7 success Logs for llvm-toolchain
bpf/vmtest-bpf-VM_Test-8 success Logs for set-matrix
bpf/vmtest-bpf-VM_Test-9 success Logs for test_maps on aarch64 with gcc
bpf/vmtest-bpf-VM_Test-10 success Logs for test_maps on aarch64 with llvm-17
bpf/vmtest-bpf-VM_Test-11 success Logs for test_maps on s390x with gcc
bpf/vmtest-bpf-VM_Test-12 success Logs for test_maps on x86_64 with gcc
bpf/vmtest-bpf-VM_Test-13 success Logs for test_maps on x86_64 with llvm-17
bpf/vmtest-bpf-VM_Test-14 success Logs for test_progs on aarch64 with gcc
bpf/vmtest-bpf-VM_Test-15 fail Logs for test_progs on aarch64 with llvm-17
bpf/vmtest-bpf-VM_Test-16 fail Logs for test_progs on s390x with gcc
bpf/vmtest-bpf-VM_Test-17 success Logs for test_progs on x86_64 with gcc
bpf/vmtest-bpf-VM_Test-18 fail Logs for test_progs on x86_64 with llvm-17
bpf/vmtest-bpf-VM_Test-19 fail Logs for test_progs_no_alu32 on aarch64 with gcc
bpf/vmtest-bpf-VM_Test-20 success Logs for test_progs_no_alu32 on aarch64 with llvm-17
bpf/vmtest-bpf-VM_Test-21 success Logs for test_progs_no_alu32 on s390x with gcc
bpf/vmtest-bpf-VM_Test-22 success Logs for test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-VM_Test-23 success Logs for test_progs_no_alu32 on x86_64 with llvm-17
bpf/vmtest-bpf-VM_Test-24 success Logs for test_progs_no_alu32_parallel on aarch64 with gcc
bpf/vmtest-bpf-VM_Test-25 success Logs for test_progs_no_alu32_parallel on aarch64 with llvm-17
bpf/vmtest-bpf-VM_Test-26 success Logs for test_progs_no_alu32_parallel on s390x with gcc
bpf/vmtest-bpf-VM_Test-27 success Logs for test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-VM_Test-28 success Logs for test_progs_no_alu32_parallel on x86_64 with llvm-17
bpf/vmtest-bpf-VM_Test-29 success Logs for test_progs_parallel on aarch64 with gcc
bpf/vmtest-bpf-VM_Test-30 success Logs for test_progs_parallel on aarch64 with llvm-17
bpf/vmtest-bpf-VM_Test-31 success Logs for test_progs_parallel on s390x with gcc
bpf/vmtest-bpf-VM_Test-32 success Logs for test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-VM_Test-33 success Logs for test_progs_parallel on x86_64 with llvm-17
bpf/vmtest-bpf-VM_Test-34 success Logs for test_verifier on aarch64 with gcc
bpf/vmtest-bpf-VM_Test-35 success Logs for test_verifier on aarch64 with llvm-17
bpf/vmtest-bpf-VM_Test-36 fail Logs for test_verifier on s390x with gcc
bpf/vmtest-bpf-VM_Test-37 success Logs for test_verifier on x86_64 with gcc
bpf/vmtest-bpf-VM_Test-38 success Logs for test_verifier on x86_64 with llvm-17

Commit Message

Namhyung Kim Feb. 2, 2023, 5:04 a.m. UTC
This is a preparation work to support complex keys of BPF maps.  Now it
has single value key according to the aggregation mode like stack_id or
pid.  But we want to use a combination of those keys.

Then lock_contention_read() should still aggregate the result based on
the key that was requested by user.  The other key info will be used for
filtering.

So instead of creating a lock_stat entry always, Check if it's already
there using lock_stat_find() first.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
---
 tools/perf/builtin-lock.c             |  4 +--
 tools/perf/util/bpf_lock_contention.c | 41 ++++++++++++++++-----------
 tools/perf/util/lock-contention.h     |  3 ++
 3 files changed, 30 insertions(+), 18 deletions(-)

Comments

Arnaldo Carvalho de Melo Feb. 2, 2023, 8:27 p.m. UTC | #1
Em Wed, Feb 01, 2023 at 09:04:53PM -0800, Namhyung Kim escreveu:
> This is a preparation work to support complex keys of BPF maps.  Now it
> has single value key according to the aggregation mode like stack_id or
> pid.  But we want to use a combination of those keys.
> 
> Then lock_contention_read() should still aggregate the result based on
> the key that was requested by user.  The other key info will be used for
> filtering.
> 
> So instead of creating a lock_stat entry always, Check if it's already
> there using lock_stat_find() first.

Hey, try building without libtraceevent-devel installed, should be
equivalent to NO_LIBTRACEEVENT=1.

At this point I think you should move bpf_lock_contention.o to inside
that CONFIG_LIBTRACEEVENT if block.

perf-$(CONFIG_PERF_BPF_SKEL) += bpf_lock_contention.o

ifeq ($(CONFIG_LIBTRACEEVENT),y)
  perf-$(CONFIG_PERF_BPF_SKEL) += bpf_kwork.o
endif

I'm removing this series from tmp.perf/core for now.

- Arnaldo
 
> Signed-off-by: Namhyung Kim <namhyung@kernel.org>
> ---
>  tools/perf/builtin-lock.c             |  4 +--
>  tools/perf/util/bpf_lock_contention.c | 41 ++++++++++++++++-----------
>  tools/perf/util/lock-contention.h     |  3 ++
>  3 files changed, 30 insertions(+), 18 deletions(-)
> 
> diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c
> index 216a9a252bf4..0593c6e636c6 100644
> --- a/tools/perf/builtin-lock.c
> +++ b/tools/perf/builtin-lock.c
> @@ -465,7 +465,7 @@ static struct lock_stat *pop_from_result(void)
>  	return container_of(node, struct lock_stat, rb);
>  }
>  
> -static struct lock_stat *lock_stat_find(u64 addr)
> +struct lock_stat *lock_stat_find(u64 addr)
>  {
>  	struct hlist_head *entry = lockhashentry(addr);
>  	struct lock_stat *ret;
> @@ -477,7 +477,7 @@ static struct lock_stat *lock_stat_find(u64 addr)
>  	return NULL;
>  }
>  
> -static struct lock_stat *lock_stat_findnew(u64 addr, const char *name, int flags)
> +struct lock_stat *lock_stat_findnew(u64 addr, const char *name, int flags)
>  {
>  	struct hlist_head *entry = lockhashentry(addr);
>  	struct lock_stat *ret, *new;
> diff --git a/tools/perf/util/bpf_lock_contention.c b/tools/perf/util/bpf_lock_contention.c
> index 967ce168f163..c6f2db603d5a 100644
> --- a/tools/perf/util/bpf_lock_contention.c
> +++ b/tools/perf/util/bpf_lock_contention.c
> @@ -254,12 +254,34 @@ int lock_contention_read(struct lock_contention *con)
>  	prev_key = NULL;
>  	while (!bpf_map_get_next_key(fd, prev_key, &key)) {
>  		s32 stack_id;
> +		const char *name;
>  
>  		/* to handle errors in the loop body */
>  		err = -1;
>  
>  		bpf_map_lookup_elem(fd, &key, &data);
> -		st = zalloc(sizeof(*st));
> +
> +		if (con->save_callstack) {
> +			stack_id = key.aggr_key;
> +			bpf_map_lookup_elem(stack, &stack_id, stack_trace);
> +		}
> +
> +		st = lock_stat_find(key.aggr_key);
> +		if (st != NULL) {
> +			st->wait_time_total += data.total_time;
> +			if (st->wait_time_max < data.max_time)
> +				st->wait_time_max = data.max_time;
> +			if (st->wait_time_min > data.min_time)
> +				st->wait_time_min = data.min_time;
> +
> +			st->nr_contended += data.count;
> +			if (st->nr_contended)
> +				st->avg_wait_time = st->wait_time_total / st->nr_contended;
> +			goto next;
> +		}
> +
> +		name = lock_contention_get_name(con, &key, stack_trace);
> +		st = lock_stat_findnew(key.aggr_key, name, data.flags);
>  		if (st == NULL)
>  			break;
>  
> @@ -272,14 +294,6 @@ int lock_contention_read(struct lock_contention *con)
>  			st->avg_wait_time = data.total_time / data.count;
>  
>  		st->flags = data.flags;
> -		st->addr = key.aggr_key;
> -
> -		stack_id = key.aggr_key;
> -		bpf_map_lookup_elem(stack, &stack_id, stack_trace);
> -
> -		st->name = strdup(lock_contention_get_name(con, &key, stack_trace));
> -		if (st->name == NULL)
> -			break;
>  
>  		if (con->save_callstack) {
>  			st->callstack = memdup(stack_trace, stack_size);
> @@ -287,19 +301,14 @@ int lock_contention_read(struct lock_contention *con)
>  				break;
>  		}
>  
> -		hlist_add_head(&st->hash_entry, con->result);
> +next:
>  		prev_key = &key;
>  
> -		/* we're fine now, reset the values */
> -		st = NULL;
> +		/* we're fine now, reset the error */
>  		err = 0;
>  	}
>  
>  	free(stack_trace);
> -	if (st) {
> -		free(st->name);
> -		free(st);
> -	}
>  
>  	return err;
>  }
> diff --git a/tools/perf/util/lock-contention.h b/tools/perf/util/lock-contention.h
> index 17e594d57a61..39d5bfc77f4e 100644
> --- a/tools/perf/util/lock-contention.h
> +++ b/tools/perf/util/lock-contention.h
> @@ -65,6 +65,9 @@ struct lock_stat {
>   */
>  #define MAX_LOCK_DEPTH 48
>  
> +struct lock_stat *lock_stat_find(u64 addr);
> +struct lock_stat *lock_stat_findnew(u64 addr, const char *name, int flags);
> +
>  /*
>   * struct lock_seq_stat:
>   * Place to put on state of one lock sequence
> -- 
> 2.39.1.456.gfc5497dd1b-goog
>
Namhyung Kim Feb. 2, 2023, 11:51 p.m. UTC | #2
Hi Arnaldo,

On Thu, Feb 2, 2023 at 12:27 PM Arnaldo Carvalho de Melo
<acme@kernel.org> wrote:
>
> Em Wed, Feb 01, 2023 at 09:04:53PM -0800, Namhyung Kim escreveu:
> > This is a preparation work to support complex keys of BPF maps.  Now it
> > has single value key according to the aggregation mode like stack_id or
> > pid.  But we want to use a combination of those keys.
> >
> > Then lock_contention_read() should still aggregate the result based on
> > the key that was requested by user.  The other key info will be used for
> > filtering.
> >
> > So instead of creating a lock_stat entry always, Check if it's already
> > there using lock_stat_find() first.
>
> Hey, try building without libtraceevent-devel installed, should be
> equivalent to NO_LIBTRACEEVENT=1.
>
> At this point I think you should move bpf_lock_contention.o to inside
> that CONFIG_LIBTRACEEVENT if block.
>
> perf-$(CONFIG_PERF_BPF_SKEL) += bpf_lock_contention.o
>
> ifeq ($(CONFIG_LIBTRACEEVENT),y)
>   perf-$(CONFIG_PERF_BPF_SKEL) += bpf_kwork.o
> endif
>
> I'm removing this series from tmp.perf/core for now.

Thanks for the suggestion.  I've tested it builds with the change.
Will send v2.

Thanks,
Namhyung
diff mbox series

Patch

diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c
index 216a9a252bf4..0593c6e636c6 100644
--- a/tools/perf/builtin-lock.c
+++ b/tools/perf/builtin-lock.c
@@ -465,7 +465,7 @@  static struct lock_stat *pop_from_result(void)
 	return container_of(node, struct lock_stat, rb);
 }
 
-static struct lock_stat *lock_stat_find(u64 addr)
+struct lock_stat *lock_stat_find(u64 addr)
 {
 	struct hlist_head *entry = lockhashentry(addr);
 	struct lock_stat *ret;
@@ -477,7 +477,7 @@  static struct lock_stat *lock_stat_find(u64 addr)
 	return NULL;
 }
 
-static struct lock_stat *lock_stat_findnew(u64 addr, const char *name, int flags)
+struct lock_stat *lock_stat_findnew(u64 addr, const char *name, int flags)
 {
 	struct hlist_head *entry = lockhashentry(addr);
 	struct lock_stat *ret, *new;
diff --git a/tools/perf/util/bpf_lock_contention.c b/tools/perf/util/bpf_lock_contention.c
index 967ce168f163..c6f2db603d5a 100644
--- a/tools/perf/util/bpf_lock_contention.c
+++ b/tools/perf/util/bpf_lock_contention.c
@@ -254,12 +254,34 @@  int lock_contention_read(struct lock_contention *con)
 	prev_key = NULL;
 	while (!bpf_map_get_next_key(fd, prev_key, &key)) {
 		s32 stack_id;
+		const char *name;
 
 		/* to handle errors in the loop body */
 		err = -1;
 
 		bpf_map_lookup_elem(fd, &key, &data);
-		st = zalloc(sizeof(*st));
+
+		if (con->save_callstack) {
+			stack_id = key.aggr_key;
+			bpf_map_lookup_elem(stack, &stack_id, stack_trace);
+		}
+
+		st = lock_stat_find(key.aggr_key);
+		if (st != NULL) {
+			st->wait_time_total += data.total_time;
+			if (st->wait_time_max < data.max_time)
+				st->wait_time_max = data.max_time;
+			if (st->wait_time_min > data.min_time)
+				st->wait_time_min = data.min_time;
+
+			st->nr_contended += data.count;
+			if (st->nr_contended)
+				st->avg_wait_time = st->wait_time_total / st->nr_contended;
+			goto next;
+		}
+
+		name = lock_contention_get_name(con, &key, stack_trace);
+		st = lock_stat_findnew(key.aggr_key, name, data.flags);
 		if (st == NULL)
 			break;
 
@@ -272,14 +294,6 @@  int lock_contention_read(struct lock_contention *con)
 			st->avg_wait_time = data.total_time / data.count;
 
 		st->flags = data.flags;
-		st->addr = key.aggr_key;
-
-		stack_id = key.aggr_key;
-		bpf_map_lookup_elem(stack, &stack_id, stack_trace);
-
-		st->name = strdup(lock_contention_get_name(con, &key, stack_trace));
-		if (st->name == NULL)
-			break;
 
 		if (con->save_callstack) {
 			st->callstack = memdup(stack_trace, stack_size);
@@ -287,19 +301,14 @@  int lock_contention_read(struct lock_contention *con)
 				break;
 		}
 
-		hlist_add_head(&st->hash_entry, con->result);
+next:
 		prev_key = &key;
 
-		/* we're fine now, reset the values */
-		st = NULL;
+		/* we're fine now, reset the error */
 		err = 0;
 	}
 
 	free(stack_trace);
-	if (st) {
-		free(st->name);
-		free(st);
-	}
 
 	return err;
 }
diff --git a/tools/perf/util/lock-contention.h b/tools/perf/util/lock-contention.h
index 17e594d57a61..39d5bfc77f4e 100644
--- a/tools/perf/util/lock-contention.h
+++ b/tools/perf/util/lock-contention.h
@@ -65,6 +65,9 @@  struct lock_stat {
  */
 #define MAX_LOCK_DEPTH 48
 
+struct lock_stat *lock_stat_find(u64 addr);
+struct lock_stat *lock_stat_findnew(u64 addr, const char *name, int flags);
+
 /*
  * struct lock_seq_stat:
  * Place to put on state of one lock sequence