diff mbox series

[v2,4/7] perf tools: Support "branch-misses:pp" on arm64

Message ID 20200123160734.3775-5-james.clark@arm.com (mailing list archive)
State New, archived
Headers show
Series perf tools: Add support for some spe events and precise ip | expand

Commit Message

James Clark Jan. 23, 2020, 4:07 p.m. UTC
From: Tan Xiaojun <tanxiaojun@huawei.com>

At the suggestion of James Clark, use spe to support the precise
ip of some events. Currently its support event is:
branch-misses.

Example usage:

$ ./perf record -e branch-misses:pp dd if=/dev/zero of=/dev/null count=10000
(:p/pp/ppp is same for this case.)

$ ./perf report --stdio
("--stdio is not necessary")

--------------------------------------------------------------------
...
 # Samples: 14  of event 'branch-misses:pp'
 # Event count (approx.): 14
 #
 # Children      Self  Command  Shared Object      Symbol
 # ........  ........  .......  .................  ..........................
 #
    14.29%    14.29%  dd       [kernel.kallsyms]  [k] __arch_copy_from_user
    14.29%    14.29%  dd       libc-2.28.so       [.] _dl_addr
     7.14%     7.14%  dd       [kernel.kallsyms]  [k] __free_pages
     7.14%     7.14%  dd       [kernel.kallsyms]  [k] __pi_memcpy
     7.14%     7.14%  dd       [kernel.kallsyms]  [k] pagecache_get_page
     7.14%     7.14%  dd       [kernel.kallsyms]  [k] unmap_single_vma
     7.14%     7.14%  dd       dd                 [.] 0x00000000000025ec
     7.14%     7.14%  dd       ld-2.28.so         [.] _dl_lookup_symbol_x
     7.14%     7.14%  dd       ld-2.28.so         [.] check_match
     7.14%     7.14%  dd       libc-2.28.so       [.] __mpn_rshift
     7.14%     7.14%  dd       libc-2.28.so       [.] _nl_intern_locale_data
     7.14%     7.14%  dd       libc-2.28.so       [.] read_alias_file
...
--------------------------------------------------------------------

Signed-off-by: Tan Xiaojun <tanxiaojun@huawei.com>
Suggested-by: James Clark <James.Clark@arm.com>
Tested-by: Qi Liu <liuqi115@hisilicon.com>
Signed-off-by: James Clark <james.clark@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Tan Xiaojun <tanxiaojun@huawei.com>
Cc: Al Grant <al.grant@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
---
 tools/perf/util/arm-spe.c | 41 +++++++++++++++++++++++++++++++++++++++
 tools/perf/util/arm-spe.h |  3 +++
 tools/perf/util/evlist.c  |  2 ++
 3 files changed, 46 insertions(+)

Comments

Jiri Olsa Jan. 27, 2020, 12:31 p.m. UTC | #1
On Thu, Jan 23, 2020 at 04:07:31PM +0000, James Clark wrote:

SNIP

> diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
> index 1548237b6558..b9c7e5271611 100644
> --- a/tools/perf/util/evlist.c
> +++ b/tools/perf/util/evlist.c
> @@ -9,6 +9,7 @@
>  #include <errno.h>
>  #include <inttypes.h>
>  #include <poll.h>
> +#include "arm-spe.h"
>  #include "cpumap.h"
>  #include "util/mmap.h"
>  #include "thread_map.h"
> @@ -179,6 +180,7 @@ void perf_evlist__splice_list_tail(struct evlist *evlist,
>  	struct evsel *evsel, *temp;
>  
>  	__evlist__for_each_entry_safe(list, temp, evsel) {
> +		arm_spe_precise_ip_support(evlist, evsel);

this is 'splice' function, you can't configure precise in here

do you need this 'config thing' to be executed on arm only?

if yes, please add something like arch_evsel__config, make it
weak for generic code and define it for arm

if no, just add the call at the end perf_evsel__config I guess

thanks,
jirka

>  		list_del_init(&evsel->core.node);
>  		evlist__add(evlist, evsel);
>  	}
> -- 
> 2.25.0
>
diff mbox series

Patch

diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c
index c99814c58745..0fcaefd386a6 100644
--- a/tools/perf/util/arm-spe.c
+++ b/tools/perf/util/arm-spe.c
@@ -35,6 +35,19 @@ 
 
 #define MAX_TIMESTAMP (~0ULL)
 
+#define SPE_ATTR_TS_ENABLE		BIT(0)
+#define SPE_ATTR_PA_ENABLE		BIT(1)
+#define SPE_ATTR_PCT_ENABLE		BIT(2)
+#define SPE_ATTR_JITTER			BIT(16)
+#define SPE_ATTR_BRANCH_FILTER		BIT(32)
+#define SPE_ATTR_LOAD_FILTER		BIT(33)
+#define SPE_ATTR_STORE_FILTER		BIT(34)
+
+#define SPE_ATTR_EV_RETIRED		BIT(1)
+#define SPE_ATTR_EV_CACHE		BIT(3)
+#define SPE_ATTR_EV_TLB			BIT(5)
+#define SPE_ATTR_EV_BRANCH		BIT(7)
+
 struct arm_spe {
 	struct auxtrace			auxtrace;
 	struct auxtrace_queues		queues;
@@ -778,6 +791,15 @@  arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session)
 	attr.sample_id_all = evsel->core.attr.sample_id_all;
 	attr.read_format = evsel->core.attr.read_format;
 
+	/* If it is in the precise ip mode, there is no need to
+	 * synthesize new events. */
+	if (!strncmp(evsel->name, "branch-misses", 13)) {
+		spe->sample_branch_miss = true;
+		spe->branch_miss_id = evsel->core.id[0];
+
+		return 0;
+	}
+
 	/* create new id val to be a fixed offset from evsel id */
 	id = evsel->core.id[0] + 1000000000;
 
@@ -899,3 +921,22 @@  int arm_spe_process_auxtrace_info(union perf_event *event,
 	free(spe);
 	return err;
 }
+
+void arm_spe_precise_ip_support(struct evlist *evlist, struct evsel *evsel)
+{
+	struct perf_pmu *pmu;
+
+	/* Currently only supports precise_ip for branch-misses on arm64 */
+	if (!strcmp(perf_env__arch(evlist->env), "arm64")
+			&& evsel->core.attr.config == PERF_COUNT_HW_BRANCH_MISSES
+			&& evsel->core.attr.precise_ip) {
+		pmu = perf_pmu__find("arm_spe_0");
+		if (pmu) {
+			evsel->pmu_name = pmu->name;
+			evsel->core.attr.type = pmu->type;
+			evsel->core.attr.config = SPE_ATTR_TS_ENABLE
+						| SPE_ATTR_BRANCH_FILTER;
+			evsel->core.attr.config1 = SPE_ATTR_EV_BRANCH;
+		}
+	}
+}
diff --git a/tools/perf/util/arm-spe.h b/tools/perf/util/arm-spe.h
index 98d3235781c3..8b1fb191d03a 100644
--- a/tools/perf/util/arm-spe.h
+++ b/tools/perf/util/arm-spe.h
@@ -20,6 +20,8 @@  enum {
 union perf_event;
 struct perf_session;
 struct perf_pmu;
+struct evlist;
+struct evsel;
 
 struct auxtrace_record *arm_spe_recording_init(int *err,
 					       struct perf_pmu *arm_spe_pmu);
@@ -28,4 +30,5 @@  int arm_spe_process_auxtrace_info(union perf_event *event,
 				  struct perf_session *session);
 
 struct perf_event_attr *arm_spe_pmu_default_config(struct perf_pmu *arm_spe_pmu);
+void arm_spe_precise_ip_support(struct evlist *evlist, struct evsel *evsel);
 #endif
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 1548237b6558..b9c7e5271611 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -9,6 +9,7 @@ 
 #include <errno.h>
 #include <inttypes.h>
 #include <poll.h>
+#include "arm-spe.h"
 #include "cpumap.h"
 #include "util/mmap.h"
 #include "thread_map.h"
@@ -179,6 +180,7 @@  void perf_evlist__splice_list_tail(struct evlist *evlist,
 	struct evsel *evsel, *temp;
 
 	__evlist__for_each_entry_safe(list, temp, evsel) {
+		arm_spe_precise_ip_support(evlist, evsel);
 		list_del_init(&evsel->core.node);
 		evlist__add(evlist, evsel);
 	}