Message ID | 20241031213533.11148-3-ilkka@os.amperecomputing.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | perf arm-spe: Add support for SPE Data Source packet on AmpereOne | expand |
On 31/10/2024 9:35 pm, Ilkka Koskinen wrote: > Decode SPE Data Source packets on AmpereOne. The field is IMPDEF. > > Signed-off-by: Ilkka Koskinen <ilkka@os.amperecomputing.com> > --- > .../util/arm-spe-decoder/arm-spe-decoder.h | 9 +++ > tools/perf/util/arm-spe.c | 65 +++++++++++++++++++ > 2 files changed, 74 insertions(+) > > diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h > index 358c611eeddb..4bcd627e859f 100644 > --- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h > +++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h > @@ -67,6 +67,15 @@ enum arm_spe_common_data_source { > ARM_SPE_COMMON_DS_DRAM = 0xe, > }; > > +enum arm_spe_ampereone_data_source { > + ARM_SPE_AMPEREONE_LOCAL_CHIP_CACHE_OR_DEVICE = 0x0, > + ARM_SPE_AMPEREONE_SLC = 0x3, > + ARM_SPE_AMPEREONE_REMOTE_CHIP_CACHE = 0x5, > + ARM_SPE_AMPEREONE_DDR = 0x7, > + ARM_SPE_AMPEREONE_L1D = 0x8, > + ARM_SPE_AMPEREONE_L2D = 0x9, > +}; > + > struct arm_spe_record { > enum arm_spe_sample_type type; > int err; > diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c > index 9586416be30a..700d4bc8d8ec 100644 > --- a/tools/perf/util/arm-spe.c > +++ b/tools/perf/util/arm-spe.c > @@ -103,6 +103,30 @@ struct arm_spe_queue { > u32 flags; > }; > > +struct arm_spe_source_mapping { > + u16 source; > + enum arm_spe_common_data_source common_src; > +}; > + > +#define MAP_SOURCE(src, common) \ > + { \ > + .source = ARM_SPE_##src, \ > + .common_src = ARM_SPE_COMMON_##common, \ > + } > + > +static int arm_spe__map_to_common_source(u16 source, > + struct arm_spe_source_mapping *tbl, > + int nr_sources) > +{ > + while (nr_sources--) { > + if (tbl->source == source) > + return tbl->common_src; > + tbl++; > + } > + Hi Ilkka, I think a simple switch statement here would be easier to follow than the loop, custom macro and then having the mappings in some other place: switch(source) case 0x0: /* AMPEREONE_LOCAL_CHIP_CACHE_OR_DEVICE */ return DS_PEER_CORE; etc... > + return -1; And the default case can return 0xfff directly, which avoids the if else later only to convert this -1 back into 0xfff. > +} > + > static void arm_spe_dump(struct arm_spe *spe __maybe_unused, > unsigned char *buf, size_t len) > { > @@ -443,6 +467,11 @@ static const struct midr_range common_ds_encoding_cpus[] = { > {}, > }; > > +static const struct midr_range ampereone_ds_encoding_cpus[] = { > + MIDR_ALL_VERSIONS(MIDR_AMPERE1A), > + {}, > +}; > + > static void arm_spe__sample_flags(struct arm_spe_queue *speq) > { > const struct arm_spe_record *record = &speq->decoder->record; > @@ -532,6 +561,38 @@ static void arm_spe__synth_data_source_common(const struct arm_spe_record *recor > } > } > > +static struct arm_spe_source_mapping ampereone_sources[] = { > + MAP_SOURCE(AMPEREONE_LOCAL_CHIP_CACHE_OR_DEVICE, DS_PEER_CORE), > + MAP_SOURCE(AMPEREONE_SLC, DS_SYS_CACHE), > + MAP_SOURCE(AMPEREONE_REMOTE_CHIP_CACHE, DS_REMOTE), > + MAP_SOURCE(AMPEREONE_DDR, DS_DRAM), > + MAP_SOURCE(AMPEREONE_L1D, DS_L1D), > + MAP_SOURCE(AMPEREONE_L2D, DS_L2), > +}; > + > +/* > + * Source is IMPDEF. Here we convert the source code used on AmpereOne cores > + * to the common (Neoverse, Cortex) to avoid duplicating the decoding code. > + */ > +static void arm_spe__synth_data_source_ampereone(const struct arm_spe_record *record, > + union perf_mem_data_src *data_src) > +{ > + int common_src; > + struct arm_spe_record common_record; > + > + common_src = arm_spe__map_to_common_source(record->source, > + ampereone_sources, > + ARRAY_SIZE(ampereone_sources)); > + if (common_src < 0) > + /* Assign a bogus value that's not used for common coding */ > + common_record.source = 0xfff; > + else > + common_record.source = common_src; > + > + common_record.op = record->op; > + arm_spe__synth_data_source_common(&common_record, data_src); > +} > + > static void arm_spe__synth_memory_level(const struct arm_spe_record *record, > union perf_mem_data_src *data_src) > { > @@ -606,6 +667,8 @@ static u64 arm_spe__synth_data_source(struct arm_spe_queue *speq, > union perf_mem_data_src data_src = { .mem_op = PERF_MEM_OP_NA }; > bool is_common = arm_spe__is_ds_encoding_supported(speq, > common_ds_encoding_cpus); > + bool is_ampereone = arm_spe__is_ds_encoding_supported(speq, > + ampereone_ds_encoding_cpus); I know this probably already works, but we don't really need is_common is_ampere etc, it will only grow anyway. All we need is a list of midrs and function pairs. That also avoids doing is_ampereone even after we already know is_common == true. static const struct data_src[] = { ... DS(MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V2), common_ds), DS(MIDR_ALL_VERSIONS(MIDR_AMPERE1A), ampere_ds), {}, ... }; "arm_spe__is_ds_encoding_supported" then becomes a direct call to "arm_spe__synth_ds" and we can drop the is_ampereone and is_common vars. Then adding new ones doesn't require changing the function anymore. > > if (record->op & ARM_SPE_OP_LD) > data_src.mem_op = PERF_MEM_OP_LOAD; > @@ -616,6 +679,8 @@ static u64 arm_spe__synth_data_source(struct arm_spe_queue *speq, > > if (is_common) > arm_spe__synth_data_source_common(record, &data_src); > + else if (is_ampereone) > + arm_spe__synth_data_source_ampereone(record, &data_src); > else > arm_spe__synth_memory_level(record, &data_src); >
Hi James, On Mon, 4 Nov 2024, James Clark wrote: > On 31/10/2024 9:35 pm, Ilkka Koskinen wrote: >> Decode SPE Data Source packets on AmpereOne. The field is IMPDEF. >> >> Signed-off-by: Ilkka Koskinen <ilkka@os.amperecomputing.com> >> --- >> .../util/arm-spe-decoder/arm-spe-decoder.h | 9 +++ >> tools/perf/util/arm-spe.c | 65 +++++++++++++++++++ >> 2 files changed, 74 insertions(+) >> >> diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h >> b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h >> index 358c611eeddb..4bcd627e859f 100644 >> --- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h >> +++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h >> @@ -67,6 +67,15 @@ enum arm_spe_common_data_source { >> ARM_SPE_COMMON_DS_DRAM = 0xe, >> }; >> +enum arm_spe_ampereone_data_source { >> + ARM_SPE_AMPEREONE_LOCAL_CHIP_CACHE_OR_DEVICE = 0x0, >> + ARM_SPE_AMPEREONE_SLC = 0x3, >> + ARM_SPE_AMPEREONE_REMOTE_CHIP_CACHE = 0x5, >> + ARM_SPE_AMPEREONE_DDR = 0x7, >> + ARM_SPE_AMPEREONE_L1D = 0x8, >> + ARM_SPE_AMPEREONE_L2D = 0x9, >> +}; >> + >> struct arm_spe_record { >> enum arm_spe_sample_type type; >> int err; >> diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c >> index 9586416be30a..700d4bc8d8ec 100644 >> --- a/tools/perf/util/arm-spe.c >> +++ b/tools/perf/util/arm-spe.c >> @@ -103,6 +103,30 @@ struct arm_spe_queue { >> u32 flags; >> }; >> +struct arm_spe_source_mapping { >> + u16 source; >> + enum arm_spe_common_data_source common_src; >> +}; >> + >> +#define MAP_SOURCE(src, common) \ >> + { \ >> + .source = ARM_SPE_##src, \ >> + .common_src = ARM_SPE_COMMON_##common, \ >> + } >> + >> +static int arm_spe__map_to_common_source(u16 source, >> + struct arm_spe_source_mapping *tbl, >> + int nr_sources) >> +{ >> + while (nr_sources--) { >> + if (tbl->source == source) >> + return tbl->common_src; >> + tbl++; >> + } >> + > > Hi Ilkka, > > I think a simple switch statement here would be easier to follow than the > loop, custom macro and then having the mappings in some other place: > > switch(source) > case 0x0: /* AMPEREONE_LOCAL_CHIP_CACHE_OR_DEVICE */ > return DS_PEER_CORE; > > etc... > >> + return -1; > > And the default case can return 0xfff directly, which avoids the if else > later only to convert this -1 back into 0xfff. I can surely do that. > >> +} >> + >> static void arm_spe_dump(struct arm_spe *spe __maybe_unused, >> unsigned char *buf, size_t len) >> { >> @@ -443,6 +467,11 @@ static const struct midr_range >> common_ds_encoding_cpus[] = { >> {}, >> }; >> +static const struct midr_range ampereone_ds_encoding_cpus[] = { >> + MIDR_ALL_VERSIONS(MIDR_AMPERE1A), >> + {}, >> +}; >> + >> static void arm_spe__sample_flags(struct arm_spe_queue *speq) >> { >> const struct arm_spe_record *record = &speq->decoder->record; >> @@ -532,6 +561,38 @@ static void arm_spe__synth_data_source_common(const >> struct arm_spe_record *recor >> } >> } >> +static struct arm_spe_source_mapping ampereone_sources[] = { >> + MAP_SOURCE(AMPEREONE_LOCAL_CHIP_CACHE_OR_DEVICE, DS_PEER_CORE), >> + MAP_SOURCE(AMPEREONE_SLC, DS_SYS_CACHE), >> + MAP_SOURCE(AMPEREONE_REMOTE_CHIP_CACHE, DS_REMOTE), >> + MAP_SOURCE(AMPEREONE_DDR, DS_DRAM), >> + MAP_SOURCE(AMPEREONE_L1D, DS_L1D), >> + MAP_SOURCE(AMPEREONE_L2D, DS_L2), >> +}; >> + >> +/* >> + * Source is IMPDEF. Here we convert the source code used on AmpereOne >> cores >> + * to the common (Neoverse, Cortex) to avoid duplicating the decoding >> code. >> + */ >> +static void arm_spe__synth_data_source_ampereone(const struct >> arm_spe_record *record, >> + union perf_mem_data_src >> *data_src) >> +{ >> + int common_src; >> + struct arm_spe_record common_record; >> + >> + common_src = arm_spe__map_to_common_source(record->source, >> + ampereone_sources, >> + >> ARRAY_SIZE(ampereone_sources)); >> + if (common_src < 0) >> + /* Assign a bogus value that's not used for common coding */ >> + common_record.source = 0xfff; >> + else >> + common_record.source = common_src; >> + >> + common_record.op = record->op; >> + arm_spe__synth_data_source_common(&common_record, data_src); >> +} >> + >> static void arm_spe__synth_memory_level(const struct arm_spe_record >> *record, >> union perf_mem_data_src *data_src) >> { >> @@ -606,6 +667,8 @@ static u64 arm_spe__synth_data_source(struct >> arm_spe_queue *speq, >> union perf_mem_data_src data_src = { .mem_op = PERF_MEM_OP_NA }; >> bool is_common = arm_spe__is_ds_encoding_supported(speq, >> common_ds_encoding_cpus); >> + bool is_ampereone = arm_spe__is_ds_encoding_supported(speq, >> + ampereone_ds_encoding_cpus); > > I know this probably already works, but we don't really need is_common > is_ampere etc, it will only grow anyway. All we need is a list of midrs and > function pairs. That also avoids doing is_ampereone even after we already > know is_common == true. > > static const struct data_src[] = { > ... > DS(MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V2), common_ds), > DS(MIDR_ALL_VERSIONS(MIDR_AMPERE1A), ampere_ds), > {}, > ... > }; > > "arm_spe__is_ds_encoding_supported" then becomes a direct call to > "arm_spe__synth_ds" and we can drop the is_ampereone and is_common vars. Then > adding new ones doesn't require changing the function anymore. To be honest, I'm not a big fan of the is_xyz() thing but I just didn't want to change it. Anyway, I'll change it for the next version. Cheers, Ilkka > >> if (record->op & ARM_SPE_OP_LD) >> data_src.mem_op = PERF_MEM_OP_LOAD; >> @@ -616,6 +679,8 @@ static u64 arm_spe__synth_data_source(struct >> arm_spe_queue *speq, >> if (is_common) >> arm_spe__synth_data_source_common(record, &data_src); >> + else if (is_ampereone) >> + arm_spe__synth_data_source_ampereone(record, &data_src); >> else >> arm_spe__synth_memory_level(record, &data_src); >> > >
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h index 358c611eeddb..4bcd627e859f 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h +++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h @@ -67,6 +67,15 @@ enum arm_spe_common_data_source { ARM_SPE_COMMON_DS_DRAM = 0xe, }; +enum arm_spe_ampereone_data_source { + ARM_SPE_AMPEREONE_LOCAL_CHIP_CACHE_OR_DEVICE = 0x0, + ARM_SPE_AMPEREONE_SLC = 0x3, + ARM_SPE_AMPEREONE_REMOTE_CHIP_CACHE = 0x5, + ARM_SPE_AMPEREONE_DDR = 0x7, + ARM_SPE_AMPEREONE_L1D = 0x8, + ARM_SPE_AMPEREONE_L2D = 0x9, +}; + struct arm_spe_record { enum arm_spe_sample_type type; int err; diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c index 9586416be30a..700d4bc8d8ec 100644 --- a/tools/perf/util/arm-spe.c +++ b/tools/perf/util/arm-spe.c @@ -103,6 +103,30 @@ struct arm_spe_queue { u32 flags; }; +struct arm_spe_source_mapping { + u16 source; + enum arm_spe_common_data_source common_src; +}; + +#define MAP_SOURCE(src, common) \ + { \ + .source = ARM_SPE_##src, \ + .common_src = ARM_SPE_COMMON_##common, \ + } + +static int arm_spe__map_to_common_source(u16 source, + struct arm_spe_source_mapping *tbl, + int nr_sources) +{ + while (nr_sources--) { + if (tbl->source == source) + return tbl->common_src; + tbl++; + } + + return -1; +} + static void arm_spe_dump(struct arm_spe *spe __maybe_unused, unsigned char *buf, size_t len) { @@ -443,6 +467,11 @@ static const struct midr_range common_ds_encoding_cpus[] = { {}, }; +static const struct midr_range ampereone_ds_encoding_cpus[] = { + MIDR_ALL_VERSIONS(MIDR_AMPERE1A), + {}, +}; + static void arm_spe__sample_flags(struct arm_spe_queue *speq) { const struct arm_spe_record *record = &speq->decoder->record; @@ -532,6 +561,38 @@ static void arm_spe__synth_data_source_common(const struct arm_spe_record *recor } } +static struct arm_spe_source_mapping ampereone_sources[] = { + MAP_SOURCE(AMPEREONE_LOCAL_CHIP_CACHE_OR_DEVICE, DS_PEER_CORE), + MAP_SOURCE(AMPEREONE_SLC, DS_SYS_CACHE), + MAP_SOURCE(AMPEREONE_REMOTE_CHIP_CACHE, DS_REMOTE), + MAP_SOURCE(AMPEREONE_DDR, DS_DRAM), + MAP_SOURCE(AMPEREONE_L1D, DS_L1D), + MAP_SOURCE(AMPEREONE_L2D, DS_L2), +}; + +/* + * Source is IMPDEF. Here we convert the source code used on AmpereOne cores + * to the common (Neoverse, Cortex) to avoid duplicating the decoding code. + */ +static void arm_spe__synth_data_source_ampereone(const struct arm_spe_record *record, + union perf_mem_data_src *data_src) +{ + int common_src; + struct arm_spe_record common_record; + + common_src = arm_spe__map_to_common_source(record->source, + ampereone_sources, + ARRAY_SIZE(ampereone_sources)); + if (common_src < 0) + /* Assign a bogus value that's not used for common coding */ + common_record.source = 0xfff; + else + common_record.source = common_src; + + common_record.op = record->op; + arm_spe__synth_data_source_common(&common_record, data_src); +} + static void arm_spe__synth_memory_level(const struct arm_spe_record *record, union perf_mem_data_src *data_src) { @@ -606,6 +667,8 @@ static u64 arm_spe__synth_data_source(struct arm_spe_queue *speq, union perf_mem_data_src data_src = { .mem_op = PERF_MEM_OP_NA }; bool is_common = arm_spe__is_ds_encoding_supported(speq, common_ds_encoding_cpus); + bool is_ampereone = arm_spe__is_ds_encoding_supported(speq, + ampereone_ds_encoding_cpus); if (record->op & ARM_SPE_OP_LD) data_src.mem_op = PERF_MEM_OP_LOAD; @@ -616,6 +679,8 @@ static u64 arm_spe__synth_data_source(struct arm_spe_queue *speq, if (is_common) arm_spe__synth_data_source_common(record, &data_src); + else if (is_ampereone) + arm_spe__synth_data_source_ampereone(record, &data_src); else arm_spe__synth_memory_level(record, &data_src);
Decode SPE Data Source packets on AmpereOne. The field is IMPDEF. Signed-off-by: Ilkka Koskinen <ilkka@os.amperecomputing.com> --- .../util/arm-spe-decoder/arm-spe-decoder.h | 9 +++ tools/perf/util/arm-spe.c | 65 +++++++++++++++++++ 2 files changed, 74 insertions(+)