Message ID | 20241024233035.7979-1-ilkka@os.amperecomputing.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | perf arm-spe: Add support for SPE Data Source packet on AmpereOne | expand |
On 25/10/2024 12:30 am, Ilkka Koskinen wrote: > Decode SPE Data Source packets on AmpereOne. The field is IMPDEF. > > Signed-off-by: Ilkka Koskinen <ilkka@os.amperecomputing.com> > --- > .../util/arm-spe-decoder/arm-spe-decoder.h | 9 +++ > tools/perf/util/arm-spe.c | 61 +++++++++++++++++++ > 2 files changed, 70 insertions(+) > > diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h > index 1443c28545a9..e4115b1e92b2 100644 > --- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h > +++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h > @@ -67,6 +67,15 @@ enum arm_spe_neoverse_data_source { > ARM_SPE_NV_DRAM = 0xe, > }; > > +enum arm_spe_ampereone_data_source { > + ARM_SPE_AMPEREONE_LOCAL_CHIP_CACHE_OR_DEVICE = 0x0, > + ARM_SPE_AMPEREONE_SLC = 0x3, > + ARM_SPE_AMPEREONE_REMOTE_CHIP_CACHE = 0x5, > + ARM_SPE_AMPEREONE_DDR = 0x7, > + ARM_SPE_AMPEREONE_L1D = 0x8, > + ARM_SPE_AMPEREONE_L2D = 0x9, > +}; > + > struct arm_spe_record { > enum arm_spe_sample_type type; > int err; > diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c > index 138ffc71b32d..04bd21ad7ea8 100644 > --- a/tools/perf/util/arm-spe.c > +++ b/tools/perf/util/arm-spe.c > @@ -515,10 +515,69 @@ static void arm_spe__synth_data_source_generic(const struct arm_spe_record *reco > data_src->mem_lvl |= PERF_MEM_LVL_REM_CCE1; > } > > +static const struct midr_range ampereone_source_spe[] = { > + MIDR_ALL_VERSIONS(MIDR_AMPERE1A), > + {}, > +}; > + > +static void arm_spe__synth_data_source_ampereone(const struct arm_spe_record *record, > + union perf_mem_data_src *data_src, > + u64 midr) > +{ > + if (!is_midr_in_range_list(midr, ampereone_source_spe)) { > + arm_spe__synth_data_source_generic(record, data_src); > + return; > + } [...] > static u64 arm_spe__synth_data_source(const struct arm_spe_record *record, u64 midr) > { > union perf_mem_data_src data_src = { .mem_op = PERF_MEM_OP_NA }; > bool is_neoverse = is_midr_in_range_list(midr, neoverse_spe); > + bool is_ampereone = (read_cpuid_implementor() == ARM_CPU_IMP_AMPERE); > Hi Ilkka, I think this read_cpuid_implementor() is for the device that's running report, rather than record. You need to use the midr that's saved into the file. But it looks like you've done that for is_midr_in_range_list(midr, ampereone_source_spe) above. Is it possible to just do that and not read_cpuid_implementor() and then it's done the same way as neoverse and also works off target? Thanks James
On Thu, Oct 24, 2024 at 11:30:35PM +0000, Ilkka Koskinen wrote: > > Decode SPE Data Source packets on AmpereOne. The field is IMPDEF. > > Signed-off-by: Ilkka Koskinen <ilkka@os.amperecomputing.com> > --- > .../util/arm-spe-decoder/arm-spe-decoder.h | 9 +++ > tools/perf/util/arm-spe.c | 61 +++++++++++++++++++ > 2 files changed, 70 insertions(+) > > diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h > index 1443c28545a9..e4115b1e92b2 100644 > --- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h > +++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h > @@ -67,6 +67,15 @@ enum arm_spe_neoverse_data_source { > ARM_SPE_NV_DRAM = 0xe, > }; > > +enum arm_spe_ampereone_data_source { > + ARM_SPE_AMPEREONE_LOCAL_CHIP_CACHE_OR_DEVICE = 0x0, > + ARM_SPE_AMPEREONE_SLC = 0x3, > + ARM_SPE_AMPEREONE_REMOTE_CHIP_CACHE = 0x5, > + ARM_SPE_AMPEREONE_DDR = 0x7, > + ARM_SPE_AMPEREONE_L1D = 0x8, > + ARM_SPE_AMPEREONE_L2D = 0x9, > +}; > + > struct arm_spe_record { > enum arm_spe_sample_type type; > int err; > diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c > index 138ffc71b32d..04bd21ad7ea8 100644 > --- a/tools/perf/util/arm-spe.c > +++ b/tools/perf/util/arm-spe.c > @@ -515,10 +515,69 @@ static void arm_spe__synth_data_source_generic(const struct arm_spe_record *reco > data_src->mem_lvl |= PERF_MEM_LVL_REM_CCE1; > } > > +static const struct midr_range ampereone_source_spe[] = { > + MIDR_ALL_VERSIONS(MIDR_AMPERE1A), > + {}, > +}; > + > +static void arm_spe__synth_data_source_ampereone(const struct arm_spe_record *record, > + union perf_mem_data_src *data_src, > + u64 midr) > +{ > + if (!is_midr_in_range_list(midr, ampereone_source_spe)) { > + arm_spe__synth_data_source_generic(record, data_src); > + return; > + } With James' suggestion, I don't think here need to check the CPU variant again. All generic data source generating should run in the arm_spe__synth_data_source() function. > + > + if (record->op & ARM_SPE_OP_ST) { > + data_src->mem_lvl = PERF_MEM_LVL_NA; > + data_src->mem_lvl_num = PERF_MEM_LVLNUM_NA; > + data_src->mem_snoop = PERF_MEM_SNOOP_NA; > + return; > + } > + > + switch (record->source) { > + case ARM_SPE_AMPEREONE_LOCAL_CHIP_CACHE_OR_DEVICE: > + data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT; > + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2; > + data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER; > + break; > + case ARM_SPE_AMPEREONE_SLC: > + data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT; > + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3; > + data_src->mem_snoop = PERF_MEM_SNOOP_HIT; > + break; > + case ARM_SPE_AMPEREONE_REMOTE_CHIP_CACHE: > + data_src->mem_lvl = PERF_MEM_LVL_REM_CCE1; > + data_src->mem_lvl_num = PERF_MEM_LVLNUM_ANY_CACHE; > + data_src->mem_remote = PERF_MEM_REMOTE_REMOTE; > + data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER; > + break; > + case ARM_SPE_AMPEREONE_DDR: > + data_src->mem_lvl = PERF_MEM_LVL_LOC_RAM | PERF_MEM_LVL_HIT; > + data_src->mem_lvl_num = PERF_MEM_LVLNUM_RAM; > + data_src->mem_snoop = PERF_MEM_SNOOP_NONE; > + break; > + case ARM_SPE_AMPEREONE_L1D: > + data_src->mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT; > + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L1; > + data_src->mem_snoop = PERF_MEM_SNOOP_NONE; > + break; > + case ARM_SPE_AMPEREONE_L2D: > + data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT; > + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2; > + data_src->mem_snoop = PERF_MEM_SNOOP_NONE; > + break; We have another way to do this. If convert the SoC specific data source to common data source values, e.g. ARM_SPE_AMPEREONE_LOCAL_CHIP_CACHE_OR_DEVICE -> ARM_SPE_NV_PEER_CORE ARM_SPE_AMPEREONE_SLC -> ARM_SPE_NV_SYS_CACHE ARM_SPE_AMPEREONE_REMOTE_CHIP_CACHE -> ARM_SPE_NV_REMOTE ARM_SPE_AMPEREONE_DDR -> ARM_SPE_NV_DRAM ... Then we don't need to maintain two functions with almost same setting. I have no strong opinion for this. A dedicated function for Ampere CPU might give a bit flexiblity for later tweaking. It is up to you. Last thing, please work on the the latest perf-tools-next branch: https://git.kernel.org/pub/scm/linux/kernel/git/perf/perf-tools-next.git branch: perf-tools-next Recently we have Arm SPE data source refactoring, please rebase on it. Thanks, Leo > + default: > + break; > + } > +} > + > static u64 arm_spe__synth_data_source(const struct arm_spe_record *record, u64 midr) > { > union perf_mem_data_src data_src = { .mem_op = PERF_MEM_OP_NA }; > bool is_neoverse = is_midr_in_range_list(midr, neoverse_spe); > + bool is_ampereone = (read_cpuid_implementor() == ARM_CPU_IMP_AMPERE); > > if (record->op & ARM_SPE_OP_LD) > data_src.mem_op = PERF_MEM_OP_LOAD; > @@ -529,6 +588,8 @@ static u64 arm_spe__synth_data_source(const struct arm_spe_record *record, u64 m > > if (is_neoverse) > arm_spe__synth_data_source_neoverse(record, &data_src); > + else if (is_ampereone) > + arm_spe__synth_data_source_ampereone(record, &data_src, midr); > else > arm_spe__synth_data_source_generic(record, &data_src); > > -- > 2.47.0 > >
Hi James, On Fri, 25 Oct 2024, James Clark wrote: > On 25/10/2024 12:30 am, Ilkka Koskinen wrote: >> Decode SPE Data Source packets on AmpereOne. The field is IMPDEF. >> >> Signed-off-by: Ilkka Koskinen <ilkka@os.amperecomputing.com> >> --- >> .../util/arm-spe-decoder/arm-spe-decoder.h | 9 +++ >> tools/perf/util/arm-spe.c | 61 +++++++++++++++++++ >> 2 files changed, 70 insertions(+) >> >> diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h >> b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h >> index 1443c28545a9..e4115b1e92b2 100644 >> --- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h >> +++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h >> @@ -67,6 +67,15 @@ enum arm_spe_neoverse_data_source { >> ARM_SPE_NV_DRAM = 0xe, >> }; >> +enum arm_spe_ampereone_data_source { >> + ARM_SPE_AMPEREONE_LOCAL_CHIP_CACHE_OR_DEVICE = 0x0, >> + ARM_SPE_AMPEREONE_SLC = 0x3, >> + ARM_SPE_AMPEREONE_REMOTE_CHIP_CACHE = 0x5, >> + ARM_SPE_AMPEREONE_DDR = 0x7, >> + ARM_SPE_AMPEREONE_L1D = 0x8, >> + ARM_SPE_AMPEREONE_L2D = 0x9, >> +}; >> + >> struct arm_spe_record { >> enum arm_spe_sample_type type; >> int err; >> diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c >> index 138ffc71b32d..04bd21ad7ea8 100644 >> --- a/tools/perf/util/arm-spe.c >> +++ b/tools/perf/util/arm-spe.c >> @@ -515,10 +515,69 @@ static void arm_spe__synth_data_source_generic(const >> struct arm_spe_record *reco >> data_src->mem_lvl |= PERF_MEM_LVL_REM_CCE1; >> } >> +static const struct midr_range ampereone_source_spe[] = { >> + MIDR_ALL_VERSIONS(MIDR_AMPERE1A), >> + {}, >> +}; >> + >> +static void arm_spe__synth_data_source_ampereone(const struct >> arm_spe_record *record, >> + union perf_mem_data_src >> *data_src, >> + u64 midr) >> +{ >> + if (!is_midr_in_range_list(midr, ampereone_source_spe)) { >> + arm_spe__synth_data_source_generic(record, data_src); >> + return; >> + } > [...] >> static u64 arm_spe__synth_data_source(const struct arm_spe_record >> *record, u64 midr) >> { >> union perf_mem_data_src data_src = { .mem_op = PERF_MEM_OP_NA }; >> bool is_neoverse = is_midr_in_range_list(midr, neoverse_spe); >> + bool is_ampereone = (read_cpuid_implementor() == ARM_CPU_IMP_AMPERE); >> > > Hi Ilkka, > > I think this read_cpuid_implementor() is for the device that's running > report, rather than record. You need to use the midr that's saved into the > file. Uh, that didn't come to my mind at all. > But it looks like you've done that for is_midr_in_range_list(midr, > ampereone_source_spe) above. Is it possible to just do that and not > read_cpuid_implementor() and then it's done the same way as neoverse and also > works off target? You're right, it's wrong and there's no even need for separate implementor id check. I fix it. Cheers, Ilkka > > Thanks > > James > >
Hi Leo, On Fri, 25 Oct 2024, Leo Yan wrote: > On Thu, Oct 24, 2024 at 11:30:35PM +0000, Ilkka Koskinen wrote: >> >> Decode SPE Data Source packets on AmpereOne. The field is IMPDEF. >> >> Signed-off-by: Ilkka Koskinen <ilkka@os.amperecomputing.com> >> --- >> .../util/arm-spe-decoder/arm-spe-decoder.h | 9 +++ >> tools/perf/util/arm-spe.c | 61 +++++++++++++++++++ >> 2 files changed, 70 insertions(+) >> >> diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h >> index 1443c28545a9..e4115b1e92b2 100644 >> --- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h >> +++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h >> @@ -67,6 +67,15 @@ enum arm_spe_neoverse_data_source { >> ARM_SPE_NV_DRAM = 0xe, >> }; >> >> +enum arm_spe_ampereone_data_source { >> + ARM_SPE_AMPEREONE_LOCAL_CHIP_CACHE_OR_DEVICE = 0x0, >> + ARM_SPE_AMPEREONE_SLC = 0x3, >> + ARM_SPE_AMPEREONE_REMOTE_CHIP_CACHE = 0x5, >> + ARM_SPE_AMPEREONE_DDR = 0x7, >> + ARM_SPE_AMPEREONE_L1D = 0x8, >> + ARM_SPE_AMPEREONE_L2D = 0x9, >> +}; >> + >> struct arm_spe_record { >> enum arm_spe_sample_type type; >> int err; >> diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c >> index 138ffc71b32d..04bd21ad7ea8 100644 >> --- a/tools/perf/util/arm-spe.c >> +++ b/tools/perf/util/arm-spe.c >> @@ -515,10 +515,69 @@ static void arm_spe__synth_data_source_generic(const struct arm_spe_record *reco >> data_src->mem_lvl |= PERF_MEM_LVL_REM_CCE1; >> } >> >> +static const struct midr_range ampereone_source_spe[] = { >> + MIDR_ALL_VERSIONS(MIDR_AMPERE1A), >> + {}, >> +}; >> + >> +static void arm_spe__synth_data_source_ampereone(const struct arm_spe_record *record, >> + union perf_mem_data_src *data_src, >> + u64 midr) >> +{ >> + if (!is_midr_in_range_list(midr, ampereone_source_spe)) { >> + arm_spe__synth_data_source_generic(record, data_src); >> + return; >> + } > > With James' suggestion, I don't think here need to check the CPU > variant again. All generic data source generating should run in the > arm_spe__synth_data_source() function. Yep, checking the implementor ID was just wrong and unnecessary. I fix that. > >> + >> + if (record->op & ARM_SPE_OP_ST) { >> + data_src->mem_lvl = PERF_MEM_LVL_NA; >> + data_src->mem_lvl_num = PERF_MEM_LVLNUM_NA; >> + data_src->mem_snoop = PERF_MEM_SNOOP_NA; >> + return; >> + } >> + >> + switch (record->source) { >> + case ARM_SPE_AMPEREONE_LOCAL_CHIP_CACHE_OR_DEVICE: >> + data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT; >> + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2; >> + data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER; >> + break; >> + case ARM_SPE_AMPEREONE_SLC: >> + data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT; >> + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3; >> + data_src->mem_snoop = PERF_MEM_SNOOP_HIT; >> + break; >> + case ARM_SPE_AMPEREONE_REMOTE_CHIP_CACHE: >> + data_src->mem_lvl = PERF_MEM_LVL_REM_CCE1; >> + data_src->mem_lvl_num = PERF_MEM_LVLNUM_ANY_CACHE; >> + data_src->mem_remote = PERF_MEM_REMOTE_REMOTE; >> + data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER; >> + break; >> + case ARM_SPE_AMPEREONE_DDR: >> + data_src->mem_lvl = PERF_MEM_LVL_LOC_RAM | PERF_MEM_LVL_HIT; >> + data_src->mem_lvl_num = PERF_MEM_LVLNUM_RAM; >> + data_src->mem_snoop = PERF_MEM_SNOOP_NONE; >> + break; >> + case ARM_SPE_AMPEREONE_L1D: >> + data_src->mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT; >> + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L1; >> + data_src->mem_snoop = PERF_MEM_SNOOP_NONE; >> + break; >> + case ARM_SPE_AMPEREONE_L2D: >> + data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT; >> + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2; >> + data_src->mem_snoop = PERF_MEM_SNOOP_NONE; >> + break; > > We have another way to do this. If convert the SoC specific data source > to common data source values, e.g. > > ARM_SPE_AMPEREONE_LOCAL_CHIP_CACHE_OR_DEVICE -> ARM_SPE_NV_PEER_CORE > ARM_SPE_AMPEREONE_SLC -> ARM_SPE_NV_SYS_CACHE > ARM_SPE_AMPEREONE_REMOTE_CHIP_CACHE -> ARM_SPE_NV_REMOTE > ARM_SPE_AMPEREONE_DDR -> ARM_SPE_NV_DRAM > ... > > Then we don't need to maintain two functions with almost same setting. > > I have no strong opinion for this. A dedicated function for Ampere CPU > might give a bit flexiblity for later tweaking. It is up to you. Let me think about it and see how it would look like. > > Last thing, please work on the the latest perf-tools-next branch: > > https://git.kernel.org/pub/scm/linux/kernel/git/perf/perf-tools-next.git > branch: perf-tools-next > > Recently we have Arm SPE data source refactoring, please rebase on it. Uh, for some reason I rebased it on top of Will's arm64 tree. Sorry about that. Cheers, Ilkka > > Thanks, > Leo > >> + default: >> + break; >> + } >> +} >> + >> static u64 arm_spe__synth_data_source(const struct arm_spe_record *record, u64 midr) >> { >> union perf_mem_data_src data_src = { .mem_op = PERF_MEM_OP_NA }; >> bool is_neoverse = is_midr_in_range_list(midr, neoverse_spe); >> + bool is_ampereone = (read_cpuid_implementor() == ARM_CPU_IMP_AMPERE); >> >> if (record->op & ARM_SPE_OP_LD) >> data_src.mem_op = PERF_MEM_OP_LOAD; >> @@ -529,6 +588,8 @@ static u64 arm_spe__synth_data_source(const struct arm_spe_record *record, u64 m >> >> if (is_neoverse) >> arm_spe__synth_data_source_neoverse(record, &data_src); >> + else if (is_ampereone) >> + arm_spe__synth_data_source_ampereone(record, &data_src, midr); >> else >> arm_spe__synth_data_source_generic(record, &data_src); >> >> -- >> 2.47.0 >> >> >
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h index 1443c28545a9..e4115b1e92b2 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h +++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h @@ -67,6 +67,15 @@ enum arm_spe_neoverse_data_source { ARM_SPE_NV_DRAM = 0xe, }; +enum arm_spe_ampereone_data_source { + ARM_SPE_AMPEREONE_LOCAL_CHIP_CACHE_OR_DEVICE = 0x0, + ARM_SPE_AMPEREONE_SLC = 0x3, + ARM_SPE_AMPEREONE_REMOTE_CHIP_CACHE = 0x5, + ARM_SPE_AMPEREONE_DDR = 0x7, + ARM_SPE_AMPEREONE_L1D = 0x8, + ARM_SPE_AMPEREONE_L2D = 0x9, +}; + struct arm_spe_record { enum arm_spe_sample_type type; int err; diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c index 138ffc71b32d..04bd21ad7ea8 100644 --- a/tools/perf/util/arm-spe.c +++ b/tools/perf/util/arm-spe.c @@ -515,10 +515,69 @@ static void arm_spe__synth_data_source_generic(const struct arm_spe_record *reco data_src->mem_lvl |= PERF_MEM_LVL_REM_CCE1; } +static const struct midr_range ampereone_source_spe[] = { + MIDR_ALL_VERSIONS(MIDR_AMPERE1A), + {}, +}; + +static void arm_spe__synth_data_source_ampereone(const struct arm_spe_record *record, + union perf_mem_data_src *data_src, + u64 midr) +{ + if (!is_midr_in_range_list(midr, ampereone_source_spe)) { + arm_spe__synth_data_source_generic(record, data_src); + return; + } + + if (record->op & ARM_SPE_OP_ST) { + data_src->mem_lvl = PERF_MEM_LVL_NA; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_NA; + data_src->mem_snoop = PERF_MEM_SNOOP_NA; + return; + } + + switch (record->source) { + case ARM_SPE_AMPEREONE_LOCAL_CHIP_CACHE_OR_DEVICE: + data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2; + data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER; + break; + case ARM_SPE_AMPEREONE_SLC: + data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3; + data_src->mem_snoop = PERF_MEM_SNOOP_HIT; + break; + case ARM_SPE_AMPEREONE_REMOTE_CHIP_CACHE: + data_src->mem_lvl = PERF_MEM_LVL_REM_CCE1; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_ANY_CACHE; + data_src->mem_remote = PERF_MEM_REMOTE_REMOTE; + data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER; + break; + case ARM_SPE_AMPEREONE_DDR: + data_src->mem_lvl = PERF_MEM_LVL_LOC_RAM | PERF_MEM_LVL_HIT; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_RAM; + data_src->mem_snoop = PERF_MEM_SNOOP_NONE; + break; + case ARM_SPE_AMPEREONE_L1D: + data_src->mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L1; + data_src->mem_snoop = PERF_MEM_SNOOP_NONE; + break; + case ARM_SPE_AMPEREONE_L2D: + data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2; + data_src->mem_snoop = PERF_MEM_SNOOP_NONE; + break; + default: + break; + } +} + static u64 arm_spe__synth_data_source(const struct arm_spe_record *record, u64 midr) { union perf_mem_data_src data_src = { .mem_op = PERF_MEM_OP_NA }; bool is_neoverse = is_midr_in_range_list(midr, neoverse_spe); + bool is_ampereone = (read_cpuid_implementor() == ARM_CPU_IMP_AMPERE); if (record->op & ARM_SPE_OP_LD) data_src.mem_op = PERF_MEM_OP_LOAD; @@ -529,6 +588,8 @@ static u64 arm_spe__synth_data_source(const struct arm_spe_record *record, u64 m if (is_neoverse) arm_spe__synth_data_source_neoverse(record, &data_src); + else if (is_ampereone) + arm_spe__synth_data_source_ampereone(record, &data_src, midr); else arm_spe__synth_data_source_generic(record, &data_src);
Decode SPE Data Source packets on AmpereOne. The field is IMPDEF. Signed-off-by: Ilkka Koskinen <ilkka@os.amperecomputing.com> --- .../util/arm-spe-decoder/arm-spe-decoder.h | 9 +++ tools/perf/util/arm-spe.c | 61 +++++++++++++++++++ 2 files changed, 70 insertions(+)