Message ID | 20200211184542.29585-2-ionela.voinescu@arm.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | arm64: ARMv8.4 Activity Monitors support | expand |
Hi Ionela, On 11/02/2020 18:45, Ionela Voinescu wrote: > The activity monitors extension is an optional extension introduced > by the ARMv8.4 CPU architecture. This implements basic support for > version 1 of the activity monitors architecture, AMUv1. > > This support includes: > - Extension detection on each CPU (boot, secondary, hotplugged) > - Register interface for AMU aarch64 registers > - disable_amu kernel parameter to disable detection/counter access > at runtime > > Signed-off-by: Ionela Voinescu <ionela.voinescu@arm.com> > Cc: Catalin Marinas <catalin.marinas@arm.com> > Cc: Will Deacon <will@kernel.org> > Cc: Suzuki K Poulose <suzuki.poulose@arm.com> > Cc: Marc Zyngier <maz@kernel.org> > Cc: Mark Rutland <mark.rutland@arm.com> > --- > .../admin-guide/kernel-parameters.txt | 10 ++ > arch/arm64/Kconfig | 31 ++++++ > arch/arm64/include/asm/cpucaps.h | 3 +- > arch/arm64/include/asm/cpufeature.h | 5 + > arch/arm64/include/asm/sysreg.h | 38 ++++++++ > arch/arm64/kernel/cpufeature.c | 97 +++++++++++++++++++ > 6 files changed, 183 insertions(+), 1 deletion(-) > ... > diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c > index 04cf64e9f0c9..029a473ad273 100644 > --- a/arch/arm64/kernel/cpufeature.c > +++ b/arch/arm64/kernel/cpufeature.c > @@ -156,6 +156,7 @@ static const struct arm64_ftr_bits ftr_id_aa64pfr0[] = { > ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_CSV3_SHIFT, 4, 0), > ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_CSV2_SHIFT, 4, 0), > ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_DIT_SHIFT, 4, 0), > + ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_AMU_SHIFT, 4, 0), > ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE), > FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_SVE_SHIFT, 4, 0), > ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_RAS_SHIFT, 4, 0), > @@ -1150,6 +1151,84 @@ static bool has_hw_dbm(const struct arm64_cpu_capabilities *cap, > > #endif > > +#ifdef CONFIG_ARM64_AMU_EXTN > + > +/* > + * The "amu_cpus" cpumask only signals that the CPU implementation for the > + * flagged CPUs supports the Activity Monitors Unit (AMU) but does not provide > + * information regarding all the events that it supports. When a CPU bit is > + * set in the cpumask, the user of this feature can only rely on the presence > + * of the 4 fixed counters for that CPU. But this does not guarantee that the > + * counters are enabled or access to these counters is enabled by code > + * executed at higher exception levels (firmware). > + */ > +static cpumask_var_t amu_cpus; > + > +bool cpu_has_amu_feat(int cpu) > +{ > + if (cpumask_available(amu_cpus)) > + return cpumask_test_cpu(cpu, amu_cpus); > + > + return false; > +} > + > +static void cpu_amu_enable(struct arm64_cpu_capabilities const *cap) > +{ > + if (has_cpuid_feature(cap, SCOPE_LOCAL_CPU)) { > + pr_info("detected CPU%d: Activity Monitors Unit (AMU)\n", > + smp_processor_id()); > + cpumask_set_cpu(smp_processor_id(), amu_cpus); > + } > +} > + > +/* > + * For known broken firmware, a kernel parameter ("disable_amu") is provided > + * to ensure access to AMU counter registers is not attempted. By default, > + * the feature is enabled, but disable_amu can both be used to disable or > + * enable the capability at runtime in case the default changes in the future. > + * > + * To be noted that for security considerations, this does not bypass the > + * setting of AMUSERENR_EL0 to trap accesses from EL0 (userspace) to EL1 > + * (kernel). Therefore, firmware should still ensure accesses to AMU registers > + * are not trapped in EL2/EL3. > + */ > +static bool disable_amu; > + > +static int __init set_disable_amu(char *str) > +{ > + int value = 0; > + > + disable_amu = get_option(&str, &value) ? !!value : true; minor nit: You could simply use strtobool(str) here, which accepts: disable_amu= [0/1/on/off/y/n] > + > + return 0; > +} > +early_param("disable_amu", set_disable_amu); > + > +static bool has_amu(const struct arm64_cpu_capabilities *cap, > + int __unused) > +{ > + /* > + * The AMU extension is a non-conflicting feature: the kernel can > + * safely run a mix of CPUs with and without support for the > + * activity monitors extension. Therefore, if not disabled through > + * the kernel command line early parameter, enable the capability > + * to allow any late CPU to use the feature. > + * > + * With this feature enabled, the cpu_enable function will be called > + * for all CPUs that match the criteria, including secondary and > + * hotplugged, marking this feature as present on that respective CPU. > + * The enable function will also print a detection message. > + */ > + > + if (!disable_amu && !zalloc_cpumask_var(&amu_cpus, GFP_KERNEL)) { This looks problematic. Don't we end up in allocating the memory during "each CPU" check and thus leaking memory ? Do we really need to allocate this dynamically ? > + pr_err("Activity Monitors Unit (AMU): fail to allocate memory"); > + disable_amu = true; > + } > + > + return !disable_amu; > +} > +#endif > + > #ifdef CONFIG_ARM64_VHE > static bool runs_at_el2(const struct arm64_cpu_capabilities *entry, int __unused) > { > @@ -1419,6 +1498,24 @@ static const struct arm64_cpu_capabilities arm64_features[] = { > .cpu_enable = cpu_clear_disr, > }, > #endif /* CONFIG_ARM64_RAS_EXTN */ > +#ifdef CONFIG_ARM64_AMU_EXTN > + { > + /* > + * The feature is enabled by default if CONFIG_ARM64_AMU_EXTN=y. > + * Therefore, don't provide .desc as we don't want the detection > + * message to be shown until at least one CPU is detected to > + * support the feature. > + */ > + .capability = ARM64_HAS_AMU_EXTN, > + .type = ARM64_CPUCAP_WEAK_LOCAL_CPU_FEATURE, > + .matches = has_amu, > + .sys_reg = SYS_ID_AA64PFR0_EL1, > + .sign = FTR_UNSIGNED, > + .field_pos = ID_AA64PFR0_AMU_SHIFT, > + .min_field_value = ID_AA64PFR0_AMU, > + .cpu_enable = cpu_amu_enable, > + }, > +#endif /* CONFIG_ARM64_AMU_EXTN */ > { > .desc = "Data cache clean to the PoU not required for I/D coherence", > .capability = ARM64_HAS_CACHE_IDC, > Thanks Suzuki
On 12/02/2020 11:30, Suzuki Kuruppassery Poulose wrote: >> +static bool has_amu(const struct arm64_cpu_capabilities *cap, >> + int __unused) >> +{ >> + /* >> + * The AMU extension is a non-conflicting feature: the kernel can >> + * safely run a mix of CPUs with and without support for the >> + * activity monitors extension. Therefore, if not disabled through >> + * the kernel command line early parameter, enable the capability >> + * to allow any late CPU to use the feature. >> + * >> + * With this feature enabled, the cpu_enable function will be called >> + * for all CPUs that match the criteria, including secondary and >> + * hotplugged, marking this feature as present on that respective CPU. >> + * The enable function will also print a detection message. >> + */ >> + >> + if (!disable_amu && !zalloc_cpumask_var(&amu_cpus, GFP_KERNEL)) { > > This looks problematic. Don't we end up in allocating the memory during > "each CPU" check and thus leaking memory ? Do we really need to allocate > this dynamically ? > For the static vs dynamic thing, I think it's not *too* important here since we don't risk pwning the stack because of the cpumask. That said, if we are somewhat pedantic about memory usage, the static allocation is done against NR_CPUS whereas the dynamic one is done against nr_cpu_ids. Pretty inconsequential for a single cpumask, but I guess it all adds up eventually...
Hi Suzuki, On Wednesday 12 Feb 2020 at 11:30:44 (+0000), Suzuki Kuruppassery Poulose wrote: > > +static int __init set_disable_amu(char *str) > > +{ > > + int value = 0; > > + > > + disable_amu = get_option(&str, &value) ? !!value : true; > > minor nit: You could simply use strtobool(str) here, which accepts: > > disable_amu= [0/1/on/off/y/n] > Yes, this was intentional as I wanted "disable_amu" to be a valid option as well, not only "disable_amu=<option>". If you don't mind I'd like to keep it like this. Currently the use of AMU is enabled by default, and the most common kernel parameter to disable it would be "disable_amu". Allowing "disable_amu=0" is just in case we change the default in the kernel to not support AMU and we'd like platforms to be able to enable it. > > > + > > + return 0; > > +} > > +early_param("disable_amu", set_disable_amu); > > + > > +static bool has_amu(const struct arm64_cpu_capabilities *cap, > > + int __unused) > > +{ > > + /* > > + * The AMU extension is a non-conflicting feature: the kernel can > > + * safely run a mix of CPUs with and without support for the > > + * activity monitors extension. Therefore, if not disabled through > > + * the kernel command line early parameter, enable the capability > > + * to allow any late CPU to use the feature. > > + * > > + * With this feature enabled, the cpu_enable function will be called > > + * for all CPUs that match the criteria, including secondary and > > + * hotplugged, marking this feature as present on that respective CPU. > > + * The enable function will also print a detection message. > > + */ > > + > > + if (!disable_amu && !zalloc_cpumask_var(&amu_cpus, GFP_KERNEL)) { > > This looks problematic. Don't we end up in allocating the memory during > "each CPU" check and thus leaking memory ? Do we really need to allocate > this dynamically ? > Yes, it does make some assumptions. Given that the AMU capability is a WEAK_LOCAL_CPU_FEATURE I relied on the match function being called only once, when the return value is true. If the return value is false, which will result in it being called multiple times, it's either because disable_amu == false, or it has become false due to a previous failed allocation, in which case a new allocation will not be attempted. For better handling I could have a cpumask_available check before the allocation just in case the capability type changes in the future, or to at least not rely on assumptions based on the type of the capability. The reason this is dynamic is that I wanted to avoid the memory being allocated when disable_amu is true - as Valentin mentioned in a comment in the meantime "the static allocation is done against NR_CPUS whereas the dynamic one is done against nr_cpu_ids". Would this be alright? diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 182e05ca3410..4cee6b147ddd 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -1222,7 +1222,11 @@ static bool has_amu(const struct arm64_cpu_capabilities *cap, * The enable function will also print a detection message. */ - if (!disable_amu && !zalloc_cpumask_var(&amu_cpus, GFP_KERNEL)) { + if (disable_amu) + return false; + + if (!cpumask_available(amu_cpus) && + !zalloc_cpumask_var(&amu_cpus, GFP_KERNEL)) { pr_err("Activity Monitors Unit (AMU): fail to allocate memory"); disable_amu = true; } Otherwise I can go for static allocation. Thank you, Ionela.
Hi Ionela, On 12/02/2020 16:10, Ionela Voinescu wrote: > Hi Suzuki, > > On Wednesday 12 Feb 2020 at 11:30:44 (+0000), Suzuki Kuruppassery Poulose wrote: >>> +static int __init set_disable_amu(char *str) >>> +{ >>> + int value = 0; >>> + >>> + disable_amu = get_option(&str, &value) ? !!value : true; >> >> minor nit: You could simply use strtobool(str) here, which accepts: >> >> disable_amu= [0/1/on/off/y/n] >> > > Yes, this was intentional as I wanted "disable_amu" to be a valid option > as well, not only "disable_amu=<option>". > > If you don't mind I'd like to keep it like this. Currently the use of Sure, thats fine. >>> + >>> + return 0; >>> +} >>> +early_param("disable_amu", set_disable_amu); >>> + >>> +static bool has_amu(const struct arm64_cpu_capabilities *cap, >>> + int __unused) >>> +{ >>> + /* >>> + * The AMU extension is a non-conflicting feature: the kernel can >>> + * safely run a mix of CPUs with and without support for the >>> + * activity monitors extension. Therefore, if not disabled through >>> + * the kernel command line early parameter, enable the capability >>> + * to allow any late CPU to use the feature. >>> + * >>> + * With this feature enabled, the cpu_enable function will be called >>> + * for all CPUs that match the criteria, including secondary and >>> + * hotplugged, marking this feature as present on that respective CPU. >>> + * The enable function will also print a detection message. >>> + */ >>> + >>> + if (!disable_amu && !zalloc_cpumask_var(&amu_cpus, GFP_KERNEL)) { >> >> This looks problematic. Don't we end up in allocating the memory during >> "each CPU" check and thus leaking memory ? Do we really need to allocate >> this dynamically ? >> > > Yes, it does make some assumptions. Given that the AMU capability is > a WEAK_LOCAL_CPU_FEATURE I relied on the match function being called > only once, when the return value is true. If the return value is false, That is not correct. A WEAK_LOCAL_CPU_FEATURE is still SCOPE_LOCAL_CPU, implies it is run on all the booting CPUs (including the hotplugged ones). The WEAK is there to imply that its "permitted" or "optional" for a hotplugged CPU. So, eventually you will re-allocate this variable every single time a CPU turns up, where you could also loose the current state. > which will result in it being called multiple times, it's either because > disable_amu == false, or it has become false due to a previous failed > allocation, in which case a new allocation will not be attempted. > > For better handling I could have a cpumask_available check before the > allocation just in case the capability type changes in the future, or to > at least not rely on assumptions based on the type of the capability. > > The reason this is dynamic is that I wanted to avoid the memory being > allocated when disable_amu is true - as Valentin mentioned in a comment > in the meantime "the static allocation is done against NR_CPUS whereas > the dynamic one is done against nr_cpu_ids". > > Would this be alright? > > diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c > index 182e05ca3410..4cee6b147ddd 100644 > --- a/arch/arm64/kernel/cpufeature.c > +++ b/arch/arm64/kernel/cpufeature.c > @@ -1222,7 +1222,11 @@ static bool has_amu(const struct arm64_cpu_capabilities *cap, > * The enable function will also print a detection message. > */ > > - if (!disable_amu && !zalloc_cpumask_var(&amu_cpus, GFP_KERNEL)) { > + if (disable_amu) > + return false; > + > + if (!cpumask_available(amu_cpus) && > + !zalloc_cpumask_var(&amu_cpus, GFP_KERNEL)) { > pr_err("Activity Monitors Unit (AMU): fail to allocate memory"); > disable_amu = true; > } This looks fine. Cheers Suzuki
Hi, On 2/12/20 4:10 PM, Ionela Voinescu wrote: > Hi Suzuki, > > On Wednesday 12 Feb 2020 at 11:30:44 (+0000), Suzuki Kuruppassery Poulose wrote: >>> +static int __init set_disable_amu(char *str) >>> +{ >>> + int value = 0; >>> + >>> + disable_amu = get_option(&str, &value) ? !!value : true; >> minor nit: You could simply use strtobool(str) here, which accepts: >> >> disable_amu= [0/1/on/off/y/n] >> > Yes, this was intentional as I wanted "disable_amu" to be a valid option > as well, not only "disable_amu=<option>". > > If you don't mind I'd like to keep it like this. Currently the use of > AMU is enabled by default, and the most common kernel parameter to > disable it would be "disable_amu". Allowing "disable_amu=0" is just in > case we change the default in the kernel to not support AMU and we'd > like platforms to be able to enable it. > Sorry for jumping into thread, but can we avoid negatives into naming which accept values? If is always tricky to get expected effect when both are combined. If value doesn't really mater than can it be just "noamu"? If value does matter can it be (per Suzuki) amu=[0/1/on/off/y/n]? Or can you postpone introduction of "just in case" option till that case happens? Cheers Vladimir
Hi Suzuki, On Wednesday 12 Feb 2020 at 16:20:56 (+0000), Suzuki Kuruppassery Poulose wrote: > > > > +static bool has_amu(const struct arm64_cpu_capabilities *cap, > > > > + int __unused) > > > > +{ > > > > + /* > > > > + * The AMU extension is a non-conflicting feature: the kernel can > > > > + * safely run a mix of CPUs with and without support for the > > > > + * activity monitors extension. Therefore, if not disabled through > > > > + * the kernel command line early parameter, enable the capability > > > > + * to allow any late CPU to use the feature. > > > > + * > > > > + * With this feature enabled, the cpu_enable function will be called > > > > + * for all CPUs that match the criteria, including secondary and > > > > + * hotplugged, marking this feature as present on that respective CPU. > > > > + * The enable function will also print a detection message. > > > > + */ > > > > + > > > > + if (!disable_amu && !zalloc_cpumask_var(&amu_cpus, GFP_KERNEL)) { > > > > > > This looks problematic. Don't we end up in allocating the memory during > > > "each CPU" check and thus leaking memory ? Do we really need to allocate > > > this dynamically ? > > > > > > > Yes, it does make some assumptions. Given that the AMU capability is > > a WEAK_LOCAL_CPU_FEATURE I relied on the match function being called > > only once, when the return value is true. If the return value is false, > > That is not correct. A WEAK_LOCAL_CPU_FEATURE is still SCOPE_LOCAL_CPU, > implies it is run on all the booting CPUs (including the hotplugged > ones). The WEAK is there to imply that its "permitted" or "optional" > for a hotplugged CPU. So, eventually you will re-allocate this variable > every single time a CPU turns up, where you could also loose the current > state. > > > which will result in it being called multiple times, it's either because > > disable_amu == false, or it has become false due to a previous failed > > allocation, in which case a new allocation will not be attempted. First of all, I agree with you that this should be corrected. But for completion (and my education) I retraced my steps in regards to my assumption above. While cpu_enable is called for all CPUs - boot, secondary, hotplugged, the matches function (in this case has_amu) is not always called for all CPUs, and that's where the confusion came from. Looking over the update_cpu_capabilities function, that's called from both setup_boot_cpu_capabilities and check_local_cpu_capabilities (secondary CPUs) for SCOPE_LOCAL_CPU: ----- static void update_cpu_capabilities(u16 scope_mask) { int i; const struct arm64_cpu_capabilities *caps; scope_mask &= ARM64_CPUCAP_SCOPE_MASK; for (i = 0; i < ARM64_NCAPS; i++) { caps = cpu_hwcaps_ptrs[i]; if (!caps || !(caps->type & scope_mask) || cpus_have_cap(caps->capability) || !caps->matches(caps, cpucap_default_scope(caps))) continue; --> The matches function is only called if !cpus_have_cap if (caps->desc) pr_info("detected: %s\n", caps->desc); cpus_set_cap(caps->capability); --> If matches returns true we mark it as present in cpu_hwcaps. if ((scope_mask & SCOPE_BOOT_CPU) && (caps->type & SCOPE_BOOT_CPU)) set_bit(caps->capability, boot_capabilities); } } --- Therefore caps->matches (in this case has_amu) will only be called as long as it returns false. This is where my assumption above came from. Also, this is the reason it was working nicely in my testing, as I did not test hotplug this time. Where the has_amu code breaks is when we end up calling verify_local_cpu_capabilities instead of update_cpu_capabilities after sys_caps_initialised, which will happen for hotplugged CPUs. In that case we call caps->matches for all CPUs. Also, if anyone in the future ends up calling this_cpu_has_cap for the AMU capability. I will fix this. Thank you, Ionela.
On Wednesday 12 Feb 2020 at 16:24:42 (+0000), Vladimir Murzin wrote: > Hi, > > On 2/12/20 4:10 PM, Ionela Voinescu wrote: > > Hi Suzuki, > > > > On Wednesday 12 Feb 2020 at 11:30:44 (+0000), Suzuki Kuruppassery Poulose wrote: > >>> +static int __init set_disable_amu(char *str) > >>> +{ > >>> + int value = 0; > >>> + > >>> + disable_amu = get_option(&str, &value) ? !!value : true; > >> minor nit: You could simply use strtobool(str) here, which accepts: > >> > >> disable_amu= [0/1/on/off/y/n] > >> > > Yes, this was intentional as I wanted "disable_amu" to be a valid option > > as well, not only "disable_amu=<option>". > > > > If you don't mind I'd like to keep it like this. Currently the use of > > AMU is enabled by default, and the most common kernel parameter to > > disable it would be "disable_amu". Allowing "disable_amu=0" is just in > > case we change the default in the kernel to not support AMU and we'd > > like platforms to be able to enable it. > > > > Sorry for jumping into thread, but can we avoid negatives into naming which > accept values? If is always tricky to get expected effect when both are combined. > > If value doesn't really mater than can it be just "noamu"? > > If value does matter can it be (per Suzuki) amu=[0/1/on/off/y/n]? > > Or can you postpone introduction of "just in case" option till that case happens? > No worries, thank you very much for the input. I'll change it to amu=[0/1/on/off/y/n] for clarity. Thank you, Ionela. > Cheers > Vladimir
Hi Ionela, On 02/12/2020 06:20 PM, Ionela Voinescu wrote: > Hi Suzuki, > > On Wednesday 12 Feb 2020 at 16:20:56 (+0000), Suzuki Kuruppassery Poulose wrote: >>>>> +static bool has_amu(const struct arm64_cpu_capabilities *cap, >>>>> + int __unused) >>>>> +{ >>>>> + /* >>>>> + * The AMU extension is a non-conflicting feature: the kernel can >>>>> + * safely run a mix of CPUs with and without support for the >>>>> + * activity monitors extension. Therefore, if not disabled through >>>>> + * the kernel command line early parameter, enable the capability >>>>> + * to allow any late CPU to use the feature. >>>>> + * >>>>> + * With this feature enabled, the cpu_enable function will be called >>>>> + * for all CPUs that match the criteria, including secondary and >>>>> + * hotplugged, marking this feature as present on that respective CPU. >>>>> + * The enable function will also print a detection message. >>>>> + */ >>>>> + >>>>> + if (!disable_amu && !zalloc_cpumask_var(&amu_cpus, GFP_KERNEL)) { >>>> >>>> This looks problematic. Don't we end up in allocating the memory during >>>> "each CPU" check and thus leaking memory ? Do we really need to allocate >>>> this dynamically ? >>>> >>> >>> Yes, it does make some assumptions. Given that the AMU capability is >>> a WEAK_LOCAL_CPU_FEATURE I relied on the match function being called >>> only once, when the return value is true. If the return value is false, >> >> That is not correct. A WEAK_LOCAL_CPU_FEATURE is still SCOPE_LOCAL_CPU, >> implies it is run on all the booting CPUs (including the hotplugged >> ones). The WEAK is there to imply that its "permitted" or "optional" >> for a hotplugged CPU. So, eventually you will re-allocate this variable >> every single time a CPU turns up, where you could also loose the current >> state. >> > >>> which will result in it being called multiple times, it's either because >>> disable_amu == false, or it has become false due to a previous failed >>> allocation, in which case a new allocation will not be attempted. > > First of all, I agree with you that this should be corrected. > > But for completion (and my education) I retraced my steps in regards > to my assumption above. While cpu_enable is called for all CPUs - boot, > secondary, hotplugged, the matches function (in this case has_amu) is > not always called for all CPUs, and that's where the confusion came > from. > > Looking over the update_cpu_capabilities function, that's called from > both setup_boot_cpu_capabilities and check_local_cpu_capabilities > (secondary CPUs) for SCOPE_LOCAL_CPU: > > ----- > static void update_cpu_capabilities(u16 scope_mask) > { > int i; > const struct arm64_cpu_capabilities *caps; > > scope_mask &= ARM64_CPUCAP_SCOPE_MASK; > for (i = 0; i < ARM64_NCAPS; i++) { > caps = cpu_hwcaps_ptrs[i]; > if (!caps || !(caps->type & scope_mask) || > cpus_have_cap(caps->capability) || > !caps->matches(caps, cpucap_default_scope(caps))) > continue; > > --> The matches function is only called if !cpus_have_cap Agreed. Your analysis is correct. This was done as a micro optimization(!) as it is pointless to check if something should be set, when it is already set. > > > if (caps->desc) > pr_info("detected: %s\n", caps->desc); > cpus_set_cap(caps->capability); > > --> If matches returns true we mark it as present in cpu_hwcaps. > > if ((scope_mask & SCOPE_BOOT_CPU) && (caps->type & SCOPE_BOOT_CPU)) > set_bit(caps->capability, boot_capabilities); > } > } > --- > > Therefore caps->matches (in this case has_amu) will only be called as > long as it returns false. This is where my assumption above came from. > Also, this is the reason it was working nicely in my testing, as I did > not test hotplug this time. > > Where the has_amu code breaks is when we end up calling > verify_local_cpu_capabilities instead of update_cpu_capabilities after > sys_caps_initialised, which will happen for hotplugged CPUs. > In that case we call caps->matches for all CPUs. Also, if anyone in the > future ends up calling this_cpu_has_cap for the AMU capability. True. > > I will fix this. Cheers Suzuki
Hi guys, On Wednesday 12 Feb 2020 at 16:20:56 (+0000), Suzuki Kuruppassery Poulose wrote: > > For better handling I could have a cpumask_available check before the > > allocation just in case the capability type changes in the future, or to > > at least not rely on assumptions based on the type of the capability. > > > > The reason this is dynamic is that I wanted to avoid the memory being > > allocated when disable_amu is true - as Valentin mentioned in a comment > > in the meantime "the static allocation is done against NR_CPUS whereas > > the dynamic one is done against nr_cpu_ids". > > > > Would this be alright? > > > > diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c > > index 182e05ca3410..4cee6b147ddd 100644 > > --- a/arch/arm64/kernel/cpufeature.c > > +++ b/arch/arm64/kernel/cpufeature.c > > @@ -1222,7 +1222,11 @@ static bool has_amu(const struct arm64_cpu_capabilities *cap, > > * The enable function will also print a detection message. > > */ > > - if (!disable_amu && !zalloc_cpumask_var(&amu_cpus, GFP_KERNEL)) { > > + if (disable_amu) > > + return false; > > + > > + if (!cpumask_available(amu_cpus) && > > + !zalloc_cpumask_var(&amu_cpus, GFP_KERNEL)) { > > pr_err("Activity Monitors Unit (AMU): fail to allocate memory"); > > disable_amu = true; > > } Going down the rabbit hole in regards to this section, it seems it's actually not fine. When CONFIG_CPUMASK_OFFSTACK==y it fails to allocate memory because zalloc_cpumask_var cannot be used before initializing the slub allocator (mm_init) to allocate a cpumask. The alternative alloc_bootmem_cpumask_var is an __init function that can be used only during the initialization phase, which is not the case for has_amu that can be called later (for hotplugged CPUs). Therefore, dynamic allocation is not an option here. Thanks, Ionela.
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index ade4e6ec23e0..6f0c6d22fa4c 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -843,6 +843,16 @@ can be useful when debugging issues that require an SLB miss to occur. + disable_amu [ARM64] + Disables detection, enablement and access to counter + registers of the Activity Monitors Unit (AMU). By + default these are enabled. "disable_amu=0/1" is also + allowed. + disable_amu / disable_amu=1 - ensures access to AMU's + counter registers is not attempted. + disable_amu=0 - enables or maintain detection and + access to AMU's counter registers. + disable= [IPV6] See Documentation/networking/ipv6.txt. diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 3ab05857ca8f..b3408d7629fd 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1484,6 +1484,37 @@ config ARM64_PTR_AUTH endmenu +menu "ARMv8.4 architectural features" + +config ARM64_AMU_EXTN + bool "Enable support for the Activity Monitors Unit CPU extension" + default y + help + The activity monitors extension is an optional extension introduced + by the ARMv8.4 CPU architecture. This enables support for version 1 + of the activity monitors architecture, AMUv1. + + To enable the use of this extension on CPUs that implement it, say Y. + + Note that for architectural reasons, firmware _must_ implement AMU + support when running on CPUs that present the activity monitors + extension. The required support is present in: + * Version 1.5 and later of the ARM Trusted Firmware + + For kernels that have this configuration enabled but boot with broken + firmware, you may need to say N here until the firmware is fixed. + Otherwise you may experience firmware panics or lockups when + accessing the counter registers. Even if you are not observing these + symptoms, the values returned by the register reads might not + correctly reflect reality. Most commonly, the value read will be 0, + indicating that the counter is not enabled. + + Alternatively, a kernel parameter is provided to disable detection, + enablement and access to the counter registers of the Activity + Monitors Unit at runtime. + +endmenu + config ARM64_SVE bool "ARM Scalable Vector Extension support" default y diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h index b92683871119..7dde890bde50 100644 --- a/arch/arm64/include/asm/cpucaps.h +++ b/arch/arm64/include/asm/cpucaps.h @@ -56,7 +56,8 @@ #define ARM64_WORKAROUND_CAVIUM_TX2_219_PRFM 46 #define ARM64_WORKAROUND_1542419 47 #define ARM64_WORKAROUND_1319367 48 +#define ARM64_HAS_AMU_EXTN 49 -#define ARM64_NCAPS 49 +#define ARM64_NCAPS 50 #endif /* __ASM_CPUCAPS_H */ diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 4261d55e8506..5ae6e00ccabb 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -673,6 +673,11 @@ static inline bool cpu_has_hw_af(void) ID_AA64MMFR1_HADBS_SHIFT); } +#ifdef CONFIG_ARM64_AMU_EXTN +/* Check whether the current CPU supports the Activity Monitors Unit (AMU) */ +extern bool cpu_has_amu_feat(int cpu); +#endif + #endif /* __ASSEMBLY__ */ #endif diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 6e919fafb43d..eba13b8994ce 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -382,6 +382,42 @@ #define SYS_TPIDR_EL0 sys_reg(3, 3, 13, 0, 2) #define SYS_TPIDRRO_EL0 sys_reg(3, 3, 13, 0, 3) +/* Definitions for system register interface to AMU for ARMv8.4 onwards */ +#define SYS_AM_EL0(crm, op2) sys_reg(3, 3, 13, (crm), (op2)) +#define SYS_AMCR_EL0 SYS_AM_EL0(2, 0) +#define SYS_AMCFGR_EL0 SYS_AM_EL0(2, 1) +#define SYS_AMCGCR_EL0 SYS_AM_EL0(2, 2) +#define SYS_AMUSERENR_EL0 SYS_AM_EL0(2, 3) +#define SYS_AMCNTENCLR0_EL0 SYS_AM_EL0(2, 4) +#define SYS_AMCNTENSET0_EL0 SYS_AM_EL0(2, 5) +#define SYS_AMCNTENCLR1_EL0 SYS_AM_EL0(3, 0) +#define SYS_AMCNTENSET1_EL0 SYS_AM_EL0(3, 1) + +/* + * Group 0 of activity monitors (architected): + * op0 op1 CRn CRm op2 + * Counter: 11 011 1101 010:n<3> n<2:0> + * Type: 11 011 1101 011:n<3> n<2:0> + * n: 0-15 + * + * Group 1 of activity monitors (auxiliary): + * op0 op1 CRn CRm op2 + * Counter: 11 011 1101 110:n<3> n<2:0> + * Type: 11 011 1101 111:n<3> n<2:0> + * n: 0-15 + */ + +#define SYS_AMEVCNTR0_EL0(n) SYS_AM_EL0(4 + ((n) >> 3), (n) & 7) +#define SYS_AMEVTYPE0_EL0(n) SYS_AM_EL0(6 + ((n) >> 3), (n) & 7) +#define SYS_AMEVCNTR1_EL0(n) SYS_AM_EL0(12 + ((n) >> 3), (n) & 7) +#define SYS_AMEVTYPE1_EL0(n) SYS_AM_EL0(14 + ((n) >> 3), (n) & 7) + +/* AMU v1: Fixed (architecturally defined) activity monitors */ +#define SYS_AMEVCNTR0_CORE_EL0 SYS_AMEVCNTR0_EL0(0) +#define SYS_AMEVCNTR0_CONST_EL0 SYS_AMEVCNTR0_EL0(1) +#define SYS_AMEVCNTR0_INST_RET_EL0 SYS_AMEVCNTR0_EL0(2) +#define SYS_AMEVCNTR0_MEM_STALL SYS_AMEVCNTR0_EL0(3) + #define SYS_CNTFRQ_EL0 sys_reg(3, 3, 14, 0, 0) #define SYS_CNTP_TVAL_EL0 sys_reg(3, 3, 14, 2, 0) @@ -577,6 +613,7 @@ #define ID_AA64PFR0_CSV3_SHIFT 60 #define ID_AA64PFR0_CSV2_SHIFT 56 #define ID_AA64PFR0_DIT_SHIFT 48 +#define ID_AA64PFR0_AMU_SHIFT 44 #define ID_AA64PFR0_SVE_SHIFT 32 #define ID_AA64PFR0_RAS_SHIFT 28 #define ID_AA64PFR0_GIC_SHIFT 24 @@ -587,6 +624,7 @@ #define ID_AA64PFR0_EL1_SHIFT 4 #define ID_AA64PFR0_EL0_SHIFT 0 +#define ID_AA64PFR0_AMU 0x1 #define ID_AA64PFR0_SVE 0x1 #define ID_AA64PFR0_RAS_V1 0x1 #define ID_AA64PFR0_FP_NI 0xf diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 04cf64e9f0c9..029a473ad273 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -156,6 +156,7 @@ static const struct arm64_ftr_bits ftr_id_aa64pfr0[] = { ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_CSV3_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_CSV2_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_DIT_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_AMU_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE), FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_SVE_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_RAS_SHIFT, 4, 0), @@ -1150,6 +1151,84 @@ static bool has_hw_dbm(const struct arm64_cpu_capabilities *cap, #endif +#ifdef CONFIG_ARM64_AMU_EXTN + +/* + * The "amu_cpus" cpumask only signals that the CPU implementation for the + * flagged CPUs supports the Activity Monitors Unit (AMU) but does not provide + * information regarding all the events that it supports. When a CPU bit is + * set in the cpumask, the user of this feature can only rely on the presence + * of the 4 fixed counters for that CPU. But this does not guarantee that the + * counters are enabled or access to these counters is enabled by code + * executed at higher exception levels (firmware). + */ +static cpumask_var_t amu_cpus; + +bool cpu_has_amu_feat(int cpu) +{ + if (cpumask_available(amu_cpus)) + return cpumask_test_cpu(cpu, amu_cpus); + + return false; +} + +static void cpu_amu_enable(struct arm64_cpu_capabilities const *cap) +{ + if (has_cpuid_feature(cap, SCOPE_LOCAL_CPU)) { + pr_info("detected CPU%d: Activity Monitors Unit (AMU)\n", + smp_processor_id()); + cpumask_set_cpu(smp_processor_id(), amu_cpus); + } +} + +/* + * For known broken firmware, a kernel parameter ("disable_amu") is provided + * to ensure access to AMU counter registers is not attempted. By default, + * the feature is enabled, but disable_amu can both be used to disable or + * enable the capability at runtime in case the default changes in the future. + * + * To be noted that for security considerations, this does not bypass the + * setting of AMUSERENR_EL0 to trap accesses from EL0 (userspace) to EL1 + * (kernel). Therefore, firmware should still ensure accesses to AMU registers + * are not trapped in EL2/EL3. + */ +static bool disable_amu; + +static int __init set_disable_amu(char *str) +{ + int value = 0; + + disable_amu = get_option(&str, &value) ? !!value : true; + + return 0; +} +early_param("disable_amu", set_disable_amu); + +static bool has_amu(const struct arm64_cpu_capabilities *cap, + int __unused) +{ + /* + * The AMU extension is a non-conflicting feature: the kernel can + * safely run a mix of CPUs with and without support for the + * activity monitors extension. Therefore, if not disabled through + * the kernel command line early parameter, enable the capability + * to allow any late CPU to use the feature. + * + * With this feature enabled, the cpu_enable function will be called + * for all CPUs that match the criteria, including secondary and + * hotplugged, marking this feature as present on that respective CPU. + * The enable function will also print a detection message. + */ + + if (!disable_amu && !zalloc_cpumask_var(&amu_cpus, GFP_KERNEL)) { + pr_err("Activity Monitors Unit (AMU): fail to allocate memory"); + disable_amu = true; + } + + return !disable_amu; +} +#endif + #ifdef CONFIG_ARM64_VHE static bool runs_at_el2(const struct arm64_cpu_capabilities *entry, int __unused) { @@ -1419,6 +1498,24 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .cpu_enable = cpu_clear_disr, }, #endif /* CONFIG_ARM64_RAS_EXTN */ +#ifdef CONFIG_ARM64_AMU_EXTN + { + /* + * The feature is enabled by default if CONFIG_ARM64_AMU_EXTN=y. + * Therefore, don't provide .desc as we don't want the detection + * message to be shown until at least one CPU is detected to + * support the feature. + */ + .capability = ARM64_HAS_AMU_EXTN, + .type = ARM64_CPUCAP_WEAK_LOCAL_CPU_FEATURE, + .matches = has_amu, + .sys_reg = SYS_ID_AA64PFR0_EL1, + .sign = FTR_UNSIGNED, + .field_pos = ID_AA64PFR0_AMU_SHIFT, + .min_field_value = ID_AA64PFR0_AMU, + .cpu_enable = cpu_amu_enable, + }, +#endif /* CONFIG_ARM64_AMU_EXTN */ { .desc = "Data cache clean to the PoU not required for I/D coherence", .capability = ARM64_HAS_CACHE_IDC,
The activity monitors extension is an optional extension introduced by the ARMv8.4 CPU architecture. This implements basic support for version 1 of the activity monitors architecture, AMUv1. This support includes: - Extension detection on each CPU (boot, secondary, hotplugged) - Register interface for AMU aarch64 registers - disable_amu kernel parameter to disable detection/counter access at runtime Signed-off-by: Ionela Voinescu <ionela.voinescu@arm.com> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: Will Deacon <will@kernel.org> Cc: Suzuki K Poulose <suzuki.poulose@arm.com> Cc: Marc Zyngier <maz@kernel.org> Cc: Mark Rutland <mark.rutland@arm.com> --- .../admin-guide/kernel-parameters.txt | 10 ++ arch/arm64/Kconfig | 31 ++++++ arch/arm64/include/asm/cpucaps.h | 3 +- arch/arm64/include/asm/cpufeature.h | 5 + arch/arm64/include/asm/sysreg.h | 38 ++++++++ arch/arm64/kernel/cpufeature.c | 97 +++++++++++++++++++ 6 files changed, 183 insertions(+), 1 deletion(-)