diff mbox series

[v3,1/7] arm64: add support for the AMU extension v1

Message ID 20200211184542.29585-2-ionela.voinescu@arm.com (mailing list archive)
State Not Applicable, archived
Headers show
Series arm64: ARMv8.4 Activity Monitors support | expand

Commit Message

Ionela Voinescu Feb. 11, 2020, 6:45 p.m. UTC
The activity monitors extension is an optional extension introduced
by the ARMv8.4 CPU architecture. This implements basic support for
version 1 of the activity monitors architecture, AMUv1.

This support includes:
- Extension detection on each CPU (boot, secondary, hotplugged)
- Register interface for AMU aarch64 registers
- disable_amu kernel parameter to disable detection/counter access
  at runtime

Signed-off-by: Ionela Voinescu <ionela.voinescu@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: Suzuki K Poulose <suzuki.poulose@arm.com>
Cc: Marc Zyngier <maz@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
---
 .../admin-guide/kernel-parameters.txt         | 10 ++
 arch/arm64/Kconfig                            | 31 ++++++
 arch/arm64/include/asm/cpucaps.h              |  3 +-
 arch/arm64/include/asm/cpufeature.h           |  5 +
 arch/arm64/include/asm/sysreg.h               | 38 ++++++++
 arch/arm64/kernel/cpufeature.c                | 97 +++++++++++++++++++
 6 files changed, 183 insertions(+), 1 deletion(-)

Comments

Suzuki K Poulose Feb. 12, 2020, 11:30 a.m. UTC | #1
Hi Ionela,

On 11/02/2020 18:45, Ionela Voinescu wrote:
> The activity monitors extension is an optional extension introduced
> by the ARMv8.4 CPU architecture. This implements basic support for
> version 1 of the activity monitors architecture, AMUv1.
> 
> This support includes:
> - Extension detection on each CPU (boot, secondary, hotplugged)
> - Register interface for AMU aarch64 registers
> - disable_amu kernel parameter to disable detection/counter access
>    at runtime
> 
> Signed-off-by: Ionela Voinescu <ionela.voinescu@arm.com>
> Cc: Catalin Marinas <catalin.marinas@arm.com>
> Cc: Will Deacon <will@kernel.org>
> Cc: Suzuki K Poulose <suzuki.poulose@arm.com>
> Cc: Marc Zyngier <maz@kernel.org>
> Cc: Mark Rutland <mark.rutland@arm.com>
> ---
>   .../admin-guide/kernel-parameters.txt         | 10 ++
>   arch/arm64/Kconfig                            | 31 ++++++
>   arch/arm64/include/asm/cpucaps.h              |  3 +-
>   arch/arm64/include/asm/cpufeature.h           |  5 +
>   arch/arm64/include/asm/sysreg.h               | 38 ++++++++
>   arch/arm64/kernel/cpufeature.c                | 97 +++++++++++++++++++
>   6 files changed, 183 insertions(+), 1 deletion(-)
> 
...

> diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
> index 04cf64e9f0c9..029a473ad273 100644
> --- a/arch/arm64/kernel/cpufeature.c
> +++ b/arch/arm64/kernel/cpufeature.c
> @@ -156,6 +156,7 @@ static const struct arm64_ftr_bits ftr_id_aa64pfr0[] = {
>   	ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_CSV3_SHIFT, 4, 0),
>   	ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_CSV2_SHIFT, 4, 0),
>   	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_DIT_SHIFT, 4, 0),
> +	ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_AMU_SHIFT, 4, 0),
>   	ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE),
>   				   FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_SVE_SHIFT, 4, 0),
>   	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_RAS_SHIFT, 4, 0),
> @@ -1150,6 +1151,84 @@ static bool has_hw_dbm(const struct arm64_cpu_capabilities *cap,
>   
>   #endif
>   
> +#ifdef CONFIG_ARM64_AMU_EXTN
> +
> +/*
> + * The "amu_cpus" cpumask only signals that the CPU implementation for the
> + * flagged CPUs supports the Activity Monitors Unit (AMU) but does not provide
> + * information regarding all the events that it supports. When a CPU bit is
> + * set in the cpumask, the user of this feature can only rely on the presence
> + * of the 4 fixed counters for that CPU. But this does not guarantee that the
> + * counters are enabled or access to these counters is enabled by code
> + * executed at higher exception levels (firmware).
> + */
> +static cpumask_var_t amu_cpus;
> +
> +bool cpu_has_amu_feat(int cpu)
> +{
> +	if (cpumask_available(amu_cpus))
> +		return cpumask_test_cpu(cpu, amu_cpus);
> +
> +	return false;
> +}
> +
> +static void cpu_amu_enable(struct arm64_cpu_capabilities const *cap)
> +{
> +	if (has_cpuid_feature(cap, SCOPE_LOCAL_CPU)) {
> +		pr_info("detected CPU%d: Activity Monitors Unit (AMU)\n",
> +			smp_processor_id());
> +		cpumask_set_cpu(smp_processor_id(), amu_cpus);
> +	}
> +}
> +
> +/*
> + * For known broken firmware, a kernel parameter ("disable_amu") is provided
> + * to ensure access to AMU counter registers is not attempted. By default,
> + * the feature is enabled, but disable_amu can both be used to disable or
> + * enable the capability at runtime in case the default changes in the future.
> + *
> + * To be noted that for security considerations, this does not bypass the
> + * setting of AMUSERENR_EL0 to trap accesses from EL0 (userspace) to EL1
> + * (kernel). Therefore, firmware should still ensure accesses to AMU registers
> + * are not trapped in EL2/EL3.
> + */
> +static bool disable_amu;
> +
> +static int __init set_disable_amu(char *str)
> +{
> +	int value = 0;
> +
> +	disable_amu = get_option(&str, &value) ? !!value : true;

minor nit: You could simply use strtobool(str) here, which accepts:

disable_amu= [0/1/on/off/y/n]


> +
> +	return 0;
> +}
> +early_param("disable_amu", set_disable_amu);
> +
> +static bool has_amu(const struct arm64_cpu_capabilities *cap,
> +		       int __unused)
> +{
> +	/*
> +	 * The AMU extension is a non-conflicting feature: the kernel can
> +	 * safely run a mix of CPUs with and without support for the
> +	 * activity monitors extension. Therefore, if not disabled through
> +	 * the kernel command line early parameter, enable the capability
> +	 * to allow any late CPU to use the feature.
> +	 *
> +	 * With this feature enabled, the cpu_enable function will be called
> +	 * for all CPUs that match the criteria, including secondary and
> +	 * hotplugged, marking this feature as present on that respective CPU.
> +	 * The enable function will also print a detection message.
> +	 */
> +
> +	if (!disable_amu && !zalloc_cpumask_var(&amu_cpus, GFP_KERNEL)) {

This looks problematic. Don't we end up in allocating the memory during
"each CPU" check and thus leaking memory ? Do we really need to allocate
this dynamically ?

> +		pr_err("Activity Monitors Unit (AMU): fail to allocate memory");
> +		disable_amu = true;
> +	}
> +
> +	return !disable_amu;
> +}
> +#endif
> +
>   #ifdef CONFIG_ARM64_VHE
>   static bool runs_at_el2(const struct arm64_cpu_capabilities *entry, int __unused)
>   {
> @@ -1419,6 +1498,24 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
>   		.cpu_enable = cpu_clear_disr,
>   	},
>   #endif /* CONFIG_ARM64_RAS_EXTN */
> +#ifdef CONFIG_ARM64_AMU_EXTN
> +	{
> +		/*
> +		 * The feature is enabled by default if CONFIG_ARM64_AMU_EXTN=y.
> +		 * Therefore, don't provide .desc as we don't want the detection
> +		 * message to be shown until at least one CPU is detected to
> +		 * support the feature.
> +		 */
> +		.capability = ARM64_HAS_AMU_EXTN,
> +		.type = ARM64_CPUCAP_WEAK_LOCAL_CPU_FEATURE,
> +		.matches = has_amu,
> +		.sys_reg = SYS_ID_AA64PFR0_EL1,
> +		.sign = FTR_UNSIGNED,
> +		.field_pos = ID_AA64PFR0_AMU_SHIFT,
> +		.min_field_value = ID_AA64PFR0_AMU,
> +		.cpu_enable = cpu_amu_enable,
> +	},
> +#endif /* CONFIG_ARM64_AMU_EXTN */
>   	{
>   		.desc = "Data cache clean to the PoU not required for I/D coherence",
>   		.capability = ARM64_HAS_CACHE_IDC,
> 


Thanks
Suzuki
Valentin Schneider Feb. 12, 2020, 2:54 p.m. UTC | #2
On 12/02/2020 11:30, Suzuki Kuruppassery Poulose wrote:
>> +static bool has_amu(const struct arm64_cpu_capabilities *cap,
>> +               int __unused)
>> +{
>> +    /*
>> +     * The AMU extension is a non-conflicting feature: the kernel can
>> +     * safely run a mix of CPUs with and without support for the
>> +     * activity monitors extension. Therefore, if not disabled through
>> +     * the kernel command line early parameter, enable the capability
>> +     * to allow any late CPU to use the feature.
>> +     *
>> +     * With this feature enabled, the cpu_enable function will be called
>> +     * for all CPUs that match the criteria, including secondary and
>> +     * hotplugged, marking this feature as present on that respective CPU.
>> +     * The enable function will also print a detection message.
>> +     */
>> +
>> +    if (!disable_amu && !zalloc_cpumask_var(&amu_cpus, GFP_KERNEL)) {
> 
> This looks problematic. Don't we end up in allocating the memory during
> "each CPU" check and thus leaking memory ? Do we really need to allocate
> this dynamically ?
> 

For the static vs dynamic thing, I think it's not *too* important here since
we don't risk pwning the stack because of the cpumask. That said, if we are
somewhat pedantic about memory usage, the static allocation is done
against NR_CPUS whereas the dynamic one is done against nr_cpu_ids.
Pretty inconsequential for a single cpumask, but I guess it all adds up
eventually...
Ionela Voinescu Feb. 12, 2020, 4:10 p.m. UTC | #3
Hi Suzuki,

On Wednesday 12 Feb 2020 at 11:30:44 (+0000), Suzuki Kuruppassery Poulose wrote:
> > +static int __init set_disable_amu(char *str)
> > +{
> > +	int value = 0;
> > +
> > +	disable_amu = get_option(&str, &value) ? !!value : true;
> 
> minor nit: You could simply use strtobool(str) here, which accepts:
> 
> disable_amu= [0/1/on/off/y/n]
>

Yes, this was intentional as I wanted "disable_amu" to be a valid option
as well, not only "disable_amu=<option>".

If you don't mind I'd like to keep it like this. Currently the use of
AMU is enabled by default, and the most common kernel parameter to
disable it would be "disable_amu". Allowing "disable_amu=0" is just in
case we change the default in the kernel to not support AMU and we'd
like platforms to be able to enable it. 

> 
> > +
> > +	return 0;
> > +}
> > +early_param("disable_amu", set_disable_amu);
> > +
> > +static bool has_amu(const struct arm64_cpu_capabilities *cap,
> > +		       int __unused)
> > +{
> > +	/*
> > +	 * The AMU extension is a non-conflicting feature: the kernel can
> > +	 * safely run a mix of CPUs with and without support for the
> > +	 * activity monitors extension. Therefore, if not disabled through
> > +	 * the kernel command line early parameter, enable the capability
> > +	 * to allow any late CPU to use the feature.
> > +	 *
> > +	 * With this feature enabled, the cpu_enable function will be called
> > +	 * for all CPUs that match the criteria, including secondary and
> > +	 * hotplugged, marking this feature as present on that respective CPU.
> > +	 * The enable function will also print a detection message.
> > +	 */
> > +
> > +	if (!disable_amu && !zalloc_cpumask_var(&amu_cpus, GFP_KERNEL)) {
> 
> This looks problematic. Don't we end up in allocating the memory during
> "each CPU" check and thus leaking memory ? Do we really need to allocate
> this dynamically ?
> 

Yes, it does make some assumptions. Given that the AMU capability is
a WEAK_LOCAL_CPU_FEATURE I relied on the match function being called
only once, when the return value is true. If the return value is false,
which will result in it being called multiple times, it's either because
disable_amu == false, or it has become false due to a previous failed
allocation, in which case a new allocation will not be attempted.

For better handling I could have a cpumask_available check before the
allocation just in case the capability type changes in the future, or to
at least not rely on assumptions based on the type of the capability.

The reason this is dynamic is that I wanted to avoid the memory being
allocated when disable_amu is true - as Valentin mentioned in a comment
in the meantime "the static allocation is done against NR_CPUS whereas
the dynamic one is done against nr_cpu_ids".

Would this be alright?

diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 182e05ca3410..4cee6b147ddd 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -1222,7 +1222,11 @@ static bool has_amu(const struct arm64_cpu_capabilities *cap,
         * The enable function will also print a detection message.
         */
 
-       if (!disable_amu && !zalloc_cpumask_var(&amu_cpus, GFP_KERNEL)) {
+       if (disable_amu)
+               return false;
+
+       if (!cpumask_available(amu_cpus) &&
+           !zalloc_cpumask_var(&amu_cpus, GFP_KERNEL)) {
                pr_err("Activity Monitors Unit (AMU): fail to allocate memory");
                disable_amu = true;
        }

Otherwise I can go for static allocation.

Thank you,
Ionela.
Suzuki K Poulose Feb. 12, 2020, 4:20 p.m. UTC | #4
Hi Ionela,

On 12/02/2020 16:10, Ionela Voinescu wrote:
> Hi Suzuki,
> 
> On Wednesday 12 Feb 2020 at 11:30:44 (+0000), Suzuki Kuruppassery Poulose wrote:
>>> +static int __init set_disable_amu(char *str)
>>> +{
>>> +	int value = 0;
>>> +
>>> +	disable_amu = get_option(&str, &value) ? !!value : true;
>>
>> minor nit: You could simply use strtobool(str) here, which accepts:
>>
>> disable_amu= [0/1/on/off/y/n]
>>
> 
> Yes, this was intentional as I wanted "disable_amu" to be a valid option
> as well, not only "disable_amu=<option>".
> 
> If you don't mind I'd like to keep it like this. Currently the use of

Sure, thats fine.

>>> +
>>> +	return 0;
>>> +}
>>> +early_param("disable_amu", set_disable_amu);
>>> +
>>> +static bool has_amu(const struct arm64_cpu_capabilities *cap,
>>> +		       int __unused)
>>> +{
>>> +	/*
>>> +	 * The AMU extension is a non-conflicting feature: the kernel can
>>> +	 * safely run a mix of CPUs with and without support for the
>>> +	 * activity monitors extension. Therefore, if not disabled through
>>> +	 * the kernel command line early parameter, enable the capability
>>> +	 * to allow any late CPU to use the feature.
>>> +	 *
>>> +	 * With this feature enabled, the cpu_enable function will be called
>>> +	 * for all CPUs that match the criteria, including secondary and
>>> +	 * hotplugged, marking this feature as present on that respective CPU.
>>> +	 * The enable function will also print a detection message.
>>> +	 */
>>> +
>>> +	if (!disable_amu && !zalloc_cpumask_var(&amu_cpus, GFP_KERNEL)) {
>>
>> This looks problematic. Don't we end up in allocating the memory during
>> "each CPU" check and thus leaking memory ? Do we really need to allocate
>> this dynamically ?
>>
> 
> Yes, it does make some assumptions. Given that the AMU capability is
> a WEAK_LOCAL_CPU_FEATURE I relied on the match function being called
> only once, when the return value is true. If the return value is false,

That is not correct. A WEAK_LOCAL_CPU_FEATURE is still SCOPE_LOCAL_CPU,
implies it is run on all the booting CPUs (including the hotplugged
ones). The WEAK is there to imply that its "permitted" or "optional"
for a hotplugged CPU. So, eventually you will re-allocate this variable
every single time a CPU turns up, where you could also loose the current
state.

> which will result in it being called multiple times, it's either because
> disable_amu == false, or it has become false due to a previous failed
> allocation, in which case a new allocation will not be attempted.
> 
> For better handling I could have a cpumask_available check before the
> allocation just in case the capability type changes in the future, or to
> at least not rely on assumptions based on the type of the capability.
> 
> The reason this is dynamic is that I wanted to avoid the memory being
> allocated when disable_amu is true - as Valentin mentioned in a comment
> in the meantime "the static allocation is done against NR_CPUS whereas
> the dynamic one is done against nr_cpu_ids".
> 
> Would this be alright?
> 
> diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
> index 182e05ca3410..4cee6b147ddd 100644
> --- a/arch/arm64/kernel/cpufeature.c
> +++ b/arch/arm64/kernel/cpufeature.c
> @@ -1222,7 +1222,11 @@ static bool has_amu(const struct arm64_cpu_capabilities *cap,
>           * The enable function will also print a detection message.
>           */
>   
> -       if (!disable_amu && !zalloc_cpumask_var(&amu_cpus, GFP_KERNEL)) {
> +       if (disable_amu)
> +               return false;
> +
> +       if (!cpumask_available(amu_cpus) &&
> +           !zalloc_cpumask_var(&amu_cpus, GFP_KERNEL)) {
>                  pr_err("Activity Monitors Unit (AMU): fail to allocate memory");
>                  disable_amu = true;
>          }

This looks fine.

Cheers
Suzuki
Vladimir Murzin Feb. 12, 2020, 4:24 p.m. UTC | #5
Hi,

On 2/12/20 4:10 PM, Ionela Voinescu wrote:
> Hi Suzuki,
> 
> On Wednesday 12 Feb 2020 at 11:30:44 (+0000), Suzuki Kuruppassery Poulose wrote:
>>> +static int __init set_disable_amu(char *str)
>>> +{
>>> +	int value = 0;
>>> +
>>> +	disable_amu = get_option(&str, &value) ? !!value : true;
>> minor nit: You could simply use strtobool(str) here, which accepts:
>>
>> disable_amu= [0/1/on/off/y/n]
>>
> Yes, this was intentional as I wanted "disable_amu" to be a valid option
> as well, not only "disable_amu=<option>".
> 
> If you don't mind I'd like to keep it like this. Currently the use of
> AMU is enabled by default, and the most common kernel parameter to
> disable it would be "disable_amu". Allowing "disable_amu=0" is just in
> case we change the default in the kernel to not support AMU and we'd
> like platforms to be able to enable it. 
> 

Sorry for jumping into thread, but can we avoid negatives into naming which
accept values? If is always tricky to get expected effect when both are combined.

If value doesn't really mater than can it be just "noamu"?

If value does matter can it be (per Suzuki) amu=[0/1/on/off/y/n]?

Or can you postpone introduction of "just in case" option till that case happens?

Cheers
Vladimir
Ionela Voinescu Feb. 12, 2020, 6:20 p.m. UTC | #6
Hi Suzuki,

On Wednesday 12 Feb 2020 at 16:20:56 (+0000), Suzuki Kuruppassery Poulose wrote:
> > > > +static bool has_amu(const struct arm64_cpu_capabilities *cap,
> > > > +		       int __unused)
> > > > +{
> > > > +	/*
> > > > +	 * The AMU extension is a non-conflicting feature: the kernel can
> > > > +	 * safely run a mix of CPUs with and without support for the
> > > > +	 * activity monitors extension. Therefore, if not disabled through
> > > > +	 * the kernel command line early parameter, enable the capability
> > > > +	 * to allow any late CPU to use the feature.
> > > > +	 *
> > > > +	 * With this feature enabled, the cpu_enable function will be called
> > > > +	 * for all CPUs that match the criteria, including secondary and
> > > > +	 * hotplugged, marking this feature as present on that respective CPU.
> > > > +	 * The enable function will also print a detection message.
> > > > +	 */
> > > > +
> > > > +	if (!disable_amu && !zalloc_cpumask_var(&amu_cpus, GFP_KERNEL)) {
> > > 
> > > This looks problematic. Don't we end up in allocating the memory during
> > > "each CPU" check and thus leaking memory ? Do we really need to allocate
> > > this dynamically ?
> > > 
> > 
> > Yes, it does make some assumptions. Given that the AMU capability is
> > a WEAK_LOCAL_CPU_FEATURE I relied on the match function being called
> > only once, when the return value is true. If the return value is false,
> 
> That is not correct. A WEAK_LOCAL_CPU_FEATURE is still SCOPE_LOCAL_CPU,
> implies it is run on all the booting CPUs (including the hotplugged
> ones). The WEAK is there to imply that its "permitted" or "optional"
> for a hotplugged CPU. So, eventually you will re-allocate this variable
> every single time a CPU turns up, where you could also loose the current
> state.
> 

> > which will result in it being called multiple times, it's either because
> > disable_amu == false, or it has become false due to a previous failed
> > allocation, in which case a new allocation will not be attempted.

First of all, I agree with you that this should be corrected.

But for completion (and my education) I retraced my steps in regards
to my assumption above. While cpu_enable is called for all CPUs - boot,
secondary, hotplugged, the matches function (in this case has_amu) is
not always called for all CPUs, and that's where the confusion came
from.

Looking over the update_cpu_capabilities function, that's called from
both setup_boot_cpu_capabilities and check_local_cpu_capabilities
(secondary CPUs) for SCOPE_LOCAL_CPU:

-----
static void update_cpu_capabilities(u16 scope_mask)
{
        int i;
        const struct arm64_cpu_capabilities *caps;

        scope_mask &= ARM64_CPUCAP_SCOPE_MASK;
        for (i = 0; i < ARM64_NCAPS; i++) {
                caps = cpu_hwcaps_ptrs[i];
                if (!caps || !(caps->type & scope_mask) ||
                    cpus_have_cap(caps->capability) ||
                    !caps->matches(caps, cpucap_default_scope(caps)))
                        continue;

--> The matches function is only called if !cpus_have_cap


                if (caps->desc)
                        pr_info("detected: %s\n", caps->desc);
                cpus_set_cap(caps->capability);

--> If matches returns true we mark it as present in cpu_hwcaps.

                if ((scope_mask & SCOPE_BOOT_CPU) && (caps->type & SCOPE_BOOT_CPU))
                        set_bit(caps->capability, boot_capabilities);
        }   
}
---

Therefore caps->matches (in this case has_amu) will only be called as
long as it returns false. This is where my assumption above came from.
Also, this is the reason it was working nicely in my testing, as I did
not test hotplug this time.

Where the has_amu code breaks is when we end up calling
verify_local_cpu_capabilities instead of update_cpu_capabilities after
sys_caps_initialised, which will happen for hotplugged CPUs.
In that case we call caps->matches for all CPUs. Also, if anyone in the
future ends up calling this_cpu_has_cap for the AMU capability.

I will fix this.

Thank you,
Ionela.
Ionela Voinescu Feb. 12, 2020, 6:27 p.m. UTC | #7
On Wednesday 12 Feb 2020 at 16:24:42 (+0000), Vladimir Murzin wrote:
> Hi,
> 
> On 2/12/20 4:10 PM, Ionela Voinescu wrote:
> > Hi Suzuki,
> > 
> > On Wednesday 12 Feb 2020 at 11:30:44 (+0000), Suzuki Kuruppassery Poulose wrote:
> >>> +static int __init set_disable_amu(char *str)
> >>> +{
> >>> +	int value = 0;
> >>> +
> >>> +	disable_amu = get_option(&str, &value) ? !!value : true;
> >> minor nit: You could simply use strtobool(str) here, which accepts:
> >>
> >> disable_amu= [0/1/on/off/y/n]
> >>
> > Yes, this was intentional as I wanted "disable_amu" to be a valid option
> > as well, not only "disable_amu=<option>".
> > 
> > If you don't mind I'd like to keep it like this. Currently the use of
> > AMU is enabled by default, and the most common kernel parameter to
> > disable it would be "disable_amu". Allowing "disable_amu=0" is just in
> > case we change the default in the kernel to not support AMU and we'd
> > like platforms to be able to enable it. 
> > 
> 
> Sorry for jumping into thread, but can we avoid negatives into naming which
> accept values? If is always tricky to get expected effect when both are combined.
> 
> If value doesn't really mater than can it be just "noamu"?
> 
> If value does matter can it be (per Suzuki) amu=[0/1/on/off/y/n]?
> 
> Or can you postpone introduction of "just in case" option till that case happens?
>

No worries, thank you very much for the input. I'll change it to
amu=[0/1/on/off/y/n] for clarity.

Thank you,
Ionela.

> Cheers
> Vladimir
Suzuki K Poulose Feb. 12, 2020, 7:24 p.m. UTC | #8
Hi Ionela,

On 02/12/2020 06:20 PM, Ionela Voinescu wrote:
> Hi Suzuki,
> 
> On Wednesday 12 Feb 2020 at 16:20:56 (+0000), Suzuki Kuruppassery Poulose wrote:
>>>>> +static bool has_amu(const struct arm64_cpu_capabilities *cap,
>>>>> +		       int __unused)
>>>>> +{
>>>>> +	/*
>>>>> +	 * The AMU extension is a non-conflicting feature: the kernel can
>>>>> +	 * safely run a mix of CPUs with and without support for the
>>>>> +	 * activity monitors extension. Therefore, if not disabled through
>>>>> +	 * the kernel command line early parameter, enable the capability
>>>>> +	 * to allow any late CPU to use the feature.
>>>>> +	 *
>>>>> +	 * With this feature enabled, the cpu_enable function will be called
>>>>> +	 * for all CPUs that match the criteria, including secondary and
>>>>> +	 * hotplugged, marking this feature as present on that respective CPU.
>>>>> +	 * The enable function will also print a detection message.
>>>>> +	 */
>>>>> +
>>>>> +	if (!disable_amu && !zalloc_cpumask_var(&amu_cpus, GFP_KERNEL)) {
>>>>
>>>> This looks problematic. Don't we end up in allocating the memory during
>>>> "each CPU" check and thus leaking memory ? Do we really need to allocate
>>>> this dynamically ?
>>>>
>>>
>>> Yes, it does make some assumptions. Given that the AMU capability is
>>> a WEAK_LOCAL_CPU_FEATURE I relied on the match function being called
>>> only once, when the return value is true. If the return value is false,
>>
>> That is not correct. A WEAK_LOCAL_CPU_FEATURE is still SCOPE_LOCAL_CPU,
>> implies it is run on all the booting CPUs (including the hotplugged
>> ones). The WEAK is there to imply that its "permitted" or "optional"
>> for a hotplugged CPU. So, eventually you will re-allocate this variable
>> every single time a CPU turns up, where you could also loose the current
>> state.
>>
> 
>>> which will result in it being called multiple times, it's either because
>>> disable_amu == false, or it has become false due to a previous failed
>>> allocation, in which case a new allocation will not be attempted.
> 
> First of all, I agree with you that this should be corrected.
> 
> But for completion (and my education) I retraced my steps in regards
> to my assumption above. While cpu_enable is called for all CPUs - boot,
> secondary, hotplugged, the matches function (in this case has_amu) is
> not always called for all CPUs, and that's where the confusion came
> from.
> 
> Looking over the update_cpu_capabilities function, that's called from
> both setup_boot_cpu_capabilities and check_local_cpu_capabilities
> (secondary CPUs) for SCOPE_LOCAL_CPU:
> 
> -----
> static void update_cpu_capabilities(u16 scope_mask)
> {
>          int i;
>          const struct arm64_cpu_capabilities *caps;
> 
>          scope_mask &= ARM64_CPUCAP_SCOPE_MASK;
>          for (i = 0; i < ARM64_NCAPS; i++) {
>                  caps = cpu_hwcaps_ptrs[i];
>                  if (!caps || !(caps->type & scope_mask) ||
>                      cpus_have_cap(caps->capability) ||
>                      !caps->matches(caps, cpucap_default_scope(caps)))
>                          continue;
> 
> --> The matches function is only called if !cpus_have_cap

Agreed. Your analysis is correct. This was done as a micro
optimization(!) as it is pointless to check if something should be set, 
when it is already set.

> 
> 
>                  if (caps->desc)
>                          pr_info("detected: %s\n", caps->desc);
>                  cpus_set_cap(caps->capability);
> 
> --> If matches returns true we mark it as present in cpu_hwcaps.
> 
>                  if ((scope_mask & SCOPE_BOOT_CPU) && (caps->type & SCOPE_BOOT_CPU))
>                          set_bit(caps->capability, boot_capabilities);
>          }
> }
> ---
> 
> Therefore caps->matches (in this case has_amu) will only be called as
> long as it returns false. This is where my assumption above came from.
> Also, this is the reason it was working nicely in my testing, as I did
> not test hotplug this time.
> 
> Where the has_amu code breaks is when we end up calling
> verify_local_cpu_capabilities instead of update_cpu_capabilities after
> sys_caps_initialised, which will happen for hotplugged CPUs.
> In that case we call caps->matches for all CPUs. Also, if anyone in the
> future ends up calling this_cpu_has_cap for the AMU capability.

True.

> 
> I will fix this.

Cheers
Suzuki
Ionela Voinescu Feb. 12, 2020, 8:19 p.m. UTC | #9
Hi guys,

On Wednesday 12 Feb 2020 at 16:20:56 (+0000), Suzuki Kuruppassery Poulose wrote:
> > For better handling I could have a cpumask_available check before the
> > allocation just in case the capability type changes in the future, or to
> > at least not rely on assumptions based on the type of the capability.
> > 
> > The reason this is dynamic is that I wanted to avoid the memory being
> > allocated when disable_amu is true - as Valentin mentioned in a comment
> > in the meantime "the static allocation is done against NR_CPUS whereas
> > the dynamic one is done against nr_cpu_ids".
> > 
> > Would this be alright?
> > 
> > diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
> > index 182e05ca3410..4cee6b147ddd 100644
> > --- a/arch/arm64/kernel/cpufeature.c
> > +++ b/arch/arm64/kernel/cpufeature.c
> > @@ -1222,7 +1222,11 @@ static bool has_amu(const struct arm64_cpu_capabilities *cap,
> >           * The enable function will also print a detection message.
> >           */
> > -       if (!disable_amu && !zalloc_cpumask_var(&amu_cpus, GFP_KERNEL)) {
> > +       if (disable_amu)
> > +               return false;
> > +
> > +       if (!cpumask_available(amu_cpus) &&
> > +           !zalloc_cpumask_var(&amu_cpus, GFP_KERNEL)) {
> >                  pr_err("Activity Monitors Unit (AMU): fail to allocate memory");
> >                  disable_amu = true;
> >          }

Going down the rabbit hole in regards to this section, it seems it's
actually not fine. When CONFIG_CPUMASK_OFFSTACK==y it fails to
allocate memory because zalloc_cpumask_var cannot be used before
initializing the slub allocator (mm_init) to allocate a cpumask.

The alternative alloc_bootmem_cpumask_var is an __init function that
can be used only during the initialization phase, which is not the case
for has_amu that can be called later (for hotplugged CPUs). Therefore,
dynamic allocation is not an option here.

Thanks,
Ionela.
diff mbox series

Patch

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index ade4e6ec23e0..6f0c6d22fa4c 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -843,6 +843,16 @@ 
 			can be useful when debugging issues that require an SLB
 			miss to occur.
 
+	disable_amu	[ARM64]
+			Disables detection, enablement and access to counter
+			registers of the Activity Monitors Unit (AMU). By
+			default these are enabled. "disable_amu=0/1" is also
+			allowed.
+			disable_amu / disable_amu=1 - ensures access to AMU's
+			 counter registers is not attempted.
+			disable_amu=0 - enables or maintain detection and
+			 access to AMU's counter registers.
+
 	disable=	[IPV6]
 			See Documentation/networking/ipv6.txt.
 
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 3ab05857ca8f..b3408d7629fd 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -1484,6 +1484,37 @@  config ARM64_PTR_AUTH
 
 endmenu
 
+menu "ARMv8.4 architectural features"
+
+config ARM64_AMU_EXTN
+	bool "Enable support for the Activity Monitors Unit CPU extension"
+	default y
+	help
+          The activity monitors extension is an optional extension introduced
+          by the ARMv8.4 CPU architecture. This enables support for version 1
+          of the activity monitors architecture, AMUv1.
+
+          To enable the use of this extension on CPUs that implement it, say Y.
+
+          Note that for architectural reasons, firmware _must_ implement AMU
+          support when running on CPUs that present the activity monitors
+          extension. The required support is present in:
+            * Version 1.5 and later of the ARM Trusted Firmware
+
+          For kernels that have this configuration enabled but boot with broken
+          firmware, you may need to say N here until the firmware is fixed.
+          Otherwise you may experience firmware panics or lockups when
+          accessing the counter registers. Even if you are not observing these
+          symptoms, the values returned by the register reads might not
+          correctly reflect reality. Most commonly, the value read will be 0,
+          indicating that the counter is not enabled.
+
+          Alternatively, a kernel parameter is provided to disable detection,
+          enablement and access to the counter registers of the Activity
+          Monitors Unit at runtime.
+
+endmenu
+
 config ARM64_SVE
 	bool "ARM Scalable Vector Extension support"
 	default y
diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h
index b92683871119..7dde890bde50 100644
--- a/arch/arm64/include/asm/cpucaps.h
+++ b/arch/arm64/include/asm/cpucaps.h
@@ -56,7 +56,8 @@ 
 #define ARM64_WORKAROUND_CAVIUM_TX2_219_PRFM	46
 #define ARM64_WORKAROUND_1542419		47
 #define ARM64_WORKAROUND_1319367		48
+#define ARM64_HAS_AMU_EXTN			49
 
-#define ARM64_NCAPS				49
+#define ARM64_NCAPS				50
 
 #endif /* __ASM_CPUCAPS_H */
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index 4261d55e8506..5ae6e00ccabb 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -673,6 +673,11 @@  static inline bool cpu_has_hw_af(void)
 						ID_AA64MMFR1_HADBS_SHIFT);
 }
 
+#ifdef CONFIG_ARM64_AMU_EXTN
+/* Check whether the current CPU supports the Activity Monitors Unit (AMU) */
+extern bool cpu_has_amu_feat(int cpu);
+#endif
+
 #endif /* __ASSEMBLY__ */
 
 #endif
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index 6e919fafb43d..eba13b8994ce 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -382,6 +382,42 @@ 
 #define SYS_TPIDR_EL0			sys_reg(3, 3, 13, 0, 2)
 #define SYS_TPIDRRO_EL0			sys_reg(3, 3, 13, 0, 3)
 
+/* Definitions for system register interface to AMU for ARMv8.4 onwards */
+#define SYS_AM_EL0(crm, op2)		sys_reg(3, 3, 13, (crm), (op2))
+#define SYS_AMCR_EL0			SYS_AM_EL0(2, 0)
+#define SYS_AMCFGR_EL0			SYS_AM_EL0(2, 1)
+#define SYS_AMCGCR_EL0			SYS_AM_EL0(2, 2)
+#define SYS_AMUSERENR_EL0		SYS_AM_EL0(2, 3)
+#define SYS_AMCNTENCLR0_EL0		SYS_AM_EL0(2, 4)
+#define SYS_AMCNTENSET0_EL0		SYS_AM_EL0(2, 5)
+#define SYS_AMCNTENCLR1_EL0		SYS_AM_EL0(3, 0)
+#define SYS_AMCNTENSET1_EL0		SYS_AM_EL0(3, 1)
+
+/*
+ * Group 0 of activity monitors (architected):
+ *                op0  op1  CRn   CRm       op2
+ * Counter:       11   011  1101  010:n<3>  n<2:0>
+ * Type:          11   011  1101  011:n<3>  n<2:0>
+ * n: 0-15
+ *
+ * Group 1 of activity monitors (auxiliary):
+ *                op0  op1  CRn   CRm       op2
+ * Counter:       11   011  1101  110:n<3>  n<2:0>
+ * Type:          11   011  1101  111:n<3>  n<2:0>
+ * n: 0-15
+ */
+
+#define SYS_AMEVCNTR0_EL0(n)		SYS_AM_EL0(4 + ((n) >> 3), (n) & 7)
+#define SYS_AMEVTYPE0_EL0(n)		SYS_AM_EL0(6 + ((n) >> 3), (n) & 7)
+#define SYS_AMEVCNTR1_EL0(n)		SYS_AM_EL0(12 + ((n) >> 3), (n) & 7)
+#define SYS_AMEVTYPE1_EL0(n)		SYS_AM_EL0(14 + ((n) >> 3), (n) & 7)
+
+/* AMU v1: Fixed (architecturally defined) activity monitors */
+#define SYS_AMEVCNTR0_CORE_EL0          SYS_AMEVCNTR0_EL0(0)
+#define SYS_AMEVCNTR0_CONST_EL0         SYS_AMEVCNTR0_EL0(1)
+#define SYS_AMEVCNTR0_INST_RET_EL0      SYS_AMEVCNTR0_EL0(2)
+#define SYS_AMEVCNTR0_MEM_STALL         SYS_AMEVCNTR0_EL0(3)
+
 #define SYS_CNTFRQ_EL0			sys_reg(3, 3, 14, 0, 0)
 
 #define SYS_CNTP_TVAL_EL0		sys_reg(3, 3, 14, 2, 0)
@@ -577,6 +613,7 @@ 
 #define ID_AA64PFR0_CSV3_SHIFT		60
 #define ID_AA64PFR0_CSV2_SHIFT		56
 #define ID_AA64PFR0_DIT_SHIFT		48
+#define ID_AA64PFR0_AMU_SHIFT		44
 #define ID_AA64PFR0_SVE_SHIFT		32
 #define ID_AA64PFR0_RAS_SHIFT		28
 #define ID_AA64PFR0_GIC_SHIFT		24
@@ -587,6 +624,7 @@ 
 #define ID_AA64PFR0_EL1_SHIFT		4
 #define ID_AA64PFR0_EL0_SHIFT		0
 
+#define ID_AA64PFR0_AMU			0x1
 #define ID_AA64PFR0_SVE			0x1
 #define ID_AA64PFR0_RAS_V1		0x1
 #define ID_AA64PFR0_FP_NI		0xf
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 04cf64e9f0c9..029a473ad273 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -156,6 +156,7 @@  static const struct arm64_ftr_bits ftr_id_aa64pfr0[] = {
 	ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_CSV3_SHIFT, 4, 0),
 	ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_CSV2_SHIFT, 4, 0),
 	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_DIT_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_AMU_SHIFT, 4, 0),
 	ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE),
 				   FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_SVE_SHIFT, 4, 0),
 	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_RAS_SHIFT, 4, 0),
@@ -1150,6 +1151,84 @@  static bool has_hw_dbm(const struct arm64_cpu_capabilities *cap,
 
 #endif
 
+#ifdef CONFIG_ARM64_AMU_EXTN
+
+/*
+ * The "amu_cpus" cpumask only signals that the CPU implementation for the
+ * flagged CPUs supports the Activity Monitors Unit (AMU) but does not provide
+ * information regarding all the events that it supports. When a CPU bit is
+ * set in the cpumask, the user of this feature can only rely on the presence
+ * of the 4 fixed counters for that CPU. But this does not guarantee that the
+ * counters are enabled or access to these counters is enabled by code
+ * executed at higher exception levels (firmware).
+ */
+static cpumask_var_t amu_cpus;
+
+bool cpu_has_amu_feat(int cpu)
+{
+	if (cpumask_available(amu_cpus))
+		return cpumask_test_cpu(cpu, amu_cpus);
+
+	return false;
+}
+
+static void cpu_amu_enable(struct arm64_cpu_capabilities const *cap)
+{
+	if (has_cpuid_feature(cap, SCOPE_LOCAL_CPU)) {
+		pr_info("detected CPU%d: Activity Monitors Unit (AMU)\n",
+			smp_processor_id());
+		cpumask_set_cpu(smp_processor_id(), amu_cpus);
+	}
+}
+
+/*
+ * For known broken firmware, a kernel parameter ("disable_amu") is provided
+ * to ensure access to AMU counter registers is not attempted. By default,
+ * the feature is enabled, but disable_amu can both be used to disable or
+ * enable the capability at runtime in case the default changes in the future.
+ *
+ * To be noted that for security considerations, this does not bypass the
+ * setting of AMUSERENR_EL0 to trap accesses from EL0 (userspace) to EL1
+ * (kernel). Therefore, firmware should still ensure accesses to AMU registers
+ * are not trapped in EL2/EL3.
+ */
+static bool disable_amu;
+
+static int __init set_disable_amu(char *str)
+{
+	int value = 0;
+
+	disable_amu = get_option(&str, &value) ? !!value : true;
+
+	return 0;
+}
+early_param("disable_amu", set_disable_amu);
+
+static bool has_amu(const struct arm64_cpu_capabilities *cap,
+		       int __unused)
+{
+	/*
+	 * The AMU extension is a non-conflicting feature: the kernel can
+	 * safely run a mix of CPUs with and without support for the
+	 * activity monitors extension. Therefore, if not disabled through
+	 * the kernel command line early parameter, enable the capability
+	 * to allow any late CPU to use the feature.
+	 *
+	 * With this feature enabled, the cpu_enable function will be called
+	 * for all CPUs that match the criteria, including secondary and
+	 * hotplugged, marking this feature as present on that respective CPU.
+	 * The enable function will also print a detection message.
+	 */
+
+	if (!disable_amu && !zalloc_cpumask_var(&amu_cpus, GFP_KERNEL)) {
+		pr_err("Activity Monitors Unit (AMU): fail to allocate memory");
+		disable_amu = true;
+	}
+
+	return !disable_amu;
+}
+#endif
+
 #ifdef CONFIG_ARM64_VHE
 static bool runs_at_el2(const struct arm64_cpu_capabilities *entry, int __unused)
 {
@@ -1419,6 +1498,24 @@  static const struct arm64_cpu_capabilities arm64_features[] = {
 		.cpu_enable = cpu_clear_disr,
 	},
 #endif /* CONFIG_ARM64_RAS_EXTN */
+#ifdef CONFIG_ARM64_AMU_EXTN
+	{
+		/*
+		 * The feature is enabled by default if CONFIG_ARM64_AMU_EXTN=y.
+		 * Therefore, don't provide .desc as we don't want the detection
+		 * message to be shown until at least one CPU is detected to
+		 * support the feature.
+		 */
+		.capability = ARM64_HAS_AMU_EXTN,
+		.type = ARM64_CPUCAP_WEAK_LOCAL_CPU_FEATURE,
+		.matches = has_amu,
+		.sys_reg = SYS_ID_AA64PFR0_EL1,
+		.sign = FTR_UNSIGNED,
+		.field_pos = ID_AA64PFR0_AMU_SHIFT,
+		.min_field_value = ID_AA64PFR0_AMU,
+		.cpu_enable = cpu_amu_enable,
+	},
+#endif /* CONFIG_ARM64_AMU_EXTN */
 	{
 		.desc = "Data cache clean to the PoU not required for I/D coherence",
 		.capability = ARM64_HAS_CACHE_IDC,