diff mbox series

[v5,06/26] arm64/sve: Check SVE virtualisability

Message ID 1550519559-15915-7-git-send-email-Dave.Martin@arm.com (mailing list archive)
State New, archived
Headers show
Series KVM: arm64: SVE guest support | expand

Commit Message

Dave Martin Feb. 18, 2019, 7:52 p.m. UTC
Due to the way the effective SVE vector length is controlled and
trapped at different exception levels, certain mismatches in the
sets of vector lengths supported by different physical CPUs in the
system may prevent straightforward virtualisation of SVE at parity
with the host.

This patch analyses the extent to which SVE can be virtualised
safely without interfering with migration of vcpus between physical
CPUs, and rejects late secondary CPUs that would erode the
situation further.

It is left up to KVM to decide what to do with this information.

Signed-off-by: Dave Martin <Dave.Martin@arm.com>
---
 arch/arm64/include/asm/fpsimd.h |  1 +
 arch/arm64/kernel/cpufeature.c  |  2 +-
 arch/arm64/kernel/fpsimd.c      | 86 ++++++++++++++++++++++++++++++++++-------
 3 files changed, 73 insertions(+), 16 deletions(-)

Comments

Julien Thierry Feb. 20, 2019, 11:12 a.m. UTC | #1
Hi Dave,

On 18/02/2019 19:52, Dave Martin wrote:
> Due to the way the effective SVE vector length is controlled and
> trapped at different exception levels, certain mismatches in the
> sets of vector lengths supported by different physical CPUs in the
> system may prevent straightforward virtualisation of SVE at parity
> with the host.
> 
> This patch analyses the extent to which SVE can be virtualised
> safely without interfering with migration of vcpus between physical
> CPUs, and rejects late secondary CPUs that would erode the
> situation further.
> 
> It is left up to KVM to decide what to do with this information.
> 
> Signed-off-by: Dave Martin <Dave.Martin@arm.com>
> ---
>  arch/arm64/include/asm/fpsimd.h |  1 +
>  arch/arm64/kernel/cpufeature.c  |  2 +-
>  arch/arm64/kernel/fpsimd.c      | 86 ++++++++++++++++++++++++++++++++++-------
>  3 files changed, 73 insertions(+), 16 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h
> index dd1ad39..964adc9 100644
> --- a/arch/arm64/include/asm/fpsimd.h
> +++ b/arch/arm64/include/asm/fpsimd.h
> @@ -87,6 +87,7 @@ extern void sve_kernel_enable(const struct arm64_cpu_capabilities *__unused);
>  extern u64 read_zcr_features(void);
>  
>  extern int __ro_after_init sve_max_vl;
> +extern int __ro_after_init sve_max_virtualisable_vl;
>  
>  #ifdef CONFIG_ARM64_SVE
>  
> diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
> index f6d84e2..5eaacb4 100644
> --- a/arch/arm64/kernel/cpufeature.c
> +++ b/arch/arm64/kernel/cpufeature.c
> @@ -1825,7 +1825,7 @@ static void verify_sve_features(void)
>  	unsigned int len = zcr & ZCR_ELx_LEN_MASK;
>  
>  	if (len < safe_len || sve_verify_vq_map()) {
> -		pr_crit("CPU%d: SVE: required vector length(s) missing\n",
> +		pr_crit("CPU%d: SVE: vector length support mismatch\n",
>  			smp_processor_id());
>  		cpu_die_early();
>  	}
> diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
> index 62c37f0..64729e2 100644
> --- a/arch/arm64/kernel/fpsimd.c
> +++ b/arch/arm64/kernel/fpsimd.c
> @@ -18,6 +18,7 @@
>   */
>  
>  #include <linux/bitmap.h>
> +#include <linux/bitops.h>
>  #include <linux/bottom_half.h>
>  #include <linux/bug.h>
>  #include <linux/cache.h>
> @@ -48,6 +49,7 @@
>  #include <asm/sigcontext.h>
>  #include <asm/sysreg.h>
>  #include <asm/traps.h>
> +#include <asm/virt.h>
>  
>  #define FPEXC_IOF	(1 << 0)
>  #define FPEXC_DZF	(1 << 1)
> @@ -130,14 +132,18 @@ static int sve_default_vl = -1;
>  
>  /* Maximum supported vector length across all CPUs (initially poisoned) */
>  int __ro_after_init sve_max_vl = SVE_VL_MIN;
> +int __ro_after_init sve_max_virtualisable_vl = SVE_VL_MIN;
>  /* Set of available vector lengths, as vq_to_bit(vq): */
>  static __ro_after_init DECLARE_BITMAP(sve_vq_map, SVE_VQ_MAX);
> +/* Set of vector lengths present on at least one cpu: */
> +static __ro_after_init DECLARE_BITMAP(sve_vq_partial_map, SVE_VQ_MAX);
>  static void __percpu *efi_sve_state;
>  
>  #else /* ! CONFIG_ARM64_SVE */
>  
>  /* Dummy declaration for code that will be optimised out: */
>  extern __ro_after_init DECLARE_BITMAP(sve_vq_map, SVE_VQ_MAX);
> +extern __ro_after_init DECLARE_BITMAP(sve_vq_partial_map, SVE_VQ_MAX);
>  extern void __percpu *efi_sve_state;
>  
>  #endif /* ! CONFIG_ARM64_SVE */
> @@ -623,12 +629,6 @@ int sve_get_current_vl(void)
>  	return sve_prctl_status(0);
>  }
>  
> -/*
> - * Bitmap for temporary storage of the per-CPU set of supported vector lengths
> - * during secondary boot.
> - */
> -static DECLARE_BITMAP(sve_secondary_vq_map, SVE_VQ_MAX);
> -
>  static void sve_probe_vqs(DECLARE_BITMAP(map, SVE_VQ_MAX))
>  {
>  	unsigned int vq, vl;
> @@ -650,6 +650,7 @@ static void sve_probe_vqs(DECLARE_BITMAP(map, SVE_VQ_MAX))
>  void __init sve_init_vq_map(void)
>  {
>  	sve_probe_vqs(sve_vq_map);
> +	bitmap_copy(sve_vq_partial_map, sve_vq_map, SVE_VQ_MAX);
>  }
>  
>  /*
> @@ -658,25 +659,58 @@ void __init sve_init_vq_map(void)
>   */
>  void sve_update_vq_map(void)
>  {
> -	sve_probe_vqs(sve_secondary_vq_map);
> -	bitmap_and(sve_vq_map, sve_vq_map, sve_secondary_vq_map, SVE_VQ_MAX);
> +	DECLARE_BITMAP(tmp_map, SVE_VQ_MAX);
> +
> +	sve_probe_vqs(tmp_map);
> +	bitmap_and(sve_vq_map, sve_vq_map, tmp_map, SVE_VQ_MAX);
> +	bitmap_or(sve_vq_partial_map, sve_vq_partial_map, tmp_map, SVE_VQ_MAX);
>  }
>  
>  /* Check whether the current CPU supports all VQs in the committed set */
>  int sve_verify_vq_map(void)
>  {
> -	int ret = 0;
> +	DECLARE_BITMAP(tmp_map, SVE_VQ_MAX);
> +	unsigned long b;
>  
> -	sve_probe_vqs(sve_secondary_vq_map);
> -	bitmap_andnot(sve_secondary_vq_map, sve_vq_map, sve_secondary_vq_map,
> -		      SVE_VQ_MAX);
> -	if (!bitmap_empty(sve_secondary_vq_map, SVE_VQ_MAX)) {
> +	sve_probe_vqs(tmp_map);
> +
> +	bitmap_complement(tmp_map, tmp_map, SVE_VQ_MAX);
> +	if (bitmap_intersects(tmp_map, sve_vq_map, SVE_VQ_MAX)) {
>  		pr_warn("SVE: cpu%d: Required vector length(s) missing\n",
>  			smp_processor_id());
> -		ret = -EINVAL;
> +		return -EINVAL;
>  	}
>  
> -	return ret;
> +	if (!IS_ENABLED(CONFIG_KVM) || !is_hyp_mode_available())
> +		return 0;
> +
> +	/*
> +	 * For KVM, it is necessary to ensure that this CPU doesn't
> +	 * support any vector length that guests may have probed as
> +	 * unsupported.
> +	 */
> +
> +	/* Recover the set of supported VQs: */
> +	bitmap_complement(tmp_map, tmp_map, SVE_VQ_MAX);
> +	/* Find VQs supported that are not globally supported: */
> +	bitmap_andnot(tmp_map, tmp_map, sve_vq_map, SVE_VQ_MAX);
> +
> +	/* Find the lowest such VQ, if any: */
> +	b = find_last_bit(tmp_map, SVE_VQ_MAX);
> +	if (b >= SVE_VQ_MAX)
> +		return 0; /* no mismatches */
> +
> +	/*
> +	 * Mismatches above sve_max_virtualisable_vl are fine, since
> +	 * no guest is allowed to configure ZCR_EL2.LEN to exceed this:
> +	 */
> +	if (sve_vl_from_vq(bit_to_vq(b)) <= sve_max_virtualisable_vl) {
> +		pr_warn("SVE: cpu%d: Unsupported vector length(s) present\n",

Nit: might be good to specify that the vector length is unsupported for
virtualisation.

Also, since KVM is the one deciding what to do with the information,
should we have a warning here? But I can understand that knowing which
CPUs are introducing unsupported vector length, maybe using pr_devel()
instead of pr_warn()


In any case, the logic looks good to me:

Reviewed-by: Julien Thierry <julien.thierry@arm.com>

Cheers,
Julien Grall Feb. 21, 2019, 1:36 p.m. UTC | #2
Hi Dave,

On 18/02/2019 19:52, Dave Martin wrote:
> +	/*
> +	 * Mismatches above sve_max_virtualisable_vl are fine, since
> +	 * no guest is allowed to configure ZCR_EL2.LEN to exceed this:
> +	 */
> +	if (sve_vl_from_vq(bit_to_vq(b)) <= sve_max_virtualisable_vl) {
> +		pr_warn("SVE: cpu%d: Unsupported vector length(s) present\n",
> +			smp_processor_id());

Would it be worth to print the unsupported vector length?

Cheers,
Dave Martin Feb. 26, 2019, 12:06 p.m. UTC | #3
On Wed, Feb 20, 2019 at 11:12:49AM +0000, Julien Thierry wrote:
> Hi Dave,
> 
> On 18/02/2019 19:52, Dave Martin wrote:
> > Due to the way the effective SVE vector length is controlled and
> > trapped at different exception levels, certain mismatches in the
> > sets of vector lengths supported by different physical CPUs in the
> > system may prevent straightforward virtualisation of SVE at parity
> > with the host.
> > 
> > This patch analyses the extent to which SVE can be virtualised
> > safely without interfering with migration of vcpus between physical
> > CPUs, and rejects late secondary CPUs that would erode the
> > situation further.
> > 
> > It is left up to KVM to decide what to do with this information.
> > 
> > Signed-off-by: Dave Martin <Dave.Martin@arm.com>
> > ---
> >  arch/arm64/include/asm/fpsimd.h |  1 +
> >  arch/arm64/kernel/cpufeature.c  |  2 +-
> >  arch/arm64/kernel/fpsimd.c      | 86 ++++++++++++++++++++++++++++++++++-------
> >  3 files changed, 73 insertions(+), 16 deletions(-)
> > 
> > diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h
> > index dd1ad39..964adc9 100644
> > --- a/arch/arm64/include/asm/fpsimd.h
> > +++ b/arch/arm64/include/asm/fpsimd.h
> > @@ -87,6 +87,7 @@ extern void sve_kernel_enable(const struct arm64_cpu_capabilities *__unused);
> >  extern u64 read_zcr_features(void);
> >  
> >  extern int __ro_after_init sve_max_vl;
> > +extern int __ro_after_init sve_max_virtualisable_vl;
> >  
> >  #ifdef CONFIG_ARM64_SVE
> >  
> > diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
> > index f6d84e2..5eaacb4 100644
> > --- a/arch/arm64/kernel/cpufeature.c
> > +++ b/arch/arm64/kernel/cpufeature.c
> > @@ -1825,7 +1825,7 @@ static void verify_sve_features(void)
> >  	unsigned int len = zcr & ZCR_ELx_LEN_MASK;
> >  
> >  	if (len < safe_len || sve_verify_vq_map()) {
> > -		pr_crit("CPU%d: SVE: required vector length(s) missing\n",
> > +		pr_crit("CPU%d: SVE: vector length support mismatch\n",
> >  			smp_processor_id());
> >  		cpu_die_early();
> >  	}
> > diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
> > index 62c37f0..64729e2 100644
> > --- a/arch/arm64/kernel/fpsimd.c
> > +++ b/arch/arm64/kernel/fpsimd.c
> > @@ -18,6 +18,7 @@
> >   */
> >  
> >  #include <linux/bitmap.h>
> > +#include <linux/bitops.h>
> >  #include <linux/bottom_half.h>
> >  #include <linux/bug.h>
> >  #include <linux/cache.h>
> > @@ -48,6 +49,7 @@
> >  #include <asm/sigcontext.h>
> >  #include <asm/sysreg.h>
> >  #include <asm/traps.h>
> > +#include <asm/virt.h>
> >  
> >  #define FPEXC_IOF	(1 << 0)
> >  #define FPEXC_DZF	(1 << 1)
> > @@ -130,14 +132,18 @@ static int sve_default_vl = -1;
> >  
> >  /* Maximum supported vector length across all CPUs (initially poisoned) */
> >  int __ro_after_init sve_max_vl = SVE_VL_MIN;
> > +int __ro_after_init sve_max_virtualisable_vl = SVE_VL_MIN;
> >  /* Set of available vector lengths, as vq_to_bit(vq): */
> >  static __ro_after_init DECLARE_BITMAP(sve_vq_map, SVE_VQ_MAX);
> > +/* Set of vector lengths present on at least one cpu: */
> > +static __ro_after_init DECLARE_BITMAP(sve_vq_partial_map, SVE_VQ_MAX);
> >  static void __percpu *efi_sve_state;
> >  
> >  #else /* ! CONFIG_ARM64_SVE */
> >  
> >  /* Dummy declaration for code that will be optimised out: */
> >  extern __ro_after_init DECLARE_BITMAP(sve_vq_map, SVE_VQ_MAX);
> > +extern __ro_after_init DECLARE_BITMAP(sve_vq_partial_map, SVE_VQ_MAX);
> >  extern void __percpu *efi_sve_state;
> >  
> >  #endif /* ! CONFIG_ARM64_SVE */
> > @@ -623,12 +629,6 @@ int sve_get_current_vl(void)
> >  	return sve_prctl_status(0);
> >  }
> >  
> > -/*
> > - * Bitmap for temporary storage of the per-CPU set of supported vector lengths
> > - * during secondary boot.
> > - */
> > -static DECLARE_BITMAP(sve_secondary_vq_map, SVE_VQ_MAX);
> > -
> >  static void sve_probe_vqs(DECLARE_BITMAP(map, SVE_VQ_MAX))
> >  {
> >  	unsigned int vq, vl;
> > @@ -650,6 +650,7 @@ static void sve_probe_vqs(DECLARE_BITMAP(map, SVE_VQ_MAX))
> >  void __init sve_init_vq_map(void)
> >  {
> >  	sve_probe_vqs(sve_vq_map);
> > +	bitmap_copy(sve_vq_partial_map, sve_vq_map, SVE_VQ_MAX);
> >  }
> >  
> >  /*
> > @@ -658,25 +659,58 @@ void __init sve_init_vq_map(void)
> >   */
> >  void sve_update_vq_map(void)
> >  {
> > -	sve_probe_vqs(sve_secondary_vq_map);
> > -	bitmap_and(sve_vq_map, sve_vq_map, sve_secondary_vq_map, SVE_VQ_MAX);
> > +	DECLARE_BITMAP(tmp_map, SVE_VQ_MAX);
> > +
> > +	sve_probe_vqs(tmp_map);
> > +	bitmap_and(sve_vq_map, sve_vq_map, tmp_map, SVE_VQ_MAX);
> > +	bitmap_or(sve_vq_partial_map, sve_vq_partial_map, tmp_map, SVE_VQ_MAX);
> >  }
> >  
> >  /* Check whether the current CPU supports all VQs in the committed set */
> >  int sve_verify_vq_map(void)
> >  {
> > -	int ret = 0;
> > +	DECLARE_BITMAP(tmp_map, SVE_VQ_MAX);
> > +	unsigned long b;
> >  
> > -	sve_probe_vqs(sve_secondary_vq_map);
> > -	bitmap_andnot(sve_secondary_vq_map, sve_vq_map, sve_secondary_vq_map,
> > -		      SVE_VQ_MAX);
> > -	if (!bitmap_empty(sve_secondary_vq_map, SVE_VQ_MAX)) {
> > +	sve_probe_vqs(tmp_map);
> > +
> > +	bitmap_complement(tmp_map, tmp_map, SVE_VQ_MAX);
> > +	if (bitmap_intersects(tmp_map, sve_vq_map, SVE_VQ_MAX)) {
> >  		pr_warn("SVE: cpu%d: Required vector length(s) missing\n",
> >  			smp_processor_id());
> > -		ret = -EINVAL;
> > +		return -EINVAL;
> >  	}
> >  
> > -	return ret;
> > +	if (!IS_ENABLED(CONFIG_KVM) || !is_hyp_mode_available())
> > +		return 0;
> > +
> > +	/*
> > +	 * For KVM, it is necessary to ensure that this CPU doesn't
> > +	 * support any vector length that guests may have probed as
> > +	 * unsupported.
> > +	 */
> > +
> > +	/* Recover the set of supported VQs: */
> > +	bitmap_complement(tmp_map, tmp_map, SVE_VQ_MAX);
> > +	/* Find VQs supported that are not globally supported: */
> > +	bitmap_andnot(tmp_map, tmp_map, sve_vq_map, SVE_VQ_MAX);
> > +
> > +	/* Find the lowest such VQ, if any: */
> > +	b = find_last_bit(tmp_map, SVE_VQ_MAX);
> > +	if (b >= SVE_VQ_MAX)
> > +		return 0; /* no mismatches */
> > +
> > +	/*
> > +	 * Mismatches above sve_max_virtualisable_vl are fine, since
> > +	 * no guest is allowed to configure ZCR_EL2.LEN to exceed this:
> > +	 */
> > +	if (sve_vl_from_vq(bit_to_vq(b)) <= sve_max_virtualisable_vl) {
> > +		pr_warn("SVE: cpu%d: Unsupported vector length(s) present\n",
> 
> Nit: might be good to specify that the vector length is unsupported for
> virtualisation.
> 
> Also, since KVM is the one deciding what to do with the information,
> should we have a warning here? But I can understand that knowing which
> CPUs are introducing unsupported vector length, maybe using pr_devel()
> instead of pr_warn()

These warnings are really for consumption by SoC vendors, not users:
my aim is to flag up systems that we consider broken (or at least,
unsuitable for running KVM).

So I prefer to make this noisy and limit the amount of "useful"
information people might be tempted to programmatically scrape from
dmesg.

cpufeatures uses pr_warn("SANITY CHECK: [...]") here.  Maybe I should
stick "SANITY CHECK" in here too?  I will also try to make the commit
message more explicit and/or add comments to make the intent of the code
clearer.

It may also make sense to make this noise even if KVM isn't enabled
(which is a rare case anyhow).

Thoughts?

> In any case, the logic looks good to me:
> 
> Reviewed-by: Julien Thierry <julien.thierry@arm.com>

Thanks
---Dave
Dave Martin Feb. 26, 2019, 12:06 p.m. UTC | #4
On Thu, Feb 21, 2019 at 01:36:26PM +0000, Julien Grall wrote:
> Hi Dave,
> 
> On 18/02/2019 19:52, Dave Martin wrote:
> >+	/*
> >+	 * Mismatches above sve_max_virtualisable_vl are fine, since
> >+	 * no guest is allowed to configure ZCR_EL2.LEN to exceed this:
> >+	 */
> >+	if (sve_vl_from_vq(bit_to_vq(b)) <= sve_max_virtualisable_vl) {
> >+		pr_warn("SVE: cpu%d: Unsupported vector length(s) present\n",
> >+			smp_processor_id());
> 
> Would it be worth to print the unsupported vector length?

Possibly not, but admittedly the intent is a bit unclear in this patch.

See my reply to Julien Thierry (and respond on that subthread if you
have comments, so that we don't end up with two subthreads discussing
the same thing...)

Thanks
---Dave
Julien Grall Feb. 26, 2019, 3:43 p.m. UTC | #5
Hi Dave,

On 26/02/2019 12:06, Dave Martin wrote:
> On Thu, Feb 21, 2019 at 01:36:26PM +0000, Julien Grall wrote:
>> Hi Dave,
>>
>> On 18/02/2019 19:52, Dave Martin wrote:
>>> +	/*
>>> +	 * Mismatches above sve_max_virtualisable_vl are fine, since
>>> +	 * no guest is allowed to configure ZCR_EL2.LEN to exceed this:
>>> +	 */
>>> +	if (sve_vl_from_vq(bit_to_vq(b)) <= sve_max_virtualisable_vl) {
>>> +		pr_warn("SVE: cpu%d: Unsupported vector length(s) present\n",
>>> +			smp_processor_id());
>>
>> Would it be worth to print the unsupported vector length?
> 
> Possibly not, but admittedly the intent is a bit unclear in this patch.
> 
> See my reply to Julien Thierry (and respond on that subthread if you
> have comments, so that we don't end up with two subthreads discussing
> the same thing...)

I will have a look at the thread.

Cheers,
Julien Thierry March 1, 2019, 12:39 p.m. UTC | #6
On 26/02/2019 12:06, Dave Martin wrote:
> On Wed, Feb 20, 2019 at 11:12:49AM +0000, Julien Thierry wrote:
>> Hi Dave,
>>
>> On 18/02/2019 19:52, Dave Martin wrote:
>>> Due to the way the effective SVE vector length is controlled and
>>> trapped at different exception levels, certain mismatches in the
>>> sets of vector lengths supported by different physical CPUs in the
>>> system may prevent straightforward virtualisation of SVE at parity
>>> with the host.
>>>
>>> This patch analyses the extent to which SVE can be virtualised
>>> safely without interfering with migration of vcpus between physical
>>> CPUs, and rejects late secondary CPUs that would erode the
>>> situation further.
>>>
>>> It is left up to KVM to decide what to do with this information.
>>>
>>> Signed-off-by: Dave Martin <Dave.Martin@arm.com>
>>> ---
>>>  arch/arm64/include/asm/fpsimd.h |  1 +
>>>  arch/arm64/kernel/cpufeature.c  |  2 +-
>>>  arch/arm64/kernel/fpsimd.c      | 86 ++++++++++++++++++++++++++++++++++-------
>>>  3 files changed, 73 insertions(+), 16 deletions(-)
>>>
>>> diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h
>>> index dd1ad39..964adc9 100644
>>> --- a/arch/arm64/include/asm/fpsimd.h
>>> +++ b/arch/arm64/include/asm/fpsimd.h
>>> @@ -87,6 +87,7 @@ extern void sve_kernel_enable(const struct arm64_cpu_capabilities *__unused);
>>>  extern u64 read_zcr_features(void);
>>>  
>>>  extern int __ro_after_init sve_max_vl;
>>> +extern int __ro_after_init sve_max_virtualisable_vl;
>>>  
>>>  #ifdef CONFIG_ARM64_SVE
>>>  
>>> diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
>>> index f6d84e2..5eaacb4 100644
>>> --- a/arch/arm64/kernel/cpufeature.c
>>> +++ b/arch/arm64/kernel/cpufeature.c
>>> @@ -1825,7 +1825,7 @@ static void verify_sve_features(void)
>>>  	unsigned int len = zcr & ZCR_ELx_LEN_MASK;
>>>  
>>>  	if (len < safe_len || sve_verify_vq_map()) {
>>> -		pr_crit("CPU%d: SVE: required vector length(s) missing\n",
>>> +		pr_crit("CPU%d: SVE: vector length support mismatch\n",
>>>  			smp_processor_id());
>>>  		cpu_die_early();
>>>  	}
>>> diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
>>> index 62c37f0..64729e2 100644
>>> --- a/arch/arm64/kernel/fpsimd.c
>>> +++ b/arch/arm64/kernel/fpsimd.c
>>> @@ -18,6 +18,7 @@
>>>   */
>>>  
>>>  #include <linux/bitmap.h>
>>> +#include <linux/bitops.h>
>>>  #include <linux/bottom_half.h>
>>>  #include <linux/bug.h>
>>>  #include <linux/cache.h>
>>> @@ -48,6 +49,7 @@
>>>  #include <asm/sigcontext.h>
>>>  #include <asm/sysreg.h>
>>>  #include <asm/traps.h>
>>> +#include <asm/virt.h>
>>>  
>>>  #define FPEXC_IOF	(1 << 0)
>>>  #define FPEXC_DZF	(1 << 1)
>>> @@ -130,14 +132,18 @@ static int sve_default_vl = -1;
>>>  
>>>  /* Maximum supported vector length across all CPUs (initially poisoned) */
>>>  int __ro_after_init sve_max_vl = SVE_VL_MIN;
>>> +int __ro_after_init sve_max_virtualisable_vl = SVE_VL_MIN;
>>>  /* Set of available vector lengths, as vq_to_bit(vq): */
>>>  static __ro_after_init DECLARE_BITMAP(sve_vq_map, SVE_VQ_MAX);
>>> +/* Set of vector lengths present on at least one cpu: */
>>> +static __ro_after_init DECLARE_BITMAP(sve_vq_partial_map, SVE_VQ_MAX);
>>>  static void __percpu *efi_sve_state;
>>>  
>>>  #else /* ! CONFIG_ARM64_SVE */
>>>  
>>>  /* Dummy declaration for code that will be optimised out: */
>>>  extern __ro_after_init DECLARE_BITMAP(sve_vq_map, SVE_VQ_MAX);
>>> +extern __ro_after_init DECLARE_BITMAP(sve_vq_partial_map, SVE_VQ_MAX);
>>>  extern void __percpu *efi_sve_state;
>>>  
>>>  #endif /* ! CONFIG_ARM64_SVE */
>>> @@ -623,12 +629,6 @@ int sve_get_current_vl(void)
>>>  	return sve_prctl_status(0);
>>>  }
>>>  
>>> -/*
>>> - * Bitmap for temporary storage of the per-CPU set of supported vector lengths
>>> - * during secondary boot.
>>> - */
>>> -static DECLARE_BITMAP(sve_secondary_vq_map, SVE_VQ_MAX);
>>> -
>>>  static void sve_probe_vqs(DECLARE_BITMAP(map, SVE_VQ_MAX))
>>>  {
>>>  	unsigned int vq, vl;
>>> @@ -650,6 +650,7 @@ static void sve_probe_vqs(DECLARE_BITMAP(map, SVE_VQ_MAX))
>>>  void __init sve_init_vq_map(void)
>>>  {
>>>  	sve_probe_vqs(sve_vq_map);
>>> +	bitmap_copy(sve_vq_partial_map, sve_vq_map, SVE_VQ_MAX);
>>>  }
>>>  
>>>  /*
>>> @@ -658,25 +659,58 @@ void __init sve_init_vq_map(void)
>>>   */
>>>  void sve_update_vq_map(void)
>>>  {
>>> -	sve_probe_vqs(sve_secondary_vq_map);
>>> -	bitmap_and(sve_vq_map, sve_vq_map, sve_secondary_vq_map, SVE_VQ_MAX);
>>> +	DECLARE_BITMAP(tmp_map, SVE_VQ_MAX);
>>> +
>>> +	sve_probe_vqs(tmp_map);
>>> +	bitmap_and(sve_vq_map, sve_vq_map, tmp_map, SVE_VQ_MAX);
>>> +	bitmap_or(sve_vq_partial_map, sve_vq_partial_map, tmp_map, SVE_VQ_MAX);
>>>  }
>>>  
>>>  /* Check whether the current CPU supports all VQs in the committed set */
>>>  int sve_verify_vq_map(void)
>>>  {
>>> -	int ret = 0;
>>> +	DECLARE_BITMAP(tmp_map, SVE_VQ_MAX);
>>> +	unsigned long b;
>>>  
>>> -	sve_probe_vqs(sve_secondary_vq_map);
>>> -	bitmap_andnot(sve_secondary_vq_map, sve_vq_map, sve_secondary_vq_map,
>>> -		      SVE_VQ_MAX);
>>> -	if (!bitmap_empty(sve_secondary_vq_map, SVE_VQ_MAX)) {
>>> +	sve_probe_vqs(tmp_map);
>>> +
>>> +	bitmap_complement(tmp_map, tmp_map, SVE_VQ_MAX);
>>> +	if (bitmap_intersects(tmp_map, sve_vq_map, SVE_VQ_MAX)) {
>>>  		pr_warn("SVE: cpu%d: Required vector length(s) missing\n",
>>>  			smp_processor_id());
>>> -		ret = -EINVAL;
>>> +		return -EINVAL;
>>>  	}
>>>  
>>> -	return ret;
>>> +	if (!IS_ENABLED(CONFIG_KVM) || !is_hyp_mode_available())
>>> +		return 0;
>>> +
>>> +	/*
>>> +	 * For KVM, it is necessary to ensure that this CPU doesn't
>>> +	 * support any vector length that guests may have probed as
>>> +	 * unsupported.
>>> +	 */
>>> +
>>> +	/* Recover the set of supported VQs: */
>>> +	bitmap_complement(tmp_map, tmp_map, SVE_VQ_MAX);
>>> +	/* Find VQs supported that are not globally supported: */
>>> +	bitmap_andnot(tmp_map, tmp_map, sve_vq_map, SVE_VQ_MAX);
>>> +
>>> +	/* Find the lowest such VQ, if any: */
>>> +	b = find_last_bit(tmp_map, SVE_VQ_MAX);
>>> +	if (b >= SVE_VQ_MAX)
>>> +		return 0; /* no mismatches */
>>> +
>>> +	/*
>>> +	 * Mismatches above sve_max_virtualisable_vl are fine, since
>>> +	 * no guest is allowed to configure ZCR_EL2.LEN to exceed this:
>>> +	 */
>>> +	if (sve_vl_from_vq(bit_to_vq(b)) <= sve_max_virtualisable_vl) {
>>> +		pr_warn("SVE: cpu%d: Unsupported vector length(s) present\n",
>>
>> Nit: might be good to specify that the vector length is unsupported for
>> virtualisation.
>>
>> Also, since KVM is the one deciding what to do with the information,
>> should we have a warning here? But I can understand that knowing which
>> CPUs are introducing unsupported vector length, maybe using pr_devel()
>> instead of pr_warn()
> 
> These warnings are really for consumption by SoC vendors, not users:
> my aim is to flag up systems that we consider broken (or at least,
> unsuitable for running KVM).
> 
> So I prefer to make this noisy and limit the amount of "useful"
> information people might be tempted to programmatically scrape from
> dmesg.
> 
> cpufeatures uses pr_warn("SANITY CHECK: [...]") here.  Maybe I should
> stick "SANITY CHECK" in here too?  I will also try to make the commit
> message more explicit and/or add comments to make the intent of the code
> clearer.
> 
> It may also make sense to make this noise even if KVM isn't enabled
> (which is a rare case anyhow).
> 
> Thoughts?

As I explained later in the patch series, I missed the fact that this
function was for late secondary CPUs. I think things are fine like this
(just add the bit about the vector lenght not being supported for
virtualisation).

Cheers,
Dave Martin March 1, 2019, 2:44 p.m. UTC | #7
On Fri, Mar 01, 2019 at 12:39:52PM +0000, Julien Thierry wrote:
> 
> 
> On 26/02/2019 12:06, Dave Martin wrote:
> > On Wed, Feb 20, 2019 at 11:12:49AM +0000, Julien Thierry wrote:
> >> Hi Dave,
> >>
> >> On 18/02/2019 19:52, Dave Martin wrote:

[...]

> >>> +	/*
> >>> +	 * Mismatches above sve_max_virtualisable_vl are fine, since
> >>> +	 * no guest is allowed to configure ZCR_EL2.LEN to exceed this:
> >>> +	 */
> >>> +	if (sve_vl_from_vq(bit_to_vq(b)) <= sve_max_virtualisable_vl) {
> >>> +		pr_warn("SVE: cpu%d: Unsupported vector length(s) present\n",
> >>
> >> Nit: might be good to specify that the vector length is unsupported for
> >> virtualisation.
> >>
> >> Also, since KVM is the one deciding what to do with the information,
> >> should we have a warning here? But I can understand that knowing which
> >> CPUs are introducing unsupported vector length, maybe using pr_devel()
> >> instead of pr_warn()
> > 
> > These warnings are really for consumption by SoC vendors, not users:
> > my aim is to flag up systems that we consider broken (or at least,
> > unsuitable for running KVM).
> > 
> > So I prefer to make this noisy and limit the amount of "useful"
> > information people might be tempted to programmatically scrape from
> > dmesg.
> > 
> > cpufeatures uses pr_warn("SANITY CHECK: [...]") here.  Maybe I should
> > stick "SANITY CHECK" in here too?  I will also try to make the commit
> > message more explicit and/or add comments to make the intent of the code
> > clearer.
> > 
> > It may also make sense to make this noise even if KVM isn't enabled
> > (which is a rare case anyhow).
> > 
> > Thoughts?
> 
> As I explained later in the patch series, I missed the fact that this
> function was for late secondary CPUs. I think things are fine like this
> (just add the bit about the vector lenght not being supported for
> virtualisation).

I've now reworked all this a bit: I probe for the largest vector length
than be offered to guests, and if this is less than the host's maximum
vector length then I print a one-off warning saying what limit KVM is
clamping guests' vector length to.

Elsewhere, I now just use the probed value as the maximum vector length,
rather than duplicating the bounds checking logic.

This approach seems simpler, and is hoepfully a bit more self-
explanatory -- so please take a look when I repost :)

Cheers
---Dave
diff mbox series

Patch

diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h
index dd1ad39..964adc9 100644
--- a/arch/arm64/include/asm/fpsimd.h
+++ b/arch/arm64/include/asm/fpsimd.h
@@ -87,6 +87,7 @@  extern void sve_kernel_enable(const struct arm64_cpu_capabilities *__unused);
 extern u64 read_zcr_features(void);
 
 extern int __ro_after_init sve_max_vl;
+extern int __ro_after_init sve_max_virtualisable_vl;
 
 #ifdef CONFIG_ARM64_SVE
 
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index f6d84e2..5eaacb4 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -1825,7 +1825,7 @@  static void verify_sve_features(void)
 	unsigned int len = zcr & ZCR_ELx_LEN_MASK;
 
 	if (len < safe_len || sve_verify_vq_map()) {
-		pr_crit("CPU%d: SVE: required vector length(s) missing\n",
+		pr_crit("CPU%d: SVE: vector length support mismatch\n",
 			smp_processor_id());
 		cpu_die_early();
 	}
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index 62c37f0..64729e2 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -18,6 +18,7 @@ 
  */
 
 #include <linux/bitmap.h>
+#include <linux/bitops.h>
 #include <linux/bottom_half.h>
 #include <linux/bug.h>
 #include <linux/cache.h>
@@ -48,6 +49,7 @@ 
 #include <asm/sigcontext.h>
 #include <asm/sysreg.h>
 #include <asm/traps.h>
+#include <asm/virt.h>
 
 #define FPEXC_IOF	(1 << 0)
 #define FPEXC_DZF	(1 << 1)
@@ -130,14 +132,18 @@  static int sve_default_vl = -1;
 
 /* Maximum supported vector length across all CPUs (initially poisoned) */
 int __ro_after_init sve_max_vl = SVE_VL_MIN;
+int __ro_after_init sve_max_virtualisable_vl = SVE_VL_MIN;
 /* Set of available vector lengths, as vq_to_bit(vq): */
 static __ro_after_init DECLARE_BITMAP(sve_vq_map, SVE_VQ_MAX);
+/* Set of vector lengths present on at least one cpu: */
+static __ro_after_init DECLARE_BITMAP(sve_vq_partial_map, SVE_VQ_MAX);
 static void __percpu *efi_sve_state;
 
 #else /* ! CONFIG_ARM64_SVE */
 
 /* Dummy declaration for code that will be optimised out: */
 extern __ro_after_init DECLARE_BITMAP(sve_vq_map, SVE_VQ_MAX);
+extern __ro_after_init DECLARE_BITMAP(sve_vq_partial_map, SVE_VQ_MAX);
 extern void __percpu *efi_sve_state;
 
 #endif /* ! CONFIG_ARM64_SVE */
@@ -623,12 +629,6 @@  int sve_get_current_vl(void)
 	return sve_prctl_status(0);
 }
 
-/*
- * Bitmap for temporary storage of the per-CPU set of supported vector lengths
- * during secondary boot.
- */
-static DECLARE_BITMAP(sve_secondary_vq_map, SVE_VQ_MAX);
-
 static void sve_probe_vqs(DECLARE_BITMAP(map, SVE_VQ_MAX))
 {
 	unsigned int vq, vl;
@@ -650,6 +650,7 @@  static void sve_probe_vqs(DECLARE_BITMAP(map, SVE_VQ_MAX))
 void __init sve_init_vq_map(void)
 {
 	sve_probe_vqs(sve_vq_map);
+	bitmap_copy(sve_vq_partial_map, sve_vq_map, SVE_VQ_MAX);
 }
 
 /*
@@ -658,25 +659,58 @@  void __init sve_init_vq_map(void)
  */
 void sve_update_vq_map(void)
 {
-	sve_probe_vqs(sve_secondary_vq_map);
-	bitmap_and(sve_vq_map, sve_vq_map, sve_secondary_vq_map, SVE_VQ_MAX);
+	DECLARE_BITMAP(tmp_map, SVE_VQ_MAX);
+
+	sve_probe_vqs(tmp_map);
+	bitmap_and(sve_vq_map, sve_vq_map, tmp_map, SVE_VQ_MAX);
+	bitmap_or(sve_vq_partial_map, sve_vq_partial_map, tmp_map, SVE_VQ_MAX);
 }
 
 /* Check whether the current CPU supports all VQs in the committed set */
 int sve_verify_vq_map(void)
 {
-	int ret = 0;
+	DECLARE_BITMAP(tmp_map, SVE_VQ_MAX);
+	unsigned long b;
 
-	sve_probe_vqs(sve_secondary_vq_map);
-	bitmap_andnot(sve_secondary_vq_map, sve_vq_map, sve_secondary_vq_map,
-		      SVE_VQ_MAX);
-	if (!bitmap_empty(sve_secondary_vq_map, SVE_VQ_MAX)) {
+	sve_probe_vqs(tmp_map);
+
+	bitmap_complement(tmp_map, tmp_map, SVE_VQ_MAX);
+	if (bitmap_intersects(tmp_map, sve_vq_map, SVE_VQ_MAX)) {
 		pr_warn("SVE: cpu%d: Required vector length(s) missing\n",
 			smp_processor_id());
-		ret = -EINVAL;
+		return -EINVAL;
 	}
 
-	return ret;
+	if (!IS_ENABLED(CONFIG_KVM) || !is_hyp_mode_available())
+		return 0;
+
+	/*
+	 * For KVM, it is necessary to ensure that this CPU doesn't
+	 * support any vector length that guests may have probed as
+	 * unsupported.
+	 */
+
+	/* Recover the set of supported VQs: */
+	bitmap_complement(tmp_map, tmp_map, SVE_VQ_MAX);
+	/* Find VQs supported that are not globally supported: */
+	bitmap_andnot(tmp_map, tmp_map, sve_vq_map, SVE_VQ_MAX);
+
+	/* Find the lowest such VQ, if any: */
+	b = find_last_bit(tmp_map, SVE_VQ_MAX);
+	if (b >= SVE_VQ_MAX)
+		return 0; /* no mismatches */
+
+	/*
+	 * Mismatches above sve_max_virtualisable_vl are fine, since
+	 * no guest is allowed to configure ZCR_EL2.LEN to exceed this:
+	 */
+	if (sve_vl_from_vq(bit_to_vq(b)) <= sve_max_virtualisable_vl) {
+		pr_warn("SVE: cpu%d: Unsupported vector length(s) present\n",
+			smp_processor_id());
+		return -EINVAL;
+	}
+
+	return 0;
 }
 
 static void __init sve_efi_setup(void)
@@ -743,6 +777,8 @@  u64 read_zcr_features(void)
 void __init sve_setup(void)
 {
 	u64 zcr;
+	DECLARE_BITMAP(tmp_map, SVE_VQ_MAX);
+	unsigned long b;
 
 	if (!system_supports_sve())
 		return;
@@ -771,11 +807,31 @@  void __init sve_setup(void)
 	 */
 	sve_default_vl = find_supported_vector_length(64);
 
+	bitmap_andnot(tmp_map, sve_vq_partial_map, sve_vq_map,
+		      SVE_VQ_MAX);
+
+	b = find_last_bit(tmp_map, SVE_VQ_MAX);
+	if (b >= SVE_VQ_MAX)
+		/* No non-virtualisable VLs found */
+		sve_max_virtualisable_vl = SVE_VQ_MAX;
+	else if (WARN_ON(b == SVE_VQ_MAX - 1))
+		/* No virtualisable VLs?  This is architecturally forbidden. */
+		sve_max_virtualisable_vl = SVE_VQ_MIN;
+	else /* b + 1 < SVE_VQ_MAX */
+		sve_max_virtualisable_vl = sve_vl_from_vq(bit_to_vq(b + 1));
+
+	if (sve_max_virtualisable_vl > sve_max_vl)
+		sve_max_virtualisable_vl = sve_max_vl;
+
 	pr_info("SVE: maximum available vector length %u bytes per vector\n",
 		sve_max_vl);
 	pr_info("SVE: default vector length %u bytes per vector\n",
 		sve_default_vl);
 
+	/* KVM decides whether to support mismatched systems. Just warn here: */
+	if (sve_max_virtualisable_vl < sve_max_vl)
+		pr_info("SVE: unvirtualisable vector lengths present\n");
+
 	sve_efi_setup();
 }