diff mbox series

[v2] xen/arm: Add workaround for Cortex-A76/Neoverse-N1 erratum #1286807

Message ID 20201116121140.26763-1-michal.orzel@arm.com (mailing list archive)
State New, archived
Headers show
Series [v2] xen/arm: Add workaround for Cortex-A76/Neoverse-N1 erratum #1286807 | expand

Commit Message

Michal Orzel Nov. 16, 2020, 12:11 p.m. UTC
On the affected Cortex-A76/Neoverse-N1 cores (r0p0 to r3p0),
if a virtual address for a cacheable mapping of a location is being
accessed by a core while another core is remapping the virtual
address to a new physical page using the recommended break-before-make
sequence, then under very rare circumstances TLBI+DSB completes before
a read using the translation being invalidated has been observed by
other observers. The workaround repeats the TLBI+DSB operation
for all the TLB flush operations on purpose.

Signed-off-by: Michal Orzel <michal.orzel@arm.com>
---
 docs/misc/arm/silicon-errata.txt     |  2 ++
 xen/arch/arm/Kconfig                 | 23 +++++++++++++++++++++
 xen/arch/arm/cpuerrata.c             | 14 +++++++++++++
 xen/include/asm-arm/arm64/flushtlb.h | 30 +++++++++++++++++++---------
 xen/include/asm-arm/cpufeature.h     |  3 ++-
 5 files changed, 62 insertions(+), 10 deletions(-)

Comments

Bertrand Marquis Nov. 16, 2020, 1:46 p.m. UTC | #1
Hi,

> On 16 Nov 2020, at 12:11, Michal Orzel <Michal.Orzel@arm.com> wrote:
> 
> On the affected Cortex-A76/Neoverse-N1 cores (r0p0 to r3p0),
> if a virtual address for a cacheable mapping of a location is being
> accessed by a core while another core is remapping the virtual
> address to a new physical page using the recommended break-before-make
> sequence, then under very rare circumstances TLBI+DSB completes before
> a read using the translation being invalidated has been observed by
> other observers. The workaround repeats the TLBI+DSB operation
> for all the TLB flush operations on purpose.
> 
> Signed-off-by: Michal Orzel <michal.orzel@arm.com>

Reviewed-by: Bertrand Marquis <bertrand.marquis@arm.com>

Cheers
Bertrand

> ---
> docs/misc/arm/silicon-errata.txt     |  2 ++
> xen/arch/arm/Kconfig                 | 23 +++++++++++++++++++++
> xen/arch/arm/cpuerrata.c             | 14 +++++++++++++
> xen/include/asm-arm/arm64/flushtlb.h | 30 +++++++++++++++++++---------
> xen/include/asm-arm/cpufeature.h     |  3 ++-
> 5 files changed, 62 insertions(+), 10 deletions(-)
> 
> diff --git a/docs/misc/arm/silicon-errata.txt b/docs/misc/arm/silicon-errata.txt
> index 552c4151d3..d183ba543f 100644
> --- a/docs/misc/arm/silicon-errata.txt
> +++ b/docs/misc/arm/silicon-errata.txt
> @@ -53,5 +53,7 @@ stable hypervisors.
> | ARM            | Cortex-A72      | #853709         | N/A                     |
> | ARM            | Cortex-A73      | #858921         | ARM_ERRATUM_858921      |
> | ARM            | Cortex-A76      | #1165522        | N/A                     |
> +| ARM            | Cortex-A76      | #1286807        | ARM64_ERRATUM_1286807   |
> | ARM            | Neoverse-N1     | #1165522        | N/A
> +| ARM            | Neoverse-N1     | #1286807        | ARM64_ERRATUM_1286807   |
> | ARM            | MMU-500         | #842869         | N/A                     |
> diff --git a/xen/arch/arm/Kconfig b/xen/arch/arm/Kconfig
> index f938dd21bd..8171b8d04a 100644
> --- a/xen/arch/arm/Kconfig
> +++ b/xen/arch/arm/Kconfig
> @@ -244,6 +244,29 @@ config ARM_ERRATUM_858921
> 
> 	  If unsure, say Y.
> 
> +config ARM64_WORKAROUND_REPEAT_TLBI
> +	bool
> +
> +config ARM64_ERRATUM_1286807
> +	bool "Cortex-A76/Neoverse-N1: 1286807: Modification of the translation table for a virtual address might lead to read-after-read ordering violation"
> +	default y
> +	select ARM64_WORKAROUND_REPEAT_TLBI
> +	depends on ARM_64
> +	help
> +	  This option adds a workaround for ARM Cortex-A76/Neoverse-N1 erratum 1286807.
> +
> +	  On the affected Cortex-A76/Neoverse-N1 cores (r0p0 to r3p0), if a virtual
> +	  address for a cacheable mapping of a location is being
> +	  accessed by a core while another core is remapping the virtual
> +	  address to a new physical page using the recommended
> +	  break-before-make sequence, then under very rare circumstances
> +	  TLBI+DSB completes before a read using the translation being
> +	  invalidated has been observed by other observers. The
> +	  workaround repeats the TLBI+DSB operation for all the TLB flush
> +	  operations on purpose.
> +
> +	  If unsure, say Y.
> +
> endmenu
> 
> config ARM64_HARDEN_BRANCH_PREDICTOR
> diff --git a/xen/arch/arm/cpuerrata.c b/xen/arch/arm/cpuerrata.c
> index 567911d380..cb4795beec 100644
> --- a/xen/arch/arm/cpuerrata.c
> +++ b/xen/arch/arm/cpuerrata.c
> @@ -424,6 +424,20 @@ static const struct arm_cpu_capabilities arm_errata[] = {
>                    (1 << MIDR_VARIANT_SHIFT) | 2),
>     },
> #endif
> +#ifdef CONFIG_ARM64_ERRATUM_1286807
> +    {
> +        /* Cortex-A76 r0p0 - r3p0 */
> +        .desc = "ARM erratum 1286807",
> +        .capability = ARM64_WORKAROUND_REPEAT_TLBI,
> +        MIDR_RANGE(MIDR_CORTEX_A76, 0, 3 << MIDR_VARIANT_SHIFT),
> +    },
> +    {
> +        /* Neoverse-N1 r0p0 - r3p0 */
> +        .desc = "ARM erratum 1286807",
> +        .capability = ARM64_WORKAROUND_REPEAT_TLBI,
> +        MIDR_RANGE(MIDR_NEOVERSE_N1, 0, 3 << MIDR_VARIANT_SHIFT),
> +    },
> +#endif
> #ifdef CONFIG_ARM64_HARDEN_BRANCH_PREDICTOR
>     {
>         .capability = ARM_HARDEN_BRANCH_PREDICTOR,
> diff --git a/xen/include/asm-arm/arm64/flushtlb.h b/xen/include/asm-arm/arm64/flushtlb.h
> index ceec59542e..8f2abfaf1d 100644
> --- a/xen/include/asm-arm/arm64/flushtlb.h
> +++ b/xen/include/asm-arm/arm64/flushtlb.h
> @@ -9,6 +9,12 @@
>  * DSB ISH          // Ensure the TLB invalidation has completed
>  * ISB              // See explanation below
>  *
> + * ARM64_WORKAROUND_REPEAT_TLBI:
> + * Modification of the translation table for a virtual address might lead to
> + * read-after-read ordering violation.
> + * The workaround repeats TLBI+DSB operation for all the TLB flush operations
> + * on purpose.
> + *
>  * For Xen page-tables the ISB will discard any instructions fetched
>  * from the old mappings.
>  *
> @@ -16,15 +22,21 @@
>  * (and therefore the TLB invalidation) before continuing. So we know
>  * the TLBs cannot contain an entry for a mapping we may have removed.
>  */
> -#define TLB_HELPER(name, tlbop) \
> -static inline void name(void)   \
> -{                               \
> -    asm volatile(               \
> -        "dsb  ishst;"           \
> -        "tlbi "  # tlbop  ";"   \
> -        "dsb  ish;"             \
> -        "isb;"                  \
> -        : : : "memory");        \
> +#define TLB_HELPER(name, tlbop)                  \
> +static inline void name(void)                    \
> +{                                                \
> +    asm volatile(                                \
> +        "dsb  ishst;"                            \
> +        "tlbi "  # tlbop  ";"                    \
> +        ALTERNATIVE(                             \
> +            "nop; nop;",                         \
> +            "dsb  ish;"                          \
> +            "tlbi "  # tlbop  ";",               \
> +            ARM64_WORKAROUND_REPEAT_TLBI,        \
> +            CONFIG_ARM64_WORKAROUND_REPEAT_TLBI) \
> +        "dsb  ish;"                              \
> +        "isb;"                                   \
> +        : : : "memory");                         \
> }
> 
> /* Flush local TLBs, current VMID only. */
> diff --git a/xen/include/asm-arm/cpufeature.h b/xen/include/asm-arm/cpufeature.h
> index 016a9fe203..c7b5052992 100644
> --- a/xen/include/asm-arm/cpufeature.h
> +++ b/xen/include/asm-arm/cpufeature.h
> @@ -46,8 +46,9 @@
> #define ARM_SMCCC_1_1 8
> #define ARM64_WORKAROUND_AT_SPECULATE 9
> #define ARM_WORKAROUND_858921 10
> +#define ARM64_WORKAROUND_REPEAT_TLBI 11
> 
> -#define ARM_NCAPS           11
> +#define ARM_NCAPS           12
> 
> #ifndef __ASSEMBLY__
> 
> -- 
> 2.28.0
>
Stefano Stabellini Nov. 16, 2020, 10:15 p.m. UTC | #2
On Mon, 16 Nov 2020, Michal Orzel wrote:
> On the affected Cortex-A76/Neoverse-N1 cores (r0p0 to r3p0),
> if a virtual address for a cacheable mapping of a location is being
> accessed by a core while another core is remapping the virtual
> address to a new physical page using the recommended break-before-make
> sequence, then under very rare circumstances TLBI+DSB completes before
> a read using the translation being invalidated has been observed by
> other observers. The workaround repeats the TLBI+DSB operation
> for all the TLB flush operations on purpose.
> 
> Signed-off-by: Michal Orzel <michal.orzel@arm.com>

Looks good and it looks like you addressed all Julien's comments, so:

Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>


> ---
>  docs/misc/arm/silicon-errata.txt     |  2 ++
>  xen/arch/arm/Kconfig                 | 23 +++++++++++++++++++++
>  xen/arch/arm/cpuerrata.c             | 14 +++++++++++++
>  xen/include/asm-arm/arm64/flushtlb.h | 30 +++++++++++++++++++---------
>  xen/include/asm-arm/cpufeature.h     |  3 ++-
>  5 files changed, 62 insertions(+), 10 deletions(-)
> 
> diff --git a/docs/misc/arm/silicon-errata.txt b/docs/misc/arm/silicon-errata.txt
> index 552c4151d3..d183ba543f 100644
> --- a/docs/misc/arm/silicon-errata.txt
> +++ b/docs/misc/arm/silicon-errata.txt
> @@ -53,5 +53,7 @@ stable hypervisors.
>  | ARM            | Cortex-A72      | #853709         | N/A                     |
>  | ARM            | Cortex-A73      | #858921         | ARM_ERRATUM_858921      |
>  | ARM            | Cortex-A76      | #1165522        | N/A                     |
> +| ARM            | Cortex-A76      | #1286807        | ARM64_ERRATUM_1286807   |
>  | ARM            | Neoverse-N1     | #1165522        | N/A
> +| ARM            | Neoverse-N1     | #1286807        | ARM64_ERRATUM_1286807   |
>  | ARM            | MMU-500         | #842869         | N/A                     |
> diff --git a/xen/arch/arm/Kconfig b/xen/arch/arm/Kconfig
> index f938dd21bd..8171b8d04a 100644
> --- a/xen/arch/arm/Kconfig
> +++ b/xen/arch/arm/Kconfig
> @@ -244,6 +244,29 @@ config ARM_ERRATUM_858921
>  
>  	  If unsure, say Y.
>  
> +config ARM64_WORKAROUND_REPEAT_TLBI
> +	bool
> +
> +config ARM64_ERRATUM_1286807
> +	bool "Cortex-A76/Neoverse-N1: 1286807: Modification of the translation table for a virtual address might lead to read-after-read ordering violation"
> +	default y
> +	select ARM64_WORKAROUND_REPEAT_TLBI
> +	depends on ARM_64
> +	help
> +	  This option adds a workaround for ARM Cortex-A76/Neoverse-N1 erratum 1286807.
> +
> +	  On the affected Cortex-A76/Neoverse-N1 cores (r0p0 to r3p0), if a virtual
> +	  address for a cacheable mapping of a location is being
> +	  accessed by a core while another core is remapping the virtual
> +	  address to a new physical page using the recommended
> +	  break-before-make sequence, then under very rare circumstances
> +	  TLBI+DSB completes before a read using the translation being
> +	  invalidated has been observed by other observers. The
> +	  workaround repeats the TLBI+DSB operation for all the TLB flush
> +	  operations on purpose.
> +
> +	  If unsure, say Y.
> +
>  endmenu
>  
>  config ARM64_HARDEN_BRANCH_PREDICTOR
> diff --git a/xen/arch/arm/cpuerrata.c b/xen/arch/arm/cpuerrata.c
> index 567911d380..cb4795beec 100644
> --- a/xen/arch/arm/cpuerrata.c
> +++ b/xen/arch/arm/cpuerrata.c
> @@ -424,6 +424,20 @@ static const struct arm_cpu_capabilities arm_errata[] = {
>                     (1 << MIDR_VARIANT_SHIFT) | 2),
>      },
>  #endif
> +#ifdef CONFIG_ARM64_ERRATUM_1286807
> +    {
> +        /* Cortex-A76 r0p0 - r3p0 */
> +        .desc = "ARM erratum 1286807",
> +        .capability = ARM64_WORKAROUND_REPEAT_TLBI,
> +        MIDR_RANGE(MIDR_CORTEX_A76, 0, 3 << MIDR_VARIANT_SHIFT),
> +    },
> +    {
> +        /* Neoverse-N1 r0p0 - r3p0 */
> +        .desc = "ARM erratum 1286807",
> +        .capability = ARM64_WORKAROUND_REPEAT_TLBI,
> +        MIDR_RANGE(MIDR_NEOVERSE_N1, 0, 3 << MIDR_VARIANT_SHIFT),
> +    },
> +#endif
>  #ifdef CONFIG_ARM64_HARDEN_BRANCH_PREDICTOR
>      {
>          .capability = ARM_HARDEN_BRANCH_PREDICTOR,
> diff --git a/xen/include/asm-arm/arm64/flushtlb.h b/xen/include/asm-arm/arm64/flushtlb.h
> index ceec59542e..8f2abfaf1d 100644
> --- a/xen/include/asm-arm/arm64/flushtlb.h
> +++ b/xen/include/asm-arm/arm64/flushtlb.h
> @@ -9,6 +9,12 @@
>   * DSB ISH          // Ensure the TLB invalidation has completed
>   * ISB              // See explanation below
>   *
> + * ARM64_WORKAROUND_REPEAT_TLBI:
> + * Modification of the translation table for a virtual address might lead to
> + * read-after-read ordering violation.
> + * The workaround repeats TLBI+DSB operation for all the TLB flush operations
> + * on purpose.
> + *
>   * For Xen page-tables the ISB will discard any instructions fetched
>   * from the old mappings.
>   *
> @@ -16,15 +22,21 @@
>   * (and therefore the TLB invalidation) before continuing. So we know
>   * the TLBs cannot contain an entry for a mapping we may have removed.
>   */
> -#define TLB_HELPER(name, tlbop) \
> -static inline void name(void)   \
> -{                               \
> -    asm volatile(               \
> -        "dsb  ishst;"           \
> -        "tlbi "  # tlbop  ";"   \
> -        "dsb  ish;"             \
> -        "isb;"                  \
> -        : : : "memory");        \
> +#define TLB_HELPER(name, tlbop)                  \
> +static inline void name(void)                    \
> +{                                                \
> +    asm volatile(                                \
> +        "dsb  ishst;"                            \
> +        "tlbi "  # tlbop  ";"                    \
> +        ALTERNATIVE(                             \
> +            "nop; nop;",                         \
> +            "dsb  ish;"                          \
> +            "tlbi "  # tlbop  ";",               \
> +            ARM64_WORKAROUND_REPEAT_TLBI,        \
> +            CONFIG_ARM64_WORKAROUND_REPEAT_TLBI) \
> +        "dsb  ish;"                              \
> +        "isb;"                                   \
> +        : : : "memory");                         \
>  }
>  
>  /* Flush local TLBs, current VMID only. */
> diff --git a/xen/include/asm-arm/cpufeature.h b/xen/include/asm-arm/cpufeature.h
> index 016a9fe203..c7b5052992 100644
> --- a/xen/include/asm-arm/cpufeature.h
> +++ b/xen/include/asm-arm/cpufeature.h
> @@ -46,8 +46,9 @@
>  #define ARM_SMCCC_1_1 8
>  #define ARM64_WORKAROUND_AT_SPECULATE 9
>  #define ARM_WORKAROUND_858921 10
> +#define ARM64_WORKAROUND_REPEAT_TLBI 11
>  
> -#define ARM_NCAPS           11
> +#define ARM_NCAPS           12
>  
>  #ifndef __ASSEMBLY__
>  
> -- 
> 2.28.0
>
Julien Grall Nov. 17, 2020, 5:30 p.m. UTC | #3
Hi Michal,

On 16/11/2020 12:11, Michal Orzel wrote:
> On the affected Cortex-A76/Neoverse-N1 cores (r0p0 to r3p0),
> if a virtual address for a cacheable mapping of a location is being
> accessed by a core while another core is remapping the virtual
> address to a new physical page using the recommended break-before-make
> sequence, then under very rare circumstances TLBI+DSB completes before
> a read using the translation being invalidated has been observed by
> other observers. The workaround repeats the TLBI+DSB operation
> for all the TLB flush operations on purpose.

Sorry for nitpicking, the commit message should contain enough 
information for future reader to understand why this was done "on purpose".

So how about:

"The workaround repeats the TLBI+DSB operation for all the TLB flush 
operations. While this is stricly not necessary, we don't want to take 
any risk.".

I can fix it on commit.

Reviewed-by: Julien Grall <jgrall@amazon.com>

> 
> Signed-off-by: Michal Orzel <michal.orzel@arm.com>
> ---
>   docs/misc/arm/silicon-errata.txt     |  2 ++
>   xen/arch/arm/Kconfig                 | 23 +++++++++++++++++++++
>   xen/arch/arm/cpuerrata.c             | 14 +++++++++++++
>   xen/include/asm-arm/arm64/flushtlb.h | 30 +++++++++++++++++++---------
>   xen/include/asm-arm/cpufeature.h     |  3 ++-
>   5 files changed, 62 insertions(+), 10 deletions(-)
> 
> diff --git a/docs/misc/arm/silicon-errata.txt b/docs/misc/arm/silicon-errata.txt
> index 552c4151d3..d183ba543f 100644
> --- a/docs/misc/arm/silicon-errata.txt
> +++ b/docs/misc/arm/silicon-errata.txt
> @@ -53,5 +53,7 @@ stable hypervisors.
>   | ARM            | Cortex-A72      | #853709         | N/A                     |
>   | ARM            | Cortex-A73      | #858921         | ARM_ERRATUM_858921      |
>   | ARM            | Cortex-A76      | #1165522        | N/A                     |
> +| ARM            | Cortex-A76      | #1286807        | ARM64_ERRATUM_1286807   |
>   | ARM            | Neoverse-N1     | #1165522        | N/A
> +| ARM            | Neoverse-N1     | #1286807        | ARM64_ERRATUM_1286807   |
>   | ARM            | MMU-500         | #842869         | N/A                     |
> diff --git a/xen/arch/arm/Kconfig b/xen/arch/arm/Kconfig
> index f938dd21bd..8171b8d04a 100644
> --- a/xen/arch/arm/Kconfig
> +++ b/xen/arch/arm/Kconfig
> @@ -244,6 +244,29 @@ config ARM_ERRATUM_858921
>   
>   	  If unsure, say Y.
>   
> +config ARM64_WORKAROUND_REPEAT_TLBI
> +	bool
> +
> +config ARM64_ERRATUM_1286807
> +	bool "Cortex-A76/Neoverse-N1: 1286807: Modification of the translation table for a virtual address might lead to read-after-read ordering violation"
> +	default y
> +	select ARM64_WORKAROUND_REPEAT_TLBI
> +	depends on ARM_64
> +	help
> +	  This option adds a workaround for ARM Cortex-A76/Neoverse-N1 erratum 1286807.
> +
> +	  On the affected Cortex-A76/Neoverse-N1 cores (r0p0 to r3p0), if a virtual
> +	  address for a cacheable mapping of a location is being
> +	  accessed by a core while another core is remapping the virtual
> +	  address to a new physical page using the recommended
> +	  break-before-make sequence, then under very rare circumstances
> +	  TLBI+DSB completes before a read using the translation being
> +	  invalidated has been observed by other observers. The
> +	  workaround repeats the TLBI+DSB operation for all the TLB flush
> +	  operations on purpose.
If you agree with what I wrote above, I will update this section and ...

> +
> +	  If unsure, say Y.
> +
>   endmenu
>   
>   config ARM64_HARDEN_BRANCH_PREDICTOR
> diff --git a/xen/arch/arm/cpuerrata.c b/xen/arch/arm/cpuerrata.c
> index 567911d380..cb4795beec 100644
> --- a/xen/arch/arm/cpuerrata.c
> +++ b/xen/arch/arm/cpuerrata.c
> @@ -424,6 +424,20 @@ static const struct arm_cpu_capabilities arm_errata[] = {
>                      (1 << MIDR_VARIANT_SHIFT) | 2),
>       },
>   #endif
> +#ifdef CONFIG_ARM64_ERRATUM_1286807
> +    {
> +        /* Cortex-A76 r0p0 - r3p0 */
> +        .desc = "ARM erratum 1286807",
> +        .capability = ARM64_WORKAROUND_REPEAT_TLBI,
> +        MIDR_RANGE(MIDR_CORTEX_A76, 0, 3 << MIDR_VARIANT_SHIFT),
> +    },
> +    {
> +        /* Neoverse-N1 r0p0 - r3p0 */
> +        .desc = "ARM erratum 1286807",
> +        .capability = ARM64_WORKAROUND_REPEAT_TLBI,
> +        MIDR_RANGE(MIDR_NEOVERSE_N1, 0, 3 << MIDR_VARIANT_SHIFT),
> +    },
> +#endif
>   #ifdef CONFIG_ARM64_HARDEN_BRANCH_PREDICTOR
>       {
>           .capability = ARM_HARDEN_BRANCH_PREDICTOR,
> diff --git a/xen/include/asm-arm/arm64/flushtlb.h b/xen/include/asm-arm/arm64/flushtlb.h
> index ceec59542e..8f2abfaf1d 100644
> --- a/xen/include/asm-arm/arm64/flushtlb.h
> +++ b/xen/include/asm-arm/arm64/flushtlb.h
> @@ -9,6 +9,12 @@
>    * DSB ISH          // Ensure the TLB invalidation has completed
>    * ISB              // See explanation below
>    *
> + * ARM64_WORKAROUND_REPEAT_TLBI:
> + * Modification of the translation table for a virtual address might lead to
> + * read-after-read ordering violation.
> + * The workaround repeats TLBI+DSB operation for all the TLB flush operations
> + * on purpose.

... this section while committing.

Cheers,
Michal Orzel Nov. 18, 2020, 7:12 a.m. UTC | #4
Hi Julien,

On 17.11.2020 18:30, Julien Grall wrote:
> Hi Michal,
> 
> On 16/11/2020 12:11, Michal Orzel wrote:
>> On the affected Cortex-A76/Neoverse-N1 cores (r0p0 to r3p0),
>> if a virtual address for a cacheable mapping of a location is being
>> accessed by a core while another core is remapping the virtual
>> address to a new physical page using the recommended break-before-make
>> sequence, then under very rare circumstances TLBI+DSB completes before
>> a read using the translation being invalidated has been observed by
>> other observers. The workaround repeats the TLBI+DSB operation
>> for all the TLB flush operations on purpose.
> 
> Sorry for nitpicking, the commit message should contain enough information for future reader to understand why this was done "on purpose".
> 
> So how about:
> 
> "The workaround repeats the TLBI+DSB operation for all the TLB flush operations. While this is stricly not necessary, we don't want to take any risk.".
> 
> I can fix it on commit.
> 
Ok I agree to add this extra clarification.
Please go on and fix it on commit/etc.

Thanks,
Michal
> Reviewed-by: Julien Grall <jgrall@amazon.com>
> 
>>
>> Signed-off-by: Michal Orzel <michal.orzel@arm.com>
>> ---
>>   docs/misc/arm/silicon-errata.txt     |  2 ++
>>   xen/arch/arm/Kconfig                 | 23 +++++++++++++++++++++
>>   xen/arch/arm/cpuerrata.c             | 14 +++++++++++++
>>   xen/include/asm-arm/arm64/flushtlb.h | 30 +++++++++++++++++++---------
>>   xen/include/asm-arm/cpufeature.h     |  3 ++-
>>   5 files changed, 62 insertions(+), 10 deletions(-)
>>
>> diff --git a/docs/misc/arm/silicon-errata.txt b/docs/misc/arm/silicon-errata.txt
>> index 552c4151d3..d183ba543f 100644
>> --- a/docs/misc/arm/silicon-errata.txt
>> +++ b/docs/misc/arm/silicon-errata.txt
>> @@ -53,5 +53,7 @@ stable hypervisors.
>>   | ARM            | Cortex-A72      | #853709         | N/A                     |
>>   | ARM            | Cortex-A73      | #858921         | ARM_ERRATUM_858921      |
>>   | ARM            | Cortex-A76      | #1165522        | N/A                     |
>> +| ARM            | Cortex-A76      | #1286807        | ARM64_ERRATUM_1286807   |
>>   | ARM            | Neoverse-N1     | #1165522        | N/A
>> +| ARM            | Neoverse-N1     | #1286807        | ARM64_ERRATUM_1286807   |
>>   | ARM            | MMU-500         | #842869         | N/A                     |
>> diff --git a/xen/arch/arm/Kconfig b/xen/arch/arm/Kconfig
>> index f938dd21bd..8171b8d04a 100644
>> --- a/xen/arch/arm/Kconfig
>> +++ b/xen/arch/arm/Kconfig
>> @@ -244,6 +244,29 @@ config ARM_ERRATUM_858921
>>           If unsure, say Y.
>>   +config ARM64_WORKAROUND_REPEAT_TLBI
>> +    bool
>> +
>> +config ARM64_ERRATUM_1286807
>> +    bool "Cortex-A76/Neoverse-N1: 1286807: Modification of the translation table for a virtual address might lead to read-after-read ordering violation"
>> +    default y
>> +    select ARM64_WORKAROUND_REPEAT_TLBI
>> +    depends on ARM_64
>> +    help
>> +      This option adds a workaround for ARM Cortex-A76/Neoverse-N1 erratum 1286807.
>> +
>> +      On the affected Cortex-A76/Neoverse-N1 cores (r0p0 to r3p0), if a virtual
>> +      address for a cacheable mapping of a location is being
>> +      accessed by a core while another core is remapping the virtual
>> +      address to a new physical page using the recommended
>> +      break-before-make sequence, then under very rare circumstances
>> +      TLBI+DSB completes before a read using the translation being
>> +      invalidated has been observed by other observers. The
>> +      workaround repeats the TLBI+DSB operation for all the TLB flush
>> +      operations on purpose.
> If you agree with what I wrote above, I will update this section and ...
> 
>> +
>> +      If unsure, say Y.
>> +
>>   endmenu
>>     config ARM64_HARDEN_BRANCH_PREDICTOR
>> diff --git a/xen/arch/arm/cpuerrata.c b/xen/arch/arm/cpuerrata.c
>> index 567911d380..cb4795beec 100644
>> --- a/xen/arch/arm/cpuerrata.c
>> +++ b/xen/arch/arm/cpuerrata.c
>> @@ -424,6 +424,20 @@ static const struct arm_cpu_capabilities arm_errata[] = {
>>                      (1 << MIDR_VARIANT_SHIFT) | 2),
>>       },
>>   #endif
>> +#ifdef CONFIG_ARM64_ERRATUM_1286807
>> +    {
>> +        /* Cortex-A76 r0p0 - r3p0 */
>> +        .desc = "ARM erratum 1286807",
>> +        .capability = ARM64_WORKAROUND_REPEAT_TLBI,
>> +        MIDR_RANGE(MIDR_CORTEX_A76, 0, 3 << MIDR_VARIANT_SHIFT),
>> +    },
>> +    {
>> +        /* Neoverse-N1 r0p0 - r3p0 */
>> +        .desc = "ARM erratum 1286807",
>> +        .capability = ARM64_WORKAROUND_REPEAT_TLBI,
>> +        MIDR_RANGE(MIDR_NEOVERSE_N1, 0, 3 << MIDR_VARIANT_SHIFT),
>> +    },
>> +#endif
>>   #ifdef CONFIG_ARM64_HARDEN_BRANCH_PREDICTOR
>>       {
>>           .capability = ARM_HARDEN_BRANCH_PREDICTOR,
>> diff --git a/xen/include/asm-arm/arm64/flushtlb.h b/xen/include/asm-arm/arm64/flushtlb.h
>> index ceec59542e..8f2abfaf1d 100644
>> --- a/xen/include/asm-arm/arm64/flushtlb.h
>> +++ b/xen/include/asm-arm/arm64/flushtlb.h
>> @@ -9,6 +9,12 @@
>>    * DSB ISH          // Ensure the TLB invalidation has completed
>>    * ISB              // See explanation below
>>    *
>> + * ARM64_WORKAROUND_REPEAT_TLBI:
>> + * Modification of the translation table for a virtual address might lead to
>> + * read-after-read ordering violation.
>> + * The workaround repeats TLBI+DSB operation for all the TLB flush operations
>> + * on purpose.
> 
> ... this section while committing.
> 
> Cheers,
>
Julien Grall Nov. 18, 2020, 3:26 p.m. UTC | #5
On 18/11/2020 07:12, Michal Orzel wrote:
> Hi Julien,

Hi Michal,

> On 17.11.2020 18:30, Julien Grall wrote:
>> Hi Michal,
>>
>> On 16/11/2020 12:11, Michal Orzel wrote:
>>> On the affected Cortex-A76/Neoverse-N1 cores (r0p0 to r3p0),
>>> if a virtual address for a cacheable mapping of a location is being
>>> accessed by a core while another core is remapping the virtual
>>> address to a new physical page using the recommended break-before-make
>>> sequence, then under very rare circumstances TLBI+DSB completes before
>>> a read using the translation being invalidated has been observed by
>>> other observers. The workaround repeats the TLBI+DSB operation
>>> for all the TLB flush operations on purpose.
>>
>> Sorry for nitpicking, the commit message should contain enough information for future reader to understand why this was done "on purpose".
>>
>> So how about:
>>
>> "The workaround repeats the TLBI+DSB operation for all the TLB flush operations. While this is stricly not necessary, we don't want to take any risk.".
>>
>> I can fix it on commit.
>>
> Ok I agree to add this extra clarification.
> Please go on and fix it on commit/etc.

Thanks. I have committed now with an extra small change (I hope that's 
fine). I decided to not add the extra-clarification in the Kconfig for 
two reasons:
  1) The description is already pretty long and I wanted to keep it 
short. A more user interested with the workaround would likely go and 
read the code.
  2) There are a risk the description will get desynchronized in the 
future. So better to keep the justification close to the implementation.

Cheers,
diff mbox series

Patch

diff --git a/docs/misc/arm/silicon-errata.txt b/docs/misc/arm/silicon-errata.txt
index 552c4151d3..d183ba543f 100644
--- a/docs/misc/arm/silicon-errata.txt
+++ b/docs/misc/arm/silicon-errata.txt
@@ -53,5 +53,7 @@  stable hypervisors.
 | ARM            | Cortex-A72      | #853709         | N/A                     |
 | ARM            | Cortex-A73      | #858921         | ARM_ERRATUM_858921      |
 | ARM            | Cortex-A76      | #1165522        | N/A                     |
+| ARM            | Cortex-A76      | #1286807        | ARM64_ERRATUM_1286807   |
 | ARM            | Neoverse-N1     | #1165522        | N/A
+| ARM            | Neoverse-N1     | #1286807        | ARM64_ERRATUM_1286807   |
 | ARM            | MMU-500         | #842869         | N/A                     |
diff --git a/xen/arch/arm/Kconfig b/xen/arch/arm/Kconfig
index f938dd21bd..8171b8d04a 100644
--- a/xen/arch/arm/Kconfig
+++ b/xen/arch/arm/Kconfig
@@ -244,6 +244,29 @@  config ARM_ERRATUM_858921
 
 	  If unsure, say Y.
 
+config ARM64_WORKAROUND_REPEAT_TLBI
+	bool
+
+config ARM64_ERRATUM_1286807
+	bool "Cortex-A76/Neoverse-N1: 1286807: Modification of the translation table for a virtual address might lead to read-after-read ordering violation"
+	default y
+	select ARM64_WORKAROUND_REPEAT_TLBI
+	depends on ARM_64
+	help
+	  This option adds a workaround for ARM Cortex-A76/Neoverse-N1 erratum 1286807.
+
+	  On the affected Cortex-A76/Neoverse-N1 cores (r0p0 to r3p0), if a virtual
+	  address for a cacheable mapping of a location is being
+	  accessed by a core while another core is remapping the virtual
+	  address to a new physical page using the recommended
+	  break-before-make sequence, then under very rare circumstances
+	  TLBI+DSB completes before a read using the translation being
+	  invalidated has been observed by other observers. The
+	  workaround repeats the TLBI+DSB operation for all the TLB flush
+	  operations on purpose.
+
+	  If unsure, say Y.
+
 endmenu
 
 config ARM64_HARDEN_BRANCH_PREDICTOR
diff --git a/xen/arch/arm/cpuerrata.c b/xen/arch/arm/cpuerrata.c
index 567911d380..cb4795beec 100644
--- a/xen/arch/arm/cpuerrata.c
+++ b/xen/arch/arm/cpuerrata.c
@@ -424,6 +424,20 @@  static const struct arm_cpu_capabilities arm_errata[] = {
                    (1 << MIDR_VARIANT_SHIFT) | 2),
     },
 #endif
+#ifdef CONFIG_ARM64_ERRATUM_1286807
+    {
+        /* Cortex-A76 r0p0 - r3p0 */
+        .desc = "ARM erratum 1286807",
+        .capability = ARM64_WORKAROUND_REPEAT_TLBI,
+        MIDR_RANGE(MIDR_CORTEX_A76, 0, 3 << MIDR_VARIANT_SHIFT),
+    },
+    {
+        /* Neoverse-N1 r0p0 - r3p0 */
+        .desc = "ARM erratum 1286807",
+        .capability = ARM64_WORKAROUND_REPEAT_TLBI,
+        MIDR_RANGE(MIDR_NEOVERSE_N1, 0, 3 << MIDR_VARIANT_SHIFT),
+    },
+#endif
 #ifdef CONFIG_ARM64_HARDEN_BRANCH_PREDICTOR
     {
         .capability = ARM_HARDEN_BRANCH_PREDICTOR,
diff --git a/xen/include/asm-arm/arm64/flushtlb.h b/xen/include/asm-arm/arm64/flushtlb.h
index ceec59542e..8f2abfaf1d 100644
--- a/xen/include/asm-arm/arm64/flushtlb.h
+++ b/xen/include/asm-arm/arm64/flushtlb.h
@@ -9,6 +9,12 @@ 
  * DSB ISH          // Ensure the TLB invalidation has completed
  * ISB              // See explanation below
  *
+ * ARM64_WORKAROUND_REPEAT_TLBI:
+ * Modification of the translation table for a virtual address might lead to
+ * read-after-read ordering violation.
+ * The workaround repeats TLBI+DSB operation for all the TLB flush operations
+ * on purpose.
+ *
  * For Xen page-tables the ISB will discard any instructions fetched
  * from the old mappings.
  *
@@ -16,15 +22,21 @@ 
  * (and therefore the TLB invalidation) before continuing. So we know
  * the TLBs cannot contain an entry for a mapping we may have removed.
  */
-#define TLB_HELPER(name, tlbop) \
-static inline void name(void)   \
-{                               \
-    asm volatile(               \
-        "dsb  ishst;"           \
-        "tlbi "  # tlbop  ";"   \
-        "dsb  ish;"             \
-        "isb;"                  \
-        : : : "memory");        \
+#define TLB_HELPER(name, tlbop)                  \
+static inline void name(void)                    \
+{                                                \
+    asm volatile(                                \
+        "dsb  ishst;"                            \
+        "tlbi "  # tlbop  ";"                    \
+        ALTERNATIVE(                             \
+            "nop; nop;",                         \
+            "dsb  ish;"                          \
+            "tlbi "  # tlbop  ";",               \
+            ARM64_WORKAROUND_REPEAT_TLBI,        \
+            CONFIG_ARM64_WORKAROUND_REPEAT_TLBI) \
+        "dsb  ish;"                              \
+        "isb;"                                   \
+        : : : "memory");                         \
 }
 
 /* Flush local TLBs, current VMID only. */
diff --git a/xen/include/asm-arm/cpufeature.h b/xen/include/asm-arm/cpufeature.h
index 016a9fe203..c7b5052992 100644
--- a/xen/include/asm-arm/cpufeature.h
+++ b/xen/include/asm-arm/cpufeature.h
@@ -46,8 +46,9 @@ 
 #define ARM_SMCCC_1_1 8
 #define ARM64_WORKAROUND_AT_SPECULATE 9
 #define ARM_WORKAROUND_858921 10
+#define ARM64_WORKAROUND_REPEAT_TLBI 11
 
-#define ARM_NCAPS           11
+#define ARM_NCAPS           12
 
 #ifndef __ASSEMBLY__