diff mbox series

[v2,1/1] arm64: Add workaround for Fujitsu A64FX erratum 010001

Message ID 8898674D84E3B24BA3A2D289B872026A6A2A32EB@G01JPEXMBKW03 (mailing list archive)
State New, archived
Headers show
Series arm64: Add workaround for Fujitsu A64FX erratum 010001 | expand

Commit Message

Zhang, Lei Jan. 22, 2019, 8:54 a.m. UTC
On some variants of the Fujitsu-A64FX cores ver(1.0, 1.1),
memory accesses may cause undefined fault (Data abort,
DFSC=0b111111) due to the CPU Errata (Fujitsu #010001).

This patch introduces the workaround to the problem.
The workaround is to change the fault handler for Data abort
DFSC=0b111111 to ignore this undefined fault, which will only
affect the Fujitsu-A64FX.

Signed-off-by: Lei Zhang <zhang.lei@jp.fujitsu.com>
Tested-by: Lei Zhang <zhang.lei@jp.fujitsu.com>
---
 Documentation/arm64/silicon-errata.txt |  1 +
 arch/arm64/Kconfig                     | 13 +++++++++++++
 arch/arm64/include/asm/cpucaps.h       |  3 ++-
 arch/arm64/include/asm/cputype.h       |  4 ++++
 arch/arm64/kernel/cpu_errata.c         |  8 ++++++++
 arch/arm64/mm/fault.c                  | 24 +++++++++++++++++++++++-
 6 files changed, 51 insertions(+), 2 deletions(-)

Comments

Catalin Marinas Jan. 25, 2019, 6:08 p.m. UTC | #1
On Tue, Jan 22, 2019 at 08:54:33AM +0000, Zhang, Lei wrote:
> diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
> index efb7b2c..37e4f18 100644
> --- a/arch/arm64/mm/fault.c
> +++ b/arch/arm64/mm/fault.c
> @@ -666,6 +666,28 @@ static int do_sea(unsigned long addr, unsigned int esr, struct pt_regs *regs)
>  	return 0;
>  }
>  
> +static int do_bad_unknown_63(unsigned long addr, unsigned int esr, struct pt_regs *regs)
> +{
> +	/*
> +	 * On some variants of the Fujitsu-A64FX cores ver(1.0, 1.1),
> +	 * memory accesses may spuriously trigger data aborts with
> +	 * DFSC=0b111111.
> +	 */
> +	if (IS_ENABLED(CONFIG_FUJITSU_ERRATUM_010001)) {
> +		if (cpus_have_cap(ARM64_WORKAROUND_FUJITSU_A64FX_0100001)) {
> +			return 0;
> +		} else { /* cpu capabilities maybe not ready*/
> +			unsigned int current_cpu_midr = read_cpuid_id();
> +			const struct midr_range fujitsu_a64fx_midr_range = {
> +				MIDR_FUJITSU_A64FX, MIDR_CPU_VAR_REV(0, 0), MIDR_CPU_VAR_REV(1, 0)
> +			};
> +			if (is_midr_in_range(current_cpu_midr, &fujitsu_a64fx_midr_range) == TRUE)
> +				return 0;
> +		}
> +	}
> +	return do_bad(addr, esr, regs);
> +}

IIUC, this can happen very early when the errata framework isn't yet
ready. Given that this is not on a fast path (you already took a fault),
I don't think it's worth optimising for cpus_have_cap() (and
ARM64_WORKAROUND_FUJITSU_A64FX_0100001). I've seen Mark's comments on
why checking MIDR in a preemptible context is not a good idea but I
suspect your platform is homogeneous (i.e. not big.LITTLE).
Zhang, Lei Jan. 29, 2019, 10:54 a.m. UTC | #2
Hi Catalin,
> -----Original Message-----
> From: linux-arm-kernel
> [mailto:linux-arm-kernel-bounces@lists.infradead.org] On Behalf Of
> Catalin Marinas
> Sent: Saturday, January 26, 2019 3:08 AM
> To: Zhang, Lei/張 雷
> Cc: 'Mark Rutland'; 'will.deacon@arm.com';
> 'linux-kernel@vger.kernel.org';
> 'linux-arm-kernel@lists.infradead.org'
> Subject: Re: [PATCH v2 1/1] arm64: Add workaround for Fujitsu A64FX
> erratum 010001
> 
> IIUC, this can happen very early when the errata framework isn't yet
> ready. Given that this is not on a fast path (you already took a fault),
> I don't think it's worth optimising for cpus_have_cap() (and
> ARM64_WORKAROUND_FUJITSU_A64FX_0100001). I've seen Mark's comments on
> why checking MIDR in a preemptible context is not a good idea but I
> suspect your platform is homogeneous (i.e. not big.LITTLE).
Thanks for comment.
I will post a new patch to resolve fast path problem in today.
By the way our platform is homogeneous.


Best Regards,
Lei Zhang
zhang.lei@jp.fujitsu.com
diff mbox series

Patch

diff --git a/Documentation/arm64/silicon-errata.txt b/Documentation/arm64/silicon-errata.txt
index 1f09d04..26d64e9 100644
--- a/Documentation/arm64/silicon-errata.txt
+++ b/Documentation/arm64/silicon-errata.txt
@@ -80,3 +80,4 @@  stable kernels.
 | Qualcomm Tech. | Falkor v1       | E1009           | QCOM_FALKOR_ERRATUM_1009    |
 | Qualcomm Tech. | QDF2400 ITS     | E0065           | QCOM_QDF2400_ERRATUM_0065   |
 | Qualcomm Tech. | Falkor v{1,2}   | E1041           | QCOM_FALKOR_ERRATUM_1041    |
+| Fujitsu        | A64FX           | E#010001        | FUJITSU_ERRATUM_010001      |
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index a4168d3..9c09b2b 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -643,6 +643,19 @@  config QCOM_FALKOR_ERRATUM_E1041
 
 	  If unsure, say Y.
 
+config FUJITSU_ERRATUM_010001
+	bool "Fujitsu-A64FX erratum E#010001: Undefined fault may occur wrongly"
+	default y
+	help
+	  This option adds workaround for Fujitsu-A64FX erratum E#010001.
+	  On some variants of the Fujitsu-A64FX cores ver(1.0, 1.1), memory accesses
+	  may cause undefined fault (Data abort, DFSC=0b111111).
+	  The workaround is to replace the fault handler for Data abort DFSC=0b111111
+	  with a new one to ignore this undefined fault, which will only affect
+	  the Fujitsu-A64FX.
+
+	  If unsure, say Y.
+
 endmenu
 
 
diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h
index 82e9099..3a0b375 100644
--- a/arch/arm64/include/asm/cpucaps.h
+++ b/arch/arm64/include/asm/cpucaps.h
@@ -60,7 +60,8 @@ 
 #define ARM64_HAS_ADDRESS_AUTH_IMP_DEF		39
 #define ARM64_HAS_GENERIC_AUTH_ARCH		40
 #define ARM64_HAS_GENERIC_AUTH_IMP_DEF		41
+#define ARM64_WORKAROUND_FUJITSU_A64FX_0100001 42
 
-#define ARM64_NCAPS				42
+#define ARM64_NCAPS				43
 
 #endif /* __ASM_CPUCAPS_H */
diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h
index 951ed1a..70203f9 100644
--- a/arch/arm64/include/asm/cputype.h
+++ b/arch/arm64/include/asm/cputype.h
@@ -76,6 +76,7 @@ 
 #define ARM_CPU_IMP_BRCM		0x42
 #define ARM_CPU_IMP_QCOM		0x51
 #define ARM_CPU_IMP_NVIDIA		0x4E
+#define ARM_CPU_IMP_FUJITSU		0x46
 
 #define ARM_CPU_PART_AEM_V8		0xD0F
 #define ARM_CPU_PART_FOUNDATION		0xD00
@@ -104,6 +105,8 @@ 
 #define NVIDIA_CPU_PART_DENVER		0x003
 #define NVIDIA_CPU_PART_CARMEL		0x004
 
+#define FUJITSU_CPU_PART_A64FX		0x001
+
 #define MIDR_CORTEX_A53 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A53)
 #define MIDR_CORTEX_A57 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A57)
 #define MIDR_CORTEX_A72 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A72)
@@ -122,6 +125,7 @@ 
 #define MIDR_QCOM_KRYO MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO)
 #define MIDR_NVIDIA_DENVER MIDR_CPU_MODEL(ARM_CPU_IMP_NVIDIA, NVIDIA_CPU_PART_DENVER)
 #define MIDR_NVIDIA_CARMEL MIDR_CPU_MODEL(ARM_CPU_IMP_NVIDIA, NVIDIA_CPU_PART_CARMEL)
+#define MIDR_FUJITSU_A64FX MIDR_CPU_MODEL(ARM_CPU_IMP_FUJITSU, FUJITSU_CPU_PART_A64FX)
 
 #ifndef __ASSEMBLY__
 
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index 9950bb0..fc0737f 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -739,6 +739,14 @@  static bool has_ssbd_mitigation(const struct arm64_cpu_capabilities *entry,
 		ERRATA_MIDR_RANGE(MIDR_CORTEX_A76, 0, 0, 2, 0),
 	},
 #endif
+#ifdef CONFIG_FUJITSU_ERRATUM_010001
+	{
+		.desc = "Fujitsu erratum 010001",
+		.capability = ARM64_WORKAROUND_FUJITSU_A64FX_0100001,
+		ERRATA_MIDR_RANGE(MIDR_FUJITSU_A64FX, 0, 0, 1, 0),
+	},
+#endif
+
 	{
 	}
 };
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index efb7b2c..37e4f18 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -666,6 +666,28 @@  static int do_sea(unsigned long addr, unsigned int esr, struct pt_regs *regs)
 	return 0;
 }
 
+static int do_bad_unknown_63(unsigned long addr, unsigned int esr, struct pt_regs *regs)
+{
+	/*
+	 * On some variants of the Fujitsu-A64FX cores ver(1.0, 1.1),
+	 * memory accesses may spuriously trigger data aborts with
+	 * DFSC=0b111111.
+	 */
+	if (IS_ENABLED(CONFIG_FUJITSU_ERRATUM_010001)) {
+		if (cpus_have_cap(ARM64_WORKAROUND_FUJITSU_A64FX_0100001)) {
+			return 0;
+		} else { /* cpu capabilities maybe not ready*/
+			unsigned int current_cpu_midr = read_cpuid_id();
+			const struct midr_range fujitsu_a64fx_midr_range = {
+				MIDR_FUJITSU_A64FX, MIDR_CPU_VAR_REV(0, 0), MIDR_CPU_VAR_REV(1, 0)
+			};
+			if (is_midr_in_range(current_cpu_midr, &fujitsu_a64fx_midr_range) == TRUE)
+				return 0;
+		}
+	}
+	return do_bad(addr, esr, regs);
+}
+
 static const struct fault_info fault_info[] = {
 	{ do_bad,		SIGKILL, SI_KERNEL,	"ttbr address size fault"	},
 	{ do_bad,		SIGKILL, SI_KERNEL,	"level 1 address size fault"	},
@@ -730,7 +752,7 @@  static int do_sea(unsigned long addr, unsigned int esr, struct pt_regs *regs)
 	{ do_bad,		SIGKILL, SI_KERNEL,	"unknown 60"			},
 	{ do_bad,		SIGKILL, SI_KERNEL,	"section domain fault"		},
 	{ do_bad,		SIGKILL, SI_KERNEL,	"page domain fault"		},
-	{ do_bad,		SIGKILL, SI_KERNEL,	"unknown 63"			},
+	{ do_bad_unknown_63,	SIGKILL, SI_KERNEL,	"unknown 63"			},
 };
 
 int handle_guest_sea(phys_addr_t addr, unsigned int esr)