Message ID | 20210511182322.3830-3-catalin.marinas@arm.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | arm64: Taint the kernel on different GMID_EL1.BS | expand |
On Tue, May 11, 2021 at 07:23:22PM +0100, Catalin Marinas wrote: > The GMID_EL1.BS field determines the number of tags accessed by the > LDGM/STGM instructions (EL1 and up), used by the kernel for copying or > zeroing page tags. > > Taint the kernel if GMID_EL1.BS differs between CPUs. > > Signed-off-by: Catalin Marinas <catalin.marinas@arm.com> > Cc: Will Deacon <will@kernel.org> > Cc: Mark Rutland <mark.rutland@arm.com> > Cc: Suzuki K Poulose <Suzuki.Poulose@arm.com> Acked-by: Mark Rutland <mark.rutland@arm.com> Mark. > --- > arch/arm64/include/asm/cpu.h | 1 + > arch/arm64/kernel/cpufeature.c | 17 +++++++++++++++++ > arch/arm64/kernel/cpuinfo.c | 1 + > 3 files changed, 19 insertions(+) > > diff --git a/arch/arm64/include/asm/cpu.h b/arch/arm64/include/asm/cpu.h > index fe5a8499ddc2..9088e72c7cf6 100644 > --- a/arch/arm64/include/asm/cpu.h > +++ b/arch/arm64/include/asm/cpu.h > @@ -20,6 +20,7 @@ struct cpuinfo_arm64 { > u64 reg_dczid; > u64 reg_midr; > u64 reg_revidr; > + u64 reg_gmid; > > u64 reg_id_aa64dfr0; > u64 reg_id_aa64dfr1; > diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c > index ca66a61bb396..3b9089ca52dc 100644 > --- a/arch/arm64/kernel/cpufeature.c > +++ b/arch/arm64/kernel/cpufeature.c > @@ -401,6 +401,11 @@ static const struct arm64_ftr_bits ftr_dczid[] = { > ARM64_FTR_END, > }; > > +static const struct arm64_ftr_bits ftr_gmid[] = { > + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, SYS_GMID_EL1_BS_SHIFT, 4, 0), > + ARM64_FTR_END, > +}; > + > static const struct arm64_ftr_bits ftr_id_isar0[] = { > ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR0_DIVIDE_SHIFT, 4, 0), > ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR0_DEBUG_SHIFT, 4, 0), > @@ -618,6 +623,9 @@ static const struct __ftr_reg_entry { > /* Op1 = 0, CRn = 1, CRm = 2 */ > ARM64_FTR_REG(SYS_ZCR_EL1, ftr_zcr), > > + /* Op1 = 1, CRn = 0, CRm = 0 */ > + ARM64_FTR_REG(SYS_GMID_EL1, ftr_gmid), > + > /* Op1 = 3, CRn = 0, CRm = 0 */ > { SYS_CTR_EL0, &arm64_ftr_reg_ctrel0 }, > ARM64_FTR_REG(SYS_DCZID_EL0, ftr_dczid), > @@ -872,6 +880,7 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info) > init_cpu_ftr_reg(SYS_CTR_EL0, info->reg_ctr); > init_cpu_ftr_reg(SYS_DCZID_EL0, info->reg_dczid); > init_cpu_ftr_reg(SYS_CNTFRQ_EL0, info->reg_cntfrq); > + init_cpu_ftr_reg(SYS_GMID_EL1, info->reg_gmid); > init_cpu_ftr_reg(SYS_ID_AA64DFR0_EL1, info->reg_id_aa64dfr0); > init_cpu_ftr_reg(SYS_ID_AA64DFR1_EL1, info->reg_id_aa64dfr1); > init_cpu_ftr_reg(SYS_ID_AA64ISAR0_EL1, info->reg_id_aa64isar0); > @@ -1082,6 +1091,14 @@ void update_cpu_features(int cpu, > taint |= check_update_ftr_reg(SYS_DCZID_EL0, cpu, > info->reg_dczid, boot->reg_dczid); > > + /* > + * The kernel uses the LDGM/STGM instructions and the number of tags > + * they read/write depends on the GMID_EL1.BS field. Check that the > + * value is the same on all CPUs. > + */ > + taint |= check_update_ftr_reg(SYS_GMID_EL1, cpu, > + info->reg_gmid, boot->reg_gmid); > + > /* If different, timekeeping will be broken (especially with KVM) */ > taint |= check_update_ftr_reg(SYS_CNTFRQ_EL0, cpu, > info->reg_cntfrq, boot->reg_cntfrq); > diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c > index 4bea701117d4..cd9f2d51285b 100644 > --- a/arch/arm64/kernel/cpuinfo.c > +++ b/arch/arm64/kernel/cpuinfo.c > @@ -359,6 +359,7 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info) > info->reg_dczid = read_cpuid(DCZID_EL0); > info->reg_midr = read_cpuid_id(); > info->reg_revidr = read_cpuid(REVIDR_EL1); > + info->reg_gmid = read_cpuid(GMID_EL1); > > info->reg_id_aa64dfr0 = read_cpuid(ID_AA64DFR0_EL1); > info->reg_id_aa64dfr1 = read_cpuid(ID_AA64DFR1_EL1);
On 11/05/2021 19:23, Catalin Marinas wrote: > The GMID_EL1.BS field determines the number of tags accessed by the > LDGM/STGM instructions (EL1 and up), used by the kernel for copying or > zeroing page tags. > > Taint the kernel if GMID_EL1.BS differs between CPUs. > > Signed-off-by: Catalin Marinas <catalin.marinas@arm.com> > Cc: Will Deacon <will@kernel.org> > Cc: Mark Rutland <mark.rutland@arm.com> > Cc: Suzuki K Poulose <Suzuki.Poulose@arm.com> > --- > arch/arm64/include/asm/cpu.h | 1 + > arch/arm64/kernel/cpufeature.c | 17 +++++++++++++++++ > arch/arm64/kernel/cpuinfo.c | 1 + > 3 files changed, 19 insertions(+) > > diff --git a/arch/arm64/include/asm/cpu.h b/arch/arm64/include/asm/cpu.h > index fe5a8499ddc2..9088e72c7cf6 100644 > --- a/arch/arm64/include/asm/cpu.h > +++ b/arch/arm64/include/asm/cpu.h > @@ -20,6 +20,7 @@ struct cpuinfo_arm64 { > u64 reg_dczid; > u64 reg_midr; > u64 reg_revidr; > + u64 reg_gmid; > > u64 reg_id_aa64dfr0; > u64 reg_id_aa64dfr1; > diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c > index ca66a61bb396..3b9089ca52dc 100644 > --- a/arch/arm64/kernel/cpufeature.c > +++ b/arch/arm64/kernel/cpufeature.c > @@ -401,6 +401,11 @@ static const struct arm64_ftr_bits ftr_dczid[] = { > ARM64_FTR_END, > }; > > +static const struct arm64_ftr_bits ftr_gmid[] = { > + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, SYS_GMID_EL1_BS_SHIFT, 4, 0), > + ARM64_FTR_END, > +}; > + > static const struct arm64_ftr_bits ftr_id_isar0[] = { > ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR0_DIVIDE_SHIFT, 4, 0), > ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR0_DEBUG_SHIFT, 4, 0), > @@ -618,6 +623,9 @@ static const struct __ftr_reg_entry { > /* Op1 = 0, CRn = 1, CRm = 2 */ > ARM64_FTR_REG(SYS_ZCR_EL1, ftr_zcr), > > + /* Op1 = 1, CRn = 0, CRm = 0 */ > + ARM64_FTR_REG(SYS_GMID_EL1, ftr_gmid), > + > /* Op1 = 3, CRn = 0, CRm = 0 */ > { SYS_CTR_EL0, &arm64_ftr_reg_ctrel0 }, > ARM64_FTR_REG(SYS_DCZID_EL0, ftr_dczid), > @@ -872,6 +880,7 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info) > init_cpu_ftr_reg(SYS_CTR_EL0, info->reg_ctr); > init_cpu_ftr_reg(SYS_DCZID_EL0, info->reg_dczid); > init_cpu_ftr_reg(SYS_CNTFRQ_EL0, info->reg_cntfrq); > + init_cpu_ftr_reg(SYS_GMID_EL1, info->reg_gmid); > init_cpu_ftr_reg(SYS_ID_AA64DFR0_EL1, info->reg_id_aa64dfr0); > init_cpu_ftr_reg(SYS_ID_AA64DFR1_EL1, info->reg_id_aa64dfr1); > init_cpu_ftr_reg(SYS_ID_AA64ISAR0_EL1, info->reg_id_aa64isar0); > @@ -1082,6 +1091,14 @@ void update_cpu_features(int cpu, > taint |= check_update_ftr_reg(SYS_DCZID_EL0, cpu, > info->reg_dczid, boot->reg_dczid); > > + /* > + * The kernel uses the LDGM/STGM instructions and the number of tags > + * they read/write depends on the GMID_EL1.BS field. Check that the > + * value is the same on all CPUs. > + */ > + taint |= check_update_ftr_reg(SYS_GMID_EL1, cpu, > + info->reg_gmid, boot->reg_gmid); > + > /* If different, timekeeping will be broken (especially with KVM) */ > taint |= check_update_ftr_reg(SYS_CNTFRQ_EL0, cpu, > info->reg_cntfrq, boot->reg_cntfrq); > diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c > index 4bea701117d4..cd9f2d51285b 100644 > --- a/arch/arm64/kernel/cpuinfo.c > +++ b/arch/arm64/kernel/cpuinfo.c > @@ -359,6 +359,7 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info) > info->reg_dczid = read_cpuid(DCZID_EL0); > info->reg_midr = read_cpuid_id(); > info->reg_revidr = read_cpuid(REVIDR_EL1); > + info->reg_gmid = read_cpuid(GMID_EL1); > > info->reg_id_aa64dfr0 = read_cpuid(ID_AA64DFR0_EL1); > info->reg_id_aa64dfr1 = read_cpuid(ID_AA64DFR1_EL1); I am seeing the following undefined instruction crash on all our ARM64 Tegra devices on today's -next and bisect is pointing to this patch. Reverting this patch on top of -next does fix the problem. Let me know if you have any thoughts. Thanks! Jon [ 0.000000] ------------[ cut here ]------------ [ 0.000000] kernel BUG at /dvs/git/dirty/git-master_l4t-upstream/kernel/arch/arm64/kernel/traps.c:406! [ 0.000000] Internal error: Oops - BUG: 0 [#1] PREEMPT SMP [ 0.000000] Modules linked in: [ 0.000000] CPU: 0 PID: 0 Comm: swapper Not tainted 5.13.0-rc3-next-20210526-gf6b46ef27317 #1 [ 0.000000] Hardware name: NVIDIA Jetson TX2 Developer Kit (DT) [ 0.000000] pstate: 600000c5 (nZCv daIF -PAN -UAO -TCO BTYPE=--) [ 0.000000] pc : do_undefinstr+0x298/0x2b0 [ 0.000000] lr : do_undefinstr+0x2a8/0x2b0 [ 0.000000] sp : ffff800011cb3c10 [ 0.000000] x29: ffff800011cb3c10 x28: ffff800011cc3540 x27: 0000000000000002 [ 0.000000] x26: ffff800011760008 x25: ffff0001f4deccc0 x24: ffff800011cb9000 [ 0.000000] x23: ffff800011fb50f8 x22: ffff800011fb5000 x21: 00000000d5390080 [ 0.000000] x20: ffff800011cb3c90 x19: ffff800011cb9000 x18: ffffffffffffffff [ 0.000000] x17: 0000000000017000 x16: 0000000000000000 x15: 000000000000001e [ 0.000000] x14: 0000000000200000 x13: 0000000275e00000 x12: 0000001000000000 [ 0.000000] x11: 00000000009fc580 x10: 0000000274e03a80 x9 : 0000001000000000 [ 0.000000] x8 : 0000000000200000 x7 : 0000000000000003 x6 : 0000000000000000 [ 0.000000] x5 : ffff800011cc5910 x4 : 0000000000000000 x3 : ffff800011fb50f8 [ 0.000000] x2 : 0000000000000000 x1 : ffff800011cc3540 x0 : 0000000000000005 [ 0.000000] Call trace: [ 0.000000] do_undefinstr+0x298/0x2b0 [ 0.000000] el1_undef+0x2c/0x48 [ 0.000000] el1_sync_handler+0xb4/0xd0 [ 0.000000] el1_sync+0x74/0x100 [ 0.000000] __cpuinfo_store_cpu+0x5c/0x248 [ 0.000000] cpuinfo_store_boot_cpu+0x28/0x54 [ 0.000000] smp_prepare_boot_cpu+0x2c/0x38 [ 0.000000] start_kernel+0x1a4/0x62c [ 0.000000] __primary_switched+0x8c/0x90 [ 0.000000] Code: b5fffe40 b94047b5 17ffffca d503201f (d4210000) [ 0.000000] random: get_random_bytes called from print_oops_end_marker+0x4c/0x68 with crng_init=0 [ 0.000000] ---[ end trace 0000000000000000 ]---
On Wed, May 26, 2021 at 08:47:16PM +0100, Jon Hunter wrote: > On 11/05/2021 19:23, Catalin Marinas wrote: > > diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c > > index 4bea701117d4..cd9f2d51285b 100644 > > --- a/arch/arm64/kernel/cpuinfo.c > > +++ b/arch/arm64/kernel/cpuinfo.c > > @@ -359,6 +359,7 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info) > > info->reg_dczid = read_cpuid(DCZID_EL0); > > info->reg_midr = read_cpuid_id(); > > info->reg_revidr = read_cpuid(REVIDR_EL1); > > + info->reg_gmid = read_cpuid(GMID_EL1); > > > > info->reg_id_aa64dfr0 = read_cpuid(ID_AA64DFR0_EL1); > > info->reg_id_aa64dfr1 = read_cpuid(ID_AA64DFR1_EL1); > > > I am seeing the following undefined instruction crash on all our > ARM64 Tegra devices on today's -next and bisect is pointing to > this patch. Reverting this patch on top of -next does fix the > problem. Let me know if you have any thoughts. Catalin just posted a fixed version, so should be solved asap (I'll push a new branch shortly). Thanks! Will
On Wed, May 26, 2021 at 08:47:16PM +0100, Jon Hunter wrote: > On 11/05/2021 19:23, Catalin Marinas wrote: > > The GMID_EL1.BS field determines the number of tags accessed by the > > LDGM/STGM instructions (EL1 and up), used by the kernel for copying or > > zeroing page tags. > > > > Taint the kernel if GMID_EL1.BS differs between CPUs. > > > > Signed-off-by: Catalin Marinas <catalin.marinas@arm.com> > > Cc: Will Deacon <will@kernel.org> > > Cc: Mark Rutland <mark.rutland@arm.com> > > Cc: Suzuki K Poulose <Suzuki.Poulose@arm.com> [...] > I am seeing the following undefined instruction crash on all our > ARM64 Tegra devices on today's -next and bisect is pointing to > this patch. Reverting this patch on top of -next does fix the > problem. Let me know if you have any thoughts. Yeah, sorry about that. Posted a new version here, better tested: https://lore.kernel.org/r/20210526193621.21559-1-catalin.marinas@arm.com Will should have dropped the old one from linux-next but it takes a few hours before Stephen re-creates the tree.
On 26/05/2021 22:48, Catalin Marinas wrote: > On Wed, May 26, 2021 at 08:47:16PM +0100, Jon Hunter wrote: >> On 11/05/2021 19:23, Catalin Marinas wrote: >>> The GMID_EL1.BS field determines the number of tags accessed by the >>> LDGM/STGM instructions (EL1 and up), used by the kernel for copying or >>> zeroing page tags. >>> >>> Taint the kernel if GMID_EL1.BS differs between CPUs. >>> >>> Signed-off-by: Catalin Marinas <catalin.marinas@arm.com> >>> Cc: Will Deacon <will@kernel.org> >>> Cc: Mark Rutland <mark.rutland@arm.com> >>> Cc: Suzuki K Poulose <Suzuki.Poulose@arm.com> > [...] >> I am seeing the following undefined instruction crash on all our >> ARM64 Tegra devices on today's -next and bisect is pointing to >> this patch. Reverting this patch on top of -next does fix the >> problem. Let me know if you have any thoughts. > > Yeah, sorry about that. Posted a new version here, better tested: > > https://lore.kernel.org/r/20210526193621.21559-1-catalin.marinas@arm.com > > Will should have dropped the old one from linux-next but it takes a few > hours before Stephen re-creates the tree. No problem. Thanks for the quick fix! Jon
diff --git a/arch/arm64/include/asm/cpu.h b/arch/arm64/include/asm/cpu.h index fe5a8499ddc2..9088e72c7cf6 100644 --- a/arch/arm64/include/asm/cpu.h +++ b/arch/arm64/include/asm/cpu.h @@ -20,6 +20,7 @@ struct cpuinfo_arm64 { u64 reg_dczid; u64 reg_midr; u64 reg_revidr; + u64 reg_gmid; u64 reg_id_aa64dfr0; u64 reg_id_aa64dfr1; diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index ca66a61bb396..3b9089ca52dc 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -401,6 +401,11 @@ static const struct arm64_ftr_bits ftr_dczid[] = { ARM64_FTR_END, }; +static const struct arm64_ftr_bits ftr_gmid[] = { + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, SYS_GMID_EL1_BS_SHIFT, 4, 0), + ARM64_FTR_END, +}; + static const struct arm64_ftr_bits ftr_id_isar0[] = { ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR0_DIVIDE_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR0_DEBUG_SHIFT, 4, 0), @@ -618,6 +623,9 @@ static const struct __ftr_reg_entry { /* Op1 = 0, CRn = 1, CRm = 2 */ ARM64_FTR_REG(SYS_ZCR_EL1, ftr_zcr), + /* Op1 = 1, CRn = 0, CRm = 0 */ + ARM64_FTR_REG(SYS_GMID_EL1, ftr_gmid), + /* Op1 = 3, CRn = 0, CRm = 0 */ { SYS_CTR_EL0, &arm64_ftr_reg_ctrel0 }, ARM64_FTR_REG(SYS_DCZID_EL0, ftr_dczid), @@ -872,6 +880,7 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info) init_cpu_ftr_reg(SYS_CTR_EL0, info->reg_ctr); init_cpu_ftr_reg(SYS_DCZID_EL0, info->reg_dczid); init_cpu_ftr_reg(SYS_CNTFRQ_EL0, info->reg_cntfrq); + init_cpu_ftr_reg(SYS_GMID_EL1, info->reg_gmid); init_cpu_ftr_reg(SYS_ID_AA64DFR0_EL1, info->reg_id_aa64dfr0); init_cpu_ftr_reg(SYS_ID_AA64DFR1_EL1, info->reg_id_aa64dfr1); init_cpu_ftr_reg(SYS_ID_AA64ISAR0_EL1, info->reg_id_aa64isar0); @@ -1082,6 +1091,14 @@ void update_cpu_features(int cpu, taint |= check_update_ftr_reg(SYS_DCZID_EL0, cpu, info->reg_dczid, boot->reg_dczid); + /* + * The kernel uses the LDGM/STGM instructions and the number of tags + * they read/write depends on the GMID_EL1.BS field. Check that the + * value is the same on all CPUs. + */ + taint |= check_update_ftr_reg(SYS_GMID_EL1, cpu, + info->reg_gmid, boot->reg_gmid); + /* If different, timekeeping will be broken (especially with KVM) */ taint |= check_update_ftr_reg(SYS_CNTFRQ_EL0, cpu, info->reg_cntfrq, boot->reg_cntfrq); diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index 4bea701117d4..cd9f2d51285b 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -359,6 +359,7 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info) info->reg_dczid = read_cpuid(DCZID_EL0); info->reg_midr = read_cpuid_id(); info->reg_revidr = read_cpuid(REVIDR_EL1); + info->reg_gmid = read_cpuid(GMID_EL1); info->reg_id_aa64dfr0 = read_cpuid(ID_AA64DFR0_EL1); info->reg_id_aa64dfr1 = read_cpuid(ID_AA64DFR1_EL1);
The GMID_EL1.BS field determines the number of tags accessed by the LDGM/STGM instructions (EL1 and up), used by the kernel for copying or zeroing page tags. Taint the kernel if GMID_EL1.BS differs between CPUs. Signed-off-by: Catalin Marinas <catalin.marinas@arm.com> Cc: Will Deacon <will@kernel.org> Cc: Mark Rutland <mark.rutland@arm.com> Cc: Suzuki K Poulose <Suzuki.Poulose@arm.com> --- arch/arm64/include/asm/cpu.h | 1 + arch/arm64/kernel/cpufeature.c | 17 +++++++++++++++++ arch/arm64/kernel/cpuinfo.c | 1 + 3 files changed, 19 insertions(+)