diff mbox

[3/3] arm64: add runtime system sanity checks

Message ID 1399905470-26500-4-git-send-email-mark.rutland@arm.com (mailing list archive)
State New, archived
Headers show

Commit Message

Mark Rutland May 12, 2014, 2:37 p.m. UTC
Unexpected variation in certain system register values across CPUs is an
indicator of potential problems with a system. The kernel expects CPUs
to be mostly identical in terms of supported features, even in systems
with homogeneous CPUs, with uniform instruction set support being
critical for the correct operation of userspace.

To help detect issues early where hardware violates the expectations of
the kernel, this patch adds simple runtime sanity checks on important ID
registers in the bring up path of each CPU.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
---
 arch/arm64/include/asm/cpu.h | 21 +++++++++-
 arch/arm64/kernel/cpuinfo.c  | 97 +++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 116 insertions(+), 2 deletions(-)

Comments

Will Deacon May 12, 2014, 3:11 p.m. UTC | #1
On Mon, May 12, 2014 at 03:37:50PM +0100, Mark Rutland wrote:
> Unexpected variation in certain system register values across CPUs is an
> indicator of potential problems with a system. The kernel expects CPUs
> to be mostly identical in terms of supported features, even in systems
> with homogeneous CPUs, with uniform instruction set support being

You mean heterogeneous, right?

> critical for the correct operation of userspace.
> 
> To help detect issues early where hardware violates the expectations of
> the kernel, this patch adds simple runtime sanity checks on important ID
> registers in the bring up path of each CPU.
> 
> Signed-off-by: Mark Rutland <mark.rutland@arm.com>
> ---
>  arch/arm64/include/asm/cpu.h | 21 +++++++++-
>  arch/arm64/kernel/cpuinfo.c  | 97 +++++++++++++++++++++++++++++++++++++++++++-
>  2 files changed, 116 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/cpu.h b/arch/arm64/include/asm/cpu.h
> index 74bf9bb..33a0e70 100644
> --- a/arch/arm64/include/asm/cpu.h
> +++ b/arch/arm64/include/asm/cpu.h
> @@ -16,11 +16,30 @@
>  /*
>   * Records attributes of an individual CPU.
>   *
> - * This is used to cache data for /proc/cpuinfo.
> + * This is used to cache data for /proc/cpuinfo and run-time sanity checks.
>   */
>  struct cpuinfo_arm64 {
>  	struct cpu	cpu;
> +	u32		reg_ctr;
> +	u32		reg_cntfrq;
>  	u32		reg_midr;
> +
> +	u64		reg_id_aa64isar0;
> +	u64		reg_id_aa64mmfr0;
> +	u64		reg_id_aa64pfr0;
> +
> +	u32		reg_id_isar0;
> +	u32		reg_id_isar1;
> +	u32		reg_id_isar2;
> +	u32		reg_id_isar3;
> +	u32		reg_id_isar4;
> +	u32		reg_id_isar5;
> +	u32		reg_id_mmfr0;
> +	u32		reg_id_mmfr1;
> +	u32		reg_id_mmfr2;
> +	u32		reg_id_mmfr3;
> +	u32		reg_id_pfr0;
> +	u32		reg_id_pfr1;
>  };
>  
>  DECLARE_PER_CPU(struct cpuinfo_arm64, cpu_data);
> diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c
> index b9e18c5..bccee60 100644
> --- a/arch/arm64/kernel/cpuinfo.c
> +++ b/arch/arm64/kernel/cpuinfo.c
> @@ -1,5 +1,6 @@
>  /*
> - * Record CPU attributes for later retrieval
> + * Record CPU attributes for later retrieval, and sanity-check that processor
> + * features do not vary unexpectedly.
>   *
>   * Copyright (C) 2014 ARM Ltd.
>   * This program is free software; you can redistribute it and/or modify
> @@ -15,16 +16,110 @@
>   * along with this program.  If not, see <http://www.gnu.org/licenses/>.
>   */
>  #include <asm/cpu.h>
> +#include <asm/arch_timer.h>
>  
> +#include <linux/printk.h>
>  #include <linux/smp.h>
> +#include <linux/types.h>
>  
>  DEFINE_PER_CPU(struct cpuinfo_arm64, cpu_data);
>  
> +static void check_reg_mask(char *name, u64 mask, u64 boot, u64 cur, int cpu)
> +{
> +	if ((boot & mask) == (cur & mask))
> +		return;
> +
> +	pr_warn("SANITY CHECK: Unexpected variation in %s. cpu0: %#016lx, cpu%d: %#016lx\n",
> +		name, (unsigned long)boot, cpu, (unsigned long)cur);

Use could use pr_fmt for the prefix.

> +}
> +
> +#define	CHECK_MASK(field, mask, boot, cur, cpu)	\
> +	check_reg_mask(#field, mask, (boot)->reg_ ## field, (cur)->reg_ ## field, cpu)
> +
> +#define CHECK(field, boot, cur, cpu)	\
> +	CHECK_MASK(field, (u64)-1, boot, cur, cpu)
> +
> +/*
> + * Verify that CPUs don't have unexpected differences that will cause problems.
> + */
> +void cpuinfo_sanity_check(struct cpuinfo_arm64 *cur)
> +{
> +	struct cpuinfo_arm64 *boot = &per_cpu(cpu_data, 0);
> +	int cpu = smp_processor_id();
> +
> +	/*
> +	 * The kernel can handle differing I-cache policies, but otherwise
> +	 * caches should look identical. Userspace JITs will make use of
> +	 * *minLine.
> +	 */
> +	CHECK_MASK(ctr, 0xffff3fff, boot, cur, cpu);

Actually, if we have different I-cache policies we need to make sure that
icache_is_aliasing reports true on all CPUs, otherwise GDB will break
(via flush_ptrace_access).

Otherwise, looks like a good series to me.

Will
Mark Rutland May 12, 2014, 3:41 p.m. UTC | #2
On Mon, May 12, 2014 at 04:11:14PM +0100, Will Deacon wrote:
> On Mon, May 12, 2014 at 03:37:50PM +0100, Mark Rutland wrote:
> > Unexpected variation in certain system register values across CPUs is an
> > indicator of potential problems with a system. The kernel expects CPUs
> > to be mostly identical in terms of supported features, even in systems
> > with homogeneous CPUs, with uniform instruction set support being
> 
> You mean heterogeneous, right?

Yes, it appears I do.

> > critical for the correct operation of userspace.
> > 
> > To help detect issues early where hardware violates the expectations of
> > the kernel, this patch adds simple runtime sanity checks on important ID
> > registers in the bring up path of each CPU.
> > 
> > Signed-off-by: Mark Rutland <mark.rutland@arm.com>
> > ---
> >  arch/arm64/include/asm/cpu.h | 21 +++++++++-
> >  arch/arm64/kernel/cpuinfo.c  | 97 +++++++++++++++++++++++++++++++++++++++++++-
> >  2 files changed, 116 insertions(+), 2 deletions(-)
> > 
> > diff --git a/arch/arm64/include/asm/cpu.h b/arch/arm64/include/asm/cpu.h
> > index 74bf9bb..33a0e70 100644
> > --- a/arch/arm64/include/asm/cpu.h
> > +++ b/arch/arm64/include/asm/cpu.h
> > @@ -16,11 +16,30 @@
> >  /*
> >   * Records attributes of an individual CPU.
> >   *
> > - * This is used to cache data for /proc/cpuinfo.
> > + * This is used to cache data for /proc/cpuinfo and run-time sanity checks.
> >   */
> >  struct cpuinfo_arm64 {
> >  	struct cpu	cpu;
> > +	u32		reg_ctr;
> > +	u32		reg_cntfrq;
> >  	u32		reg_midr;
> > +
> > +	u64		reg_id_aa64isar0;
> > +	u64		reg_id_aa64mmfr0;
> > +	u64		reg_id_aa64pfr0;
> > +
> > +	u32		reg_id_isar0;
> > +	u32		reg_id_isar1;
> > +	u32		reg_id_isar2;
> > +	u32		reg_id_isar3;
> > +	u32		reg_id_isar4;
> > +	u32		reg_id_isar5;
> > +	u32		reg_id_mmfr0;
> > +	u32		reg_id_mmfr1;
> > +	u32		reg_id_mmfr2;
> > +	u32		reg_id_mmfr3;
> > +	u32		reg_id_pfr0;
> > +	u32		reg_id_pfr1;
> >  };
> >  
> >  DECLARE_PER_CPU(struct cpuinfo_arm64, cpu_data);
> > diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c
> > index b9e18c5..bccee60 100644
> > --- a/arch/arm64/kernel/cpuinfo.c
> > +++ b/arch/arm64/kernel/cpuinfo.c
> > @@ -1,5 +1,6 @@
> >  /*
> > - * Record CPU attributes for later retrieval
> > + * Record CPU attributes for later retrieval, and sanity-check that processor
> > + * features do not vary unexpectedly.
> >   *
> >   * Copyright (C) 2014 ARM Ltd.
> >   * This program is free software; you can redistribute it and/or modify
> > @@ -15,16 +16,110 @@
> >   * along with this program.  If not, see <http://www.gnu.org/licenses/>.
> >   */
> >  #include <asm/cpu.h>
> > +#include <asm/arch_timer.h>
> >  
> > +#include <linux/printk.h>
> >  #include <linux/smp.h>
> > +#include <linux/types.h>
> >  
> >  DEFINE_PER_CPU(struct cpuinfo_arm64, cpu_data);
> >  
> > +static void check_reg_mask(char *name, u64 mask, u64 boot, u64 cur, int cpu)
> > +{
> > +	if ((boot & mask) == (cur & mask))
> > +		return;
> > +
> > +	pr_warn("SANITY CHECK: Unexpected variation in %s. cpu0: %#016lx, cpu%d: %#016lx\n",
> > +		name, (unsigned long)boot, cpu, (unsigned long)cur);
> 
> Use could use pr_fmt for the prefix.

Originally I was going to fold the /proc/cpuinfo seq_file code in here
too (which I'd still like to do), for which I didn't want the "SANITY
CHECK" prefix. It doesn't look like that affects seq_printf though, so I
guess I can.

> 
> > +}
> > +
> > +#define	CHECK_MASK(field, mask, boot, cur, cpu)	\
> > +	check_reg_mask(#field, mask, (boot)->reg_ ## field, (cur)->reg_ ## field, cpu)
> > +
> > +#define CHECK(field, boot, cur, cpu)	\
> > +	CHECK_MASK(field, (u64)-1, boot, cur, cpu)
> > +
> > +/*
> > + * Verify that CPUs don't have unexpected differences that will cause problems.
> > + */
> > +void cpuinfo_sanity_check(struct cpuinfo_arm64 *cur)
> > +{
> > +	struct cpuinfo_arm64 *boot = &per_cpu(cpu_data, 0);
> > +	int cpu = smp_processor_id();
> > +
> > +	/*
> > +	 * The kernel can handle differing I-cache policies, but otherwise
> > +	 * caches should look identical. Userspace JITs will make use of
> > +	 * *minLine.
> > +	 */
> > +	CHECK_MASK(ctr, 0xffff3fff, boot, cur, cpu);
> 
> Actually, if we have different I-cache policies we need to make sure that
> icache_is_aliasing reports true on all CPUs, otherwise GDB will break
> (via flush_ptrace_access).

That's a point. I'll go and investigate that.

Cheers,
Mark.
diff mbox

Patch

diff --git a/arch/arm64/include/asm/cpu.h b/arch/arm64/include/asm/cpu.h
index 74bf9bb..33a0e70 100644
--- a/arch/arm64/include/asm/cpu.h
+++ b/arch/arm64/include/asm/cpu.h
@@ -16,11 +16,30 @@ 
 /*
  * Records attributes of an individual CPU.
  *
- * This is used to cache data for /proc/cpuinfo.
+ * This is used to cache data for /proc/cpuinfo and run-time sanity checks.
  */
 struct cpuinfo_arm64 {
 	struct cpu	cpu;
+	u32		reg_ctr;
+	u32		reg_cntfrq;
 	u32		reg_midr;
+
+	u64		reg_id_aa64isar0;
+	u64		reg_id_aa64mmfr0;
+	u64		reg_id_aa64pfr0;
+
+	u32		reg_id_isar0;
+	u32		reg_id_isar1;
+	u32		reg_id_isar2;
+	u32		reg_id_isar3;
+	u32		reg_id_isar4;
+	u32		reg_id_isar5;
+	u32		reg_id_mmfr0;
+	u32		reg_id_mmfr1;
+	u32		reg_id_mmfr2;
+	u32		reg_id_mmfr3;
+	u32		reg_id_pfr0;
+	u32		reg_id_pfr1;
 };
 
 DECLARE_PER_CPU(struct cpuinfo_arm64, cpu_data);
diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c
index b9e18c5..bccee60 100644
--- a/arch/arm64/kernel/cpuinfo.c
+++ b/arch/arm64/kernel/cpuinfo.c
@@ -1,5 +1,6 @@ 
 /*
- * Record CPU attributes for later retrieval
+ * Record CPU attributes for later retrieval, and sanity-check that processor
+ * features do not vary unexpectedly.
  *
  * Copyright (C) 2014 ARM Ltd.
  * This program is free software; you can redistribute it and/or modify
@@ -15,16 +16,110 @@ 
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 #include <asm/cpu.h>
+#include <asm/arch_timer.h>
 
+#include <linux/printk.h>
 #include <linux/smp.h>
+#include <linux/types.h>
 
 DEFINE_PER_CPU(struct cpuinfo_arm64, cpu_data);
 
+static void check_reg_mask(char *name, u64 mask, u64 boot, u64 cur, int cpu)
+{
+	if ((boot & mask) == (cur & mask))
+		return;
+
+	pr_warn("SANITY CHECK: Unexpected variation in %s. cpu0: %#016lx, cpu%d: %#016lx\n",
+		name, (unsigned long)boot, cpu, (unsigned long)cur);
+}
+
+#define	CHECK_MASK(field, mask, boot, cur, cpu)	\
+	check_reg_mask(#field, mask, (boot)->reg_ ## field, (cur)->reg_ ## field, cpu)
+
+#define CHECK(field, boot, cur, cpu)	\
+	CHECK_MASK(field, (u64)-1, boot, cur, cpu)
+
+/*
+ * Verify that CPUs don't have unexpected differences that will cause problems.
+ */
+void cpuinfo_sanity_check(struct cpuinfo_arm64 *cur)
+{
+	struct cpuinfo_arm64 *boot = &per_cpu(cpu_data, 0);
+	int cpu = smp_processor_id();
+
+	/*
+	 * The kernel can handle differing I-cache policies, but otherwise
+	 * caches should look identical. Userspace JITs will make use of
+	 * *minLine.
+	 */
+	CHECK_MASK(ctr, 0xffff3fff, boot, cur, cpu);
+
+	/* If different, timekeeping will be broken (especially with KVM) */
+	CHECK(cntfrq, boot, cur, cpu);
+
+	/*
+	 * Even in big.LITTLE, processors should be identical instruction-set
+	 * wise.
+	 */
+	CHECK(id_aa64isar0, boot, cur, cpu);
+
+	/*
+	 * Differing PARange support is fine as long as all peripherals and
+	 * memory are mapped within the minimum PARange of all CPUs.
+	 * Linux should not care about secure memory.
+	 */
+	CHECK_MASK(id_aa64mmfr0, 0xffffffffffff0ff0, boot, cur, cpu);
+
+	/*
+	 * EL3 is not our concern, and GIC system register support only matters
+	 * if GICv3 is in use.
+	 */
+	CHECK_MASK(id_aa64pfr0, 0xfffffffff0ff0fff, boot, cur, cpu);
+
+	/*
+	 * If we have AArch32, we care about 32-bit features for compat. These
+	 * registers should be RES0 otherwise.
+	 */
+	CHECK(id_isar0, boot, cur, cpu);
+	CHECK(id_isar1, boot, cur, cpu);
+	CHECK(id_isar2, boot, cur, cpu);
+	CHECK(id_isar3, boot, cur, cpu);
+	CHECK(id_isar4, boot, cur, cpu);
+	CHECK(id_isar5, boot, cur, cpu);
+	CHECK(id_mmfr0, boot, cur, cpu);
+	CHECK(id_mmfr1, boot, cur, cpu);
+	CHECK(id_mmfr2, boot, cur, cpu);
+	CHECK(id_mmfr3, boot, cur, cpu);
+	CHECK(id_pfr0, boot, cur, cpu);
+	CHECK(id_pfr1, boot, cur, cpu);
+}
+
 void cpuinfo_store_cpu(void)
 {
 	int cpu = smp_processor_id();
 	struct cpuinfo_arm64 *cpuinfo = &per_cpu(cpu_data, cpu);
 
+	cpuinfo->reg_ctr = read_cpuid_cachetype();
+	cpuinfo->reg_cntfrq = arch_timer_get_cntfrq();
 	cpuinfo->reg_midr = read_cpuid_id();
+
+	cpuinfo->reg_id_aa64isar0 = read_cpuid(ID_AA64ISAR0_EL1);
+	cpuinfo->reg_id_aa64mmfr0 = read_cpuid(ID_AA64MMFR0_EL1);
+	cpuinfo->reg_id_aa64pfr0 = read_cpuid(ID_AA64PFR0_EL1);
+
+	cpuinfo->reg_id_isar0 = read_cpuid(ID_ISAR0_EL1);
+	cpuinfo->reg_id_isar1 = read_cpuid(ID_ISAR1_EL1);
+	cpuinfo->reg_id_isar2 = read_cpuid(ID_ISAR2_EL1);
+	cpuinfo->reg_id_isar3 = read_cpuid(ID_ISAR3_EL1);
+	cpuinfo->reg_id_isar4 = read_cpuid(ID_ISAR4_EL1);
+	cpuinfo->reg_id_isar5 = read_cpuid(ID_ISAR5_EL1);
+	cpuinfo->reg_id_mmfr0 = read_cpuid(ID_MMFR0_EL1);
+	cpuinfo->reg_id_mmfr1 = read_cpuid(ID_MMFR1_EL1);
+	cpuinfo->reg_id_mmfr2 = read_cpuid(ID_MMFR2_EL1);
+	cpuinfo->reg_id_mmfr3 = read_cpuid(ID_MMFR3_EL1);
+	cpuinfo->reg_id_pfr0 = read_cpuid(ID_PFR0_EL1);
+	cpuinfo->reg_id_pfr1 = read_cpuid(ID_PFR1_EL1);
+
+	cpuinfo_sanity_check(cpuinfo);
 }