diff mbox series

[v2,3/5] arm64: Fix incorrect irqflag restore for priority masking

Message ID 1556553607-46531-4-git-send-email-julien.thierry@arm.com (mailing list archive)
State New, archived
Headers show
Series arm64: IRQ priority masking and Pseudo-NMI fixes | expand

Commit Message

Julien Thierry April 29, 2019, 4 p.m. UTC
When using IRQ priority masking to disable interrupts, in order to deal
with the PSR.I state, local_irq_save() would convert the I bit into a
PMR value (GIC_PRIO_IRQOFF). This resulted in local_irq_restore()
potentially modifying the value of PMR in undesired location due to the
state of PSR.I upon flag saving [1].

In an attempt to solve this issue in a less hackish manner, introduce
a bit (GIC_PRIO_IGNORE_PMR) for the PMR values that can represent
whether PSR.I is being used to disable interrupts, in which case it
takes precedence of the status of interrupt masking via PMR.

GIC_PRIO_IGNORE_PMR is chosen such that (<pmr_value> |
GIC_PRIO_IGNORE_PMR) does not mask more interrupts than <pmr_value> as
some sections (e.g. arch_cpu_idle(), interrupt acknowledge path)
requires PMR not to mask interrupts that could be signaled to the
CPU when using only PSR.I.

[1] https://www.spinics.net/lists/arm-kernel/msg716956.html

Fixes: commit 4a503217ce37 ("arm64: irqflags: Use ICC_PMR_EL1 for interrupt masking")
Signed-off-by: Julien Thierry <julien.thierry@arm.com>
Reported-by: Zenghui Yu <yuzenghui@huawei.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Wei Li <liwei391@huawei.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Christoffer Dall <christoffer.dall@arm.com>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: James Morse <james.morse@arm.com>
Cc: Suzuki K Pouloze <suzuki.poulose@arm.com>
Cc: Oleg Nesterov <oleg@redhat.com>
---
 arch/arm64/include/asm/arch_gicv3.h |  4 ++-
 arch/arm64/include/asm/assembler.h  |  9 +++++
 arch/arm64/include/asm/daifflags.h  | 22 ++++++++----
 arch/arm64/include/asm/irqflags.h   | 69 ++++++++++++++++---------------------
 arch/arm64/include/asm/kvm_host.h   |  4 ++-
 arch/arm64/include/asm/ptrace.h     | 10 ++++--
 arch/arm64/kernel/entry.S           | 30 +++++++++++++---
 arch/arm64/kernel/process.c         |  2 +-
 arch/arm64/kernel/smp.c             |  8 +++--
 arch/arm64/kvm/hyp/switch.c         |  2 +-
 10 files changed, 100 insertions(+), 60 deletions(-)

--
1.9.1

Comments

Marc Zyngier May 7, 2019, 8:36 a.m. UTC | #1
On 29/04/2019 17:00, Julien Thierry wrote:
> When using IRQ priority masking to disable interrupts, in order to deal
> with the PSR.I state, local_irq_save() would convert the I bit into a
> PMR value (GIC_PRIO_IRQOFF). This resulted in local_irq_restore()
> potentially modifying the value of PMR in undesired location due to the
> state of PSR.I upon flag saving [1].
> 
> In an attempt to solve this issue in a less hackish manner, introduce
> a bit (GIC_PRIO_IGNORE_PMR) for the PMR values that can represent
> whether PSR.I is being used to disable interrupts, in which case it
> takes precedence of the status of interrupt masking via PMR.
> 
> GIC_PRIO_IGNORE_PMR is chosen such that (<pmr_value> |
> GIC_PRIO_IGNORE_PMR) does not mask more interrupts than <pmr_value> as
> some sections (e.g. arch_cpu_idle(), interrupt acknowledge path)
> requires PMR not to mask interrupts that could be signaled to the
> CPU when using only PSR.I.
> 
> [1] https://www.spinics.net/lists/arm-kernel/msg716956.html
> 
> Fixes: commit 4a503217ce37 ("arm64: irqflags: Use ICC_PMR_EL1 for interrupt masking")
> Signed-off-by: Julien Thierry <julien.thierry@arm.com>
> Reported-by: Zenghui Yu <yuzenghui@huawei.com>
> Cc: Steven Rostedt <rostedt@goodmis.org>
> Cc: Wei Li <liwei391@huawei.com>
> Cc: Catalin Marinas <catalin.marinas@arm.com>
> Cc: Will Deacon <will.deacon@arm.com>
> Cc: Christoffer Dall <christoffer.dall@arm.com>
> Cc: Marc Zyngier <marc.zyngier@arm.com>
> Cc: James Morse <james.morse@arm.com>
> Cc: Suzuki K Pouloze <suzuki.poulose@arm.com>
> Cc: Oleg Nesterov <oleg@redhat.com>
> ---
>  arch/arm64/include/asm/arch_gicv3.h |  4 ++-
>  arch/arm64/include/asm/assembler.h  |  9 +++++
>  arch/arm64/include/asm/daifflags.h  | 22 ++++++++----
>  arch/arm64/include/asm/irqflags.h   | 69 ++++++++++++++++---------------------
>  arch/arm64/include/asm/kvm_host.h   |  4 ++-
>  arch/arm64/include/asm/ptrace.h     | 10 ++++--
>  arch/arm64/kernel/entry.S           | 30 +++++++++++++---
>  arch/arm64/kernel/process.c         |  2 +-
>  arch/arm64/kernel/smp.c             |  8 +++--
>  arch/arm64/kvm/hyp/switch.c         |  2 +-
>  10 files changed, 100 insertions(+), 60 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/arch_gicv3.h b/arch/arm64/include/asm/arch_gicv3.h
> index 14b41dd..3102c9a 100644
> --- a/arch/arm64/include/asm/arch_gicv3.h
> +++ b/arch/arm64/include/asm/arch_gicv3.h
> @@ -163,7 +163,9 @@ static inline bool gic_prio_masking_enabled(void)
> 
>  static inline void gic_pmr_mask_irqs(void)
>  {
> -	BUILD_BUG_ON(GICD_INT_DEF_PRI <= GIC_PRIO_IRQOFF);
> +	BUILD_BUG_ON(GICD_INT_DEF_PRI < (GIC_PRIO_IRQOFF |
> +					 GIC_PRIO_IGNORE_PMR));
> +	BUILD_BUG_ON(GICD_INT_DEF_PRI >= GIC_PRIO_IRQON);
>  	gic_write_pmr(GIC_PRIO_IRQOFF);
>  }
> 
> diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
> index c5308d0..601154d 100644
> --- a/arch/arm64/include/asm/assembler.h
> +++ b/arch/arm64/include/asm/assembler.h
> @@ -62,6 +62,15 @@
>  	msr	daifclr, #(8 | 4 | 1)
>  	.endm
> 
> +	.macro	suspend_irq_prio_masking, tmp:req
> +#ifdef CONFIG_ARM64_PSEUDO_NMI
> +	alternative_if ARM64_HAS_IRQ_PRIO_MASKING
> +	mov	\tmp, #(GIC_PRIO_IRQON | GIC_PRIO_IGNORE_PMR)
> +	msr_s	SYS_ICC_PMR_EL1, \tmp
> +	alternative_else_nop_endif
> +#endif
> +	.endm
> +
>  /*
>   * Save/restore interrupts.
>   */
> diff --git a/arch/arm64/include/asm/daifflags.h b/arch/arm64/include/asm/daifflags.h
> index db452aa..a32ece9 100644
> --- a/arch/arm64/include/asm/daifflags.h
> +++ b/arch/arm64/include/asm/daifflags.h
> @@ -18,6 +18,7 @@
> 
>  #include <linux/irqflags.h>
> 
> +#include <asm/arch_gicv3.h>
>  #include <asm/cpufeature.h>
> 
>  #define DAIF_PROCCTX		0
> @@ -32,6 +33,11 @@ static inline void local_daif_mask(void)
>  		:
>  		:
>  		: "memory");
> +
> +	/* Don't really care for a dsb here, we don't intend to enable IRQs */
> +	if (system_uses_irq_prio_masking())
> +		gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_IGNORE_PMR);
> +
>  	trace_hardirqs_off();
>  }
> 
> @@ -43,7 +49,7 @@ static inline unsigned long local_daif_save(void)
> 
>  	if (system_uses_irq_prio_masking()) {
>  		/* If IRQs are masked with PMR, reflect it in the flags */
> -		if (read_sysreg_s(SYS_ICC_PMR_EL1) <= GIC_PRIO_IRQOFF)
> +		if (read_sysreg_s(SYS_ICC_PMR_EL1) != GIC_PRIO_IRQON)
>  			flags |= PSR_I_BIT;
>  	}
> 
> @@ -59,14 +65,16 @@ static inline void local_daif_restore(unsigned long flags)
>  	if (!irq_disabled) {
>  		trace_hardirqs_on();
> 
> -		if (system_uses_irq_prio_masking())
> -			arch_local_irq_enable();
> -	} else if (!(flags & PSR_A_BIT)) {
> +		if (system_uses_irq_prio_masking()) {
> +			gic_write_pmr(GIC_PRIO_IRQON);
> +			dsb(sy);
> +		}
> +	} else if (system_uses_irq_prio_masking()) {
>  		/*
>  		 * If interrupts are disabled but we can take
>  		 * asynchronous errors, we can take NMIs
>  		 */
> -		if (system_uses_irq_prio_masking()) {
> +		if (!(flags & PSR_A_BIT)) {
>  			flags &= ~PSR_I_BIT;
>  			/*
>  			 * There has been concern that the write to daif
> @@ -87,7 +95,9 @@ static inline void local_daif_restore(unsigned long flags)
>  			 *
>  			 * So we don't need additional synchronization here.
>  			 */
> -			arch_local_irq_disable();
> +			gic_write_pmr(GIC_PRIO_IRQOFF);
> +		} else {
> +			gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_IGNORE_PMR);
>  		}

Nit: move the write to PMR outside of the if/else clause with an
intermediate variable to hold the PMR value. Not a big deal, but I think
it helps with the nesting.

>  	}
> 
> diff --git a/arch/arm64/include/asm/irqflags.h b/arch/arm64/include/asm/irqflags.h
> index 43d8366..516cdfc 100644
> --- a/arch/arm64/include/asm/irqflags.h
> +++ b/arch/arm64/include/asm/irqflags.h
> @@ -67,43 +67,48 @@ static inline void arch_local_irq_disable(void)
>   */
>  static inline unsigned long arch_local_save_flags(void)
>  {
> -	unsigned long daif_bits;
>  	unsigned long flags;
> 
> -	daif_bits = read_sysreg(daif);
> -
> -	/*
> -	 * The asm is logically equivalent to:
> -	 *
> -	 * if (system_uses_irq_prio_masking())
> -	 *	flags = (daif_bits & PSR_I_BIT) ?
> -	 *			GIC_PRIO_IRQOFF :
> -	 *			read_sysreg_s(SYS_ICC_PMR_EL1);
> -	 * else
> -	 *	flags = daif_bits;
> -	 */
>  	asm volatile(ALTERNATIVE(
> -			"mov	%0, %1\n"
> -			"nop\n"
> -			"nop",
> -			"mrs_s	%0, " __stringify(SYS_ICC_PMR_EL1) "\n"
> -			"ands	%1, %1, " __stringify(PSR_I_BIT) "\n"
> -			"csel	%0, %0, %2, eq",
> -			ARM64_HAS_IRQ_PRIO_MASKING)
> -		: "=&r" (flags), "+r" (daif_bits)
> -		: "r" ((unsigned long) GIC_PRIO_IRQOFF)
> +		"mrs	%0, daif",
> +		"mrs_s	%0, " __stringify(SYS_ICC_PMR_EL1) "\n",
> +		ARM64_HAS_IRQ_PRIO_MASKING)
> +		: "=&r" (flags)
> +		:
>  		: "memory");
> 
>  	return flags;
>  }
> 
> +static inline int arch_irqs_disabled_flags(unsigned long flags)
> +{
> +	int res;
> +
> +	asm volatile(ALTERNATIVE(
> +		"and	%w0, %w1, #" __stringify(PSR_I_BIT) "\n"
> +		"nop",
> +		"cmp	%w1, #" __stringify(GIC_PRIO_IRQON) "\n"
> +		"cset	%w0, ne",
> +		ARM64_HAS_IRQ_PRIO_MASKING)
> +		: "=&r" (res)
> +		: "r" ((int) flags)
> +		: "memory");

I wonder if this should have "cc" as part of the clobber list.

> +
> +	return res;
> +}
> +
>  static inline unsigned long arch_local_irq_save(void)
>  {
>  	unsigned long flags;
> 
>  	flags = arch_local_save_flags();
> 
> -	arch_local_irq_disable();
> +	/*
> +	 * There are too many states with IRQs disabled, just keep the current
> +	 * state if interrupts are already disabled/masked.
> +	 */
> +	if (!arch_irqs_disabled_flags(flags))
> +		arch_local_irq_disable();
> 
>  	return flags;
>  }
> @@ -119,26 +124,10 @@ static inline void arch_local_irq_restore(unsigned long flags)
>  			"msr_s	" __stringify(SYS_ICC_PMR_EL1) ", %0\n"
>  			"dsb	sy",
>  			ARM64_HAS_IRQ_PRIO_MASKING)
> -		: "+r" (flags)
>  		:
> +		: "r" (flags)
>  		: "memory");
>  }
> 
> -static inline int arch_irqs_disabled_flags(unsigned long flags)
> -{
> -	int res;
> -
> -	asm volatile(ALTERNATIVE(
> -			"and	%w0, %w1, #" __stringify(PSR_I_BIT) "\n"
> -			"nop",
> -			"cmp	%w1, #" __stringify(GIC_PRIO_IRQOFF) "\n"
> -			"cset	%w0, ls",
> -			ARM64_HAS_IRQ_PRIO_MASKING)
> -		: "=&r" (res)
> -		: "r" ((int) flags)
> -		: "memory");
> -
> -	return res;
> -}
>  #endif
>  #endif
> diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
> index a01fe087..0ec398c 100644
> --- a/arch/arm64/include/asm/kvm_host.h
> +++ b/arch/arm64/include/asm/kvm_host.h
> @@ -532,9 +532,11 @@ static inline void kvm_arm_vhe_guest_enter(void)
>  	 * will not signal the CPU of interrupts of lower priority, and the
>  	 * only way to get out will be via guest exceptions.
>  	 * Naturally, we want to avoid this.
> +	 *
> +	 * local_daif_mask() already sets IGNORE_PMR, we just need a
> +	 * dsb to ensure the redistributor is forwards EL2 IRQs to the CPU.
>  	 */
>  	if (system_uses_irq_prio_masking()) {
> -		gic_write_pmr(GIC_PRIO_IRQON);
>  		dsb(sy);
>  	}

nit: drop the (now superfluous) brackets.

>  }
> diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h
> index ec60174..7861a5d 100644
> --- a/arch/arm64/include/asm/ptrace.h
> +++ b/arch/arm64/include/asm/ptrace.h
> @@ -35,9 +35,15 @@
>   * means masking more IRQs (or at least that the same IRQs remain masked).
>   *
>   * To mask interrupts, we clear the most significant bit of PMR.
> + *
> + * Some code sections either automatically switch back to PSR.I or explicitly
> + * require to not use priority masking. If bit GIC_PRIO_IGNORE_PMR is included
> + * in the  the priority mask, it indicates that PSR.I should be set and
> + * interrupt disabling temporarily does not rely on IRQ priorities.
>   */
> -#define GIC_PRIO_IRQON		0xf0
> -#define GIC_PRIO_IRQOFF		(GIC_PRIO_IRQON & ~0x80)
> +#define GIC_PRIO_IRQON			0xc0
> +#define GIC_PRIO_IRQOFF			(GIC_PRIO_IRQON & ~0x80)
> +#define GIC_PRIO_IGNORE_PMR		(1 << 4)

The naming of this last value is still a bit odd: it says "ignore PMR",
and yet it only works because it is a valid PMR value. So it is not so
much that PMR is not used (it is), but that PSTATE.I is set, thus
overriding the interrupt signaling.

How about something like GIC_PRIO_PSTATE_I_SET, which has the advantage
of exactly matching the above comment?

> 
>  /* Additional SPSR bits not exposed in the UABI */
>  #define PSR_IL_BIT		(1 << 20)
> diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
> index 00c1f21..e6df781 100644
> --- a/arch/arm64/kernel/entry.S
> +++ b/arch/arm64/kernel/entry.S
> @@ -258,6 +258,7 @@ alternative_else_nop_endif
>  	/*
>  	 * Registers that may be useful after this macro is invoked:
>  	 *
> +	 * x20 - ICC_PMR_EL1
>  	 * x21 - aborted SP
>  	 * x22 - aborted PC
>  	 * x23 - aborted PSTATE
> @@ -434,6 +435,16 @@ alternative_endif
>  	.endm
>  #endif
> 
> +
> +	.macro irq_entry_ignore_pmr, pmr:req, tmp:req
> +#ifdef CONFIG_ARM64_PSEUDO_NMI
> +	alternative_if ARM64_HAS_IRQ_PRIO_MASKING
> +	orr	\tmp, \pmr, #GIC_PRIO_IGNORE_PMR
> +	msr_s	SYS_ICC_PMR_EL1, \tmp
> +	alternative_else_nop_endif
> +#endif
> +	.endm
> +
>  	.text
> 
>  /*
> @@ -612,6 +623,7 @@ el1_dbg:
>  	cmp	x24, #ESR_ELx_EC_BRK64		// if BRK64
>  	cinc	x24, x24, eq			// set bit '0'
>  	tbz	x24, #0, el1_inv		// EL1 only
> +	suspend_irq_prio_masking tmp=x21
>  	mrs	x0, far_el1
>  	mov	x2, sp				// struct pt_regs
>  	bl	do_debug_exception
> @@ -629,12 +641,10 @@ ENDPROC(el1_sync)
>  	.align	6
>  el1_irq:
>  	kernel_entry 1
> +	irq_entry_ignore_pmr pmr=x20, tmp=x0

What are the conditions for which we use 'suspend', and those we use
'ignore'? They obviously are very similar, and putting the rational
wouldn't hurt.

Of course, should you decide to adopt my naming suggestion above, some
names need to change too... ;-)

>  	enable_da_f
> 
>  #ifdef CONFIG_ARM64_PSEUDO_NMI
> -alternative_if ARM64_HAS_IRQ_PRIO_MASKING
> -	ldr	x20, [sp, #S_PMR_SAVE]
> -alternative_else_nop_endif
>  	test_irqs_unmasked	res=x0, pmr=x20
>  	cbz	x0, 1f
>  	bl	asm_nmi_enter
> @@ -664,8 +674,9 @@ alternative_else_nop_endif
> 
>  #ifdef CONFIG_ARM64_PSEUDO_NMI
>  	/*
> -	 * if IRQs were disabled when we received the interrupt, we have an NMI
> -	 * and we are not re-enabling interrupt upon eret. Skip tracing.
> +	 * When using IRQ priority masking, we can get spurious interrupts while
> +	 * PMR is set to GIC_PRIO_IRQOFF. An NMI might also have occurred in a
> +	 * section with interrupts disabled. Skip tracing in those cases.
>  	 */
>  	test_irqs_unmasked	res=x0, pmr=x20
>  	cbz	x0, 1f
> @@ -794,6 +805,7 @@ el0_ia:
>  	 * Instruction abort handling
>  	 */
>  	mrs	x26, far_el1
> +	suspend_irq_prio_masking tmp=x0
>  	enable_da_f
>  #ifdef CONFIG_TRACE_IRQFLAGS
>  	bl	trace_hardirqs_off
> @@ -839,6 +851,7 @@ el0_sp_pc:
>  	 * Stack or PC alignment exception handling
>  	 */
>  	mrs	x26, far_el1
> +	suspend_irq_prio_masking tmp=x0
>  	enable_da_f
>  #ifdef CONFIG_TRACE_IRQFLAGS
>  	bl	trace_hardirqs_off
> @@ -873,6 +886,7 @@ el0_dbg:
>  	 * Debug exception handling
>  	 */
>  	tbnz	x24, #0, el0_inv		// EL0 only
> +	suspend_irq_prio_masking tmp=x0
>  	mrs	x0, far_el1
>  	mov	x1, x25
>  	mov	x2, sp
> @@ -894,7 +908,9 @@ ENDPROC(el0_sync)
>  el0_irq:
>  	kernel_entry 0
>  el0_irq_naked:
> +	irq_entry_ignore_pmr pmr=x20, tmp=x0
>  	enable_da_f
> +
>  #ifdef CONFIG_TRACE_IRQFLAGS
>  	bl	trace_hardirqs_off
>  #endif
> @@ -916,6 +932,7 @@ ENDPROC(el0_irq)
>  el1_error:
>  	kernel_entry 1
>  	mrs	x1, esr_el1
> +	suspend_irq_prio_masking tmp=x0
>  	enable_dbg
>  	mov	x0, sp
>  	bl	do_serror
> @@ -926,6 +943,7 @@ el0_error:
>  	kernel_entry 0
>  el0_error_naked:
>  	mrs	x1, esr_el1
> +	suspend_irq_prio_masking tmp=x0
>  	enable_dbg
>  	mov	x0, sp
>  	bl	do_serror
> @@ -950,6 +968,7 @@ work_pending:
>   */
>  ret_to_user:
>  	disable_daif
> +	suspend_irq_prio_masking tmp=x0
>  	ldr	x1, [tsk, #TSK_TI_FLAGS]
>  	and	x2, x1, #_TIF_WORK_MASK
>  	cbnz	x2, work_pending
> @@ -966,6 +985,7 @@ ENDPROC(ret_to_user)
>   */
>  	.align	6
>  el0_svc:
> +	suspend_irq_prio_masking tmp=x1
>  	mov	x0, sp
>  	bl	el0_svc_handler
>  	b	ret_to_user
> diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
> index 3767fb2..f5bae97 100644
> --- a/arch/arm64/kernel/process.c
> +++ b/arch/arm64/kernel/process.c
> @@ -94,7 +94,7 @@ static void __cpu_do_idle_irqprio(void)
>  	 * be raised.
>  	 */
>  	pmr = gic_read_pmr();
> -	gic_write_pmr(GIC_PRIO_IRQON);
> +	gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_IGNORE_PMR);
> 
>  	__cpu_do_idle();
> 
> diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
> index 824de70..2a6d0dd1 100644
> --- a/arch/arm64/kernel/smp.c
> +++ b/arch/arm64/kernel/smp.c
> @@ -192,11 +192,13 @@ static void init_gic_priority_masking(void)
> 
>  	WARN_ON(!(cpuflags & PSR_I_BIT));
> 
> -	gic_write_pmr(GIC_PRIO_IRQOFF);
> -
>  	/* We can only unmask PSR.I if we can take aborts */
> -	if (!(cpuflags & PSR_A_BIT))
> +	if (!(cpuflags & PSR_A_BIT)) {
> +		gic_write_pmr(GIC_PRIO_IRQOFF);
>  		write_sysreg(cpuflags & ~PSR_I_BIT, daif);
> +	} else {
> +		gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_IGNORE_PMR);
> +	}
>  }
> 
>  /*
> diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
> index 3563fe6..083e319 100644
> --- a/arch/arm64/kvm/hyp/switch.c
> +++ b/arch/arm64/kvm/hyp/switch.c
> @@ -533,7 +533,7 @@ int __hyp_text __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu)
>  	 * Naturally, we want to avoid this.
>  	 */
>  	if (system_uses_irq_prio_masking()) {
> -		gic_write_pmr(GIC_PRIO_IRQON);
> +		gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_IGNORE_PMR);
>  		dsb(sy);
>  	}
> 
> --
> 1.9.1
> 

Overall, I like the way it looks, and my comments are more cosmetic than
anything else.

Thanks,

	M.
Julien Thierry May 14, 2019, 9:25 a.m. UTC | #2
On 07/05/2019 09:36, Marc Zyngier wrote:
> On 29/04/2019 17:00, Julien Thierry wrote:
>> When using IRQ priority masking to disable interrupts, in order to deal
>> with the PSR.I state, local_irq_save() would convert the I bit into a
>> PMR value (GIC_PRIO_IRQOFF). This resulted in local_irq_restore()
>> potentially modifying the value of PMR in undesired location due to the
>> state of PSR.I upon flag saving [1].
>>
>> In an attempt to solve this issue in a less hackish manner, introduce
>> a bit (GIC_PRIO_IGNORE_PMR) for the PMR values that can represent
>> whether PSR.I is being used to disable interrupts, in which case it
>> takes precedence of the status of interrupt masking via PMR.
>>
>> GIC_PRIO_IGNORE_PMR is chosen such that (<pmr_value> |
>> GIC_PRIO_IGNORE_PMR) does not mask more interrupts than <pmr_value> as
>> some sections (e.g. arch_cpu_idle(), interrupt acknowledge path)
>> requires PMR not to mask interrupts that could be signaled to the
>> CPU when using only PSR.I.
>>
>> [1] https://www.spinics.net/lists/arm-kernel/msg716956.html
>>
>> Fixes: commit 4a503217ce37 ("arm64: irqflags: Use ICC_PMR_EL1 for interrupt masking")
>> Signed-off-by: Julien Thierry <julien.thierry@arm.com>
>> Reported-by: Zenghui Yu <yuzenghui@huawei.com>
>> Cc: Steven Rostedt <rostedt@goodmis.org>
>> Cc: Wei Li <liwei391@huawei.com>
>> Cc: Catalin Marinas <catalin.marinas@arm.com>
>> Cc: Will Deacon <will.deacon@arm.com>
>> Cc: Christoffer Dall <christoffer.dall@arm.com>
>> Cc: Marc Zyngier <marc.zyngier@arm.com>
>> Cc: James Morse <james.morse@arm.com>
>> Cc: Suzuki K Pouloze <suzuki.poulose@arm.com>
>> Cc: Oleg Nesterov <oleg@redhat.com>
>> ---
>>  arch/arm64/include/asm/arch_gicv3.h |  4 ++-
>>  arch/arm64/include/asm/assembler.h  |  9 +++++
>>  arch/arm64/include/asm/daifflags.h  | 22 ++++++++----
>>  arch/arm64/include/asm/irqflags.h   | 69 ++++++++++++++++---------------------
>>  arch/arm64/include/asm/kvm_host.h   |  4 ++-
>>  arch/arm64/include/asm/ptrace.h     | 10 ++++--
>>  arch/arm64/kernel/entry.S           | 30 +++++++++++++---
>>  arch/arm64/kernel/process.c         |  2 +-
>>  arch/arm64/kernel/smp.c             |  8 +++--
>>  arch/arm64/kvm/hyp/switch.c         |  2 +-
>>  10 files changed, 100 insertions(+), 60 deletions(-)
>>
>> diff --git a/arch/arm64/include/asm/arch_gicv3.h b/arch/arm64/include/asm/arch_gicv3.h
>> index 14b41dd..3102c9a 100644
>> --- a/arch/arm64/include/asm/arch_gicv3.h
>> +++ b/arch/arm64/include/asm/arch_gicv3.h
>> @@ -163,7 +163,9 @@ static inline bool gic_prio_masking_enabled(void)
>>
>>  static inline void gic_pmr_mask_irqs(void)
>>  {
>> -	BUILD_BUG_ON(GICD_INT_DEF_PRI <= GIC_PRIO_IRQOFF);
>> +	BUILD_BUG_ON(GICD_INT_DEF_PRI < (GIC_PRIO_IRQOFF |
>> +					 GIC_PRIO_IGNORE_PMR));
>> +	BUILD_BUG_ON(GICD_INT_DEF_PRI >= GIC_PRIO_IRQON);
>>  	gic_write_pmr(GIC_PRIO_IRQOFF);
>>  }
>>
>> diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
>> index c5308d0..601154d 100644
>> --- a/arch/arm64/include/asm/assembler.h
>> +++ b/arch/arm64/include/asm/assembler.h
>> @@ -62,6 +62,15 @@
>>  	msr	daifclr, #(8 | 4 | 1)
>>  	.endm
>>
>> +	.macro	suspend_irq_prio_masking, tmp:req
>> +#ifdef CONFIG_ARM64_PSEUDO_NMI
>> +	alternative_if ARM64_HAS_IRQ_PRIO_MASKING
>> +	mov	\tmp, #(GIC_PRIO_IRQON | GIC_PRIO_IGNORE_PMR)
>> +	msr_s	SYS_ICC_PMR_EL1, \tmp
>> +	alternative_else_nop_endif
>> +#endif
>> +	.endm
>> +
>>  /*
>>   * Save/restore interrupts.
>>   */
>> diff --git a/arch/arm64/include/asm/daifflags.h b/arch/arm64/include/asm/daifflags.h
>> index db452aa..a32ece9 100644
>> --- a/arch/arm64/include/asm/daifflags.h
>> +++ b/arch/arm64/include/asm/daifflags.h
>> @@ -18,6 +18,7 @@
>>
>>  #include <linux/irqflags.h>
>>
>> +#include <asm/arch_gicv3.h>
>>  #include <asm/cpufeature.h>
>>
>>  #define DAIF_PROCCTX		0
>> @@ -32,6 +33,11 @@ static inline void local_daif_mask(void)
>>  		:
>>  		:
>>  		: "memory");
>> +
>> +	/* Don't really care for a dsb here, we don't intend to enable IRQs */
>> +	if (system_uses_irq_prio_masking())
>> +		gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_IGNORE_PMR);
>> +
>>  	trace_hardirqs_off();
>>  }
>>
>> @@ -43,7 +49,7 @@ static inline unsigned long local_daif_save(void)
>>
>>  	if (system_uses_irq_prio_masking()) {
>>  		/* If IRQs are masked with PMR, reflect it in the flags */
>> -		if (read_sysreg_s(SYS_ICC_PMR_EL1) <= GIC_PRIO_IRQOFF)
>> +		if (read_sysreg_s(SYS_ICC_PMR_EL1) != GIC_PRIO_IRQON)
>>  			flags |= PSR_I_BIT;
>>  	}
>>
>> @@ -59,14 +65,16 @@ static inline void local_daif_restore(unsigned long flags)
>>  	if (!irq_disabled) {
>>  		trace_hardirqs_on();
>>
>> -		if (system_uses_irq_prio_masking())
>> -			arch_local_irq_enable();
>> -	} else if (!(flags & PSR_A_BIT)) {
>> +		if (system_uses_irq_prio_masking()) {
>> +			gic_write_pmr(GIC_PRIO_IRQON);
>> +			dsb(sy);
>> +		}
>> +	} else if (system_uses_irq_prio_masking()) {
>>  		/*
>>  		 * If interrupts are disabled but we can take
>>  		 * asynchronous errors, we can take NMIs
>>  		 */
>> -		if (system_uses_irq_prio_masking()) {
>> +		if (!(flags & PSR_A_BIT)) {
>>  			flags &= ~PSR_I_BIT;
>>  			/*
>>  			 * There has been concern that the write to daif
>> @@ -87,7 +95,9 @@ static inline void local_daif_restore(unsigned long flags)
>>  			 *
>>  			 * So we don't need additional synchronization here.
>>  			 */
>> -			arch_local_irq_disable();
>> +			gic_write_pmr(GIC_PRIO_IRQOFF);
>> +		} else {
>> +			gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_IGNORE_PMR);
>>  		}
> 
> Nit: move the write to PMR outside of the if/else clause with an
> intermediate variable to hold the PMR value. Not a big deal, but I think
> it helps with the nesting.
> 

Done.

>>  	}
>>
>> diff --git a/arch/arm64/include/asm/irqflags.h b/arch/arm64/include/asm/irqflags.h
>> index 43d8366..516cdfc 100644
>> --- a/arch/arm64/include/asm/irqflags.h
>> +++ b/arch/arm64/include/asm/irqflags.h
>> @@ -67,43 +67,48 @@ static inline void arch_local_irq_disable(void)
>>   */
>>  static inline unsigned long arch_local_save_flags(void)
>>  {
>> -	unsigned long daif_bits;
>>  	unsigned long flags;
>>
>> -	daif_bits = read_sysreg(daif);
>> -
>> -	/*
>> -	 * The asm is logically equivalent to:
>> -	 *
>> -	 * if (system_uses_irq_prio_masking())
>> -	 *	flags = (daif_bits & PSR_I_BIT) ?
>> -	 *			GIC_PRIO_IRQOFF :
>> -	 *			read_sysreg_s(SYS_ICC_PMR_EL1);
>> -	 * else
>> -	 *	flags = daif_bits;
>> -	 */
>>  	asm volatile(ALTERNATIVE(
>> -			"mov	%0, %1\n"
>> -			"nop\n"
>> -			"nop",
>> -			"mrs_s	%0, " __stringify(SYS_ICC_PMR_EL1) "\n"
>> -			"ands	%1, %1, " __stringify(PSR_I_BIT) "\n"
>> -			"csel	%0, %0, %2, eq",
>> -			ARM64_HAS_IRQ_PRIO_MASKING)
>> -		: "=&r" (flags), "+r" (daif_bits)
>> -		: "r" ((unsigned long) GIC_PRIO_IRQOFF)
>> +		"mrs	%0, daif",
>> +		"mrs_s	%0, " __stringify(SYS_ICC_PMR_EL1) "\n",
>> +		ARM64_HAS_IRQ_PRIO_MASKING)
>> +		: "=&r" (flags)
>> +		:
>>  		: "memory");
>>
>>  	return flags;
>>  }
>>
>> +static inline int arch_irqs_disabled_flags(unsigned long flags)
>> +{
>> +	int res;
>> +
>> +	asm volatile(ALTERNATIVE(
>> +		"and	%w0, %w1, #" __stringify(PSR_I_BIT) "\n"
>> +		"nop",
>> +		"cmp	%w1, #" __stringify(GIC_PRIO_IRQON) "\n"
>> +		"cset	%w0, ne",
>> +		ARM64_HAS_IRQ_PRIO_MASKING)
>> +		: "=&r" (res)
>> +		: "r" ((int) flags)
>> +		: "memory");
> 
> I wonder if this should have "cc" as part of the clobber list.

Is there any special semantic to "cc" on arm64? All I can find is that
in the general case it indicates that it is modifying the "flags" register.

Is your suggestion only for the PMR case? Or is it something that we
should add regardless of PMR?
The latter makes sense to me, but for the former, I fail to understand
why this should affect only PMR.

> 
>> +
>> +	return res;
>> +}
>> +
>>  static inline unsigned long arch_local_irq_save(void)
>>  {
>>  	unsigned long flags;
>>
>>  	flags = arch_local_save_flags();
>>
>> -	arch_local_irq_disable();
>> +	/*
>> +	 * There are too many states with IRQs disabled, just keep the current
>> +	 * state if interrupts are already disabled/masked.
>> +	 */
>> +	if (!arch_irqs_disabled_flags(flags))
>> +		arch_local_irq_disable();
>>
>>  	return flags;
>>  }
>> @@ -119,26 +124,10 @@ static inline void arch_local_irq_restore(unsigned long flags)
>>  			"msr_s	" __stringify(SYS_ICC_PMR_EL1) ", %0\n"
>>  			"dsb	sy",
>>  			ARM64_HAS_IRQ_PRIO_MASKING)
>> -		: "+r" (flags)
>>  		:
>> +		: "r" (flags)
>>  		: "memory");
>>  }
>>
>> -static inline int arch_irqs_disabled_flags(unsigned long flags)
>> -{
>> -	int res;
>> -
>> -	asm volatile(ALTERNATIVE(
>> -			"and	%w0, %w1, #" __stringify(PSR_I_BIT) "\n"
>> -			"nop",
>> -			"cmp	%w1, #" __stringify(GIC_PRIO_IRQOFF) "\n"
>> -			"cset	%w0, ls",
>> -			ARM64_HAS_IRQ_PRIO_MASKING)
>> -		: "=&r" (res)
>> -		: "r" ((int) flags)
>> -		: "memory");
>> -
>> -	return res;
>> -}
>>  #endif
>>  #endif
>> diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
>> index a01fe087..0ec398c 100644
>> --- a/arch/arm64/include/asm/kvm_host.h
>> +++ b/arch/arm64/include/asm/kvm_host.h
>> @@ -532,9 +532,11 @@ static inline void kvm_arm_vhe_guest_enter(void)
>>  	 * will not signal the CPU of interrupts of lower priority, and the
>>  	 * only way to get out will be via guest exceptions.
>>  	 * Naturally, we want to avoid this.
>> +	 *
>> +	 * local_daif_mask() already sets IGNORE_PMR, we just need a
>> +	 * dsb to ensure the redistributor is forwards EL2 IRQs to the CPU.
>>  	 */
>>  	if (system_uses_irq_prio_masking()) {
>> -		gic_write_pmr(GIC_PRIO_IRQON);
>>  		dsb(sy);
>>  	}
> 
> nit: drop the (now superfluous) brackets.
> 

Done.

>>  }
>> diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h
>> index ec60174..7861a5d 100644
>> --- a/arch/arm64/include/asm/ptrace.h
>> +++ b/arch/arm64/include/asm/ptrace.h
>> @@ -35,9 +35,15 @@
>>   * means masking more IRQs (or at least that the same IRQs remain masked).
>>   *
>>   * To mask interrupts, we clear the most significant bit of PMR.
>> + *
>> + * Some code sections either automatically switch back to PSR.I or explicitly
>> + * require to not use priority masking. If bit GIC_PRIO_IGNORE_PMR is included
>> + * in the  the priority mask, it indicates that PSR.I should be set and
>> + * interrupt disabling temporarily does not rely on IRQ priorities.
>>   */
>> -#define GIC_PRIO_IRQON		0xf0
>> -#define GIC_PRIO_IRQOFF		(GIC_PRIO_IRQON & ~0x80)
>> +#define GIC_PRIO_IRQON			0xc0
>> +#define GIC_PRIO_IRQOFF			(GIC_PRIO_IRQON & ~0x80)
>> +#define GIC_PRIO_IGNORE_PMR		(1 << 4)
> 
> The naming of this last value is still a bit odd: it says "ignore PMR",
> and yet it only works because it is a valid PMR value. So it is not so
> much that PMR is not used (it is), but that PSTATE.I is set, thus
> overriding the interrupt signaling.
> 
> How about something like GIC_PRIO_PSTATE_I_SET, which has the advantage
> of exactly matching the above comment?
> 

Sounds good to me, also depends on how I go with the next comment.

>>
>>  /* Additional SPSR bits not exposed in the UABI */
>>  #define PSR_IL_BIT		(1 << 20)
>> diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
>> index 00c1f21..e6df781 100644
>> --- a/arch/arm64/kernel/entry.S
>> +++ b/arch/arm64/kernel/entry.S
>> @@ -258,6 +258,7 @@ alternative_else_nop_endif
>>  	/*
>>  	 * Registers that may be useful after this macro is invoked:
>>  	 *
>> +	 * x20 - ICC_PMR_EL1
>>  	 * x21 - aborted SP
>>  	 * x22 - aborted PC
>>  	 * x23 - aborted PSTATE
>> @@ -434,6 +435,16 @@ alternative_endif
>>  	.endm
>>  #endif
>>
>> +
>> +	.macro irq_entry_ignore_pmr, pmr:req, tmp:req
>> +#ifdef CONFIG_ARM64_PSEUDO_NMI
>> +	alternative_if ARM64_HAS_IRQ_PRIO_MASKING
>> +	orr	\tmp, \pmr, #GIC_PRIO_IGNORE_PMR
>> +	msr_s	SYS_ICC_PMR_EL1, \tmp
>> +	alternative_else_nop_endif
>> +#endif
>> +	.endm
>> +
>>  	.text
>>
>>  /*
>> @@ -612,6 +623,7 @@ el1_dbg:
>>  	cmp	x24, #ESR_ELx_EC_BRK64		// if BRK64
>>  	cinc	x24, x24, eq			// set bit '0'
>>  	tbz	x24, #0, el1_inv		// EL1 only
>> +	suspend_irq_prio_masking tmp=x21
>>  	mrs	x0, far_el1
>>  	mov	x2, sp				// struct pt_regs
>>  	bl	do_debug_exception
>> @@ -629,12 +641,10 @@ ENDPROC(el1_sync)
>>  	.align	6
>>  el1_irq:
>>  	kernel_entry 1
>> +	irq_entry_ignore_pmr pmr=x20, tmp=x0
> 
> What are the conditions for which we use 'suspend', and those we use
> 'ignore'? They obviously are very similar, and putting the rational
> wouldn't hurt.
> 

The conditions are probably the wrong ones. Or at least they don't
really warrant slapping names on them like I did.

I wanted to factor the assembly code by using helpers while avoiding to
add unnecessary instructions in said helpers. And since there are at
least two cases (one where we have to modify the current value of PMR
and others where we can just set an immediate value) I ended up with two
helpers, trying to put semantics onto them.

I'll try to get something cleaner.

> Of course, should you decide to adopt my naming suggestion above, some
> names need to change too... ;-)
> 
>>  	enable_da_f
>>
>>  #ifdef CONFIG_ARM64_PSEUDO_NMI
>> -alternative_if ARM64_HAS_IRQ_PRIO_MASKING
>> -	ldr	x20, [sp, #S_PMR_SAVE]
>> -alternative_else_nop_endif
>>  	test_irqs_unmasked	res=x0, pmr=x20
>>  	cbz	x0, 1f
>>  	bl	asm_nmi_enter
>> @@ -664,8 +674,9 @@ alternative_else_nop_endif
>>
>>  #ifdef CONFIG_ARM64_PSEUDO_NMI
>>  	/*
>> -	 * if IRQs were disabled when we received the interrupt, we have an NMI
>> -	 * and we are not re-enabling interrupt upon eret. Skip tracing.
>> +	 * When using IRQ priority masking, we can get spurious interrupts while
>> +	 * PMR is set to GIC_PRIO_IRQOFF. An NMI might also have occurred in a
>> +	 * section with interrupts disabled. Skip tracing in those cases.
>>  	 */
>>  	test_irqs_unmasked	res=x0, pmr=x20
>>  	cbz	x0, 1f
>> @@ -794,6 +805,7 @@ el0_ia:
>>  	 * Instruction abort handling
>>  	 */
>>  	mrs	x26, far_el1
>> +	suspend_irq_prio_masking tmp=x0
>>  	enable_da_f
>>  #ifdef CONFIG_TRACE_IRQFLAGS
>>  	bl	trace_hardirqs_off
>> @@ -839,6 +851,7 @@ el0_sp_pc:
>>  	 * Stack or PC alignment exception handling
>>  	 */
>>  	mrs	x26, far_el1
>> +	suspend_irq_prio_masking tmp=x0
>>  	enable_da_f
>>  #ifdef CONFIG_TRACE_IRQFLAGS
>>  	bl	trace_hardirqs_off
>> @@ -873,6 +886,7 @@ el0_dbg:
>>  	 * Debug exception handling
>>  	 */
>>  	tbnz	x24, #0, el0_inv		// EL0 only
>> +	suspend_irq_prio_masking tmp=x0
>>  	mrs	x0, far_el1
>>  	mov	x1, x25
>>  	mov	x2, sp
>> @@ -894,7 +908,9 @@ ENDPROC(el0_sync)
>>  el0_irq:
>>  	kernel_entry 0
>>  el0_irq_naked:
>> +	irq_entry_ignore_pmr pmr=x20, tmp=x0
>>  	enable_da_f
>> +
>>  #ifdef CONFIG_TRACE_IRQFLAGS
>>  	bl	trace_hardirqs_off
>>  #endif
>> @@ -916,6 +932,7 @@ ENDPROC(el0_irq)
>>  el1_error:
>>  	kernel_entry 1
>>  	mrs	x1, esr_el1
>> +	suspend_irq_prio_masking tmp=x0
>>  	enable_dbg
>>  	mov	x0, sp
>>  	bl	do_serror
>> @@ -926,6 +943,7 @@ el0_error:
>>  	kernel_entry 0
>>  el0_error_naked:
>>  	mrs	x1, esr_el1
>> +	suspend_irq_prio_masking tmp=x0
>>  	enable_dbg
>>  	mov	x0, sp
>>  	bl	do_serror
>> @@ -950,6 +968,7 @@ work_pending:
>>   */
>>  ret_to_user:
>>  	disable_daif
>> +	suspend_irq_prio_masking tmp=x0
>>  	ldr	x1, [tsk, #TSK_TI_FLAGS]
>>  	and	x2, x1, #_TIF_WORK_MASK
>>  	cbnz	x2, work_pending
>> @@ -966,6 +985,7 @@ ENDPROC(ret_to_user)
>>   */
>>  	.align	6
>>  el0_svc:
>> +	suspend_irq_prio_masking tmp=x1
>>  	mov	x0, sp
>>  	bl	el0_svc_handler
>>  	b	ret_to_user
>> diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
>> index 3767fb2..f5bae97 100644
>> --- a/arch/arm64/kernel/process.c
>> +++ b/arch/arm64/kernel/process.c
>> @@ -94,7 +94,7 @@ static void __cpu_do_idle_irqprio(void)
>>  	 * be raised.
>>  	 */
>>  	pmr = gic_read_pmr();
>> -	gic_write_pmr(GIC_PRIO_IRQON);
>> +	gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_IGNORE_PMR);
>>
>>  	__cpu_do_idle();
>>
>> diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
>> index 824de70..2a6d0dd1 100644
>> --- a/arch/arm64/kernel/smp.c
>> +++ b/arch/arm64/kernel/smp.c
>> @@ -192,11 +192,13 @@ static void init_gic_priority_masking(void)
>>
>>  	WARN_ON(!(cpuflags & PSR_I_BIT));
>>
>> -	gic_write_pmr(GIC_PRIO_IRQOFF);
>> -
>>  	/* We can only unmask PSR.I if we can take aborts */
>> -	if (!(cpuflags & PSR_A_BIT))
>> +	if (!(cpuflags & PSR_A_BIT)) {
>> +		gic_write_pmr(GIC_PRIO_IRQOFF);
>>  		write_sysreg(cpuflags & ~PSR_I_BIT, daif);
>> +	} else {
>> +		gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_IGNORE_PMR);
>> +	}
>>  }
>>
>>  /*
>> diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
>> index 3563fe6..083e319 100644
>> --- a/arch/arm64/kvm/hyp/switch.c
>> +++ b/arch/arm64/kvm/hyp/switch.c
>> @@ -533,7 +533,7 @@ int __hyp_text __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu)
>>  	 * Naturally, we want to avoid this.
>>  	 */
>>  	if (system_uses_irq_prio_masking()) {
>> -		gic_write_pmr(GIC_PRIO_IRQON);
>> +		gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_IGNORE_PMR);
>>  		dsb(sy);
>>  	}
>>
>> --
>> 1.9.1
>>
> 
> Overall, I like the way it looks, and my comments are more cosmetic than
> anything else.
> 

Thanks,
Robin Murphy May 14, 2019, 12:01 p.m. UTC | #3
On 14/05/2019 10:25, Julien Thierry wrote:
[...]
>>> +static inline int arch_irqs_disabled_flags(unsigned long flags)
>>> +{
>>> +	int res;
>>> +
>>> +	asm volatile(ALTERNATIVE(
>>> +		"and	%w0, %w1, #" __stringify(PSR_I_BIT) "\n"
>>> +		"nop",
>>> +		"cmp	%w1, #" __stringify(GIC_PRIO_IRQON) "\n"
>>> +		"cset	%w0, ne",
>>> +		ARM64_HAS_IRQ_PRIO_MASKING)
>>> +		: "=&r" (res)
>>> +		: "r" ((int) flags)
>>> +		: "memory");
>>
>> I wonder if this should have "cc" as part of the clobber list.
> 
> Is there any special semantic to "cc" on arm64? All I can find is that
> in the general case it indicates that it is modifying the "flags" register.
> 
> Is your suggestion only for the PMR case? Or is it something that we
> should add regardless of PMR?
> The latter makes sense to me, but for the former, I fail to understand
> why this should affect only PMR.

The PMR case really ought to have have a cc clobber, because who knows 
what this may end up inlined into, and compilers can get pretty 
aggressive with instruction scheduling in ways which leave a live value 
in CPSR across sizeable chunks of other code. It's true that the non-PMR 
case doesn't need it, but the surrounding code still needs to be 
generated to accommodate both possible versions of the alternative. From 
the look of the rest of the patch, the existing pseudo-NMI code has this 
bug in a few places.

Technically you could omit it when ARM64_PSEUDO_NMI is configured out 
entirely, but at that point you may as well omit the whole alternative 
as well. It's probably not worth the bother unless it proves to have a 
significant impact on codegen overall. On which note the memory clobber 
also seems superfluous either way :/

That said, now that I've been looking at it for this long, if the aim is 
just to create a zero/nonzero value then couldn't the PMR case just be 
"eor %w0, %w1, #GIC_PRIO_IRQON" and avoid the need for clobbers at all?

Robin.
Julien Thierry May 14, 2019, 1:08 p.m. UTC | #4
On 14/05/2019 13:01, Robin Murphy wrote:
> On 14/05/2019 10:25, Julien Thierry wrote:
> [...]
>>>> +static inline int arch_irqs_disabled_flags(unsigned long flags)
>>>> +{
>>>> +    int res;
>>>> +
>>>> +    asm volatile(ALTERNATIVE(
>>>> +        "and    %w0, %w1, #" __stringify(PSR_I_BIT) "\n"
>>>> +        "nop",
>>>> +        "cmp    %w1, #" __stringify(GIC_PRIO_IRQON) "\n"
>>>> +        "cset    %w0, ne",
>>>> +        ARM64_HAS_IRQ_PRIO_MASKING)
>>>> +        : "=&r" (res)
>>>> +        : "r" ((int) flags)
>>>> +        : "memory");
>>>
>>> I wonder if this should have "cc" as part of the clobber list.
>>
>> Is there any special semantic to "cc" on arm64? All I can find is that
>> in the general case it indicates that it is modifying the "flags"
>> register.
>>
>> Is your suggestion only for the PMR case? Or is it something that we
>> should add regardless of PMR?
>> The latter makes sense to me, but for the former, I fail to understand
>> why this should affect only PMR.
> 
> The PMR case really ought to have have a cc clobber, because who knows
> what this may end up inlined into, and compilers can get pretty
> aggressive with instruction scheduling in ways which leave a live value
> in CPSR across sizeable chunks of other code. It's true that the non-PMR
> case doesn't need it, but the surrounding code still needs to be
> generated to accommodate both possible versions of the alternative. From
> the look of the rest of the patch, the existing pseudo-NMI code has this
> bug in a few places.
> 
> Technically you could omit it when ARM64_PSEUDO_NMI is configured out
> entirely, but at that point you may as well omit the whole alternative
> as well. It's probably not worth the bother unless it proves to have a
> significant impact on codegen overall. On which note the memory clobber
> also seems superfluous either way :/
> 

Right, I see. I misunderstood what was meant by "cc" indicating that the
assembly modified the flags.

Due to the context I interpreted it as irqflags whereas it concerns the
condition flags (hence the 'c' I presume...). It all makes more sense now.

> That said, now that I've been looking at it for this long, if the aim is
> just to create a zero/nonzero value then couldn't the PMR case just be
> "eor %w0, %w1, #GIC_PRIO_IRQON" and avoid the need for clobbers at all?
> 

Yes, definitely seems like it would be better! I'll take that
suggestion, thanks.

Cheers,
diff mbox series

Patch

diff --git a/arch/arm64/include/asm/arch_gicv3.h b/arch/arm64/include/asm/arch_gicv3.h
index 14b41dd..3102c9a 100644
--- a/arch/arm64/include/asm/arch_gicv3.h
+++ b/arch/arm64/include/asm/arch_gicv3.h
@@ -163,7 +163,9 @@  static inline bool gic_prio_masking_enabled(void)

 static inline void gic_pmr_mask_irqs(void)
 {
-	BUILD_BUG_ON(GICD_INT_DEF_PRI <= GIC_PRIO_IRQOFF);
+	BUILD_BUG_ON(GICD_INT_DEF_PRI < (GIC_PRIO_IRQOFF |
+					 GIC_PRIO_IGNORE_PMR));
+	BUILD_BUG_ON(GICD_INT_DEF_PRI >= GIC_PRIO_IRQON);
 	gic_write_pmr(GIC_PRIO_IRQOFF);
 }

diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index c5308d0..601154d 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -62,6 +62,15 @@ 
 	msr	daifclr, #(8 | 4 | 1)
 	.endm

+	.macro	suspend_irq_prio_masking, tmp:req
+#ifdef CONFIG_ARM64_PSEUDO_NMI
+	alternative_if ARM64_HAS_IRQ_PRIO_MASKING
+	mov	\tmp, #(GIC_PRIO_IRQON | GIC_PRIO_IGNORE_PMR)
+	msr_s	SYS_ICC_PMR_EL1, \tmp
+	alternative_else_nop_endif
+#endif
+	.endm
+
 /*
  * Save/restore interrupts.
  */
diff --git a/arch/arm64/include/asm/daifflags.h b/arch/arm64/include/asm/daifflags.h
index db452aa..a32ece9 100644
--- a/arch/arm64/include/asm/daifflags.h
+++ b/arch/arm64/include/asm/daifflags.h
@@ -18,6 +18,7 @@ 

 #include <linux/irqflags.h>

+#include <asm/arch_gicv3.h>
 #include <asm/cpufeature.h>

 #define DAIF_PROCCTX		0
@@ -32,6 +33,11 @@  static inline void local_daif_mask(void)
 		:
 		:
 		: "memory");
+
+	/* Don't really care for a dsb here, we don't intend to enable IRQs */
+	if (system_uses_irq_prio_masking())
+		gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_IGNORE_PMR);
+
 	trace_hardirqs_off();
 }

@@ -43,7 +49,7 @@  static inline unsigned long local_daif_save(void)

 	if (system_uses_irq_prio_masking()) {
 		/* If IRQs are masked with PMR, reflect it in the flags */
-		if (read_sysreg_s(SYS_ICC_PMR_EL1) <= GIC_PRIO_IRQOFF)
+		if (read_sysreg_s(SYS_ICC_PMR_EL1) != GIC_PRIO_IRQON)
 			flags |= PSR_I_BIT;
 	}

@@ -59,14 +65,16 @@  static inline void local_daif_restore(unsigned long flags)
 	if (!irq_disabled) {
 		trace_hardirqs_on();

-		if (system_uses_irq_prio_masking())
-			arch_local_irq_enable();
-	} else if (!(flags & PSR_A_BIT)) {
+		if (system_uses_irq_prio_masking()) {
+			gic_write_pmr(GIC_PRIO_IRQON);
+			dsb(sy);
+		}
+	} else if (system_uses_irq_prio_masking()) {
 		/*
 		 * If interrupts are disabled but we can take
 		 * asynchronous errors, we can take NMIs
 		 */
-		if (system_uses_irq_prio_masking()) {
+		if (!(flags & PSR_A_BIT)) {
 			flags &= ~PSR_I_BIT;
 			/*
 			 * There has been concern that the write to daif
@@ -87,7 +95,9 @@  static inline void local_daif_restore(unsigned long flags)
 			 *
 			 * So we don't need additional synchronization here.
 			 */
-			arch_local_irq_disable();
+			gic_write_pmr(GIC_PRIO_IRQOFF);
+		} else {
+			gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_IGNORE_PMR);
 		}
 	}

diff --git a/arch/arm64/include/asm/irqflags.h b/arch/arm64/include/asm/irqflags.h
index 43d8366..516cdfc 100644
--- a/arch/arm64/include/asm/irqflags.h
+++ b/arch/arm64/include/asm/irqflags.h
@@ -67,43 +67,48 @@  static inline void arch_local_irq_disable(void)
  */
 static inline unsigned long arch_local_save_flags(void)
 {
-	unsigned long daif_bits;
 	unsigned long flags;

-	daif_bits = read_sysreg(daif);
-
-	/*
-	 * The asm is logically equivalent to:
-	 *
-	 * if (system_uses_irq_prio_masking())
-	 *	flags = (daif_bits & PSR_I_BIT) ?
-	 *			GIC_PRIO_IRQOFF :
-	 *			read_sysreg_s(SYS_ICC_PMR_EL1);
-	 * else
-	 *	flags = daif_bits;
-	 */
 	asm volatile(ALTERNATIVE(
-			"mov	%0, %1\n"
-			"nop\n"
-			"nop",
-			"mrs_s	%0, " __stringify(SYS_ICC_PMR_EL1) "\n"
-			"ands	%1, %1, " __stringify(PSR_I_BIT) "\n"
-			"csel	%0, %0, %2, eq",
-			ARM64_HAS_IRQ_PRIO_MASKING)
-		: "=&r" (flags), "+r" (daif_bits)
-		: "r" ((unsigned long) GIC_PRIO_IRQOFF)
+		"mrs	%0, daif",
+		"mrs_s	%0, " __stringify(SYS_ICC_PMR_EL1) "\n",
+		ARM64_HAS_IRQ_PRIO_MASKING)
+		: "=&r" (flags)
+		:
 		: "memory");

 	return flags;
 }

+static inline int arch_irqs_disabled_flags(unsigned long flags)
+{
+	int res;
+
+	asm volatile(ALTERNATIVE(
+		"and	%w0, %w1, #" __stringify(PSR_I_BIT) "\n"
+		"nop",
+		"cmp	%w1, #" __stringify(GIC_PRIO_IRQON) "\n"
+		"cset	%w0, ne",
+		ARM64_HAS_IRQ_PRIO_MASKING)
+		: "=&r" (res)
+		: "r" ((int) flags)
+		: "memory");
+
+	return res;
+}
+
 static inline unsigned long arch_local_irq_save(void)
 {
 	unsigned long flags;

 	flags = arch_local_save_flags();

-	arch_local_irq_disable();
+	/*
+	 * There are too many states with IRQs disabled, just keep the current
+	 * state if interrupts are already disabled/masked.
+	 */
+	if (!arch_irqs_disabled_flags(flags))
+		arch_local_irq_disable();

 	return flags;
 }
@@ -119,26 +124,10 @@  static inline void arch_local_irq_restore(unsigned long flags)
 			"msr_s	" __stringify(SYS_ICC_PMR_EL1) ", %0\n"
 			"dsb	sy",
 			ARM64_HAS_IRQ_PRIO_MASKING)
-		: "+r" (flags)
 		:
+		: "r" (flags)
 		: "memory");
 }

-static inline int arch_irqs_disabled_flags(unsigned long flags)
-{
-	int res;
-
-	asm volatile(ALTERNATIVE(
-			"and	%w0, %w1, #" __stringify(PSR_I_BIT) "\n"
-			"nop",
-			"cmp	%w1, #" __stringify(GIC_PRIO_IRQOFF) "\n"
-			"cset	%w0, ls",
-			ARM64_HAS_IRQ_PRIO_MASKING)
-		: "=&r" (res)
-		: "r" ((int) flags)
-		: "memory");
-
-	return res;
-}
 #endif
 #endif
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index a01fe087..0ec398c 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -532,9 +532,11 @@  static inline void kvm_arm_vhe_guest_enter(void)
 	 * will not signal the CPU of interrupts of lower priority, and the
 	 * only way to get out will be via guest exceptions.
 	 * Naturally, we want to avoid this.
+	 *
+	 * local_daif_mask() already sets IGNORE_PMR, we just need a
+	 * dsb to ensure the redistributor is forwards EL2 IRQs to the CPU.
 	 */
 	if (system_uses_irq_prio_masking()) {
-		gic_write_pmr(GIC_PRIO_IRQON);
 		dsb(sy);
 	}
 }
diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h
index ec60174..7861a5d 100644
--- a/arch/arm64/include/asm/ptrace.h
+++ b/arch/arm64/include/asm/ptrace.h
@@ -35,9 +35,15 @@ 
  * means masking more IRQs (or at least that the same IRQs remain masked).
  *
  * To mask interrupts, we clear the most significant bit of PMR.
+ *
+ * Some code sections either automatically switch back to PSR.I or explicitly
+ * require to not use priority masking. If bit GIC_PRIO_IGNORE_PMR is included
+ * in the  the priority mask, it indicates that PSR.I should be set and
+ * interrupt disabling temporarily does not rely on IRQ priorities.
  */
-#define GIC_PRIO_IRQON		0xf0
-#define GIC_PRIO_IRQOFF		(GIC_PRIO_IRQON & ~0x80)
+#define GIC_PRIO_IRQON			0xc0
+#define GIC_PRIO_IRQOFF			(GIC_PRIO_IRQON & ~0x80)
+#define GIC_PRIO_IGNORE_PMR		(1 << 4)

 /* Additional SPSR bits not exposed in the UABI */
 #define PSR_IL_BIT		(1 << 20)
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 00c1f21..e6df781 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -258,6 +258,7 @@  alternative_else_nop_endif
 	/*
 	 * Registers that may be useful after this macro is invoked:
 	 *
+	 * x20 - ICC_PMR_EL1
 	 * x21 - aborted SP
 	 * x22 - aborted PC
 	 * x23 - aborted PSTATE
@@ -434,6 +435,16 @@  alternative_endif
 	.endm
 #endif

+
+	.macro irq_entry_ignore_pmr, pmr:req, tmp:req
+#ifdef CONFIG_ARM64_PSEUDO_NMI
+	alternative_if ARM64_HAS_IRQ_PRIO_MASKING
+	orr	\tmp, \pmr, #GIC_PRIO_IGNORE_PMR
+	msr_s	SYS_ICC_PMR_EL1, \tmp
+	alternative_else_nop_endif
+#endif
+	.endm
+
 	.text

 /*
@@ -612,6 +623,7 @@  el1_dbg:
 	cmp	x24, #ESR_ELx_EC_BRK64		// if BRK64
 	cinc	x24, x24, eq			// set bit '0'
 	tbz	x24, #0, el1_inv		// EL1 only
+	suspend_irq_prio_masking tmp=x21
 	mrs	x0, far_el1
 	mov	x2, sp				// struct pt_regs
 	bl	do_debug_exception
@@ -629,12 +641,10 @@  ENDPROC(el1_sync)
 	.align	6
 el1_irq:
 	kernel_entry 1
+	irq_entry_ignore_pmr pmr=x20, tmp=x0
 	enable_da_f

 #ifdef CONFIG_ARM64_PSEUDO_NMI
-alternative_if ARM64_HAS_IRQ_PRIO_MASKING
-	ldr	x20, [sp, #S_PMR_SAVE]
-alternative_else_nop_endif
 	test_irqs_unmasked	res=x0, pmr=x20
 	cbz	x0, 1f
 	bl	asm_nmi_enter
@@ -664,8 +674,9 @@  alternative_else_nop_endif

 #ifdef CONFIG_ARM64_PSEUDO_NMI
 	/*
-	 * if IRQs were disabled when we received the interrupt, we have an NMI
-	 * and we are not re-enabling interrupt upon eret. Skip tracing.
+	 * When using IRQ priority masking, we can get spurious interrupts while
+	 * PMR is set to GIC_PRIO_IRQOFF. An NMI might also have occurred in a
+	 * section with interrupts disabled. Skip tracing in those cases.
 	 */
 	test_irqs_unmasked	res=x0, pmr=x20
 	cbz	x0, 1f
@@ -794,6 +805,7 @@  el0_ia:
 	 * Instruction abort handling
 	 */
 	mrs	x26, far_el1
+	suspend_irq_prio_masking tmp=x0
 	enable_da_f
 #ifdef CONFIG_TRACE_IRQFLAGS
 	bl	trace_hardirqs_off
@@ -839,6 +851,7 @@  el0_sp_pc:
 	 * Stack or PC alignment exception handling
 	 */
 	mrs	x26, far_el1
+	suspend_irq_prio_masking tmp=x0
 	enable_da_f
 #ifdef CONFIG_TRACE_IRQFLAGS
 	bl	trace_hardirqs_off
@@ -873,6 +886,7 @@  el0_dbg:
 	 * Debug exception handling
 	 */
 	tbnz	x24, #0, el0_inv		// EL0 only
+	suspend_irq_prio_masking tmp=x0
 	mrs	x0, far_el1
 	mov	x1, x25
 	mov	x2, sp
@@ -894,7 +908,9 @@  ENDPROC(el0_sync)
 el0_irq:
 	kernel_entry 0
 el0_irq_naked:
+	irq_entry_ignore_pmr pmr=x20, tmp=x0
 	enable_da_f
+
 #ifdef CONFIG_TRACE_IRQFLAGS
 	bl	trace_hardirqs_off
 #endif
@@ -916,6 +932,7 @@  ENDPROC(el0_irq)
 el1_error:
 	kernel_entry 1
 	mrs	x1, esr_el1
+	suspend_irq_prio_masking tmp=x0
 	enable_dbg
 	mov	x0, sp
 	bl	do_serror
@@ -926,6 +943,7 @@  el0_error:
 	kernel_entry 0
 el0_error_naked:
 	mrs	x1, esr_el1
+	suspend_irq_prio_masking tmp=x0
 	enable_dbg
 	mov	x0, sp
 	bl	do_serror
@@ -950,6 +968,7 @@  work_pending:
  */
 ret_to_user:
 	disable_daif
+	suspend_irq_prio_masking tmp=x0
 	ldr	x1, [tsk, #TSK_TI_FLAGS]
 	and	x2, x1, #_TIF_WORK_MASK
 	cbnz	x2, work_pending
@@ -966,6 +985,7 @@  ENDPROC(ret_to_user)
  */
 	.align	6
 el0_svc:
+	suspend_irq_prio_masking tmp=x1
 	mov	x0, sp
 	bl	el0_svc_handler
 	b	ret_to_user
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index 3767fb2..f5bae97 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -94,7 +94,7 @@  static void __cpu_do_idle_irqprio(void)
 	 * be raised.
 	 */
 	pmr = gic_read_pmr();
-	gic_write_pmr(GIC_PRIO_IRQON);
+	gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_IGNORE_PMR);

 	__cpu_do_idle();

diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index 824de70..2a6d0dd1 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -192,11 +192,13 @@  static void init_gic_priority_masking(void)

 	WARN_ON(!(cpuflags & PSR_I_BIT));

-	gic_write_pmr(GIC_PRIO_IRQOFF);
-
 	/* We can only unmask PSR.I if we can take aborts */
-	if (!(cpuflags & PSR_A_BIT))
+	if (!(cpuflags & PSR_A_BIT)) {
+		gic_write_pmr(GIC_PRIO_IRQOFF);
 		write_sysreg(cpuflags & ~PSR_I_BIT, daif);
+	} else {
+		gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_IGNORE_PMR);
+	}
 }

 /*
diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
index 3563fe6..083e319 100644
--- a/arch/arm64/kvm/hyp/switch.c
+++ b/arch/arm64/kvm/hyp/switch.c
@@ -533,7 +533,7 @@  int __hyp_text __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu)
 	 * Naturally, we want to avoid this.
 	 */
 	if (system_uses_irq_prio_masking()) {
-		gic_write_pmr(GIC_PRIO_IRQON);
+		gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_IGNORE_PMR);
 		dsb(sy);
 	}