diff mbox

[09/21] MIPS: KVM: Fix timer race modifying guest CP0_Cause

Message ID 1398439204-26171-10-git-send-email-james.hogan@imgtec.com (mailing list archive)
State New, archived
Headers show

Commit Message

James Hogan April 25, 2014, 3:19 p.m. UTC
The hrtimer callback for guest timer timeouts sets the guest's
CP0_Cause.TI bit to indicate to the guest that a timer interrupt is
pending, however there is no mutual exclusion implemented to prevent
this occurring while the guest's CP0_Cause register is being
read-modify-written elsewhere.

When this occurs the setting of the CP0_Cause.TI bit is undone and the
guest misses the timer interrupt and doesn't reprogram the CP0_Compare
register for the next timeout. Currently another timer interrupt will be
triggered again in another 10ms anyway due to the way timers are
emulated, but after the MIPS timer emulation is fixed this would result
in Linux guest time standing still and the guest scheduler not being
invoked until the guest CP0_Count has looped around again, which at
100MHz takes just under 43 seconds.

Currently this is the only asynchronous modification of guest registers,
therefore it is fixed by adjusting the implementations of the
kvm_set_c0_guest_cause(), kvm_clear_c0_guest_cause(), and
kvm_change_c0_guest_cause() macros which are used for modifying the
guest CP0_Cause register to use ll/sc to ensure atomic modification.
This should work in both UP and SMP cases without requiring interrupts
to be disabled.

Signed-off-by: James Hogan <james.hogan@imgtec.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Gleb Natapov <gleb@kernel.org>
Cc: kvm@vger.kernel.org
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: linux-mips@linux-mips.org
Cc: Sanjay Lal <sanjayl@kymasys.com>
---
 arch/mips/include/asm/kvm_host.h | 71 ++++++++++++++++++++++++++++++++++++----
 1 file changed, 65 insertions(+), 6 deletions(-)

Comments

David Daney April 25, 2014, 4:55 p.m. UTC | #1
On 04/25/2014 08:19 AM, James Hogan wrote:
> The hrtimer callback for guest timer timeouts sets the guest's
> CP0_Cause.TI bit to indicate to the guest that a timer interrupt is
> pending, however there is no mutual exclusion implemented to prevent
> this occurring while the guest's CP0_Cause register is being
> read-modify-written elsewhere.
>
> When this occurs the setting of the CP0_Cause.TI bit is undone and the
> guest misses the timer interrupt and doesn't reprogram the CP0_Compare
> register for the next timeout. Currently another timer interrupt will be
> triggered again in another 10ms anyway due to the way timers are
> emulated, but after the MIPS timer emulation is fixed this would result
> in Linux guest time standing still and the guest scheduler not being
> invoked until the guest CP0_Count has looped around again, which at
> 100MHz takes just under 43 seconds.
>
> Currently this is the only asynchronous modification of guest registers,
> therefore it is fixed by adjusting the implementations of the
> kvm_set_c0_guest_cause(), kvm_clear_c0_guest_cause(), and
> kvm_change_c0_guest_cause() macros which are used for modifying the
> guest CP0_Cause register to use ll/sc to ensure atomic modification.
> This should work in both UP and SMP cases without requiring interrupts
> to be disabled.
>
> Signed-off-by: James Hogan <james.hogan@imgtec.com>
> Cc: Paolo Bonzini <pbonzini@redhat.com>
> Cc: Gleb Natapov <gleb@kernel.org>
> Cc: kvm@vger.kernel.org
> Cc: Ralf Baechle <ralf@linux-mips.org>
> Cc: linux-mips@linux-mips.org
> Cc: Sanjay Lal <sanjayl@kymasys.com>

NACK, I don't like the names of these functions.  They initially 
confused me too much...

> ---
>   arch/mips/include/asm/kvm_host.h | 71 ++++++++++++++++++++++++++++++++++++----
>   1 file changed, 65 insertions(+), 6 deletions(-)
>
> diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h
> index 3eedcb3015e5..90e1c0005ff4 100644
> --- a/arch/mips/include/asm/kvm_host.h
> +++ b/arch/mips/include/asm/kvm_host.h
> @@ -481,15 +481,74 @@ struct kvm_vcpu_arch {
>   #define kvm_read_c0_guest_errorepc(cop0)	(cop0->reg[MIPS_CP0_ERROR_PC][0])
>   #define kvm_write_c0_guest_errorepc(cop0, val)	(cop0->reg[MIPS_CP0_ERROR_PC][0] = (val))
>
> +/*
> + * Some of the guest registers may be modified asynchronously (e.g. from a
> + * hrtimer callback in hard irq context) and therefore need stronger atomicity
> + * guarantees than other registers.
> + */
> +
> +static inline void _kvm_atomic_set_c0_guest_reg(unsigned long *reg,
> +						unsigned long val)

The name of this function is too unclear.

It should be _kvm_atomic_or_c0_guest_reg, or 
_kvm_atomic_setbits_c0_guest_reg(unsigned long *reg, unsigned long mask)

> +{
> +	unsigned long temp;
> +	do {
> +		__asm__ __volatile__(
> +		"	.set	mips3				\n"
> +		"	" __LL "%0, %1				\n"
> +		"	or	%0, %2				\n"
> +		"	" __SC	"%0, %1				\n"
> +		"	.set	mips0				\n"
> +		: "=&r" (temp), "+m" (*reg)
> +		: "r" (val));
> +	} while (unlikely(!temp));
> +}
> +
> +static inline void _kvm_atomic_clear_c0_guest_reg(unsigned long *reg,
> +						  unsigned long val)

Same here.

Perhaps _kvm_atomic_clearbits_c0_guest_reg(unsigned long *reg, unsigned 
long mask)

> +{
> +	unsigned long temp;
> +	do {
> +		__asm__ __volatile__(
> +		"	.set	mips3				\n"
> +		"	" __LL "%0, %1				\n"
> +		"	and	%0, %2				\n"
> +		"	" __SC	"%0, %1				\n"
> +		"	.set	mips0				\n"
> +		: "=&r" (temp), "+m" (*reg)
> +		: "r" (~val));
> +	} while (unlikely(!temp));
> +}
> +
> +static inline void _kvm_atomic_change_c0_guest_reg(unsigned long *reg,
> +						   unsigned long change,
> +						   unsigned long val)

Same here.

Perhaps

_kvm_atomic_setbits_c0_guest_reg(unsigned long *reg,
				unsigned long mask,
				unsigned long bits)

> +{
> +	unsigned long temp;
> +	do {
> +		__asm__ __volatile__(
> +		"	.set	mips3				\n"
> +		"	" __LL "%0, %1				\n"
> +		"	and	%0, %2				\n"
> +		"	or	%0, %3				\n"
> +		"	" __SC	"%0, %1				\n"
> +		"	.set	mips0				\n"
> +		: "=&r" (temp), "+m" (*reg)
> +		: "r" (~change), "r" (val & change));
> +	} while (unlikely(!temp));
> +}
> +
>   #define kvm_set_c0_guest_status(cop0, val)	(cop0->reg[MIPS_CP0_STATUS][0] |= (val))
>   #define kvm_clear_c0_guest_status(cop0, val)	(cop0->reg[MIPS_CP0_STATUS][0] &= ~(val))
> -#define kvm_set_c0_guest_cause(cop0, val)	(cop0->reg[MIPS_CP0_CAUSE][0] |= (val))
> -#define kvm_clear_c0_guest_cause(cop0, val)	(cop0->reg[MIPS_CP0_CAUSE][0] &= ~(val))
> +
> +/* Cause can be modified asynchronously from hardirq hrtimer callback */
> +#define kvm_set_c0_guest_cause(cop0, val)				\
> +	_kvm_atomic_set_c0_guest_reg(&cop0->reg[MIPS_CP0_CAUSE][0], val)
> +#define kvm_clear_c0_guest_cause(cop0, val)				\
> +	_kvm_atomic_clear_c0_guest_reg(&cop0->reg[MIPS_CP0_CAUSE][0], val)
>   #define kvm_change_c0_guest_cause(cop0, change, val)			\
> -{									\
> -	kvm_clear_c0_guest_cause(cop0, change);				\
> -	kvm_set_c0_guest_cause(cop0, ((val) & (change)));		\
> -}
> +	_kvm_atomic_change_c0_guest_reg(&cop0->reg[MIPS_CP0_CAUSE][0],	\
> +					change, val)
> +
>   #define kvm_set_c0_guest_ebase(cop0, val)	(cop0->reg[MIPS_CP0_PRID][1] |= (val))
>   #define kvm_clear_c0_guest_ebase(cop0, val)	(cop0->reg[MIPS_CP0_PRID][1] &= ~(val))
>   #define kvm_change_c0_guest_ebase(cop0, change, val)			\
>

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
James Hogan April 25, 2014, 8:42 p.m. UTC | #2
Hi David,

On Friday 25 April 2014 09:55:47 David Daney wrote:
> On 04/25/2014 08:19 AM, James Hogan wrote:
> > The hrtimer callback for guest timer timeouts sets the guest's
> > CP0_Cause.TI bit to indicate to the guest that a timer interrupt is
> > pending, however there is no mutual exclusion implemented to prevent
> > this occurring while the guest's CP0_Cause register is being
> > read-modify-written elsewhere.
> > 
> > When this occurs the setting of the CP0_Cause.TI bit is undone and the
> > guest misses the timer interrupt and doesn't reprogram the CP0_Compare
> > register for the next timeout. Currently another timer interrupt will be
> > triggered again in another 10ms anyway due to the way timers are
> > emulated, but after the MIPS timer emulation is fixed this would result
> > in Linux guest time standing still and the guest scheduler not being
> > invoked until the guest CP0_Count has looped around again, which at
> > 100MHz takes just under 43 seconds.
> > 
> > Currently this is the only asynchronous modification of guest registers,
> > therefore it is fixed by adjusting the implementations of the
> > kvm_set_c0_guest_cause(), kvm_clear_c0_guest_cause(), and
> > kvm_change_c0_guest_cause() macros which are used for modifying the
> > guest CP0_Cause register to use ll/sc to ensure atomic modification.
> > This should work in both UP and SMP cases without requiring interrupts
> > to be disabled.
> > 
> > Signed-off-by: James Hogan <james.hogan@imgtec.com>
> > Cc: Paolo Bonzini <pbonzini@redhat.com>
> > Cc: Gleb Natapov <gleb@kernel.org>
> > Cc: kvm@vger.kernel.org
> > Cc: Ralf Baechle <ralf@linux-mips.org>
> > Cc: linux-mips@linux-mips.org
> > Cc: Sanjay Lal <sanjayl@kymasys.com>
> 
> NACK, I don't like the names of these functions.  They initially
> confused me too much...

It's just being consistent with all the other *set*, *clear*, and *change* 
macros in kvm_host.h, asm/mipsregs.h, and the 229 users of those macros across 
the arch. I see no reason to confuse things further by being inconsistent.

Cheers
James

> 
> > ---
> > 
> >   arch/mips/include/asm/kvm_host.h | 71
> >   ++++++++++++++++++++++++++++++++++++---- 1 file changed, 65
> >   insertions(+), 6 deletions(-)
> > 
> > diff --git a/arch/mips/include/asm/kvm_host.h
> > b/arch/mips/include/asm/kvm_host.h index 3eedcb3015e5..90e1c0005ff4
> > 100644
> > --- a/arch/mips/include/asm/kvm_host.h
> > +++ b/arch/mips/include/asm/kvm_host.h
> > @@ -481,15 +481,74 @@ struct kvm_vcpu_arch {
> > 
> >   #define
> >   kvm_read_c0_guest_errorepc(cop0)	(cop0->reg[MIPS_CP0_ERROR_PC][0])
> >   #define kvm_write_c0_guest_errorepc(cop0,
> >   val)	(cop0->reg[MIPS_CP0_ERROR_PC][0] = (val))> 
> > +/*
> > + * Some of the guest registers may be modified asynchronously (e.g. from
> > a
> > + * hrtimer callback in hard irq context) and therefore need stronger
> > atomicity + * guarantees than other registers.
> > + */
> > +
> > +static inline void _kvm_atomic_set_c0_guest_reg(unsigned long *reg,
> > +						unsigned long val)
> 
> The name of this function is too unclear.
> 
> It should be _kvm_atomic_or_c0_guest_reg, or
> _kvm_atomic_setbits_c0_guest_reg(unsigned long *reg, unsigned long mask)
> 
> > +{
> > +	unsigned long temp;
> > +	do {
> > +		__asm__ __volatile__(
> > +		"	.set	mips3				\n"
> > +		"	" __LL "%0, %1				\n"
> > +		"	or	%0, %2				\n"
> > +		"	" __SC	"%0, %1				\n"
> > +		"	.set	mips0				\n"
> > +		: "=&r" (temp), "+m" (*reg)
> > +		: "r" (val));
> > +	} while (unlikely(!temp));
> > +}
> > +
> > +static inline void _kvm_atomic_clear_c0_guest_reg(unsigned long *reg,
> > +						  unsigned long val)
> 
> Same here.
> 
> Perhaps _kvm_atomic_clearbits_c0_guest_reg(unsigned long *reg, unsigned
> long mask)
> 
> > +{
> > +	unsigned long temp;
> > +	do {
> > +		__asm__ __volatile__(
> > +		"	.set	mips3				\n"
> > +		"	" __LL "%0, %1				\n"
> > +		"	and	%0, %2				\n"
> > +		"	" __SC	"%0, %1				\n"
> > +		"	.set	mips0				\n"
> > +		: "=&r" (temp), "+m" (*reg)
> > +		: "r" (~val));
> > +	} while (unlikely(!temp));
> > +}
> > +
> > +static inline void _kvm_atomic_change_c0_guest_reg(unsigned long *reg,
> > +						   unsigned long change,
> > +						   unsigned long val)
> 
> Same here.
> 
> Perhaps
> 
> _kvm_atomic_setbits_c0_guest_reg(unsigned long *reg,
> 				unsigned long mask,
> 				unsigned long bits)
> 
> > +{
> > +	unsigned long temp;
> > +	do {
> > +		__asm__ __volatile__(
> > +		"	.set	mips3				\n"
> > +		"	" __LL "%0, %1				\n"
> > +		"	and	%0, %2				\n"
> > +		"	or	%0, %3				\n"
> > +		"	" __SC	"%0, %1				\n"
> > +		"	.set	mips0				\n"
> > +		: "=&r" (temp), "+m" (*reg)
> > +		: "r" (~change), "r" (val & change));
> > +	} while (unlikely(!temp));
> > +}
> > +
> > 
> >   #define kvm_set_c0_guest_status(cop0,
> >   val)	(cop0->reg[MIPS_CP0_STATUS][0] |= (val)) #define
> >   kvm_clear_c0_guest_status(cop0, val)	(cop0->reg[MIPS_CP0_STATUS][0] &=
> >   ~(val))> 
> > -#define kvm_set_c0_guest_cause(cop0, val)	(cop0->reg[MIPS_CP0_CAUSE][0]
> > |= (val)) -#define kvm_clear_c0_guest_cause(cop0,
> > val)	(cop0->reg[MIPS_CP0_CAUSE][0] &= ~(val)) +
> > +/* Cause can be modified asynchronously from hardirq hrtimer callback */
> > +#define kvm_set_c0_guest_cause(cop0, val)				\
> > +	_kvm_atomic_set_c0_guest_reg(&cop0->reg[MIPS_CP0_CAUSE][0], val)
> > +#define kvm_clear_c0_guest_cause(cop0, val)				\
> > +	_kvm_atomic_clear_c0_guest_reg(&cop0->reg[MIPS_CP0_CAUSE][0], val)
> > 
> >   #define kvm_change_c0_guest_cause(cop0, change, val)			\
> > 
> > -{									\
> > -	kvm_clear_c0_guest_cause(cop0, change);				\
> > -	kvm_set_c0_guest_cause(cop0, ((val) & (change)));		\
> > -}
> > +	_kvm_atomic_change_c0_guest_reg(&cop0->reg[MIPS_CP0_CAUSE][0],	\
> > +					change, val)
> > +
> > 
> >   #define kvm_set_c0_guest_ebase(cop0, val)	(cop0->reg[MIPS_CP0_PRID][1]
> >   |= (val)) #define kvm_clear_c0_guest_ebase(cop0,
> >   val)	(cop0->reg[MIPS_CP0_PRID][1] &= ~(val)) #define
> >   kvm_change_c0_guest_ebase(cop0, change, val)			\
diff mbox

Patch

diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h
index 3eedcb3015e5..90e1c0005ff4 100644
--- a/arch/mips/include/asm/kvm_host.h
+++ b/arch/mips/include/asm/kvm_host.h
@@ -481,15 +481,74 @@  struct kvm_vcpu_arch {
 #define kvm_read_c0_guest_errorepc(cop0)	(cop0->reg[MIPS_CP0_ERROR_PC][0])
 #define kvm_write_c0_guest_errorepc(cop0, val)	(cop0->reg[MIPS_CP0_ERROR_PC][0] = (val))
 
+/*
+ * Some of the guest registers may be modified asynchronously (e.g. from a
+ * hrtimer callback in hard irq context) and therefore need stronger atomicity
+ * guarantees than other registers.
+ */
+
+static inline void _kvm_atomic_set_c0_guest_reg(unsigned long *reg,
+						unsigned long val)
+{
+	unsigned long temp;
+	do {
+		__asm__ __volatile__(
+		"	.set	mips3				\n"
+		"	" __LL "%0, %1				\n"
+		"	or	%0, %2				\n"
+		"	" __SC	"%0, %1				\n"
+		"	.set	mips0				\n"
+		: "=&r" (temp), "+m" (*reg)
+		: "r" (val));
+	} while (unlikely(!temp));
+}
+
+static inline void _kvm_atomic_clear_c0_guest_reg(unsigned long *reg,
+						  unsigned long val)
+{
+	unsigned long temp;
+	do {
+		__asm__ __volatile__(
+		"	.set	mips3				\n"
+		"	" __LL "%0, %1				\n"
+		"	and	%0, %2				\n"
+		"	" __SC	"%0, %1				\n"
+		"	.set	mips0				\n"
+		: "=&r" (temp), "+m" (*reg)
+		: "r" (~val));
+	} while (unlikely(!temp));
+}
+
+static inline void _kvm_atomic_change_c0_guest_reg(unsigned long *reg,
+						   unsigned long change,
+						   unsigned long val)
+{
+	unsigned long temp;
+	do {
+		__asm__ __volatile__(
+		"	.set	mips3				\n"
+		"	" __LL "%0, %1				\n"
+		"	and	%0, %2				\n"
+		"	or	%0, %3				\n"
+		"	" __SC	"%0, %1				\n"
+		"	.set	mips0				\n"
+		: "=&r" (temp), "+m" (*reg)
+		: "r" (~change), "r" (val & change));
+	} while (unlikely(!temp));
+}
+
 #define kvm_set_c0_guest_status(cop0, val)	(cop0->reg[MIPS_CP0_STATUS][0] |= (val))
 #define kvm_clear_c0_guest_status(cop0, val)	(cop0->reg[MIPS_CP0_STATUS][0] &= ~(val))
-#define kvm_set_c0_guest_cause(cop0, val)	(cop0->reg[MIPS_CP0_CAUSE][0] |= (val))
-#define kvm_clear_c0_guest_cause(cop0, val)	(cop0->reg[MIPS_CP0_CAUSE][0] &= ~(val))
+
+/* Cause can be modified asynchronously from hardirq hrtimer callback */
+#define kvm_set_c0_guest_cause(cop0, val)				\
+	_kvm_atomic_set_c0_guest_reg(&cop0->reg[MIPS_CP0_CAUSE][0], val)
+#define kvm_clear_c0_guest_cause(cop0, val)				\
+	_kvm_atomic_clear_c0_guest_reg(&cop0->reg[MIPS_CP0_CAUSE][0], val)
 #define kvm_change_c0_guest_cause(cop0, change, val)			\
-{									\
-	kvm_clear_c0_guest_cause(cop0, change);				\
-	kvm_set_c0_guest_cause(cop0, ((val) & (change)));		\
-}
+	_kvm_atomic_change_c0_guest_reg(&cop0->reg[MIPS_CP0_CAUSE][0],	\
+					change, val)
+
 #define kvm_set_c0_guest_ebase(cop0, val)	(cop0->reg[MIPS_CP0_PRID][1] |= (val))
 #define kvm_clear_c0_guest_ebase(cop0, val)	(cop0->reg[MIPS_CP0_PRID][1] &= ~(val))
 #define kvm_change_c0_guest_ebase(cop0, change, val)			\