@@ -522,16 +522,6 @@ static void __init ms_hyperv_init_platform(void)
if (efi_enabled(EFI_BOOT))
x86_platform.get_nmi_reason = hv_get_nmi_reason;
- /*
- * Hyper-V VMs have a PIT emulation quirk such that zeroing the
- * counter register during PIT shutdown restarts the PIT. So it
- * continues to interrupt @18.2 HZ. Setting i8253_clear_counter
- * to false tells pit_shutdown() not to zero the counter so that
- * the PIT really is shutdown. Generation 2 VMs don't have a PIT,
- * and setting this value has no effect.
- */
- i8253_clear_counter_on_shutdown = false;
-
#if IS_ENABLED(CONFIG_HYPERV)
if ((hv_get_isolation_type() == HV_ISOLATION_TYPE_VBS) ||
ms_hyperv.paravisor_present)
@@ -20,13 +20,6 @@
DEFINE_RAW_SPINLOCK(i8253_lock);
EXPORT_SYMBOL(i8253_lock);
-/*
- * Handle PIT quirk in pit_shutdown() where zeroing the counter register
- * restarts the PIT, negating the shutdown. On platforms with the quirk,
- * platform specific code can set this to false.
- */
-bool i8253_clear_counter_on_shutdown __ro_after_init = true;
-
#ifdef CONFIG_CLKSRC_I8253
/*
* Since the PIT overflows every tick, its not very useful
@@ -112,12 +105,33 @@ void clockevent_i8253_disable(void)
{
raw_spin_lock(&i8253_lock);
+ /*
+ * Writing the MODE register should stop the counter, according to
+ * the datasheet. This appears to work on real hardware (well, on
+ * modern Intel and AMD boxes; I didn't dig the Pegasos out of the
+ * shed).
+ *
+ * However, some virtual implementations differ, and the MODE change
+ * doesn't have any effect until either the counter is written (KVM
+ * in-kernel PIT) or the next interrupt (QEMU). And in those cases,
+ * it may not stop the *count*, only the interrupts. Although in
+ * the virt case, that probably doesn't matter, as the value of the
+ * counter will only be calculated on demand if the guest reads it;
+ * it's the interrupts which cause steal time.
+ *
+ * Hyper-V apparently has a bug where even in mode 0, the IRQ keeps
+ * firing repeatedly if the counter is running. But it *does* do the
+ * right thing when the MODE register is written.
+ *
+ * So: write the MODE and then load the counter, which ensures that
+ * the IRQ is stopped on those buggy virt implementations. And then
+ * write the MODE again, which is the right way to stop it.
+ */
outb_p(0x30, PIT_MODE);
+ outb_p(0, PIT_CH0);
+ outb_p(0, PIT_CH0);
- if (i8253_clear_counter_on_shutdown) {
- outb_p(0, PIT_CH0);
- outb_p(0, PIT_CH0);
- }
+ outb_p(0x30, PIT_MODE);
raw_spin_unlock(&i8253_lock);
}
@@ -21,7 +21,6 @@
#define PIT_LATCH ((PIT_TICK_RATE + HZ/2) / HZ)
extern raw_spinlock_t i8253_lock;
-extern bool i8253_clear_counter_on_shutdown;
extern struct clock_event_device i8253_clockevent;
extern void clockevent_i8253_init(bool oneshot);
extern void clockevent_i8253_disable(void);