From patchwork Mon Jul  6 01:55:15 2009
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: Marcelo Tosatti <mtosatti@redhat.com>
X-Patchwork-Id: 34296
Received: from vger.kernel.org (vger.kernel.org [209.132.176.167])
	by demeter.kernel.org (8.14.2/8.14.2) with ESMTP id n66K1WPB005491
	for <patchwork-kvm@patchwork.kernel.org>; Mon, 6 Jul 2009 20:01:33 GMT
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
	id S1753887AbZGFUB1 (ORCPT
	<rfc822;patchwork-kvm@patchwork.kernel.org>);
	Mon, 6 Jul 2009 16:01:27 -0400
Received: (majordomo@vger.kernel.org) by vger.kernel.org id S1753847AbZGFUB1
	(ORCPT <rfc822;kvm-outgoing>); Mon, 6 Jul 2009 16:01:27 -0400
Received: from mx2.redhat.com ([66.187.237.31]:39621 "EHLO mx2.redhat.com"
	rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP
	id S1753442AbZGFUBW (ORCPT <rfc822;kvm@vger.kernel.org>);
	Mon, 6 Jul 2009 16:01:22 -0400
Received: from int-mx2.corp.redhat.com (int-mx2.corp.redhat.com
	[172.16.27.26])
	by mx2.redhat.com (8.13.8/8.13.8) with ESMTP id n66K1Qm4013644
	for <kvm@vger.kernel.org>; Mon, 6 Jul 2009 16:01:26 -0400
Received: from ns3.rdu.redhat.com (ns3.rdu.redhat.com [10.11.255.199])
	by int-mx2.corp.redhat.com (8.13.1/8.13.1) with ESMTP id
	n66K1Oie028809; Mon, 6 Jul 2009 16:01:25 -0400
Received: from amt.cnet (vpn-51-10.sfbay.redhat.com [10.14.51.10])
	by ns3.rdu.redhat.com (8.13.8/8.13.8) with ESMTP id n66K1MSK030073;
	Mon, 6 Jul 2009 16:01:23 -0400
Received: from amt.cnet (amt.cnet [127.0.0.1])
	by amt.cnet (Postfix) with ESMTP id C323227402D;
	Mon,  6 Jul 2009 17:00:48 -0300 (BRT)
Received: (from marcelo@localhost)
	by amt.cnet (8.14.3/8.14.3/Submit) id n66K0l4x008093;
	Mon, 6 Jul 2009 17:00:47 -0300
Message-Id: <20090706015812.786509491@localhost.localdomain>
References: <20090706015511.923596553@localhost.localdomain>
User-Agent: quilt/0.46-1
Date: Sun, 05 Jul 2009 22:55:15 -0300
From: Marcelo Tosatti <mtosatti@redhat.com>
To: kvm@vger.kernel.org
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Subject: [patch 4/8] KVM: x86: replace hrtimer based timer emulation
Content-Disposition: inline; filename=kvm-timer-rework
X-Scanned-By: MIMEDefang 2.58 on 172.16.27.26
Sender: kvm-owner@vger.kernel.org
Precedence: bulk
List-ID: <kvm.vger.kernel.org>
X-Mailing-List: kvm@vger.kernel.org

Replace hrtimer based timer emulation with host timebase (ktime_t)
comparisons on guest entry.

This avoids host load when guests are scheduled out, removes a
spinlock acquision on entry (i8254.c's inject_lock), and makes future
improvements easier.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>

Index: kvm-new/arch/x86/kvm/x86.c
===================================================================
--- kvm-new.orig/arch/x86/kvm/x86.c
+++ kvm-new/arch/x86/kvm/x86.c
@@ -3461,8 +3461,6 @@ static int vcpu_enter_guest(struct kvm_v
 		goto out;
 
 	if (vcpu->requests) {
-		if (test_and_clear_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests))
-			__kvm_migrate_timers(vcpu);
 		if (test_and_clear_bit(KVM_REQ_KVMCLOCK_UPDATE, &vcpu->requests))
 			kvm_write_guest_time(vcpu);
 		if (test_and_clear_bit(KVM_REQ_MMU_SYNC, &vcpu->requests))
@@ -3482,6 +3480,9 @@ static int vcpu_enter_guest(struct kvm_v
 		}
 	}
 
+
+	kvm_inject_pending_timer_irqs(vcpu);
+
 	preempt_disable();
 
 	kvm_x86_ops->prepare_guest_switch(vcpu);
@@ -3499,6 +3500,8 @@ static int vcpu_enter_guest(struct kvm_v
 		goto out;
 	}
 
+	kvm_vcpu_arm_exit(vcpu);
+
 	if (vcpu->arch.exception.pending)
 		__queue_exception(vcpu);
 	else
@@ -3564,6 +3567,8 @@ static int vcpu_enter_guest(struct kvm_v
 
 	preempt_enable();
 
+	kvm_vcpu_cleanup_timer(vcpu);
+
 	down_read(&vcpu->kvm->slots_lock);
 
 	/*
@@ -3627,10 +3632,6 @@ static int __vcpu_run(struct kvm_vcpu *v
 		if (r <= 0)
 			break;
 
-		clear_bit(KVM_REQ_PENDING_TIMER, &vcpu->requests);
-		if (kvm_cpu_has_pending_timer(vcpu))
-			kvm_inject_pending_timer_irqs(vcpu);
-
 		if (dm_request_for_irq_injection(vcpu, kvm_run)) {
 			r = -EINTR;
 			kvm_run->exit_reason = KVM_EXIT_INTR;
@@ -4579,6 +4580,8 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu 
 	if (r < 0)
 		goto free_vcpu;
 
+	kvm_vcpu_init_armed_exit(vcpu);
+
 	if (kvm->arch.vpit && kvm_vcpu_is_bsp(vcpu))
 		kvm_timer_vcpu_bind(&kvm->arch.vpit->pit_state.pit_timer, vcpu);
 
Index: kvm-new/virt/kvm/kvm_main.c
===================================================================
--- kvm-new.orig/virt/kvm/kvm_main.c
+++ kvm-new/virt/kvm/kvm_main.c
@@ -1656,11 +1656,19 @@ void mark_page_dirty(struct kvm *kvm, gf
 	}
 }
 
+#ifndef KVM_ARCH_HAVE_TIMER_EVENT
+ktime_t kvm_vcpu_next_timer_event(struct kvm_vcpu *vcpu)
+{
+	return (ktime_t) { .tv64 = KTIME_MAX };
+}
+#endif
+
 /*
  * The vCPU has executed a HLT instruction with in-kernel mode enabled.
  */
 void kvm_vcpu_block(struct kvm_vcpu *vcpu)
 {
+	ktime_t expires;
 	DEFINE_WAIT(wait);
 
 	for (;;) {
@@ -1677,8 +1685,9 @@ void kvm_vcpu_block(struct kvm_vcpu *vcp
 		if (signal_pending(current))
 			break;
 
+		expires = kvm_vcpu_next_timer_event(vcpu);
 		vcpu_put(vcpu);
-		schedule();
+		schedule_hrtimeout(&expires, HRTIMER_MODE_ABS);
 		vcpu_load(vcpu);
 	}
 
Index: kvm-new/arch/x86/kvm/i8254.c
===================================================================
--- kvm-new.orig/arch/x86/kvm/i8254.c
+++ kvm-new/arch/x86/kvm/i8254.c
@@ -224,15 +224,6 @@ static void pit_latch_status(struct kvm 
 	}
 }
 
-int pit_has_pending_timer(struct kvm_vcpu *vcpu)
-{
-	struct kvm_pit *pit = vcpu->kvm->arch.vpit;
-
-	if (pit && kvm_vcpu_is_bsp(vcpu) && pit->pit_state.irq_ack)
-		return kvm_timer_has_pending(&pit->pit_state.pit_timer);
-	return 0;
-}
-
 static void kvm_pit_ack_irq(struct kvm_irq_ack_notifier *kian)
 {
 	struct kvm_kpit_state *ps = container_of(kian, struct kvm_kpit_state,
@@ -548,6 +539,36 @@ static const struct kvm_io_device_ops sp
 	.write    = speaker_ioport_write,
 };
 
+static void pit_inject(struct kvm_timer *ktimer)
+{
+	int i;
+	struct kvm_vcpu *vcpu;
+	struct kvm *kvm = ktimer->kvm;
+
+	mutex_lock(&kvm->irq_lock);
+	kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 1);
+	kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 0);
+	mutex_unlock(&kvm->irq_lock);
+
+	/*
+	 * Provides NMI watchdog support via Virtual Wire mode.
+	 * The route is: PIT -> PIC -> LVT0 in NMI mode.
+	 *
+	 * Note: Our Virtual Wire implementation is simplified, only
+	 * propagating PIT interrupts to all VCPUs when they have set
+	 * LVT0 to NMI delivery. Other PIC interrupts are just sent to
+	 * VCPU0, and only if its LVT0 is in EXTINT mode.
+	 */
+	if (kvm->arch.vapics_in_nmi_mode > 0)
+		kvm_for_each_vcpu(i, vcpu, kvm)
+			kvm_apic_nmi_wd_deliver(vcpu);
+}
+
+struct kvm_timer_ops kpit_ops = {
+	.inject = pit_inject,
+	.name = "pit",
+};
+
 /* Caller must have writers lock on slots_lock */
 struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags)
 {
@@ -573,7 +594,7 @@ struct kvm_pit *kvm_create_pit(struct kv
 
 	pit_state = &pit->pit_state;
 	pit_state->pit = pit;
-	kvm_timer_init(kvm, &pit_state->pit_timer);
+	kvm_timer_init(kvm, &pit_state->pit_timer, &kpit_ops);
 
 	pit_state->irq_ack_notifier.gsi = 0;
 	pit_state->irq_ack_notifier.irq_acked = kvm_pit_ack_irq;
@@ -610,50 +631,3 @@ void kvm_free_pit(struct kvm *kvm)
 	}
 }
 
-static void __inject_pit_timer_intr(struct kvm *kvm)
-{
-	struct kvm_vcpu *vcpu;
-	int i;
-
-	mutex_lock(&kvm->irq_lock);
-	kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 1);
-	kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 0);
-	mutex_unlock(&kvm->irq_lock);
-
-	/*
-	 * Provides NMI watchdog support via Virtual Wire mode.
-	 * The route is: PIT -> PIC -> LVT0 in NMI mode.
-	 *
-	 * Note: Our Virtual Wire implementation is simplified, only
-	 * propagating PIT interrupts to all VCPUs when they have set
-	 * LVT0 to NMI delivery. Other PIC interrupts are just sent to
-	 * VCPU0, and only if its LVT0 is in EXTINT mode.
-	 */
-	if (kvm->arch.vapics_in_nmi_mode > 0)
-		kvm_for_each_vcpu(i, vcpu, kvm)
-			kvm_apic_nmi_wd_deliver(vcpu);
-}
-
-void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu)
-{
-	struct kvm_pit *pit = vcpu->kvm->arch.vpit;
-	struct kvm *kvm = vcpu->kvm;
-	struct kvm_kpit_state *ps;
-
-	if (vcpu && pit) {
-		int inject = 0;
-		ps = &pit->pit_state;
-
-		/* Try to inject pending interrupts when
-		 * last one has been acked.
-		 */
-		spin_lock(&ps->inject_lock);
-		if (kvm_timer_has_pending(&ps->pit_timer) && ps->irq_ack) {
-			ps->irq_ack = 0;
-			inject = 1;
-		}
-		spin_unlock(&ps->inject_lock);
-		if (inject)
-			__inject_pit_timer_intr(kvm);
-	}
-}
Index: kvm-new/arch/x86/kvm/lapic.c
===================================================================
--- kvm-new.orig/arch/x86/kvm/lapic.c
+++ kvm-new/arch/x86/kvm/lapic.c
@@ -875,16 +875,6 @@ int kvm_lapic_enabled(struct kvm_vcpu *v
  *----------------------------------------------------------------------
  */
 
-int apic_has_pending_timer(struct kvm_vcpu *vcpu)
-{
-	struct kvm_lapic *lapic = vcpu->arch.apic;
-
-	if (lapic && apic_enabled(lapic) && apic_lvt_enabled(lapic, APIC_LVTT))
-		return kvm_timer_has_pending(&lapic->lapic_timer);
-
-	return 0;
-}
-
 static int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type)
 {
 	u32 reg = apic_get_reg(apic, lvt_type);
@@ -912,6 +902,20 @@ static const struct kvm_io_device_ops ap
 	.write    = apic_mmio_write,
 };
 
+void inject_lapic_timer(struct kvm_timer *ktimer)
+{
+	struct kvm_vcpu *vcpu = ktimer->vcpu;
+	struct kvm_lapic *apic = vcpu->arch.apic;
+
+	if (apic)
+		kvm_apic_local_deliver(apic, APIC_LVTT);
+}
+
+struct kvm_timer_ops lapic_timer_ops = {
+	.inject = inject_lapic_timer,
+	.name = "lapic",
+};
+
 int kvm_create_lapic(struct kvm_vcpu *vcpu)
 {
 	struct kvm_lapic *apic;
@@ -935,7 +939,7 @@ int kvm_create_lapic(struct kvm_vcpu *vc
 	memset(apic->regs, 0, PAGE_SIZE);
 	apic->vcpu = vcpu;
 
-	kvm_timer_init(vcpu->kvm, &apic->lapic_timer);
+	kvm_timer_init(vcpu->kvm, &apic->lapic_timer, &lapic_timer_ops);
 	kvm_timer_vcpu_bind(&apic->lapic_timer, vcpu);
 
 	apic->base_address = APIC_DEFAULT_PHYS_BASE;
@@ -982,14 +986,6 @@ int kvm_apic_accept_pic_intr(struct kvm_
 	return r;
 }
 
-void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu)
-{
-	struct kvm_lapic *apic = vcpu->arch.apic;
-
-	if (apic && kvm_timer_has_pending(&apic->lapic_timer))
-		kvm_apic_local_deliver(apic, APIC_LVTT);
-}
-
 int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu)
 {
 	int vector = kvm_apic_has_interrupt(vcpu);
Index: kvm-new/arch/x86/include/asm/kvm_host.h
===================================================================
--- kvm-new.orig/arch/x86/include/asm/kvm_host.h
+++ kvm-new/arch/x86/include/asm/kvm_host.h
@@ -377,6 +377,7 @@ struct kvm_vcpu_arch {
 	u64 *mce_banks;
 
 	struct list_head timers;
+	struct hrtimer exit_timer;
 };
 
 struct kvm_mem_alias {
@@ -800,4 +801,7 @@ int kvm_unmap_hva(struct kvm *kvm, unsig
 int kvm_age_hva(struct kvm *kvm, unsigned long hva);
 int cpuid_maxphyaddr(struct kvm_vcpu *vcpu);
 
+#define KVM_ARCH_HAVE_TIMER_EVENT
+ktime_t kvm_vcpu_next_timer_event(struct kvm_vcpu *vcpu);
+
 #endif /* _ASM_X86_KVM_HOST_H */
Index: kvm-new/arch/x86/kvm/irq.c
===================================================================
--- kvm-new.orig/arch/x86/kvm/irq.c
+++ kvm-new/arch/x86/kvm/irq.c
@@ -26,18 +26,19 @@
 #include "i8254.h"
 #include "x86.h"
 
-/*
- * check if there are pending timer events
- * to be processed.
- */
 int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
 {
-	int ret;
+	ktime_t now, expires;
 
-	ret = pit_has_pending_timer(vcpu);
-	ret |= apic_has_pending_timer(vcpu);
+	expires = kvm_vcpu_next_timer_event(vcpu);
+	now = ktime_get();
+	if (expires.tv64 <= now.tv64) {
+		if (kvm_arch_interrupt_allowed(vcpu))
+			set_bit(KVM_REQ_UNHALT, &vcpu->requests);
+		return 1;
+	}
 
-	return ret;
+	return 0;
 }
 EXPORT_SYMBOL(kvm_cpu_has_pending_timer);
 
@@ -86,36 +87,3 @@ int kvm_cpu_get_interrupt(struct kvm_vcp
 }
 EXPORT_SYMBOL_GPL(kvm_cpu_get_interrupt);
 
-void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu)
-{
-	kvm_inject_apic_timer_irqs(vcpu);
-	kvm_inject_pit_timer_irqs(vcpu);
-	/* TODO: PIT, RTC etc. */
-}
-EXPORT_SYMBOL_GPL(kvm_inject_pending_timer_irqs);
-
-static void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu)
-{
-	struct kvm_lapic *apic = vcpu->arch.apic;
-
-	if (!apic)
-		return;
-
-	kvm_migrate_timer(&apic->lapic_timer);
-}
-
-static void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu)
-{
-	struct kvm_pit *pit = vcpu->kvm->arch.vpit;
-
-	if (!kvm_vcpu_is_bsp(vcpu) || !pit)
-		return;
-
-	kvm_migrate_timer(&pit->pit_state.pit_timer);
-}
-
-void __kvm_migrate_timers(struct kvm_vcpu *vcpu)
-{
-	__kvm_migrate_apic_timer(vcpu);
-	__kvm_migrate_pit_timer(vcpu);
-}
Index: kvm-new/arch/x86/kvm/svm.c
===================================================================
--- kvm-new.orig/arch/x86/kvm/svm.c
+++ kvm-new/arch/x86/kvm/svm.c
@@ -738,7 +738,6 @@ static void svm_vcpu_load(struct kvm_vcp
 		delta = vcpu->arch.host_tsc - tsc_this;
 		svm->vmcb->control.tsc_offset += delta;
 		vcpu->cpu = cpu;
-		kvm_migrate_timers(vcpu);
 	}
 
 	for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++)
Index: kvm-new/arch/x86/kvm/vmx.c
===================================================================
--- kvm-new.orig/arch/x86/kvm/vmx.c
+++ kvm-new/arch/x86/kvm/vmx.c
@@ -703,7 +703,6 @@ static void vmx_vcpu_load(struct kvm_vcp
 
 	if (vcpu->cpu != cpu) {
 		vcpu_clear(vmx);
-		kvm_migrate_timers(vcpu);
 		vpid_sync_vcpu_all(vmx);
 		local_irq_disable();
 		list_add(&vmx->local_vcpus_link,
Index: kvm-new/arch/x86/kvm/kvm_timer.h
===================================================================
--- kvm-new.orig/arch/x86/kvm/kvm_timer.h
+++ kvm-new/arch/x86/kvm/kvm_timer.h
@@ -1,26 +1,41 @@
+struct kvm_timer_ops;
 
 struct kvm_timer {
-	struct hrtimer timer;
-	s64 period; 			/* unit: ns */
-	atomic_t pending;		/* accumulated triggered timers */
+	ktime_t count_load_time;
+	ktime_t inject_time;
+	u64 period; 				/* unit: ns */
+	u64 acked_events;
+
+	bool can_inject;
 	bool reinject;
 	bool periodic;
+
 	struct kvm *kvm;
 	struct kvm_vcpu *vcpu;
 	struct list_head vcpu_timer;
+	struct kvm_timer_ops *ops;
 };
 
-void kvm_timer_init(struct kvm *kvm, struct kvm_timer *ktimer);
+struct kvm_timer_ops {
+	void (*inject)(struct kvm_timer *);
+	char *name;
+};
+
+void kvm_timer_init(struct kvm *kvm, struct kvm_timer *ktimer,
+		    struct kvm_timer_ops *ops);
+void kvm_timer_vcpu_bind(struct kvm_timer *ktimer, struct kvm_vcpu *vcpu);
 void kvm_timer_start(struct kvm_timer *ktimer, u64 interval, bool periodic);
 void kvm_timer_cancel(struct kvm_timer *ktimer);
-void kvm_timer_vcpu_bind(struct kvm_timer *ktimer, struct kvm_vcpu *vcpu);
-
 int kvm_timer_has_pending(struct kvm_timer *ktimer);
 void kvm_timer_ack(struct kvm_timer *ktimer);
 void kvm_timer_reset(struct kvm_timer *ktimer);
 
 void kvm_migrate_timer(struct kvm_timer *ktimer);
 
+void kvm_vcpu_init_armed_exit(struct kvm_vcpu *vcpu);
 
-ktime_t kvm_timer_remaining(struct kvm_timer *ktimer);
+void kvm_vcpu_arm_exit(struct kvm_vcpu *vcpu);
+void kvm_vcpu_cleanup_timer(struct kvm_vcpu *vcpu);
 
+ktime_t kvm_timer_next_event(struct kvm_timer *ktimer);
+ktime_t kvm_timer_remaining(struct kvm_timer *ktimer);
Index: kvm-new/arch/x86/kvm/timer.c
===================================================================
--- kvm-new.orig/arch/x86/kvm/timer.c
+++ kvm-new/arch/x86/kvm/timer.c
@@ -1,107 +1,176 @@
+/*
+ *
+ * Copyright (C) 2009 Red Hat, Inc.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
 #include <linux/kvm_host.h>
 #include <linux/kvm.h>
 #include <linux/hrtimer.h>
 #include <asm/atomic.h>
 #include "kvm_timer.h"
 
-static int __kvm_timer_fn(struct kvm_vcpu *vcpu, struct kvm_timer *ktimer)
-{
-	int restart_timer = 0;
-	wait_queue_head_t *q = &vcpu->wq;
-
-	/*
-	 * There is a race window between reading and incrementing, but we do
-	 * not care about potentially loosing timer events in the !reinject
-	 * case anyway.
-	 */
-	if (ktimer->reinject || !atomic_read(&ktimer->pending)) {
-		atomic_inc(&ktimer->pending);
-		/* FIXME: this code should not know anything about vcpus */
-		set_bit(KVM_REQ_PENDING_TIMER, &vcpu->requests);
-	}
-
-	if (waitqueue_active(q))
-		wake_up_interruptible(q);
-
-	if (ktimer->periodic) {
-		hrtimer_add_expires_ns(&ktimer->timer, ktimer->period);
-		restart_timer = 1;
-	}
-
-	return restart_timer;
-}
-
-static enum hrtimer_restart kvm_timer_fn(struct hrtimer *data)
-{
-	int restart_timer;
-	struct kvm_vcpu *vcpu;
-	struct kvm_timer *ktimer = container_of(data, struct kvm_timer, timer);
-
-	vcpu = ktimer->vcpu;
-	if (!vcpu)
-		return HRTIMER_NORESTART;
-
-	restart_timer = __kvm_timer_fn(vcpu, ktimer);
-	if (restart_timer)
-		return HRTIMER_RESTART;
-	else
-		return HRTIMER_NORESTART;
-}
 
-void kvm_timer_init(struct kvm *kvm, struct kvm_timer *ktimer)
+void kvm_timer_init(struct kvm *kvm, struct kvm_timer *ktimer,
+		    struct kvm_timer_ops *ops)
 {
 	ktimer->kvm = kvm;
-	hrtimer_init(&ktimer->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
-	ktimer->timer.function = kvm_timer_fn;
 	INIT_LIST_HEAD(&ktimer->vcpu_timer);
+	ktimer->ops = ops;
+	ktimer->can_inject = false;
 }
 
 void kvm_timer_vcpu_bind(struct kvm_timer *ktimer, struct kvm_vcpu *vcpu)
 {
 	ktimer->vcpu = vcpu;
-	list_add(&ktimer->vcpu_timer, &vcpu->arch.timers);
 }
 
 void kvm_timer_start(struct kvm_timer *ktimer, u64 interval, bool periodic)
 {
-	hrtimer_cancel(&ktimer->timer);
-	atomic_set(&ktimer->pending, 0);
 	ktimer->periodic = periodic;
 	ktimer->period = interval;
-	hrtimer_start(&ktimer->timer, ktime_add_ns(ktime_get(), interval),
-			HRTIMER_MODE_ABS);
+	ktimer->count_load_time = ktime_get();
+	ktimer->acked_events = 0;
+	ktimer->can_inject = true;
+
+	WARN_ON(interval == 0);
+
+	list_add(&ktimer->vcpu_timer, &ktimer->vcpu->arch.timers);
 }
 
 void kvm_timer_cancel(struct kvm_timer *ktimer)
 {
-	hrtimer_cancel(&ktimer->timer);
-	atomic_set(&ktimer->pending, 0);
+	if (!list_empty(&ktimer->vcpu_timer))
+		list_del_init(&ktimer->vcpu_timer);
 }
 
-int kvm_timer_has_pending(struct kvm_timer *ktimer)
+void kvm_timer_reset(struct kvm_timer *ktimer)
 {
-	return atomic_read(&ktimer->pending);
+	ktimer->can_inject = true;
 }
 
 void kvm_timer_ack(struct kvm_timer *ktimer)
 {
-	if (atomic_dec_return(&ktimer->pending) < 0)
-		atomic_inc(&ktimer->pending);
+	ktimer->acked_events++;
+	ktimer->can_inject = true;
 }
 
-void kvm_timer_reset(struct kvm_timer *ktimer)
+static ktime_t periodic_timer_next_event(struct kvm_timer *ktimer)
 {
-	atomic_set(&ktimer->pending, 0);
+	ktime_t last_acked_event;
+
+	last_acked_event = ktime_add_ns(ktimer->count_load_time,
+					ktimer->acked_events * ktimer->period);
+
+	return ktime_add_ns(last_acked_event, ktimer->period);
 }
 
-void kvm_migrate_timer(struct kvm_timer *ktimer)
+ktime_t kvm_timer_next_event(struct kvm_timer *ktimer)
 {
-	if (hrtimer_cancel(&ktimer->timer))
-		hrtimer_start_expires(&ktimer->timer, HRTIMER_MODE_ABS);
+	if (!ktimer->periodic)
+		return ktime_add_ns(ktimer->count_load_time, ktimer->period);
+	else
+		return periodic_timer_next_event(ktimer);
 }
 
 ktime_t kvm_timer_remaining(struct kvm_timer *ktimer)
 {
-	return hrtimer_expires_remaining(&ktimer->timer);
+	ktime_t now = ktime_get();
+
+	return ktime_sub(kvm_timer_next_event(ktimer), now);
 }
 
+struct kvm_timer *kvm_vcpu_injectable_timer_event(struct kvm_vcpu *vcpu)
+{
+	struct kvm_timer *ktimer, *ktimer_expire = NULL;
+	ktime_t expires = { .tv64 = KTIME_MAX };
+
+	list_for_each_entry(ktimer, &vcpu->arch.timers, vcpu_timer) {
+		ktime_t this_expires = { .tv64 = KTIME_MAX };
+
+		if (ktimer->can_inject)
+			this_expires = kvm_timer_next_event(ktimer);
+
+		if (this_expires.tv64 < expires.tv64) {
+			expires = this_expires;
+			ktimer_expire = ktimer;
+		}
+	}
+
+	return ktimer_expire;
+}
+
+/*
+ * when the next vcpu timer expires, in host timebase.
+ */
+ktime_t kvm_vcpu_next_timer_event(struct kvm_vcpu *vcpu)
+{
+	ktime_t expires = { .tv64 = KTIME_MAX };
+	struct kvm_timer *ktimer = kvm_vcpu_injectable_timer_event(vcpu);
+
+	if (!ktimer)
+		return expires;
+
+	return kvm_timer_next_event(ktimer);
+}
+
+void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu)
+{
+	struct kvm_timer *ktimer, *n;
+	ktime_t now = ktime_get();
+
+	list_for_each_entry_safe(ktimer, n, &vcpu->arch.timers, vcpu_timer) {
+		ktime_t expire;
+
+		if (!ktimer->can_inject)
+			continue;
+
+		expire = kvm_timer_next_event(ktimer);
+		if (ktime_to_ns(now) < ktime_to_ns(expire))
+			continue;
+
+		ktimer->can_inject = false;
+		ktimer->ops->inject(ktimer);
+		if (!ktimer->periodic)
+			list_del_init(&ktimer->vcpu_timer);
+	}
+}
+
+/* arm/disarm exit */
+
+static enum hrtimer_restart kvm_timer_fn(struct hrtimer *data)
+{
+	return HRTIMER_NORESTART;
+}
+
+void kvm_vcpu_init_armed_exit(struct kvm_vcpu *vcpu)
+{
+	hrtimer_init(&vcpu->arch.exit_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+	vcpu->arch.exit_timer.function = kvm_timer_fn;
+}
+
+void kvm_vcpu_arm_exit(struct kvm_vcpu *vcpu)
+{
+	ktime_t expire;
+	ktime_t now;
+	struct kvm_timer *ktimer = kvm_vcpu_injectable_timer_event(vcpu);
+
+	if (!ktimer)
+		return;
+
+	now = ktime_get();
+	expire = kvm_timer_next_event(ktimer);
+
+	if (expire.tv64 != KTIME_MAX)
+		hrtimer_start(&vcpu->arch.exit_timer, expire, HRTIMER_MODE_ABS);
+}
+
+void kvm_vcpu_cleanup_timer(struct kvm_vcpu *vcpu)
+{
+	hrtimer_cancel(&vcpu->arch.exit_timer);
+}
+
+
Index: kvm-new/arch/x86/kvm/irq.h
===================================================================
--- kvm-new.orig/arch/x86/kvm/irq.h
+++ kvm-new/arch/x86/kvm/irq.h
@@ -94,9 +94,5 @@ void kvm_pic_reset(struct kvm_kpic_state
 void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu);
 void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu);
 void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu);
-void __kvm_migrate_timers(struct kvm_vcpu *vcpu);
-
-int pit_has_pending_timer(struct kvm_vcpu *vcpu);
-int apic_has_pending_timer(struct kvm_vcpu *vcpu);
 
 #endif
Index: kvm-new/include/linux/kvm_host.h
===================================================================
--- kvm-new.orig/include/linux/kvm_host.h
+++ kvm-new/include/linux/kvm_host.h
@@ -30,15 +30,14 @@
  * vcpu->requests bit members
  */
 #define KVM_REQ_TLB_FLUSH          0
-#define KVM_REQ_MIGRATE_TIMER      1
-#define KVM_REQ_REPORT_TPR_ACCESS  2
-#define KVM_REQ_MMU_RELOAD         3
-#define KVM_REQ_TRIPLE_FAULT       4
-#define KVM_REQ_PENDING_TIMER      5
-#define KVM_REQ_UNHALT             6
-#define KVM_REQ_MMU_SYNC           7
-#define KVM_REQ_KVMCLOCK_UPDATE    8
-#define KVM_REQ_KICK               9
+#define KVM_REQ_REPORT_TPR_ACCESS  1
+#define KVM_REQ_MMU_RELOAD         2
+#define KVM_REQ_TRIPLE_FAULT       3
+#define KVM_REQ_PENDING_TIMER      4
+#define KVM_REQ_UNHALT             5
+#define KVM_REQ_MMU_SYNC           6
+#define KVM_REQ_KVMCLOCK_UPDATE    7
+#define KVM_REQ_KICK               8
 
 #define KVM_USERSPACE_IRQ_SOURCE_ID	0
 
@@ -469,11 +468,6 @@ static inline hpa_t pfn_to_hpa(pfn_t pfn
 	return (hpa_t)pfn << PAGE_SHIFT;
 }
 
-static inline void kvm_migrate_timers(struct kvm_vcpu *vcpu)
-{
-	set_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests);
-}
-
 enum kvm_stat_kind {
 	KVM_STAT_VM,
 	KVM_STAT_VCPU,