diff mbox

[timekeeping,32/35] Entry conditions for TSC trapping

Message ID 1282291669-25709-33-git-send-email-zamsden@redhat.com (mailing list archive)
State New, archived
Headers show

Commit Message

Zachary Amsden Aug. 20, 2010, 8:07 a.m. UTC
None
diff mbox

Patch

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 64569b0..950537c 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -413,6 +413,8 @@  struct kvm_arch {
 	u32 virtual_tsc_khz;
 	u32 virtual_tsc_mult;
 	s8 virtual_tsc_shift;
+	s64 tsc_bump;
+	s64 last_tsc_bump_ns;
 
 	struct kvm_xen_hvm_config xen_hvm_config;
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 33cb0f0..86f182a 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -917,13 +917,48 @@  static void kvm_arch_set_tsc_khz(struct kvm *kvm, u32 this_tsc_khz)
 
 static u64 compute_guest_tsc(struct kvm_vcpu *vcpu, s64 kernel_ns)
 {
+	struct kvm_arch *arch = &vcpu->kvm->arch;
 	u64 tsc = pvclock_scale_delta(kernel_ns-vcpu->arch.last_tsc_nsec,
-				      vcpu->kvm->arch.virtual_tsc_mult,
-				      vcpu->kvm->arch.virtual_tsc_shift);
+				      arch->virtual_tsc_mult,
+				      arch->virtual_tsc_shift);
 	tsc += vcpu->arch.last_tsc_write;
+	if (unlikely(arch->tsc_bump)) {
+		s64 bump;
+
+		/*
+		 * Ugh.  There were a TSC bump.  See how much time elapsed
+		 * in cycles since last read, take it off the bump, but
+		 * ensure TSC advances by at least one.  We're serialized
+		 * by the TSC write lock until the bump is gone.
+		 */
+		spin_lock(&arch->tsc_write_lock);
+		bump = pvclock_scale_delta(kernel_ns - arch->last_tsc_bump_ns,
+					   arch->virtual_tsc_mult,
+					   arch->virtual_tsc_shift);
+		bump = arch->tsc_bump - bump + 1;
+		if (bump < 0) {
+			pr_debug("kvm: vpu%d zeroed TSC bump\n", vcpu->vcpu_id);
+			bump = 0;
+		}
+		arch->tsc_bump = bump;
+		arch->last_tsc_bump_ns = kernel_ns;
+		spin_unlock(&arch->tsc_write_lock);
+
+		tsc += bump;
+	}
 	return tsc;
 }
 
+static void bump_guest_tsc(struct kvm_vcpu *vcpu, s64 bump, s64 kernel_ns)
+{
+	struct kvm *kvm = vcpu->kvm;
+	spin_lock(&kvm->arch.tsc_write_lock);
+	kvm->arch.tsc_bump += bump;
+	kvm->arch.last_tsc_bump_ns = kernel_ns;
+	spin_unlock(&vcpu->kvm->arch.tsc_write_lock);
+	pr_debug("kvm: vcpu%d bumped TSC by %lld\n", vcpu->vcpu_id, bump);
+}
+
 void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data)
 {
 	struct kvm *kvm = vcpu->kvm;
@@ -996,7 +1031,7 @@  static int kvm_guest_time_update(struct kvm_vcpu *v)
 	void *shared_kaddr;
 	unsigned long this_tsc_khz;
 	s64 kernel_ns, max_kernel_ns;
-	u64 tsc_timestamp;
+	u64 tsc_timestamp, tsc;
 	bool kvmclock = (vcpu->time_page != NULL);
 	bool catchup = !kvmclock;
 
@@ -1035,7 +1070,7 @@  static int kvm_guest_time_update(struct kvm_vcpu *v)
 	}
 
 	if (catchup) {
-		u64 tsc = compute_guest_tsc(v, kernel_ns);
+		tsc = compute_guest_tsc(v, kernel_ns);
 		if (tsc > tsc_timestamp)
 			kvm_x86_ops->adjust_tsc_offset(v, tsc-tsc_timestamp);
 	}
@@ -1048,8 +1083,21 @@  static int kvm_guest_time_update(struct kvm_vcpu *v)
 	if (!kvmclock) {
 		/* Now, see if we need to switch into trap mode */
 		if ((vcpu->tsc_mode == TSC_MODE_TRAP || vcpu->tsc_overrun) &&
-		    !vcpu->tsc_trapping)
+		    !vcpu->tsc_trapping) {
+			/*
+			 * Check for the (hopefully) unlikely event of the
+			 * computed virtual TSC being before the TSC we were
+			 * passing through in hardware.  This can happen if
+			 * the kernel has miscomputed tsc_khz, we miss an
+			 * overrun condition, or via bad SMP calibration.
+			 * If this is the case, we must add a bump to the
+			 * virtual TSC; this suck.
+			 */
+			if (unlikely(tsc < vcpu->last_guest_tsc))
+				bump_guest_tsc(v, vcpu->last_guest_tsc - tsc,
+					       kernel_ns);
 			kvm_x86_ops->set_tsc_trap(v, 1);
+		}
 
 		/* If we're falling behind and not trapping, re-trigger */
 		if (!vcpu->tsc_trapping &&