@@ -386,6 +386,7 @@ struct kvm_vcpu_arch {
u64 last_kernel_ns;
u64 last_tsc_nsec;
u64 last_tsc_write;
+ u64 tsc_offset_adjustment;
bool tsc_catchup;
bool nmi_pending;
@@ -2042,12 +2042,20 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
}
kvm_x86_ops->vcpu_load(vcpu, cpu);
+
+ /* Apply any externally detected TSC adjustments (due to suspend) */
+ if (unlikely(vcpu->arch.tsc_offset_adjustment)) {
+ kvm_x86_ops->adjust_tsc_offset(vcpu,
+ vcpu->arch.tsc_offset_adjustment);
+ vcpu->arch.tsc_offset_adjustment = 0;
+ kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
+ }
if (unlikely(vcpu->cpu != cpu) || check_tsc_unstable()) {
/* Make sure TSC doesn't go backwards */
s64 tsc_delta = !vcpu->arch.last_host_tsc ? 0 :
native_read_tsc() - vcpu->arch.last_host_tsc;
if (tsc_delta < 0)
- mark_tsc_unstable("KVM discovered backwards TSC");
+ WARN_ON(!check_tsc_unstable());
if (check_tsc_unstable()) {
kvm_x86_ops->adjust_tsc_offset(vcpu, -tsc_delta);
vcpu->arch.tsc_catchup = 1;
@@ -5778,14 +5786,60 @@ int kvm_arch_hardware_enable(void *garbage)
{
struct kvm *kvm;
struct kvm_vcpu *vcpu;
- int i;
+ int i, ret;
+ u64 local_tsc;
+ u64 max_tsc = 0;
+ bool stable, backwards_tsc = false;
kvm_shared_msr_cpu_online();
- list_for_each_entry(kvm, &vm_list, vm_list)
- kvm_for_each_vcpu(i, vcpu, kvm)
- if (vcpu->cpu == smp_processor_id())
+ local_tsc = native_read_tsc();
+ stable = !check_tsc_unstable();
+ ret = kvm_x86_ops->hardware_enable(garbage);
+ if (ret)
+ return ret;
+
+ list_for_each_entry(kvm, &vm_list, vm_list) {
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ if (!stable && vcpu->cpu == smp_processor_id())
kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
- return kvm_x86_ops->hardware_enable(garbage);
+ if (stable && vcpu->arch.last_host_tsc > local_tsc) {
+ backwards_tsc = true;
+ if (vcpu->arch.last_host_tsc > max_tsc)
+ max_tsc = vcpu->arch.last_host_tsc;
+ }
+ }
+ }
+
+ /*
+ * Sometimes, reliable TSCs go backwards. This happens
+ * on platforms that reset TSC during suspend or hibernate
+ * actions, but maintain synchronization. We must compensate.
+ * Unfortunately, we can't bring the TSCs fully up to date
+ * with real time. The reason is that we aren't yet far
+ * enough into CPU bringup that we know how much real time
+ * has actually elapsed; our helper function, get_kernel_ns()
+ * will be using boot variables that haven't been updated yet.
+ * We simply find the maximum observed TSC above, then record
+ * the adjustment to TSC in each VCPU. When the VCPU later
+ * gets loaded, the adjustment will be applied. Note that we
+ * accumulate adjustments, in case multiple suspend cycles
+ * happen before the VCPU gets a chance to run again.
+ *
+ * Note that unreliable TSCs will be compensated already by
+ * the logic in vcpu_load, which sets the TSC to catchup mode.
+ * This will catchup all VCPUs to real time, but cannot
+ * guarantee synchronization.
+ */
+ if (backwards_tsc) {
+ u64 delta_cyc = max_tsc - local_tsc;
+ list_for_each_entry(kvm, &vm_list, vm_list)
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ vcpu->arch.tsc_offset_adjustment += delta_cyc;
+ vcpu->arch.last_host_tsc = 0;
+ }
+ }
+
+ return 0;
}
void kvm_arch_hardware_disable(void *garbage)