Message ID | 1386502419-26614-2-git-send-email-vrozenfe@redhat.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Il 08/12/2013 12:33, Vadim Rozenfeld ha scritto: > Signed-off: Peter Lieven <pl@dlh.net> > Signed-off: Gleb Natapov <gleb@redhat.com> > Signed-off: Vadim Rozenfeld <vrozenfe@redhat.com> > > v1 -> v2 > 1. mark TSC page dirty as suggested by > Eric Northup <digitaleric@google.com> and Gleb > 2. disable local irq when calling get_kernel_ns, > as it was done by Peter Lieven <pl@dlhnet.de> > 3. move check for TSC page enable from second patch > to this one. > > --- > arch/x86/include/asm/kvm_host.h | 2 ++ > arch/x86/include/uapi/asm/hyperv.h | 13 +++++++++++++ > arch/x86/kvm/x86.c | 39 +++++++++++++++++++++++++++++++++++++- > include/uapi/linux/kvm.h | 1 + > 4 files changed, 54 insertions(+), 1 deletion(-) > > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h > index ae5d783..2fd0753 100644 > --- a/arch/x86/include/asm/kvm_host.h > +++ b/arch/x86/include/asm/kvm_host.h > @@ -605,6 +605,8 @@ struct kvm_arch { > /* fields used by HYPER-V emulation */ > u64 hv_guest_os_id; > u64 hv_hypercall; > + u64 hv_ref_count; > + u64 hv_tsc_page; > > #ifdef CONFIG_KVM_MMU_AUDIT > int audit_point; > diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h > index b8f1c01..462efe7 100644 > --- a/arch/x86/include/uapi/asm/hyperv.h > +++ b/arch/x86/include/uapi/asm/hyperv.h > @@ -28,6 +28,9 @@ > /* Partition Reference Counter (HV_X64_MSR_TIME_REF_COUNT) available*/ > #define HV_X64_MSR_TIME_REF_COUNT_AVAILABLE (1 << 1) > > +/* A partition's reference time stamp counter (TSC) page */ > +#define HV_X64_MSR_REFERENCE_TSC 0x40000021 > + > /* > * There is a single feature flag that signifies the presence of the MSR > * that can be used to retrieve both the local APIC Timer frequency as > @@ -198,6 +201,9 @@ > #define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_MASK \ > (~((1ull << HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT) - 1)) > > +#define HV_X64_MSR_TSC_REFERENCE_ENABLE 0x00000001 > +#define HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT 12 > + > #define HV_PROCESSOR_POWER_STATE_C0 0 > #define HV_PROCESSOR_POWER_STATE_C1 1 > #define HV_PROCESSOR_POWER_STATE_C2 2 > @@ -210,4 +216,11 @@ > #define HV_STATUS_INVALID_ALIGNMENT 4 > #define HV_STATUS_INSUFFICIENT_BUFFERS 19 > > +typedef struct _HV_REFERENCE_TSC_PAGE { > + __u32 tsc_sequence; > + __u32 res1; > + __u64 tsc_scale; > + __s64 tsc_offset; > +} HV_REFERENCE_TSC_PAGE, *PHV_REFERENCE_TSC_PAGE; > + > #endif > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > index 21ef1ba..5e4e495a 100644 > --- a/arch/x86/kvm/x86.c > +++ b/arch/x86/kvm/x86.c > @@ -840,7 +840,7 @@ EXPORT_SYMBOL_GPL(kvm_rdpmc); > static u32 msrs_to_save[] = { > MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, > MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW, > - HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, > + HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, HV_X64_MSR_TIME_REF_COUNT, You need to bump KVM_SAVE_MSRS_BEGIN. > HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME, > MSR_KVM_PV_EOI_EN, > MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, > @@ -1826,6 +1826,8 @@ static bool kvm_hv_msr_partition_wide(u32 msr) > switch (msr) { > case HV_X64_MSR_GUEST_OS_ID: > case HV_X64_MSR_HYPERCALL: > + case HV_X64_MSR_REFERENCE_TSC: > + case HV_X64_MSR_TIME_REF_COUNT: > r = true; > break; > } > @@ -1865,6 +1867,29 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data) > if (__copy_to_user((void __user *)addr, instructions, 4)) > return 1; > kvm->arch.hv_hypercall = data; > + local_irq_disable(); > + kvm->arch.hv_ref_count = get_kernel_ns() + kvm->arch.kvmclock_offset; > + local_irq_enable(); Please add a patch that moves these four lines from KVM_GET_CLOCK and KVM_SET_CLOCK local_irq_disable(); now_ns = get_kernel_ns(); delta = user_ns.clock - now_ns; local_irq_enable(); kvm->arch.kvmclock_offset = delta; kvm_gen_update_masterclock(kvm); local_irq_disable(); now_ns = get_kernel_ns(); user_ns.clock = kvm->arch.kvmclock_offset + now_ns; local_irq_enable(); For example u64 kvm_get_clock_ns(struct kvm *) and void kvm_set_clock_ns(struct kvm *, u64). You can then use the kvm_get_clock_ns function in this patch. > + break; > + } > + case HV_X64_MSR_REFERENCE_TSC: { > + u64 gfn; > + unsigned long addr; > + HV_REFERENCE_TSC_PAGE tsc_ref; > + tsc_ref.tsc_sequence = 0; Please zero it with memset. You're leaking values from the stack to the guest. > + if (!(data & HV_X64_MSR_TSC_REFERENCE_ENABLE)) { > + kvm->arch.hv_tsc_page = data; > + break; > + } > + gfn = data >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT; > + addr = gfn_to_hva(kvm, data >> > + HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT); > + if (kvm_is_error_hva(addr)) > + return 1; > + if (__copy_to_user((void __user *)addr, &tsc_ref, sizeof(tsc_ref))) > + return 1; > + mark_page_dirty(kvm, gfn); > + kvm->arch.hv_tsc_page = data; > break; > } > default: > @@ -2291,6 +2316,17 @@ static int get_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) > case HV_X64_MSR_HYPERCALL: > data = kvm->arch.hv_hypercall; > break; > + case HV_X64_MSR_TIME_REF_COUNT: { > + u64 now_ns; > + local_irq_disable(); > + now_ns = get_kernel_ns() + kvm->arch.kvmclock_offset; > + data = div_u64(now_ns - kvm->arch.hv_ref_count, 100); > + local_irq_enable(); Another possible user of kvm_get_clock_ns. The patch should be good with these changes. Paolo > + break; > + } > + case HV_X64_MSR_REFERENCE_TSC: > + data = kvm->arch.hv_tsc_page; > + break; > default: > vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr); > return 1; > @@ -2605,6 +2641,7 @@ int kvm_dev_ioctl_check_extension(long ext) > case KVM_CAP_ASSIGN_DEV_IRQ: > case KVM_CAP_PCI_2_3: > #endif > + case KVM_CAP_HYPERV_TIME: > r = 1; > break; > case KVM_CAP_COALESCED_MMIO: > diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h > index 902f124..686c1ca 100644 > --- a/include/uapi/linux/kvm.h > +++ b/include/uapi/linux/kvm.h > @@ -674,6 +674,7 @@ struct kvm_ppc_smmu_info { > #define KVM_CAP_ARM_EL1_32BIT 93 > #define KVM_CAP_SPAPR_MULTITCE 94 > #define KVM_CAP_EXT_EMUL_CPUID 95 > +#define KVM_CAP_HYPERV_TIME 96 > > #ifdef KVM_CAP_IRQ_ROUTING > > -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Mon, 2013-12-09 at 15:23 +0100, Paolo Bonzini wrote: > Il 08/12/2013 12:33, Vadim Rozenfeld ha scritto: > > Signed-off: Peter Lieven <pl@dlh.net> > > Signed-off: Gleb Natapov <gleb@redhat.com> > > Signed-off: Vadim Rozenfeld <vrozenfe@redhat.com> > > > > v1 -> v2 > > 1. mark TSC page dirty as suggested by > > Eric Northup <digitaleric@google.com> and Gleb > > 2. disable local irq when calling get_kernel_ns, > > as it was done by Peter Lieven <pl@dlhnet.de> > > 3. move check for TSC page enable from second patch > > to this one. > > > > --- > > arch/x86/include/asm/kvm_host.h | 2 ++ > > arch/x86/include/uapi/asm/hyperv.h | 13 +++++++++++++ > > arch/x86/kvm/x86.c | 39 +++++++++++++++++++++++++++++++++++++- > > include/uapi/linux/kvm.h | 1 + > > 4 files changed, 54 insertions(+), 1 deletion(-) > > > > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h > > index ae5d783..2fd0753 100644 > > --- a/arch/x86/include/asm/kvm_host.h > > +++ b/arch/x86/include/asm/kvm_host.h > > @@ -605,6 +605,8 @@ struct kvm_arch { > > /* fields used by HYPER-V emulation */ > > u64 hv_guest_os_id; > > u64 hv_hypercall; > > + u64 hv_ref_count; > > + u64 hv_tsc_page; > > > > #ifdef CONFIG_KVM_MMU_AUDIT > > int audit_point; > > diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h > > index b8f1c01..462efe7 100644 > > --- a/arch/x86/include/uapi/asm/hyperv.h > > +++ b/arch/x86/include/uapi/asm/hyperv.h > > @@ -28,6 +28,9 @@ > > /* Partition Reference Counter (HV_X64_MSR_TIME_REF_COUNT) available*/ > > #define HV_X64_MSR_TIME_REF_COUNT_AVAILABLE (1 << 1) > > > > +/* A partition's reference time stamp counter (TSC) page */ > > +#define HV_X64_MSR_REFERENCE_TSC 0x40000021 > > + > > /* > > * There is a single feature flag that signifies the presence of the MSR > > * that can be used to retrieve both the local APIC Timer frequency as > > @@ -198,6 +201,9 @@ > > #define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_MASK \ > > (~((1ull << HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT) - 1)) > > > > +#define HV_X64_MSR_TSC_REFERENCE_ENABLE 0x00000001 > > +#define HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT 12 > > + > > #define HV_PROCESSOR_POWER_STATE_C0 0 > > #define HV_PROCESSOR_POWER_STATE_C1 1 > > #define HV_PROCESSOR_POWER_STATE_C2 2 > > @@ -210,4 +216,11 @@ > > #define HV_STATUS_INVALID_ALIGNMENT 4 > > #define HV_STATUS_INSUFFICIENT_BUFFERS 19 > > > > +typedef struct _HV_REFERENCE_TSC_PAGE { > > + __u32 tsc_sequence; > > + __u32 res1; > > + __u64 tsc_scale; > > + __s64 tsc_offset; > > +} HV_REFERENCE_TSC_PAGE, *PHV_REFERENCE_TSC_PAGE; > > + > > #endif > > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > > index 21ef1ba..5e4e495a 100644 > > --- a/arch/x86/kvm/x86.c > > +++ b/arch/x86/kvm/x86.c > > @@ -840,7 +840,7 @@ EXPORT_SYMBOL_GPL(kvm_rdpmc); > > static u32 msrs_to_save[] = { > > MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, > > MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW, > > - HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, > > + HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, HV_X64_MSR_TIME_REF_COUNT, > > You need to bump KVM_SAVE_MSRS_BEGIN. > > > HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME, > > MSR_KVM_PV_EOI_EN, > > MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, > > @@ -1826,6 +1826,8 @@ static bool kvm_hv_msr_partition_wide(u32 msr) > > switch (msr) { > > case HV_X64_MSR_GUEST_OS_ID: > > case HV_X64_MSR_HYPERCALL: > > + case HV_X64_MSR_REFERENCE_TSC: > > + case HV_X64_MSR_TIME_REF_COUNT: > > r = true; > > break; > > } > > @@ -1865,6 +1867,29 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data) > > if (__copy_to_user((void __user *)addr, instructions, 4)) > > return 1; > > kvm->arch.hv_hypercall = data; > > + local_irq_disable(); > > + kvm->arch.hv_ref_count = get_kernel_ns() + kvm->arch.kvmclock_offset; > > + local_irq_enable(); > > Please add a patch that moves these four lines from KVM_GET_CLOCK and > KVM_SET_CLOCK > > local_irq_disable(); > now_ns = get_kernel_ns(); > delta = user_ns.clock - now_ns; > local_irq_enable(); > kvm->arch.kvmclock_offset = delta; > kvm_gen_update_masterclock(kvm); > > local_irq_disable(); > now_ns = get_kernel_ns(); > user_ns.clock = kvm->arch.kvmclock_offset + now_ns; > local_irq_enable(); > > For example u64 kvm_get_clock_ns(struct kvm *) and void > kvm_set_clock_ns(struct kvm *, u64). You can then use the > kvm_get_clock_ns function in this patch. > OK. > > + break; > > + } > > + case HV_X64_MSR_REFERENCE_TSC: { > > + u64 gfn; > > + unsigned long addr; > > + HV_REFERENCE_TSC_PAGE tsc_ref; > > + tsc_ref.tsc_sequence = 0; > > Please zero it with memset. You're leaking values from the stack to the > guest. I can do it, but it is probably not necessary, mostly because guest allocates one page from nonpaged pool and uses it exclusively for vTSC purpose only, and host only interested in three values, located at the beginning of this page. > > > + if (!(data & HV_X64_MSR_TSC_REFERENCE_ENABLE)) { > > + kvm->arch.hv_tsc_page = data; > > + break; > > + } > > + gfn = data >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT; > > + addr = gfn_to_hva(kvm, data >> > > + HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT); > > + if (kvm_is_error_hva(addr)) > > + return 1; > > + if (__copy_to_user((void __user *)addr, &tsc_ref, sizeof(tsc_ref))) > > + return 1; > > + mark_page_dirty(kvm, gfn); > > + kvm->arch.hv_tsc_page = data; > > break; > > } > > default: > > @@ -2291,6 +2316,17 @@ static int get_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) > > case HV_X64_MSR_HYPERCALL: > > data = kvm->arch.hv_hypercall; > > break; > > + case HV_X64_MSR_TIME_REF_COUNT: { > > + u64 now_ns; > > + local_irq_disable(); > > + now_ns = get_kernel_ns() + kvm->arch.kvmclock_offset; > > + data = div_u64(now_ns - kvm->arch.hv_ref_count, 100); > > + local_irq_enable(); > > Another possible user of kvm_get_clock_ns. > > The patch should be good with these changes. Thanks, Vadim. > > Paolo > > > + break; > > + } > > + case HV_X64_MSR_REFERENCE_TSC: > > + data = kvm->arch.hv_tsc_page; > > + break; > > default: > > vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr); > > return 1; > > @@ -2605,6 +2641,7 @@ int kvm_dev_ioctl_check_extension(long ext) > > case KVM_CAP_ASSIGN_DEV_IRQ: > > case KVM_CAP_PCI_2_3: > > #endif > > + case KVM_CAP_HYPERV_TIME: > > r = 1; > > break; > > case KVM_CAP_COALESCED_MMIO: > > diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h > > index 902f124..686c1ca 100644 > > --- a/include/uapi/linux/kvm.h > > +++ b/include/uapi/linux/kvm.h > > @@ -674,6 +674,7 @@ struct kvm_ppc_smmu_info { > > #define KVM_CAP_ARM_EL1_32BIT 93 > > #define KVM_CAP_SPAPR_MULTITCE 94 > > #define KVM_CAP_EXT_EMUL_CPUID 95 > > +#define KVM_CAP_HYPERV_TIME 96 > > > > #ifdef KVM_CAP_IRQ_ROUTING > > > > > -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Sun, Dec 08, 2013 at 10:33:38PM +1100, Vadim Rozenfeld wrote: > Signed-off: Peter Lieven <pl@dlh.net> > Signed-off: Gleb Natapov <gleb@redhat.com> > Signed-off: Vadim Rozenfeld <vrozenfe@redhat.com> > > v1 -> v2 > 1. mark TSC page dirty as suggested by > Eric Northup <digitaleric@google.com> and Gleb > 2. disable local irq when calling get_kernel_ns, > as it was done by Peter Lieven <pl@dlhnet.de> > 3. move check for TSC page enable from second patch > to this one. > > --- > arch/x86/include/asm/kvm_host.h | 2 ++ > arch/x86/include/uapi/asm/hyperv.h | 13 +++++++++++++ > arch/x86/kvm/x86.c | 39 +++++++++++++++++++++++++++++++++++++- > include/uapi/linux/kvm.h | 1 + > 4 files changed, 54 insertions(+), 1 deletion(-) > > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h > index ae5d783..2fd0753 100644 > --- a/arch/x86/include/asm/kvm_host.h > +++ b/arch/x86/include/asm/kvm_host.h > @@ -605,6 +605,8 @@ struct kvm_arch { > /* fields used by HYPER-V emulation */ > u64 hv_guest_os_id; > u64 hv_hypercall; > + u64 hv_ref_count; > + u64 hv_tsc_page; > > #ifdef CONFIG_KVM_MMU_AUDIT > int audit_point; > diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h > index b8f1c01..462efe7 100644 > --- a/arch/x86/include/uapi/asm/hyperv.h > +++ b/arch/x86/include/uapi/asm/hyperv.h > @@ -28,6 +28,9 @@ > /* Partition Reference Counter (HV_X64_MSR_TIME_REF_COUNT) available*/ > #define HV_X64_MSR_TIME_REF_COUNT_AVAILABLE (1 << 1) > > +/* A partition's reference time stamp counter (TSC) page */ > +#define HV_X64_MSR_REFERENCE_TSC 0x40000021 > + > /* > * There is a single feature flag that signifies the presence of the MSR > * that can be used to retrieve both the local APIC Timer frequency as > @@ -198,6 +201,9 @@ > #define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_MASK \ > (~((1ull << HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT) - 1)) > > +#define HV_X64_MSR_TSC_REFERENCE_ENABLE 0x00000001 > +#define HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT 12 > + > #define HV_PROCESSOR_POWER_STATE_C0 0 > #define HV_PROCESSOR_POWER_STATE_C1 1 > #define HV_PROCESSOR_POWER_STATE_C2 2 > @@ -210,4 +216,11 @@ > #define HV_STATUS_INVALID_ALIGNMENT 4 > #define HV_STATUS_INSUFFICIENT_BUFFERS 19 > > +typedef struct _HV_REFERENCE_TSC_PAGE { > + __u32 tsc_sequence; > + __u32 res1; > + __u64 tsc_scale; > + __s64 tsc_offset; > +} HV_REFERENCE_TSC_PAGE, *PHV_REFERENCE_TSC_PAGE; > + > #endif > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > index 21ef1ba..5e4e495a 100644 > --- a/arch/x86/kvm/x86.c > +++ b/arch/x86/kvm/x86.c > @@ -840,7 +840,7 @@ EXPORT_SYMBOL_GPL(kvm_rdpmc); > static u32 msrs_to_save[] = { > MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, > MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW, > - HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, > + HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, HV_X64_MSR_TIME_REF_COUNT, > HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME, > MSR_KVM_PV_EOI_EN, > MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, > @@ -1826,6 +1826,8 @@ static bool kvm_hv_msr_partition_wide(u32 msr) > switch (msr) { > case HV_X64_MSR_GUEST_OS_ID: > case HV_X64_MSR_HYPERCALL: > + case HV_X64_MSR_REFERENCE_TSC: > + case HV_X64_MSR_TIME_REF_COUNT: > r = true; > break; > } > @@ -1865,6 +1867,29 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data) > if (__copy_to_user((void __user *)addr, instructions, 4)) > return 1; > kvm->arch.hv_hypercall = data; > + local_irq_disable(); > + kvm->arch.hv_ref_count = get_kernel_ns() + kvm->arch.kvmclock_offset; > + local_irq_enable(); Where does the docs say that HV_X64_MSR_HYPERCALL is the where the clock starts counting? No need to store kvmclock_offset in hv_ref_count? (moreover the name is weird, better name would be "hv_ref_start_time". > + break; > + } > + case HV_X64_MSR_REFERENCE_TSC: { > + u64 gfn; > + unsigned long addr; > + HV_REFERENCE_TSC_PAGE tsc_ref; > + tsc_ref.tsc_sequence = 0; > + if (!(data & HV_X64_MSR_TSC_REFERENCE_ENABLE)) { > + kvm->arch.hv_tsc_page = data; > + break; > + } > + gfn = data >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT; > + addr = gfn_to_hva(kvm, data >> > + HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT); > + if (kvm_is_error_hva(addr)) > + return 1; > + if (__copy_to_user((void __user *)addr, &tsc_ref, sizeof(tsc_ref))) > + return 1; > + mark_page_dirty(kvm, gfn); > + kvm->arch.hv_tsc_page = data; > break; > } > default: > @@ -2291,6 +2316,17 @@ static int get_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) > case HV_X64_MSR_HYPERCALL: > data = kvm->arch.hv_hypercall; > break; > + case HV_X64_MSR_TIME_REF_COUNT: { > + u64 now_ns; > + local_irq_disable(); > + now_ns = get_kernel_ns() + kvm->arch.kvmclock_offset; > + data = div_u64(now_ns - kvm->arch.hv_ref_count, 100); > + local_irq_enable(); No need for irq disable/enable pairs. > + break; > + } > + case HV_X64_MSR_REFERENCE_TSC: > + data = kvm->arch.hv_tsc_page; > + break; Does it return non-zero data even if not enabled? > default: > vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr); > return 1; > @@ -2605,6 +2641,7 @@ int kvm_dev_ioctl_check_extension(long ext) > case KVM_CAP_ASSIGN_DEV_IRQ: > case KVM_CAP_PCI_2_3: > #endif > + case KVM_CAP_HYPERV_TIME: > r = 1; > break; > case KVM_CAP_COALESCED_MMIO: > diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h > index 902f124..686c1ca 100644 > --- a/include/uapi/linux/kvm.h > +++ b/include/uapi/linux/kvm.h > @@ -674,6 +674,7 @@ struct kvm_ppc_smmu_info { > #define KVM_CAP_ARM_EL1_32BIT 93 > #define KVM_CAP_SPAPR_MULTITCE 94 > #define KVM_CAP_EXT_EMUL_CPUID 95 > +#define KVM_CAP_HYPERV_TIME 96 > > #ifdef KVM_CAP_IRQ_ROUTING > > -- > 1.8.1.4 -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Wed, Dec 11, 2013 at 04:53:05PM -0200, Marcelo Tosatti wrote: > On Sun, Dec 08, 2013 at 10:33:38PM +1100, Vadim Rozenfeld wrote: > > Signed-off: Peter Lieven <pl@dlh.net> > > Signed-off: Gleb Natapov <gleb@redhat.com> > > Signed-off: Vadim Rozenfeld <vrozenfe@redhat.com> > > > > v1 -> v2 > > 1. mark TSC page dirty as suggested by > > Eric Northup <digitaleric@google.com> and Gleb > > 2. disable local irq when calling get_kernel_ns, > > as it was done by Peter Lieven <pl@dlhnet.de> > > 3. move check for TSC page enable from second patch > > to this one. > > > > --- > > arch/x86/include/asm/kvm_host.h | 2 ++ > > arch/x86/include/uapi/asm/hyperv.h | 13 +++++++++++++ > > arch/x86/kvm/x86.c | 39 +++++++++++++++++++++++++++++++++++++- > > include/uapi/linux/kvm.h | 1 + > > 4 files changed, 54 insertions(+), 1 deletion(-) > > > > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h > > index ae5d783..2fd0753 100644 > > --- a/arch/x86/include/asm/kvm_host.h > > +++ b/arch/x86/include/asm/kvm_host.h > > @@ -605,6 +605,8 @@ struct kvm_arch { > > /* fields used by HYPER-V emulation */ > > u64 hv_guest_os_id; > > u64 hv_hypercall; > > + u64 hv_ref_count; > > + u64 hv_tsc_page; > > > > #ifdef CONFIG_KVM_MMU_AUDIT > > int audit_point; > > diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h > > index b8f1c01..462efe7 100644 > > --- a/arch/x86/include/uapi/asm/hyperv.h > > +++ b/arch/x86/include/uapi/asm/hyperv.h > > @@ -28,6 +28,9 @@ > > /* Partition Reference Counter (HV_X64_MSR_TIME_REF_COUNT) available*/ > > #define HV_X64_MSR_TIME_REF_COUNT_AVAILABLE (1 << 1) > > > > +/* A partition's reference time stamp counter (TSC) page */ > > +#define HV_X64_MSR_REFERENCE_TSC 0x40000021 > > + > > /* > > * There is a single feature flag that signifies the presence of the MSR > > * that can be used to retrieve both the local APIC Timer frequency as > > @@ -198,6 +201,9 @@ > > #define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_MASK \ > > (~((1ull << HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT) - 1)) > > > > +#define HV_X64_MSR_TSC_REFERENCE_ENABLE 0x00000001 > > +#define HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT 12 > > + > > #define HV_PROCESSOR_POWER_STATE_C0 0 > > #define HV_PROCESSOR_POWER_STATE_C1 1 > > #define HV_PROCESSOR_POWER_STATE_C2 2 > > @@ -210,4 +216,11 @@ > > #define HV_STATUS_INVALID_ALIGNMENT 4 > > #define HV_STATUS_INSUFFICIENT_BUFFERS 19 > > > > +typedef struct _HV_REFERENCE_TSC_PAGE { > > + __u32 tsc_sequence; > > + __u32 res1; > > + __u64 tsc_scale; > > + __s64 tsc_offset; > > +} HV_REFERENCE_TSC_PAGE, *PHV_REFERENCE_TSC_PAGE; > > + > > #endif > > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > > index 21ef1ba..5e4e495a 100644 > > --- a/arch/x86/kvm/x86.c > > +++ b/arch/x86/kvm/x86.c > > @@ -840,7 +840,7 @@ EXPORT_SYMBOL_GPL(kvm_rdpmc); > > static u32 msrs_to_save[] = { > > MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, > > MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW, > > - HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, > > + HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, HV_X64_MSR_TIME_REF_COUNT, > > HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME, > > MSR_KVM_PV_EOI_EN, > > MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, > > @@ -1826,6 +1826,8 @@ static bool kvm_hv_msr_partition_wide(u32 msr) > > switch (msr) { > > case HV_X64_MSR_GUEST_OS_ID: > > case HV_X64_MSR_HYPERCALL: > > + case HV_X64_MSR_REFERENCE_TSC: > > + case HV_X64_MSR_TIME_REF_COUNT: > > r = true; > > break; > > } > > @@ -1865,6 +1867,29 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data) > > if (__copy_to_user((void __user *)addr, instructions, 4)) > > return 1; > > kvm->arch.hv_hypercall = data; > > + local_irq_disable(); > > + kvm->arch.hv_ref_count = get_kernel_ns() + kvm->arch.kvmclock_offset; > > + local_irq_enable(); > > Where does the docs say that HV_X64_MSR_HYPERCALL is the where the clock > starts counting? > > No need to store kvmclock_offset in hv_ref_count? (moreover > the name is weird, better name would be "hv_ref_start_time". Just add kvmclock_offset when reading the values (otherwise you have a "stale copy" of kvmclock_offset in hv_ref_count). -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Il 11/12/2013 19:59, Marcelo Tosatti ha scritto: > > Where does the docs say that HV_X64_MSR_HYPERCALL is the where the clock > > starts counting? > > > > No need to store kvmclock_offset in hv_ref_count? (moreover > > the name is weird, better name would be "hv_ref_start_time". > > Just add kvmclock_offset when reading the values (otherwise you have a > "stale copy" of kvmclock_offset in hv_ref_count). As mentioned in my review, v4 will probably read kvmclock to provide the reference time counter. So all the complexity of kvmclock_offset will be hidden behind a function. Paolo -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Am 11.12.2013 19:53, schrieb Marcelo Tosatti: > On Sun, Dec 08, 2013 at 10:33:38PM +1100, Vadim Rozenfeld wrote: >> Signed-off: Peter Lieven <pl@dlh.net> >> Signed-off: Gleb Natapov <gleb@redhat.com> >> Signed-off: Vadim Rozenfeld <vrozenfe@redhat.com> >> >> v1 -> v2 >> 1. mark TSC page dirty as suggested by >> Eric Northup <digitaleric@google.com> and Gleb >> 2. disable local irq when calling get_kernel_ns, >> as it was done by Peter Lieven <pl@dlhnet.de> >> 3. move check for TSC page enable from second patch >> to this one. >> >> --- >> arch/x86/include/asm/kvm_host.h | 2 ++ >> arch/x86/include/uapi/asm/hyperv.h | 13 +++++++++++++ >> arch/x86/kvm/x86.c | 39 +++++++++++++++++++++++++++++++++++++- >> include/uapi/linux/kvm.h | 1 + >> 4 files changed, 54 insertions(+), 1 deletion(-) >> >> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h >> index ae5d783..2fd0753 100644 >> --- a/arch/x86/include/asm/kvm_host.h >> +++ b/arch/x86/include/asm/kvm_host.h >> @@ -605,6 +605,8 @@ struct kvm_arch { >> /* fields used by HYPER-V emulation */ >> u64 hv_guest_os_id; >> u64 hv_hypercall; >> + u64 hv_ref_count; >> + u64 hv_tsc_page; >> >> #ifdef CONFIG_KVM_MMU_AUDIT >> int audit_point; >> diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h >> index b8f1c01..462efe7 100644 >> --- a/arch/x86/include/uapi/asm/hyperv.h >> +++ b/arch/x86/include/uapi/asm/hyperv.h >> @@ -28,6 +28,9 @@ >> /* Partition Reference Counter (HV_X64_MSR_TIME_REF_COUNT) available*/ >> #define HV_X64_MSR_TIME_REF_COUNT_AVAILABLE (1 << 1) >> >> +/* A partition's reference time stamp counter (TSC) page */ >> +#define HV_X64_MSR_REFERENCE_TSC 0x40000021 >> + >> /* >> * There is a single feature flag that signifies the presence of the MSR >> * that can be used to retrieve both the local APIC Timer frequency as >> @@ -198,6 +201,9 @@ >> #define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_MASK \ >> (~((1ull << HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT) - 1)) >> >> +#define HV_X64_MSR_TSC_REFERENCE_ENABLE 0x00000001 >> +#define HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT 12 >> + >> #define HV_PROCESSOR_POWER_STATE_C0 0 >> #define HV_PROCESSOR_POWER_STATE_C1 1 >> #define HV_PROCESSOR_POWER_STATE_C2 2 >> @@ -210,4 +216,11 @@ >> #define HV_STATUS_INVALID_ALIGNMENT 4 >> #define HV_STATUS_INSUFFICIENT_BUFFERS 19 >> >> +typedef struct _HV_REFERENCE_TSC_PAGE { >> + __u32 tsc_sequence; >> + __u32 res1; >> + __u64 tsc_scale; >> + __s64 tsc_offset; >> +} HV_REFERENCE_TSC_PAGE, *PHV_REFERENCE_TSC_PAGE; >> + >> #endif >> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c >> index 21ef1ba..5e4e495a 100644 >> --- a/arch/x86/kvm/x86.c >> +++ b/arch/x86/kvm/x86.c >> @@ -840,7 +840,7 @@ EXPORT_SYMBOL_GPL(kvm_rdpmc); >> static u32 msrs_to_save[] = { >> MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, >> MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW, >> - HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, >> + HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, HV_X64_MSR_TIME_REF_COUNT, >> HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME, >> MSR_KVM_PV_EOI_EN, >> MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, >> @@ -1826,6 +1826,8 @@ static bool kvm_hv_msr_partition_wide(u32 msr) >> switch (msr) { >> case HV_X64_MSR_GUEST_OS_ID: >> case HV_X64_MSR_HYPERCALL: >> + case HV_X64_MSR_REFERENCE_TSC: >> + case HV_X64_MSR_TIME_REF_COUNT: >> r = true; >> break; >> } >> @@ -1865,6 +1867,29 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data) >> if (__copy_to_user((void __user *)addr, instructions, 4)) >> return 1; >> kvm->arch.hv_hypercall = data; >> + local_irq_disable(); >> + kvm->arch.hv_ref_count = get_kernel_ns() + kvm->arch.kvmclock_offset; >> + local_irq_enable(); > > Where does the docs say that HV_X64_MSR_HYPERCALL is the where the clock > starts counting? > > No need to store kvmclock_offset in hv_ref_count? (moreover > the name is weird, better name would be "hv_ref_start_time". > >> + break; >> + } >> + case HV_X64_MSR_REFERENCE_TSC: { >> + u64 gfn; >> + unsigned long addr; >> + HV_REFERENCE_TSC_PAGE tsc_ref; >> + tsc_ref.tsc_sequence = 0; >> + if (!(data & HV_X64_MSR_TSC_REFERENCE_ENABLE)) { >> + kvm->arch.hv_tsc_page = data; >> + break; >> + } >> + gfn = data >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT; >> + addr = gfn_to_hva(kvm, data >> >> + HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT); >> + if (kvm_is_error_hva(addr)) >> + return 1; >> + if (__copy_to_user((void __user *)addr, &tsc_ref, sizeof(tsc_ref))) >> + return 1; >> + mark_page_dirty(kvm, gfn); >> + kvm->arch.hv_tsc_page = data; >> break; >> } >> default: >> @@ -2291,6 +2316,17 @@ static int get_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) >> case HV_X64_MSR_HYPERCALL: >> data = kvm->arch.hv_hypercall; >> break; >> + case HV_X64_MSR_TIME_REF_COUNT: { >> + u64 now_ns; >> + local_irq_disable(); >> + now_ns = get_kernel_ns() + kvm->arch.kvmclock_offset; >> + data = div_u64(now_ns - kvm->arch.hv_ref_count, 100); >> + local_irq_enable(); > > No need for irq disable/enable pairs. KVM_GET_CLOCK / KVM_SET_CLOCK do the irq disable/enable pairs. What is right? Peter -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Thu, Jan 02, 2014 at 02:15:48PM +0100, Peter Lieven wrote: > Am 11.12.2013 19:53, schrieb Marcelo Tosatti: > > On Sun, Dec 08, 2013 at 10:33:38PM +1100, Vadim Rozenfeld wrote: > >> Signed-off: Peter Lieven <pl@dlh.net> > >> Signed-off: Gleb Natapov <gleb@redhat.com> > >> Signed-off: Vadim Rozenfeld <vrozenfe@redhat.com> > >> > >> v1 -> v2 > >> 1. mark TSC page dirty as suggested by > >> Eric Northup <digitaleric@google.com> and Gleb > >> 2. disable local irq when calling get_kernel_ns, > >> as it was done by Peter Lieven <pl@dlhnet.de> > >> 3. move check for TSC page enable from second patch > >> to this one. > >> > >> --- > >> arch/x86/include/asm/kvm_host.h | 2 ++ > >> arch/x86/include/uapi/asm/hyperv.h | 13 +++++++++++++ > >> arch/x86/kvm/x86.c | 39 +++++++++++++++++++++++++++++++++++++- > >> include/uapi/linux/kvm.h | 1 + > >> 4 files changed, 54 insertions(+), 1 deletion(-) > >> > >> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h > >> index ae5d783..2fd0753 100644 > >> --- a/arch/x86/include/asm/kvm_host.h > >> +++ b/arch/x86/include/asm/kvm_host.h > >> @@ -605,6 +605,8 @@ struct kvm_arch { > >> /* fields used by HYPER-V emulation */ > >> u64 hv_guest_os_id; > >> u64 hv_hypercall; > >> + u64 hv_ref_count; > >> + u64 hv_tsc_page; > >> > >> #ifdef CONFIG_KVM_MMU_AUDIT > >> int audit_point; > >> diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h > >> index b8f1c01..462efe7 100644 > >> --- a/arch/x86/include/uapi/asm/hyperv.h > >> +++ b/arch/x86/include/uapi/asm/hyperv.h > >> @@ -28,6 +28,9 @@ > >> /* Partition Reference Counter (HV_X64_MSR_TIME_REF_COUNT) available*/ > >> #define HV_X64_MSR_TIME_REF_COUNT_AVAILABLE (1 << 1) > >> > >> +/* A partition's reference time stamp counter (TSC) page */ > >> +#define HV_X64_MSR_REFERENCE_TSC 0x40000021 > >> + > >> /* > >> * There is a single feature flag that signifies the presence of the MSR > >> * that can be used to retrieve both the local APIC Timer frequency as > >> @@ -198,6 +201,9 @@ > >> #define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_MASK \ > >> (~((1ull << HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT) - 1)) > >> > >> +#define HV_X64_MSR_TSC_REFERENCE_ENABLE 0x00000001 > >> +#define HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT 12 > >> + > >> #define HV_PROCESSOR_POWER_STATE_C0 0 > >> #define HV_PROCESSOR_POWER_STATE_C1 1 > >> #define HV_PROCESSOR_POWER_STATE_C2 2 > >> @@ -210,4 +216,11 @@ > >> #define HV_STATUS_INVALID_ALIGNMENT 4 > >> #define HV_STATUS_INSUFFICIENT_BUFFERS 19 > >> > >> +typedef struct _HV_REFERENCE_TSC_PAGE { > >> + __u32 tsc_sequence; > >> + __u32 res1; > >> + __u64 tsc_scale; > >> + __s64 tsc_offset; > >> +} HV_REFERENCE_TSC_PAGE, *PHV_REFERENCE_TSC_PAGE; > >> + > >> #endif > >> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > >> index 21ef1ba..5e4e495a 100644 > >> --- a/arch/x86/kvm/x86.c > >> +++ b/arch/x86/kvm/x86.c > >> @@ -840,7 +840,7 @@ EXPORT_SYMBOL_GPL(kvm_rdpmc); > >> static u32 msrs_to_save[] = { > >> MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, > >> MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW, > >> - HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, > >> + HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, HV_X64_MSR_TIME_REF_COUNT, > >> HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME, > >> MSR_KVM_PV_EOI_EN, > >> MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, > >> @@ -1826,6 +1826,8 @@ static bool kvm_hv_msr_partition_wide(u32 msr) > >> switch (msr) { > >> case HV_X64_MSR_GUEST_OS_ID: > >> case HV_X64_MSR_HYPERCALL: > >> + case HV_X64_MSR_REFERENCE_TSC: > >> + case HV_X64_MSR_TIME_REF_COUNT: > >> r = true; > >> break; > >> } > >> @@ -1865,6 +1867,29 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data) > >> if (__copy_to_user((void __user *)addr, instructions, 4)) > >> return 1; > >> kvm->arch.hv_hypercall = data; > >> + local_irq_disable(); > >> + kvm->arch.hv_ref_count = get_kernel_ns() + kvm->arch.kvmclock_offset; > >> + local_irq_enable(); > > > > Where does the docs say that HV_X64_MSR_HYPERCALL is the where the clock > > starts counting? > > > > No need to store kvmclock_offset in hv_ref_count? (moreover > > the name is weird, better name would be "hv_ref_start_time". > > > >> + break; > >> + } > >> + case HV_X64_MSR_REFERENCE_TSC: { > >> + u64 gfn; > >> + unsigned long addr; > >> + HV_REFERENCE_TSC_PAGE tsc_ref; > >> + tsc_ref.tsc_sequence = 0; > >> + if (!(data & HV_X64_MSR_TSC_REFERENCE_ENABLE)) { > >> + kvm->arch.hv_tsc_page = data; > >> + break; > >> + } > >> + gfn = data >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT; > >> + addr = gfn_to_hva(kvm, data >> > >> + HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT); > >> + if (kvm_is_error_hva(addr)) > >> + return 1; > >> + if (__copy_to_user((void __user *)addr, &tsc_ref, sizeof(tsc_ref))) > >> + return 1; > >> + mark_page_dirty(kvm, gfn); > >> + kvm->arch.hv_tsc_page = data; > >> break; > >> } > >> default: > >> @@ -2291,6 +2316,17 @@ static int get_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) > >> case HV_X64_MSR_HYPERCALL: > >> data = kvm->arch.hv_hypercall; > >> break; > >> + case HV_X64_MSR_TIME_REF_COUNT: { > >> + u64 now_ns; > >> + local_irq_disable(); > >> + now_ns = get_kernel_ns() + kvm->arch.kvmclock_offset; > >> + data = div_u64(now_ns - kvm->arch.hv_ref_count, 100); > >> + local_irq_enable(); > > > > No need for irq disable/enable pairs. > > KVM_GET_CLOCK / KVM_SET_CLOCK do the irq disable/enable pairs. What is right? > > Peter local_irq_disable(); now_ns = get_kernel_ns(); delta = user_ns.clock - now_ns; local_irq_enable(); Not using irq disable/enable pairs. The subtraction is not dependant on any particular time. local_irq_disable(); now_ns = get_kernel_ns(); local_irq_enable(); delta = user_ns.clock - now_ns; Any interrupt that would affect the value of get_kernel_ns(), would have a similar effect before the interrupts are disabled. So the disable/enable pair achieves nothing in practice. It was copied from kvm_guest_time_update. -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Am 02.01.2014 14:57, schrieb Marcelo Tosatti: > On Thu, Jan 02, 2014 at 02:15:48PM +0100, Peter Lieven wrote: >> Am 11.12.2013 19:53, schrieb Marcelo Tosatti: >>> On Sun, Dec 08, 2013 at 10:33:38PM +1100, Vadim Rozenfeld wrote: >>>> Signed-off: Peter Lieven <pl@dlh.net> >>>> Signed-off: Gleb Natapov <gleb@redhat.com> >>>> Signed-off: Vadim Rozenfeld <vrozenfe@redhat.com> >>>> >>>> v1 -> v2 >>>> 1. mark TSC page dirty as suggested by >>>> Eric Northup <digitaleric@google.com> and Gleb >>>> 2. disable local irq when calling get_kernel_ns, >>>> as it was done by Peter Lieven <pl@dlhnet.de> >>>> 3. move check for TSC page enable from second patch >>>> to this one. >>>> >>>> --- >>>> arch/x86/include/asm/kvm_host.h | 2 ++ >>>> arch/x86/include/uapi/asm/hyperv.h | 13 +++++++++++++ >>>> arch/x86/kvm/x86.c | 39 +++++++++++++++++++++++++++++++++++++- >>>> include/uapi/linux/kvm.h | 1 + >>>> 4 files changed, 54 insertions(+), 1 deletion(-) >>>> >>>> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h >>>> index ae5d783..2fd0753 100644 >>>> --- a/arch/x86/include/asm/kvm_host.h >>>> +++ b/arch/x86/include/asm/kvm_host.h >>>> @@ -605,6 +605,8 @@ struct kvm_arch { >>>> /* fields used by HYPER-V emulation */ >>>> u64 hv_guest_os_id; >>>> u64 hv_hypercall; >>>> + u64 hv_ref_count; >>>> + u64 hv_tsc_page; >>>> >>>> #ifdef CONFIG_KVM_MMU_AUDIT >>>> int audit_point; >>>> diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h >>>> index b8f1c01..462efe7 100644 >>>> --- a/arch/x86/include/uapi/asm/hyperv.h >>>> +++ b/arch/x86/include/uapi/asm/hyperv.h >>>> @@ -28,6 +28,9 @@ >>>> /* Partition Reference Counter (HV_X64_MSR_TIME_REF_COUNT) available*/ >>>> #define HV_X64_MSR_TIME_REF_COUNT_AVAILABLE (1 << 1) >>>> >>>> +/* A partition's reference time stamp counter (TSC) page */ >>>> +#define HV_X64_MSR_REFERENCE_TSC 0x40000021 >>>> + >>>> /* >>>> * There is a single feature flag that signifies the presence of the MSR >>>> * that can be used to retrieve both the local APIC Timer frequency as >>>> @@ -198,6 +201,9 @@ >>>> #define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_MASK \ >>>> (~((1ull << HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT) - 1)) >>>> >>>> +#define HV_X64_MSR_TSC_REFERENCE_ENABLE 0x00000001 >>>> +#define HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT 12 >>>> + >>>> #define HV_PROCESSOR_POWER_STATE_C0 0 >>>> #define HV_PROCESSOR_POWER_STATE_C1 1 >>>> #define HV_PROCESSOR_POWER_STATE_C2 2 >>>> @@ -210,4 +216,11 @@ >>>> #define HV_STATUS_INVALID_ALIGNMENT 4 >>>> #define HV_STATUS_INSUFFICIENT_BUFFERS 19 >>>> >>>> +typedef struct _HV_REFERENCE_TSC_PAGE { >>>> + __u32 tsc_sequence; >>>> + __u32 res1; >>>> + __u64 tsc_scale; >>>> + __s64 tsc_offset; >>>> +} HV_REFERENCE_TSC_PAGE, *PHV_REFERENCE_TSC_PAGE; >>>> + >>>> #endif >>>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c >>>> index 21ef1ba..5e4e495a 100644 >>>> --- a/arch/x86/kvm/x86.c >>>> +++ b/arch/x86/kvm/x86.c >>>> @@ -840,7 +840,7 @@ EXPORT_SYMBOL_GPL(kvm_rdpmc); >>>> static u32 msrs_to_save[] = { >>>> MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, >>>> MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW, >>>> - HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, >>>> + HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, HV_X64_MSR_TIME_REF_COUNT, >>>> HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME, >>>> MSR_KVM_PV_EOI_EN, >>>> MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, >>>> @@ -1826,6 +1826,8 @@ static bool kvm_hv_msr_partition_wide(u32 msr) >>>> switch (msr) { >>>> case HV_X64_MSR_GUEST_OS_ID: >>>> case HV_X64_MSR_HYPERCALL: >>>> + case HV_X64_MSR_REFERENCE_TSC: >>>> + case HV_X64_MSR_TIME_REF_COUNT: >>>> r = true; >>>> break; >>>> } >>>> @@ -1865,6 +1867,29 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data) >>>> if (__copy_to_user((void __user *)addr, instructions, 4)) >>>> return 1; >>>> kvm->arch.hv_hypercall = data; >>>> + local_irq_disable(); >>>> + kvm->arch.hv_ref_count = get_kernel_ns() + kvm->arch.kvmclock_offset; >>>> + local_irq_enable(); >>> >>> Where does the docs say that HV_X64_MSR_HYPERCALL is the where the clock >>> starts counting? >>> >>> No need to store kvmclock_offset in hv_ref_count? (moreover >>> the name is weird, better name would be "hv_ref_start_time". >>> >>>> + break; >>>> + } >>>> + case HV_X64_MSR_REFERENCE_TSC: { >>>> + u64 gfn; >>>> + unsigned long addr; >>>> + HV_REFERENCE_TSC_PAGE tsc_ref; >>>> + tsc_ref.tsc_sequence = 0; >>>> + if (!(data & HV_X64_MSR_TSC_REFERENCE_ENABLE)) { >>>> + kvm->arch.hv_tsc_page = data; >>>> + break; >>>> + } >>>> + gfn = data >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT; >>>> + addr = gfn_to_hva(kvm, data >> >>>> + HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT); >>>> + if (kvm_is_error_hva(addr)) >>>> + return 1; >>>> + if (__copy_to_user((void __user *)addr, &tsc_ref, sizeof(tsc_ref))) >>>> + return 1; >>>> + mark_page_dirty(kvm, gfn); >>>> + kvm->arch.hv_tsc_page = data; >>>> break; >>>> } >>>> default: >>>> @@ -2291,6 +2316,17 @@ static int get_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) >>>> case HV_X64_MSR_HYPERCALL: >>>> data = kvm->arch.hv_hypercall; >>>> break; >>>> + case HV_X64_MSR_TIME_REF_COUNT: { >>>> + u64 now_ns; >>>> + local_irq_disable(); >>>> + now_ns = get_kernel_ns() + kvm->arch.kvmclock_offset; >>>> + data = div_u64(now_ns - kvm->arch.hv_ref_count, 100); >>>> + local_irq_enable(); >>> >>> No need for irq disable/enable pairs. >> >> KVM_GET_CLOCK / KVM_SET_CLOCK do the irq disable/enable pairs. What is right? >> >> Peter > > > local_irq_disable(); > now_ns = get_kernel_ns(); > delta = user_ns.clock - now_ns; > local_irq_enable(); > > Not using irq disable/enable pairs. The subtraction is not dependant on > any particular time. > > local_irq_disable(); > now_ns = get_kernel_ns(); > local_irq_enable(); > delta = user_ns.clock - now_ns; > > Any interrupt that would affect the value of get_kernel_ns(), would > have a similar effect before the interrupts are disabled. So the > disable/enable pair achieves nothing in practice. It was copied from > kvm_guest_time_update. > > Thanks for clarifying. What about get_kernel_ns() can't this be interrupted? Peter -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Am 11.12.2013 19:59, schrieb Marcelo Tosatti: > On Wed, Dec 11, 2013 at 04:53:05PM -0200, Marcelo Tosatti wrote: >> On Sun, Dec 08, 2013 at 10:33:38PM +1100, Vadim Rozenfeld wrote: >>> Signed-off: Peter Lieven <pl@dlh.net> >>> Signed-off: Gleb Natapov <gleb@redhat.com> >>> Signed-off: Vadim Rozenfeld <vrozenfe@redhat.com> >>> >>> v1 -> v2 >>> 1. mark TSC page dirty as suggested by >>> Eric Northup <digitaleric@google.com> and Gleb >>> 2. disable local irq when calling get_kernel_ns, >>> as it was done by Peter Lieven <pl@dlhnet.de> >>> 3. move check for TSC page enable from second patch >>> to this one. >>> >>> --- >>> arch/x86/include/asm/kvm_host.h | 2 ++ >>> arch/x86/include/uapi/asm/hyperv.h | 13 +++++++++++++ >>> arch/x86/kvm/x86.c | 39 +++++++++++++++++++++++++++++++++++++- >>> include/uapi/linux/kvm.h | 1 + >>> 4 files changed, 54 insertions(+), 1 deletion(-) >>> >>> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h >>> index ae5d783..2fd0753 100644 >>> --- a/arch/x86/include/asm/kvm_host.h >>> +++ b/arch/x86/include/asm/kvm_host.h >>> @@ -605,6 +605,8 @@ struct kvm_arch { >>> /* fields used by HYPER-V emulation */ >>> u64 hv_guest_os_id; >>> u64 hv_hypercall; >>> + u64 hv_ref_count; >>> + u64 hv_tsc_page; >>> >>> #ifdef CONFIG_KVM_MMU_AUDIT >>> int audit_point; >>> diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h >>> index b8f1c01..462efe7 100644 >>> --- a/arch/x86/include/uapi/asm/hyperv.h >>> +++ b/arch/x86/include/uapi/asm/hyperv.h >>> @@ -28,6 +28,9 @@ >>> /* Partition Reference Counter (HV_X64_MSR_TIME_REF_COUNT) available*/ >>> #define HV_X64_MSR_TIME_REF_COUNT_AVAILABLE (1 << 1) >>> >>> +/* A partition's reference time stamp counter (TSC) page */ >>> +#define HV_X64_MSR_REFERENCE_TSC 0x40000021 >>> + >>> /* >>> * There is a single feature flag that signifies the presence of the MSR >>> * that can be used to retrieve both the local APIC Timer frequency as >>> @@ -198,6 +201,9 @@ >>> #define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_MASK \ >>> (~((1ull << HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT) - 1)) >>> >>> +#define HV_X64_MSR_TSC_REFERENCE_ENABLE 0x00000001 >>> +#define HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT 12 >>> + >>> #define HV_PROCESSOR_POWER_STATE_C0 0 >>> #define HV_PROCESSOR_POWER_STATE_C1 1 >>> #define HV_PROCESSOR_POWER_STATE_C2 2 >>> @@ -210,4 +216,11 @@ >>> #define HV_STATUS_INVALID_ALIGNMENT 4 >>> #define HV_STATUS_INSUFFICIENT_BUFFERS 19 >>> >>> +typedef struct _HV_REFERENCE_TSC_PAGE { >>> + __u32 tsc_sequence; >>> + __u32 res1; >>> + __u64 tsc_scale; >>> + __s64 tsc_offset; >>> +} HV_REFERENCE_TSC_PAGE, *PHV_REFERENCE_TSC_PAGE; >>> + >>> #endif >>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c >>> index 21ef1ba..5e4e495a 100644 >>> --- a/arch/x86/kvm/x86.c >>> +++ b/arch/x86/kvm/x86.c >>> @@ -840,7 +840,7 @@ EXPORT_SYMBOL_GPL(kvm_rdpmc); >>> static u32 msrs_to_save[] = { >>> MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, >>> MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW, >>> - HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, >>> + HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, HV_X64_MSR_TIME_REF_COUNT, >>> HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME, >>> MSR_KVM_PV_EOI_EN, >>> MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, >>> @@ -1826,6 +1826,8 @@ static bool kvm_hv_msr_partition_wide(u32 msr) >>> switch (msr) { >>> case HV_X64_MSR_GUEST_OS_ID: >>> case HV_X64_MSR_HYPERCALL: >>> + case HV_X64_MSR_REFERENCE_TSC: >>> + case HV_X64_MSR_TIME_REF_COUNT: >>> r = true; >>> break; >>> } >>> @@ -1865,6 +1867,29 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data) >>> if (__copy_to_user((void __user *)addr, instructions, 4)) >>> return 1; >>> kvm->arch.hv_hypercall = data; >>> + local_irq_disable(); >>> + kvm->arch.hv_ref_count = get_kernel_ns() + kvm->arch.kvmclock_offset; >>> + local_irq_enable(); >> >> Where does the docs say that HV_X64_MSR_HYPERCALL is the where the clock >> starts counting? >> >> No need to store kvmclock_offset in hv_ref_count? (moreover >> the name is weird, better name would be "hv_ref_start_time". > > Just add kvmclock_offset when reading the values (otherwise you have a > "stale copy" of kvmclock_offset in hv_ref_count). > After some experiments I think we do no need kvm->arch.hv_ref_count at all. I was debugging some weird clockjump issues and I think the problem is that after live migration kvm->arch.hv_ref_count is initialized to 0. Depending on the uptime of the vServer when the hypercall was set up this can lead to series jumps. So I would suggest to completely drop kvm->arch.hv_ref_count. And use simply this in get_msr_hyperv_pw(). case HV_X64_MSR_TIME_REF_COUNT: { data = div_u64(get_kernel_ns() + kvm->arch.kvmclock_offset, 100); break; } It seems that get_kernel_ns() + kvm->arch.kvmclock_offset is exactly the vServer uptime in nanoseconds which starts counting at 0. Peter -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Thu, Jan 02, 2014 at 05:08:07PM +0100, Peter Lieven wrote: > > Not using irq disable/enable pairs. The subtraction is not dependant on > > any particular time. > > > > local_irq_disable(); > > now_ns = get_kernel_ns(); > > local_irq_enable(); > > delta = user_ns.clock - now_ns; > > > > Any interrupt that would affect the value of get_kernel_ns(), would > > have a similar effect before the interrupts are disabled. So the > > disable/enable pair achieves nothing in practice. It was copied from > > kvm_guest_time_update. > > > > > > Thanks for clarifying. What about get_kernel_ns() can't this be interrupted? > > Peter It can handle interrupts. Disabling interrupts there is used when a {rdtsc, system_time=get_kernel_ns()} tuple is wanted, to reduce the physical time difference between execution of rdtsc and get_kernel_ns(). -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Thu, 2014-01-02 at 17:52 +0100, Peter Lieven wrote: > Am 11.12.2013 19:59, schrieb Marcelo Tosatti: > > On Wed, Dec 11, 2013 at 04:53:05PM -0200, Marcelo Tosatti wrote: > >> On Sun, Dec 08, 2013 at 10:33:38PM +1100, Vadim Rozenfeld wrote: > >>> Signed-off: Peter Lieven <pl@dlh.net> > >>> Signed-off: Gleb Natapov <gleb@redhat.com> > >>> Signed-off: Vadim Rozenfeld <vrozenfe@redhat.com> > >>> > >>> v1 -> v2 > >>> 1. mark TSC page dirty as suggested by > >>> Eric Northup <digitaleric@google.com> and Gleb > >>> 2. disable local irq when calling get_kernel_ns, > >>> as it was done by Peter Lieven <pl@dlhnet.de> > >>> 3. move check for TSC page enable from second patch > >>> to this one. > >>> > >>> --- > >>> arch/x86/include/asm/kvm_host.h | 2 ++ > >>> arch/x86/include/uapi/asm/hyperv.h | 13 +++++++++++++ > >>> arch/x86/kvm/x86.c | 39 +++++++++++++++++++++++++++++++++++++- > >>> include/uapi/linux/kvm.h | 1 + > >>> 4 files changed, 54 insertions(+), 1 deletion(-) > >>> > >>> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h > >>> index ae5d783..2fd0753 100644 > >>> --- a/arch/x86/include/asm/kvm_host.h > >>> +++ b/arch/x86/include/asm/kvm_host.h > >>> @@ -605,6 +605,8 @@ struct kvm_arch { > >>> /* fields used by HYPER-V emulation */ > >>> u64 hv_guest_os_id; > >>> u64 hv_hypercall; > >>> + u64 hv_ref_count; > >>> + u64 hv_tsc_page; > >>> > >>> #ifdef CONFIG_KVM_MMU_AUDIT > >>> int audit_point; > >>> diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h > >>> index b8f1c01..462efe7 100644 > >>> --- a/arch/x86/include/uapi/asm/hyperv.h > >>> +++ b/arch/x86/include/uapi/asm/hyperv.h > >>> @@ -28,6 +28,9 @@ > >>> /* Partition Reference Counter (HV_X64_MSR_TIME_REF_COUNT) available*/ > >>> #define HV_X64_MSR_TIME_REF_COUNT_AVAILABLE (1 << 1) > >>> > >>> +/* A partition's reference time stamp counter (TSC) page */ > >>> +#define HV_X64_MSR_REFERENCE_TSC 0x40000021 > >>> + > >>> /* > >>> * There is a single feature flag that signifies the presence of the MSR > >>> * that can be used to retrieve both the local APIC Timer frequency as > >>> @@ -198,6 +201,9 @@ > >>> #define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_MASK \ > >>> (~((1ull << HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT) - 1)) > >>> > >>> +#define HV_X64_MSR_TSC_REFERENCE_ENABLE 0x00000001 > >>> +#define HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT 12 > >>> + > >>> #define HV_PROCESSOR_POWER_STATE_C0 0 > >>> #define HV_PROCESSOR_POWER_STATE_C1 1 > >>> #define HV_PROCESSOR_POWER_STATE_C2 2 > >>> @@ -210,4 +216,11 @@ > >>> #define HV_STATUS_INVALID_ALIGNMENT 4 > >>> #define HV_STATUS_INSUFFICIENT_BUFFERS 19 > >>> > >>> +typedef struct _HV_REFERENCE_TSC_PAGE { > >>> + __u32 tsc_sequence; > >>> + __u32 res1; > >>> + __u64 tsc_scale; > >>> + __s64 tsc_offset; > >>> +} HV_REFERENCE_TSC_PAGE, *PHV_REFERENCE_TSC_PAGE; > >>> + > >>> #endif > >>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > >>> index 21ef1ba..5e4e495a 100644 > >>> --- a/arch/x86/kvm/x86.c > >>> +++ b/arch/x86/kvm/x86.c > >>> @@ -840,7 +840,7 @@ EXPORT_SYMBOL_GPL(kvm_rdpmc); > >>> static u32 msrs_to_save[] = { > >>> MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, > >>> MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW, > >>> - HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, > >>> + HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, HV_X64_MSR_TIME_REF_COUNT, > >>> HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME, > >>> MSR_KVM_PV_EOI_EN, > >>> MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, > >>> @@ -1826,6 +1826,8 @@ static bool kvm_hv_msr_partition_wide(u32 msr) > >>> switch (msr) { > >>> case HV_X64_MSR_GUEST_OS_ID: > >>> case HV_X64_MSR_HYPERCALL: > >>> + case HV_X64_MSR_REFERENCE_TSC: > >>> + case HV_X64_MSR_TIME_REF_COUNT: > >>> r = true; > >>> break; > >>> } > >>> @@ -1865,6 +1867,29 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data) > >>> if (__copy_to_user((void __user *)addr, instructions, 4)) > >>> return 1; > >>> kvm->arch.hv_hypercall = data; > >>> + local_irq_disable(); > >>> + kvm->arch.hv_ref_count = get_kernel_ns() + kvm->arch.kvmclock_offset; > >>> + local_irq_enable(); > >> > >> Where does the docs say that HV_X64_MSR_HYPERCALL is the where the clock > >> starts counting? > >> > >> No need to store kvmclock_offset in hv_ref_count? (moreover > >> the name is weird, better name would be "hv_ref_start_time". > > > > Just add kvmclock_offset when reading the values (otherwise you have a > > "stale copy" of kvmclock_offset in hv_ref_count). > > > > After some experiments I think we do no need kvm->arch.hv_ref_count at all. > > I was debugging some weird clockjump issues and I think the problem is that after live migration > kvm->arch.hv_ref_count is initialized to 0. Depending on the uptime of the vServer when the > hypercall was set up this can lead to series jumps. > > So I would suggest to completely drop kvm->arch.hv_ref_count. > > And use simply this in get_msr_hyperv_pw(). > > case HV_X64_MSR_TIME_REF_COUNT: { > data = div_u64(get_kernel_ns() + kvm->arch.kvmclock_offset, 100); > break; > } > Agreed. It should work as long as we rely on kvmclock_offset. Vadim. > It seems that get_kernel_ns() + kvm->arch.kvmclock_offset is exactly the vServer uptime > in nanoseconds which starts counting at 0. > > Peter > > -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Am 07.01.2014 10:36, schrieb Vadim Rozenfeld: > On Thu, 2014-01-02 at 17:52 +0100, Peter Lieven wrote: >> Am 11.12.2013 19:59, schrieb Marcelo Tosatti: >>> On Wed, Dec 11, 2013 at 04:53:05PM -0200, Marcelo Tosatti wrote: >>>> On Sun, Dec 08, 2013 at 10:33:38PM +1100, Vadim Rozenfeld wrote: >>>>> Signed-off: Peter Lieven <pl@dlh.net> >>>>> Signed-off: Gleb Natapov <gleb@redhat.com> >>>>> Signed-off: Vadim Rozenfeld <vrozenfe@redhat.com> >>>>> >>>>> v1 -> v2 >>>>> 1. mark TSC page dirty as suggested by >>>>> Eric Northup <digitaleric@google.com> and Gleb >>>>> 2. disable local irq when calling get_kernel_ns, >>>>> as it was done by Peter Lieven <pl@dlhnet.de> >>>>> 3. move check for TSC page enable from second patch >>>>> to this one. >>>>> >>>>> --- >>>>> arch/x86/include/asm/kvm_host.h | 2 ++ >>>>> arch/x86/include/uapi/asm/hyperv.h | 13 +++++++++++++ >>>>> arch/x86/kvm/x86.c | 39 +++++++++++++++++++++++++++++++++++++- >>>>> include/uapi/linux/kvm.h | 1 + >>>>> 4 files changed, 54 insertions(+), 1 deletion(-) >>>>> >>>>> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h >>>>> index ae5d783..2fd0753 100644 >>>>> --- a/arch/x86/include/asm/kvm_host.h >>>>> +++ b/arch/x86/include/asm/kvm_host.h >>>>> @@ -605,6 +605,8 @@ struct kvm_arch { >>>>> /* fields used by HYPER-V emulation */ >>>>> u64 hv_guest_os_id; >>>>> u64 hv_hypercall; >>>>> + u64 hv_ref_count; >>>>> + u64 hv_tsc_page; >>>>> >>>>> #ifdef CONFIG_KVM_MMU_AUDIT >>>>> int audit_point; >>>>> diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h >>>>> index b8f1c01..462efe7 100644 >>>>> --- a/arch/x86/include/uapi/asm/hyperv.h >>>>> +++ b/arch/x86/include/uapi/asm/hyperv.h >>>>> @@ -28,6 +28,9 @@ >>>>> /* Partition Reference Counter (HV_X64_MSR_TIME_REF_COUNT) available*/ >>>>> #define HV_X64_MSR_TIME_REF_COUNT_AVAILABLE (1 << 1) >>>>> >>>>> +/* A partition's reference time stamp counter (TSC) page */ >>>>> +#define HV_X64_MSR_REFERENCE_TSC 0x40000021 >>>>> + >>>>> /* >>>>> * There is a single feature flag that signifies the presence of the MSR >>>>> * that can be used to retrieve both the local APIC Timer frequency as >>>>> @@ -198,6 +201,9 @@ >>>>> #define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_MASK \ >>>>> (~((1ull << HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT) - 1)) >>>>> >>>>> +#define HV_X64_MSR_TSC_REFERENCE_ENABLE 0x00000001 >>>>> +#define HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT 12 >>>>> + >>>>> #define HV_PROCESSOR_POWER_STATE_C0 0 >>>>> #define HV_PROCESSOR_POWER_STATE_C1 1 >>>>> #define HV_PROCESSOR_POWER_STATE_C2 2 >>>>> @@ -210,4 +216,11 @@ >>>>> #define HV_STATUS_INVALID_ALIGNMENT 4 >>>>> #define HV_STATUS_INSUFFICIENT_BUFFERS 19 >>>>> >>>>> +typedef struct _HV_REFERENCE_TSC_PAGE { >>>>> + __u32 tsc_sequence; >>>>> + __u32 res1; >>>>> + __u64 tsc_scale; >>>>> + __s64 tsc_offset; >>>>> +} HV_REFERENCE_TSC_PAGE, *PHV_REFERENCE_TSC_PAGE; >>>>> + >>>>> #endif >>>>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c >>>>> index 21ef1ba..5e4e495a 100644 >>>>> --- a/arch/x86/kvm/x86.c >>>>> +++ b/arch/x86/kvm/x86.c >>>>> @@ -840,7 +840,7 @@ EXPORT_SYMBOL_GPL(kvm_rdpmc); >>>>> static u32 msrs_to_save[] = { >>>>> MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, >>>>> MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW, >>>>> - HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, >>>>> + HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, HV_X64_MSR_TIME_REF_COUNT, >>>>> HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME, >>>>> MSR_KVM_PV_EOI_EN, >>>>> MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, >>>>> @@ -1826,6 +1826,8 @@ static bool kvm_hv_msr_partition_wide(u32 msr) >>>>> switch (msr) { >>>>> case HV_X64_MSR_GUEST_OS_ID: >>>>> case HV_X64_MSR_HYPERCALL: >>>>> + case HV_X64_MSR_REFERENCE_TSC: >>>>> + case HV_X64_MSR_TIME_REF_COUNT: >>>>> r = true; >>>>> break; >>>>> } >>>>> @@ -1865,6 +1867,29 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data) >>>>> if (__copy_to_user((void __user *)addr, instructions, 4)) >>>>> return 1; >>>>> kvm->arch.hv_hypercall = data; >>>>> + local_irq_disable(); >>>>> + kvm->arch.hv_ref_count = get_kernel_ns() + kvm->arch.kvmclock_offset; >>>>> + local_irq_enable(); >>>> >>>> Where does the docs say that HV_X64_MSR_HYPERCALL is the where the clock >>>> starts counting? >>>> >>>> No need to store kvmclock_offset in hv_ref_count? (moreover >>>> the name is weird, better name would be "hv_ref_start_time". >>> >>> Just add kvmclock_offset when reading the values (otherwise you have a >>> "stale copy" of kvmclock_offset in hv_ref_count). >>> >> >> After some experiments I think we do no need kvm->arch.hv_ref_count at all. >> >> I was debugging some weird clockjump issues and I think the problem is that after live migration >> kvm->arch.hv_ref_count is initialized to 0. Depending on the uptime of the vServer when the >> hypercall was set up this can lead to series jumps. >> >> So I would suggest to completely drop kvm->arch.hv_ref_count. >> >> And use simply this in get_msr_hyperv_pw(). >> >> case HV_X64_MSR_TIME_REF_COUNT: { >> data = div_u64(get_kernel_ns() + kvm->arch.kvmclock_offset, 100); >> break; >> } >> > > Agreed. It should work as long as we rely on kvmclock_offset. > Vadim. I think we can rely on kvmclock_offset. Had you had a chance to do further testing during the weekend? Peter -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Tue, 2014-01-07 at 18:52 +0100, Peter Lieven wrote: > Am 07.01.2014 10:36, schrieb Vadim Rozenfeld: > > On Thu, 2014-01-02 at 17:52 +0100, Peter Lieven wrote: > >> Am 11.12.2013 19:59, schrieb Marcelo Tosatti: > >>> On Wed, Dec 11, 2013 at 04:53:05PM -0200, Marcelo Tosatti wrote: > >>>> On Sun, Dec 08, 2013 at 10:33:38PM +1100, Vadim Rozenfeld wrote: > >>>>> Signed-off: Peter Lieven <pl@dlh.net> > >>>>> Signed-off: Gleb Natapov <gleb@redhat.com> > >>>>> Signed-off: Vadim Rozenfeld <vrozenfe@redhat.com> > >>>>> > >>>>> v1 -> v2 > >>>>> 1. mark TSC page dirty as suggested by > >>>>> Eric Northup <digitaleric@google.com> and Gleb > >>>>> 2. disable local irq when calling get_kernel_ns, > >>>>> as it was done by Peter Lieven <pl@dlhnet.de> > >>>>> 3. move check for TSC page enable from second patch > >>>>> to this one. > >>>>> > >>>>> --- > >>>>> arch/x86/include/asm/kvm_host.h | 2 ++ > >>>>> arch/x86/include/uapi/asm/hyperv.h | 13 +++++++++++++ > >>>>> arch/x86/kvm/x86.c | 39 +++++++++++++++++++++++++++++++++++++- > >>>>> include/uapi/linux/kvm.h | 1 + > >>>>> 4 files changed, 54 insertions(+), 1 deletion(-) > >>>>> > >>>>> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h > >>>>> index ae5d783..2fd0753 100644 > >>>>> --- a/arch/x86/include/asm/kvm_host.h > >>>>> +++ b/arch/x86/include/asm/kvm_host.h > >>>>> @@ -605,6 +605,8 @@ struct kvm_arch { > >>>>> /* fields used by HYPER-V emulation */ > >>>>> u64 hv_guest_os_id; > >>>>> u64 hv_hypercall; > >>>>> + u64 hv_ref_count; > >>>>> + u64 hv_tsc_page; > >>>>> > >>>>> #ifdef CONFIG_KVM_MMU_AUDIT > >>>>> int audit_point; > >>>>> diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h > >>>>> index b8f1c01..462efe7 100644 > >>>>> --- a/arch/x86/include/uapi/asm/hyperv.h > >>>>> +++ b/arch/x86/include/uapi/asm/hyperv.h > >>>>> @@ -28,6 +28,9 @@ > >>>>> /* Partition Reference Counter (HV_X64_MSR_TIME_REF_COUNT) available*/ > >>>>> #define HV_X64_MSR_TIME_REF_COUNT_AVAILABLE (1 << 1) > >>>>> > >>>>> +/* A partition's reference time stamp counter (TSC) page */ > >>>>> +#define HV_X64_MSR_REFERENCE_TSC 0x40000021 > >>>>> + > >>>>> /* > >>>>> * There is a single feature flag that signifies the presence of the MSR > >>>>> * that can be used to retrieve both the local APIC Timer frequency as > >>>>> @@ -198,6 +201,9 @@ > >>>>> #define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_MASK \ > >>>>> (~((1ull << HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT) - 1)) > >>>>> > >>>>> +#define HV_X64_MSR_TSC_REFERENCE_ENABLE 0x00000001 > >>>>> +#define HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT 12 > >>>>> + > >>>>> #define HV_PROCESSOR_POWER_STATE_C0 0 > >>>>> #define HV_PROCESSOR_POWER_STATE_C1 1 > >>>>> #define HV_PROCESSOR_POWER_STATE_C2 2 > >>>>> @@ -210,4 +216,11 @@ > >>>>> #define HV_STATUS_INVALID_ALIGNMENT 4 > >>>>> #define HV_STATUS_INSUFFICIENT_BUFFERS 19 > >>>>> > >>>>> +typedef struct _HV_REFERENCE_TSC_PAGE { > >>>>> + __u32 tsc_sequence; > >>>>> + __u32 res1; > >>>>> + __u64 tsc_scale; > >>>>> + __s64 tsc_offset; > >>>>> +} HV_REFERENCE_TSC_PAGE, *PHV_REFERENCE_TSC_PAGE; > >>>>> + > >>>>> #endif > >>>>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > >>>>> index 21ef1ba..5e4e495a 100644 > >>>>> --- a/arch/x86/kvm/x86.c > >>>>> +++ b/arch/x86/kvm/x86.c > >>>>> @@ -840,7 +840,7 @@ EXPORT_SYMBOL_GPL(kvm_rdpmc); > >>>>> static u32 msrs_to_save[] = { > >>>>> MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, > >>>>> MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW, > >>>>> - HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, > >>>>> + HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, HV_X64_MSR_TIME_REF_COUNT, > >>>>> HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME, > >>>>> MSR_KVM_PV_EOI_EN, > >>>>> MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, > >>>>> @@ -1826,6 +1826,8 @@ static bool kvm_hv_msr_partition_wide(u32 msr) > >>>>> switch (msr) { > >>>>> case HV_X64_MSR_GUEST_OS_ID: > >>>>> case HV_X64_MSR_HYPERCALL: > >>>>> + case HV_X64_MSR_REFERENCE_TSC: > >>>>> + case HV_X64_MSR_TIME_REF_COUNT: > >>>>> r = true; > >>>>> break; > >>>>> } > >>>>> @@ -1865,6 +1867,29 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data) > >>>>> if (__copy_to_user((void __user *)addr, instructions, 4)) > >>>>> return 1; > >>>>> kvm->arch.hv_hypercall = data; > >>>>> + local_irq_disable(); > >>>>> + kvm->arch.hv_ref_count = get_kernel_ns() + kvm->arch.kvmclock_offset; > >>>>> + local_irq_enable(); > >>>> > >>>> Where does the docs say that HV_X64_MSR_HYPERCALL is the where the clock > >>>> starts counting? > >>>> > >>>> No need to store kvmclock_offset in hv_ref_count? (moreover > >>>> the name is weird, better name would be "hv_ref_start_time". > >>> > >>> Just add kvmclock_offset when reading the values (otherwise you have a > >>> "stale copy" of kvmclock_offset in hv_ref_count). > >>> > >> > >> After some experiments I think we do no need kvm->arch.hv_ref_count at all. > >> > >> I was debugging some weird clockjump issues and I think the problem is that after live migration > >> kvm->arch.hv_ref_count is initialized to 0. Depending on the uptime of the vServer when the > >> hypercall was set up this can lead to series jumps. > >> > >> So I would suggest to completely drop kvm->arch.hv_ref_count. > >> > >> And use simply this in get_msr_hyperv_pw(). > >> > >> case HV_X64_MSR_TIME_REF_COUNT: { > >> data = div_u64(get_kernel_ns() + kvm->arch.kvmclock_offset, 100); > >> break; > >> } > >> > > > > Agreed. It should work as long as we rely on kvmclock_offset. > > Vadim. > > I think we can rely on kvmclock_offset. Had you had a chance to do further testing > during the weekend? Yes, and still testing. There is still some inconsistency in the value returned by QueryPerformanceCounter during migration. Vadim. > > Peter > -- > To unsubscribe from this list: send the line "unsubscribe kvm" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 08.01.2014 10:40, Vadim Rozenfeld wrote: > On Tue, 2014-01-07 at 18:52 +0100, Peter Lieven wrote: >> Am 07.01.2014 10:36, schrieb Vadim Rozenfeld: >>> On Thu, 2014-01-02 at 17:52 +0100, Peter Lieven wrote: >>>> Am 11.12.2013 19:59, schrieb Marcelo Tosatti: >>>>> On Wed, Dec 11, 2013 at 04:53:05PM -0200, Marcelo Tosatti wrote: >>>>>> On Sun, Dec 08, 2013 at 10:33:38PM +1100, Vadim Rozenfeld wrote: >>>>>>> Signed-off: Peter Lieven <pl@dlh.net> >>>>>>> Signed-off: Gleb Natapov <gleb@redhat.com> >>>>>>> Signed-off: Vadim Rozenfeld <vrozenfe@redhat.com> >>>>>>> >>>>>>> v1 -> v2 >>>>>>> 1. mark TSC page dirty as suggested by >>>>>>> Eric Northup <digitaleric@google.com> and Gleb >>>>>>> 2. disable local irq when calling get_kernel_ns, >>>>>>> as it was done by Peter Lieven <pl@dlhnet.de> >>>>>>> 3. move check for TSC page enable from second patch >>>>>>> to this one. >>>>>>> >>>>>>> --- >>>>>>> arch/x86/include/asm/kvm_host.h | 2 ++ >>>>>>> arch/x86/include/uapi/asm/hyperv.h | 13 +++++++++++++ >>>>>>> arch/x86/kvm/x86.c | 39 +++++++++++++++++++++++++++++++++++++- >>>>>>> include/uapi/linux/kvm.h | 1 + >>>>>>> 4 files changed, 54 insertions(+), 1 deletion(-) >>>>>>> >>>>>>> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h >>>>>>> index ae5d783..2fd0753 100644 >>>>>>> --- a/arch/x86/include/asm/kvm_host.h >>>>>>> +++ b/arch/x86/include/asm/kvm_host.h >>>>>>> @@ -605,6 +605,8 @@ struct kvm_arch { >>>>>>> /* fields used by HYPER-V emulation */ >>>>>>> u64 hv_guest_os_id; >>>>>>> u64 hv_hypercall; >>>>>>> + u64 hv_ref_count; >>>>>>> + u64 hv_tsc_page; >>>>>>> >>>>>>> #ifdef CONFIG_KVM_MMU_AUDIT >>>>>>> int audit_point; >>>>>>> diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h >>>>>>> index b8f1c01..462efe7 100644 >>>>>>> --- a/arch/x86/include/uapi/asm/hyperv.h >>>>>>> +++ b/arch/x86/include/uapi/asm/hyperv.h >>>>>>> @@ -28,6 +28,9 @@ >>>>>>> /* Partition Reference Counter (HV_X64_MSR_TIME_REF_COUNT) available*/ >>>>>>> #define HV_X64_MSR_TIME_REF_COUNT_AVAILABLE (1 << 1) >>>>>>> >>>>>>> +/* A partition's reference time stamp counter (TSC) page */ >>>>>>> +#define HV_X64_MSR_REFERENCE_TSC 0x40000021 >>>>>>> + >>>>>>> /* >>>>>>> * There is a single feature flag that signifies the presence of the MSR >>>>>>> * that can be used to retrieve both the local APIC Timer frequency as >>>>>>> @@ -198,6 +201,9 @@ >>>>>>> #define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_MASK \ >>>>>>> (~((1ull << HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT) - 1)) >>>>>>> >>>>>>> +#define HV_X64_MSR_TSC_REFERENCE_ENABLE 0x00000001 >>>>>>> +#define HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT 12 >>>>>>> + >>>>>>> #define HV_PROCESSOR_POWER_STATE_C0 0 >>>>>>> #define HV_PROCESSOR_POWER_STATE_C1 1 >>>>>>> #define HV_PROCESSOR_POWER_STATE_C2 2 >>>>>>> @@ -210,4 +216,11 @@ >>>>>>> #define HV_STATUS_INVALID_ALIGNMENT 4 >>>>>>> #define HV_STATUS_INSUFFICIENT_BUFFERS 19 >>>>>>> >>>>>>> +typedef struct _HV_REFERENCE_TSC_PAGE { >>>>>>> + __u32 tsc_sequence; >>>>>>> + __u32 res1; >>>>>>> + __u64 tsc_scale; >>>>>>> + __s64 tsc_offset; >>>>>>> +} HV_REFERENCE_TSC_PAGE, *PHV_REFERENCE_TSC_PAGE; >>>>>>> + >>>>>>> #endif >>>>>>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c >>>>>>> index 21ef1ba..5e4e495a 100644 >>>>>>> --- a/arch/x86/kvm/x86.c >>>>>>> +++ b/arch/x86/kvm/x86.c >>>>>>> @@ -840,7 +840,7 @@ EXPORT_SYMBOL_GPL(kvm_rdpmc); >>>>>>> static u32 msrs_to_save[] = { >>>>>>> MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, >>>>>>> MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW, >>>>>>> - HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, >>>>>>> + HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, HV_X64_MSR_TIME_REF_COUNT, >>>>>>> HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME, >>>>>>> MSR_KVM_PV_EOI_EN, >>>>>>> MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, >>>>>>> @@ -1826,6 +1826,8 @@ static bool kvm_hv_msr_partition_wide(u32 msr) >>>>>>> switch (msr) { >>>>>>> case HV_X64_MSR_GUEST_OS_ID: >>>>>>> case HV_X64_MSR_HYPERCALL: >>>>>>> + case HV_X64_MSR_REFERENCE_TSC: >>>>>>> + case HV_X64_MSR_TIME_REF_COUNT: >>>>>>> r = true; >>>>>>> break; >>>>>>> } >>>>>>> @@ -1865,6 +1867,29 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data) >>>>>>> if (__copy_to_user((void __user *)addr, instructions, 4)) >>>>>>> return 1; >>>>>>> kvm->arch.hv_hypercall = data; >>>>>>> + local_irq_disable(); >>>>>>> + kvm->arch.hv_ref_count = get_kernel_ns() + kvm->arch.kvmclock_offset; >>>>>>> + local_irq_enable(); >>>>>> >>>>>> Where does the docs say that HV_X64_MSR_HYPERCALL is the where the clock >>>>>> starts counting? >>>>>> >>>>>> No need to store kvmclock_offset in hv_ref_count? (moreover >>>>>> the name is weird, better name would be "hv_ref_start_time". >>>>> >>>>> Just add kvmclock_offset when reading the values (otherwise you have a >>>>> "stale copy" of kvmclock_offset in hv_ref_count). >>>>> >>>> >>>> After some experiments I think we do no need kvm->arch.hv_ref_count at all. >>>> >>>> I was debugging some weird clockjump issues and I think the problem is that after live migration >>>> kvm->arch.hv_ref_count is initialized to 0. Depending on the uptime of the vServer when the >>>> hypercall was set up this can lead to series jumps. >>>> >>>> So I would suggest to completely drop kvm->arch.hv_ref_count. >>>> >>>> And use simply this in get_msr_hyperv_pw(). >>>> >>>> case HV_X64_MSR_TIME_REF_COUNT: { >>>> data = div_u64(get_kernel_ns() + kvm->arch.kvmclock_offset, 100); >>>> break; >>>> } >>>> >>> >>> Agreed. It should work as long as we rely on kvmclock_offset. >>> Vadim. >> >> I think we can rely on kvmclock_offset. Had you had a chance to do further testing >> during the weekend? > > Yes, and still testing. > > There is still some inconsistency in the value returned by > QueryPerformanceCounter during migration. With my proposal? Peter -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Wed, 2014-01-08 at 11:15 +0100, Peter Lieven wrote: > On 08.01.2014 10:40, Vadim Rozenfeld wrote: > > On Tue, 2014-01-07 at 18:52 +0100, Peter Lieven wrote: > >> Am 07.01.2014 10:36, schrieb Vadim Rozenfeld: > >>> On Thu, 2014-01-02 at 17:52 +0100, Peter Lieven wrote: > >>>> Am 11.12.2013 19:59, schrieb Marcelo Tosatti: > >>>>> On Wed, Dec 11, 2013 at 04:53:05PM -0200, Marcelo Tosatti wrote: > >>>>>> On Sun, Dec 08, 2013 at 10:33:38PM +1100, Vadim Rozenfeld wrote: > >>>>>>> Signed-off: Peter Lieven <pl@dlh.net> > >>>>>>> Signed-off: Gleb Natapov <gleb@redhat.com> > >>>>>>> Signed-off: Vadim Rozenfeld <vrozenfe@redhat.com> > >>>>>>> > >>>>>>> v1 -> v2 > >>>>>>> 1. mark TSC page dirty as suggested by > >>>>>>> Eric Northup <digitaleric@google.com> and Gleb > >>>>>>> 2. disable local irq when calling get_kernel_ns, > >>>>>>> as it was done by Peter Lieven <pl@dlhnet.de> > >>>>>>> 3. move check for TSC page enable from second patch > >>>>>>> to this one. > >>>>>>> > >>>>>>> --- > >>>>>>> arch/x86/include/asm/kvm_host.h | 2 ++ > >>>>>>> arch/x86/include/uapi/asm/hyperv.h | 13 +++++++++++++ > >>>>>>> arch/x86/kvm/x86.c | 39 +++++++++++++++++++++++++++++++++++++- > >>>>>>> include/uapi/linux/kvm.h | 1 + > >>>>>>> 4 files changed, 54 insertions(+), 1 deletion(-) > >>>>>>> > >>>>>>> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h > >>>>>>> index ae5d783..2fd0753 100644 > >>>>>>> --- a/arch/x86/include/asm/kvm_host.h > >>>>>>> +++ b/arch/x86/include/asm/kvm_host.h > >>>>>>> @@ -605,6 +605,8 @@ struct kvm_arch { > >>>>>>> /* fields used by HYPER-V emulation */ > >>>>>>> u64 hv_guest_os_id; > >>>>>>> u64 hv_hypercall; > >>>>>>> + u64 hv_ref_count; > >>>>>>> + u64 hv_tsc_page; > >>>>>>> > >>>>>>> #ifdef CONFIG_KVM_MMU_AUDIT > >>>>>>> int audit_point; > >>>>>>> diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h > >>>>>>> index b8f1c01..462efe7 100644 > >>>>>>> --- a/arch/x86/include/uapi/asm/hyperv.h > >>>>>>> +++ b/arch/x86/include/uapi/asm/hyperv.h > >>>>>>> @@ -28,6 +28,9 @@ > >>>>>>> /* Partition Reference Counter (HV_X64_MSR_TIME_REF_COUNT) available*/ > >>>>>>> #define HV_X64_MSR_TIME_REF_COUNT_AVAILABLE (1 << 1) > >>>>>>> > >>>>>>> +/* A partition's reference time stamp counter (TSC) page */ > >>>>>>> +#define HV_X64_MSR_REFERENCE_TSC 0x40000021 > >>>>>>> + > >>>>>>> /* > >>>>>>> * There is a single feature flag that signifies the presence of the MSR > >>>>>>> * that can be used to retrieve both the local APIC Timer frequency as > >>>>>>> @@ -198,6 +201,9 @@ > >>>>>>> #define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_MASK \ > >>>>>>> (~((1ull << HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT) - 1)) > >>>>>>> > >>>>>>> +#define HV_X64_MSR_TSC_REFERENCE_ENABLE 0x00000001 > >>>>>>> +#define HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT 12 > >>>>>>> + > >>>>>>> #define HV_PROCESSOR_POWER_STATE_C0 0 > >>>>>>> #define HV_PROCESSOR_POWER_STATE_C1 1 > >>>>>>> #define HV_PROCESSOR_POWER_STATE_C2 2 > >>>>>>> @@ -210,4 +216,11 @@ > >>>>>>> #define HV_STATUS_INVALID_ALIGNMENT 4 > >>>>>>> #define HV_STATUS_INSUFFICIENT_BUFFERS 19 > >>>>>>> > >>>>>>> +typedef struct _HV_REFERENCE_TSC_PAGE { > >>>>>>> + __u32 tsc_sequence; > >>>>>>> + __u32 res1; > >>>>>>> + __u64 tsc_scale; > >>>>>>> + __s64 tsc_offset; > >>>>>>> +} HV_REFERENCE_TSC_PAGE, *PHV_REFERENCE_TSC_PAGE; > >>>>>>> + > >>>>>>> #endif > >>>>>>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > >>>>>>> index 21ef1ba..5e4e495a 100644 > >>>>>>> --- a/arch/x86/kvm/x86.c > >>>>>>> +++ b/arch/x86/kvm/x86.c > >>>>>>> @@ -840,7 +840,7 @@ EXPORT_SYMBOL_GPL(kvm_rdpmc); > >>>>>>> static u32 msrs_to_save[] = { > >>>>>>> MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, > >>>>>>> MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW, > >>>>>>> - HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, > >>>>>>> + HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, HV_X64_MSR_TIME_REF_COUNT, > >>>>>>> HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME, > >>>>>>> MSR_KVM_PV_EOI_EN, > >>>>>>> MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, > >>>>>>> @@ -1826,6 +1826,8 @@ static bool kvm_hv_msr_partition_wide(u32 msr) > >>>>>>> switch (msr) { > >>>>>>> case HV_X64_MSR_GUEST_OS_ID: > >>>>>>> case HV_X64_MSR_HYPERCALL: > >>>>>>> + case HV_X64_MSR_REFERENCE_TSC: > >>>>>>> + case HV_X64_MSR_TIME_REF_COUNT: > >>>>>>> r = true; > >>>>>>> break; > >>>>>>> } > >>>>>>> @@ -1865,6 +1867,29 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data) > >>>>>>> if (__copy_to_user((void __user *)addr, instructions, 4)) > >>>>>>> return 1; > >>>>>>> kvm->arch.hv_hypercall = data; > >>>>>>> + local_irq_disable(); > >>>>>>> + kvm->arch.hv_ref_count = get_kernel_ns() + kvm->arch.kvmclock_offset; > >>>>>>> + local_irq_enable(); > >>>>>> > >>>>>> Where does the docs say that HV_X64_MSR_HYPERCALL is the where the clock > >>>>>> starts counting? > >>>>>> > >>>>>> No need to store kvmclock_offset in hv_ref_count? (moreover > >>>>>> the name is weird, better name would be "hv_ref_start_time". > >>>>> > >>>>> Just add kvmclock_offset when reading the values (otherwise you have a > >>>>> "stale copy" of kvmclock_offset in hv_ref_count). > >>>>> > >>>> > >>>> After some experiments I think we do no need kvm->arch.hv_ref_count at all. > >>>> > >>>> I was debugging some weird clockjump issues and I think the problem is that after live migration > >>>> kvm->arch.hv_ref_count is initialized to 0. Depending on the uptime of the vServer when the > >>>> hypercall was set up this can lead to series jumps. > >>>> > >>>> So I would suggest to completely drop kvm->arch.hv_ref_count. > >>>> > >>>> And use simply this in get_msr_hyperv_pw(). > >>>> > >>>> case HV_X64_MSR_TIME_REF_COUNT: { > >>>> data = div_u64(get_kernel_ns() + kvm->arch.kvmclock_offset, 100); > >>>> break; > >>>> } > >>>> > >>> > >>> Agreed. It should work as long as we rely on kvmclock_offset. > >>> Vadim. > >> > >> I think we can rely on kvmclock_offset. Had you had a chance to do further testing > >> during the weekend? > > > > Yes, and still testing. > > > > There is still some inconsistency in the value returned by > > QueryPerformanceCounter during migration. > > With my proposal? > Right. Please see below QTC vs. NTP time stamp before and after migration: C:\timedrift\objchk_win7_x86\i386>timedrift.exe quantum = 15 QPC NTP Delta 1311 1307 4 2621 2618 3 3932 3931 1 ....... ....... 38018 38014 4 Unable to wait for NTP reply from the SNTP server, GetLastError returns 7279088 142210 40113 102097 143458 41357 102101 ....... ....... 156500 54615 101885 NTP time changed from 38014 to 40113 ((40113 - 38014) / 100 = 21 mSec) while QPC jumped from 38018 to 142210 ((142210 - 38014) / 100 = 1042 mSec) Vadim. > Peter > -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 08.01.2014 11:44, Vadim Rozenfeld wrote: > On Wed, 2014-01-08 at 11:15 +0100, Peter Lieven wrote: >> On 08.01.2014 10:40, Vadim Rozenfeld wrote: >>> On Tue, 2014-01-07 at 18:52 +0100, Peter Lieven wrote: >>>> Am 07.01.2014 10:36, schrieb Vadim Rozenfeld: >>>>> On Thu, 2014-01-02 at 17:52 +0100, Peter Lieven wrote: >>>>>> Am 11.12.2013 19:59, schrieb Marcelo Tosatti: >>>>>>> On Wed, Dec 11, 2013 at 04:53:05PM -0200, Marcelo Tosatti wrote: >>>>>>>> On Sun, Dec 08, 2013 at 10:33:38PM +1100, Vadim Rozenfeld wrote: >>>>>>>>> Signed-off: Peter Lieven <pl@dlh.net> >>>>>>>>> Signed-off: Gleb Natapov <gleb@redhat.com> >>>>>>>>> Signed-off: Vadim Rozenfeld <vrozenfe@redhat.com> >>>>>>>>> >>>>>>>>> v1 -> v2 >>>>>>>>> 1. mark TSC page dirty as suggested by >>>>>>>>> Eric Northup <digitaleric@google.com> and Gleb >>>>>>>>> 2. disable local irq when calling get_kernel_ns, >>>>>>>>> as it was done by Peter Lieven <pl@dlhnet.de> >>>>>>>>> 3. move check for TSC page enable from second patch >>>>>>>>> to this one. >>>>>>>>> >>>>>>>>> --- >>>>>>>>> arch/x86/include/asm/kvm_host.h | 2 ++ >>>>>>>>> arch/x86/include/uapi/asm/hyperv.h | 13 +++++++++++++ >>>>>>>>> arch/x86/kvm/x86.c | 39 +++++++++++++++++++++++++++++++++++++- >>>>>>>>> include/uapi/linux/kvm.h | 1 + >>>>>>>>> 4 files changed, 54 insertions(+), 1 deletion(-) >>>>>>>>> >>>>>>>>> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h >>>>>>>>> index ae5d783..2fd0753 100644 >>>>>>>>> --- a/arch/x86/include/asm/kvm_host.h >>>>>>>>> +++ b/arch/x86/include/asm/kvm_host.h >>>>>>>>> @@ -605,6 +605,8 @@ struct kvm_arch { >>>>>>>>> /* fields used by HYPER-V emulation */ >>>>>>>>> u64 hv_guest_os_id; >>>>>>>>> u64 hv_hypercall; >>>>>>>>> + u64 hv_ref_count; >>>>>>>>> + u64 hv_tsc_page; >>>>>>>>> >>>>>>>>> #ifdef CONFIG_KVM_MMU_AUDIT >>>>>>>>> int audit_point; >>>>>>>>> diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h >>>>>>>>> index b8f1c01..462efe7 100644 >>>>>>>>> --- a/arch/x86/include/uapi/asm/hyperv.h >>>>>>>>> +++ b/arch/x86/include/uapi/asm/hyperv.h >>>>>>>>> @@ -28,6 +28,9 @@ >>>>>>>>> /* Partition Reference Counter (HV_X64_MSR_TIME_REF_COUNT) available*/ >>>>>>>>> #define HV_X64_MSR_TIME_REF_COUNT_AVAILABLE (1 << 1) >>>>>>>>> >>>>>>>>> +/* A partition's reference time stamp counter (TSC) page */ >>>>>>>>> +#define HV_X64_MSR_REFERENCE_TSC 0x40000021 >>>>>>>>> + >>>>>>>>> /* >>>>>>>>> * There is a single feature flag that signifies the presence of the MSR >>>>>>>>> * that can be used to retrieve both the local APIC Timer frequency as >>>>>>>>> @@ -198,6 +201,9 @@ >>>>>>>>> #define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_MASK \ >>>>>>>>> (~((1ull << HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT) - 1)) >>>>>>>>> >>>>>>>>> +#define HV_X64_MSR_TSC_REFERENCE_ENABLE 0x00000001 >>>>>>>>> +#define HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT 12 >>>>>>>>> + >>>>>>>>> #define HV_PROCESSOR_POWER_STATE_C0 0 >>>>>>>>> #define HV_PROCESSOR_POWER_STATE_C1 1 >>>>>>>>> #define HV_PROCESSOR_POWER_STATE_C2 2 >>>>>>>>> @@ -210,4 +216,11 @@ >>>>>>>>> #define HV_STATUS_INVALID_ALIGNMENT 4 >>>>>>>>> #define HV_STATUS_INSUFFICIENT_BUFFERS 19 >>>>>>>>> >>>>>>>>> +typedef struct _HV_REFERENCE_TSC_PAGE { >>>>>>>>> + __u32 tsc_sequence; >>>>>>>>> + __u32 res1; >>>>>>>>> + __u64 tsc_scale; >>>>>>>>> + __s64 tsc_offset; >>>>>>>>> +} HV_REFERENCE_TSC_PAGE, *PHV_REFERENCE_TSC_PAGE; >>>>>>>>> + >>>>>>>>> #endif >>>>>>>>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c >>>>>>>>> index 21ef1ba..5e4e495a 100644 >>>>>>>>> --- a/arch/x86/kvm/x86.c >>>>>>>>> +++ b/arch/x86/kvm/x86.c >>>>>>>>> @@ -840,7 +840,7 @@ EXPORT_SYMBOL_GPL(kvm_rdpmc); >>>>>>>>> static u32 msrs_to_save[] = { >>>>>>>>> MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, >>>>>>>>> MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW, >>>>>>>>> - HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, >>>>>>>>> + HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, HV_X64_MSR_TIME_REF_COUNT, >>>>>>>>> HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME, >>>>>>>>> MSR_KVM_PV_EOI_EN, >>>>>>>>> MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, >>>>>>>>> @@ -1826,6 +1826,8 @@ static bool kvm_hv_msr_partition_wide(u32 msr) >>>>>>>>> switch (msr) { >>>>>>>>> case HV_X64_MSR_GUEST_OS_ID: >>>>>>>>> case HV_X64_MSR_HYPERCALL: >>>>>>>>> + case HV_X64_MSR_REFERENCE_TSC: >>>>>>>>> + case HV_X64_MSR_TIME_REF_COUNT: >>>>>>>>> r = true; >>>>>>>>> break; >>>>>>>>> } >>>>>>>>> @@ -1865,6 +1867,29 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data) >>>>>>>>> if (__copy_to_user((void __user *)addr, instructions, 4)) >>>>>>>>> return 1; >>>>>>>>> kvm->arch.hv_hypercall = data; >>>>>>>>> + local_irq_disable(); >>>>>>>>> + kvm->arch.hv_ref_count = get_kernel_ns() + kvm->arch.kvmclock_offset; >>>>>>>>> + local_irq_enable(); >>>>>>>> >>>>>>>> Where does the docs say that HV_X64_MSR_HYPERCALL is the where the clock >>>>>>>> starts counting? >>>>>>>> >>>>>>>> No need to store kvmclock_offset in hv_ref_count? (moreover >>>>>>>> the name is weird, better name would be "hv_ref_start_time". >>>>>>> >>>>>>> Just add kvmclock_offset when reading the values (otherwise you have a >>>>>>> "stale copy" of kvmclock_offset in hv_ref_count). >>>>>>> >>>>>> >>>>>> After some experiments I think we do no need kvm->arch.hv_ref_count at all. >>>>>> >>>>>> I was debugging some weird clockjump issues and I think the problem is that after live migration >>>>>> kvm->arch.hv_ref_count is initialized to 0. Depending on the uptime of the vServer when the >>>>>> hypercall was set up this can lead to series jumps. >>>>>> >>>>>> So I would suggest to completely drop kvm->arch.hv_ref_count. >>>>>> >>>>>> And use simply this in get_msr_hyperv_pw(). >>>>>> >>>>>> case HV_X64_MSR_TIME_REF_COUNT: { >>>>>> data = div_u64(get_kernel_ns() + kvm->arch.kvmclock_offset, 100); >>>>>> break; >>>>>> } >>>>>> >>>>> >>>>> Agreed. It should work as long as we rely on kvmclock_offset. >>>>> Vadim. >>>> >>>> I think we can rely on kvmclock_offset. Had you had a chance to do further testing >>>> during the weekend? >>> >>> Yes, and still testing. >>> >>> There is still some inconsistency in the value returned by >>> QueryPerformanceCounter during migration. >> >> With my proposal? >> > > Right. > Please see below QTC vs. NTP time stamp > before and after migration: > > C:\timedrift\objchk_win7_x86\i386>timedrift.exe > quantum = 15 > QPC NTP Delta > 1311 1307 4 > 2621 2618 3 > 3932 3931 1 > ....... > ....... > 38018 38014 4 > Unable to wait for NTP reply from the SNTP server, GetLastError returns > 7279088 > 142210 40113 102097 > 143458 41357 102101 > ....... > ....... > 156500 54615 101885 > > NTP time changed from 38014 to 40113 ((40113 - 38014) / 100 = 21 mSec) > while QPC jumped from 38018 to 142210 ((142210 - 38014) / 100 = 1042 > mSec) How long was the downtime when you migrated the vServer? I have done similar tests and have not seen this... Can you show the relevant parts of your patch. Remember I use the REFERENCE_COUTNER not iTSC. Peter -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Wed, 2014-01-08 at 12:48 +0100, Peter Lieven wrote: > On 08.01.2014 11:44, Vadim Rozenfeld wrote: > > On Wed, 2014-01-08 at 11:15 +0100, Peter Lieven wrote: > >> On 08.01.2014 10:40, Vadim Rozenfeld wrote: > >>> On Tue, 2014-01-07 at 18:52 +0100, Peter Lieven wrote: > >>>> Am 07.01.2014 10:36, schrieb Vadim Rozenfeld: > >>>>> On Thu, 2014-01-02 at 17:52 +0100, Peter Lieven wrote: > >>>>>> Am 11.12.2013 19:59, schrieb Marcelo Tosatti: > >>>>>>> On Wed, Dec 11, 2013 at 04:53:05PM -0200, Marcelo Tosatti wrote: > >>>>>>>> On Sun, Dec 08, 2013 at 10:33:38PM +1100, Vadim Rozenfeld wrote: > >>>>>>>>> Signed-off: Peter Lieven <pl@dlh.net> > >>>>>>>>> Signed-off: Gleb Natapov <gleb@redhat.com> > >>>>>>>>> Signed-off: Vadim Rozenfeld <vrozenfe@redhat.com> > >>>>>>>>> > >>>>>>>>> v1 -> v2 > >>>>>>>>> 1. mark TSC page dirty as suggested by > >>>>>>>>> Eric Northup <digitaleric@google.com> and Gleb > >>>>>>>>> 2. disable local irq when calling get_kernel_ns, > >>>>>>>>> as it was done by Peter Lieven <pl@dlhnet.de> > >>>>>>>>> 3. move check for TSC page enable from second patch > >>>>>>>>> to this one. > >>>>>>>>> > >>>>>>>>> --- > >>>>>>>>> arch/x86/include/asm/kvm_host.h | 2 ++ > >>>>>>>>> arch/x86/include/uapi/asm/hyperv.h | 13 +++++++++++++ > >>>>>>>>> arch/x86/kvm/x86.c | 39 +++++++++++++++++++++++++++++++++++++- > >>>>>>>>> include/uapi/linux/kvm.h | 1 + > >>>>>>>>> 4 files changed, 54 insertions(+), 1 deletion(-) > >>>>>>>>> > >>>>>>>>> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h > >>>>>>>>> index ae5d783..2fd0753 100644 > >>>>>>>>> --- a/arch/x86/include/asm/kvm_host.h > >>>>>>>>> +++ b/arch/x86/include/asm/kvm_host.h > >>>>>>>>> @@ -605,6 +605,8 @@ struct kvm_arch { > >>>>>>>>> /* fields used by HYPER-V emulation */ > >>>>>>>>> u64 hv_guest_os_id; > >>>>>>>>> u64 hv_hypercall; > >>>>>>>>> + u64 hv_ref_count; > >>>>>>>>> + u64 hv_tsc_page; > >>>>>>>>> > >>>>>>>>> #ifdef CONFIG_KVM_MMU_AUDIT > >>>>>>>>> int audit_point; > >>>>>>>>> diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h > >>>>>>>>> index b8f1c01..462efe7 100644 > >>>>>>>>> --- a/arch/x86/include/uapi/asm/hyperv.h > >>>>>>>>> +++ b/arch/x86/include/uapi/asm/hyperv.h > >>>>>>>>> @@ -28,6 +28,9 @@ > >>>>>>>>> /* Partition Reference Counter (HV_X64_MSR_TIME_REF_COUNT) available*/ > >>>>>>>>> #define HV_X64_MSR_TIME_REF_COUNT_AVAILABLE (1 << 1) > >>>>>>>>> > >>>>>>>>> +/* A partition's reference time stamp counter (TSC) page */ > >>>>>>>>> +#define HV_X64_MSR_REFERENCE_TSC 0x40000021 > >>>>>>>>> + > >>>>>>>>> /* > >>>>>>>>> * There is a single feature flag that signifies the presence of the MSR > >>>>>>>>> * that can be used to retrieve both the local APIC Timer frequency as > >>>>>>>>> @@ -198,6 +201,9 @@ > >>>>>>>>> #define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_MASK \ > >>>>>>>>> (~((1ull << HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT) - 1)) > >>>>>>>>> > >>>>>>>>> +#define HV_X64_MSR_TSC_REFERENCE_ENABLE 0x00000001 > >>>>>>>>> +#define HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT 12 > >>>>>>>>> + > >>>>>>>>> #define HV_PROCESSOR_POWER_STATE_C0 0 > >>>>>>>>> #define HV_PROCESSOR_POWER_STATE_C1 1 > >>>>>>>>> #define HV_PROCESSOR_POWER_STATE_C2 2 > >>>>>>>>> @@ -210,4 +216,11 @@ > >>>>>>>>> #define HV_STATUS_INVALID_ALIGNMENT 4 > >>>>>>>>> #define HV_STATUS_INSUFFICIENT_BUFFERS 19 > >>>>>>>>> > >>>>>>>>> +typedef struct _HV_REFERENCE_TSC_PAGE { > >>>>>>>>> + __u32 tsc_sequence; > >>>>>>>>> + __u32 res1; > >>>>>>>>> + __u64 tsc_scale; > >>>>>>>>> + __s64 tsc_offset; > >>>>>>>>> +} HV_REFERENCE_TSC_PAGE, *PHV_REFERENCE_TSC_PAGE; > >>>>>>>>> + > >>>>>>>>> #endif > >>>>>>>>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > >>>>>>>>> index 21ef1ba..5e4e495a 100644 > >>>>>>>>> --- a/arch/x86/kvm/x86.c > >>>>>>>>> +++ b/arch/x86/kvm/x86.c > >>>>>>>>> @@ -840,7 +840,7 @@ EXPORT_SYMBOL_GPL(kvm_rdpmc); > >>>>>>>>> static u32 msrs_to_save[] = { > >>>>>>>>> MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, > >>>>>>>>> MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW, > >>>>>>>>> - HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, > >>>>>>>>> + HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, HV_X64_MSR_TIME_REF_COUNT, > >>>>>>>>> HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME, > >>>>>>>>> MSR_KVM_PV_EOI_EN, > >>>>>>>>> MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, > >>>>>>>>> @@ -1826,6 +1826,8 @@ static bool kvm_hv_msr_partition_wide(u32 msr) > >>>>>>>>> switch (msr) { > >>>>>>>>> case HV_X64_MSR_GUEST_OS_ID: > >>>>>>>>> case HV_X64_MSR_HYPERCALL: > >>>>>>>>> + case HV_X64_MSR_REFERENCE_TSC: > >>>>>>>>> + case HV_X64_MSR_TIME_REF_COUNT: > >>>>>>>>> r = true; > >>>>>>>>> break; > >>>>>>>>> } > >>>>>>>>> @@ -1865,6 +1867,29 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data) > >>>>>>>>> if (__copy_to_user((void __user *)addr, instructions, 4)) > >>>>>>>>> return 1; > >>>>>>>>> kvm->arch.hv_hypercall = data; > >>>>>>>>> + local_irq_disable(); > >>>>>>>>> + kvm->arch.hv_ref_count = get_kernel_ns() + kvm->arch.kvmclock_offset; > >>>>>>>>> + local_irq_enable(); > >>>>>>>> > >>>>>>>> Where does the docs say that HV_X64_MSR_HYPERCALL is the where the clock > >>>>>>>> starts counting? > >>>>>>>> > >>>>>>>> No need to store kvmclock_offset in hv_ref_count? (moreover > >>>>>>>> the name is weird, better name would be "hv_ref_start_time". > >>>>>>> > >>>>>>> Just add kvmclock_offset when reading the values (otherwise you have a > >>>>>>> "stale copy" of kvmclock_offset in hv_ref_count). > >>>>>>> > >>>>>> > >>>>>> After some experiments I think we do no need kvm->arch.hv_ref_count at all. > >>>>>> > >>>>>> I was debugging some weird clockjump issues and I think the problem is that after live migration > >>>>>> kvm->arch.hv_ref_count is initialized to 0. Depending on the uptime of the vServer when the > >>>>>> hypercall was set up this can lead to series jumps. > >>>>>> > >>>>>> So I would suggest to completely drop kvm->arch.hv_ref_count. > >>>>>> > >>>>>> And use simply this in get_msr_hyperv_pw(). > >>>>>> > >>>>>> case HV_X64_MSR_TIME_REF_COUNT: { > >>>>>> data = div_u64(get_kernel_ns() + kvm->arch.kvmclock_offset, 100); > >>>>>> break; > >>>>>> } > >>>>>> > >>>>> > >>>>> Agreed. It should work as long as we rely on kvmclock_offset. > >>>>> Vadim. > >>>> > >>>> I think we can rely on kvmclock_offset. Had you had a chance to do further testing > >>>> during the weekend? > >>> > >>> Yes, and still testing. > >>> > >>> There is still some inconsistency in the value returned by > >>> QueryPerformanceCounter during migration. > >> > >> With my proposal? > >> > > > > Right. > > Please see below QTC vs. NTP time stamp > > before and after migration: > > > > C:\timedrift\objchk_win7_x86\i386>timedrift.exe > > quantum = 15 > > QPC NTP Delta > > 1311 1307 4 > > 2621 2618 3 > > 3932 3931 1 > > ....... > > ....... > > 38018 38014 4 > > Unable to wait for NTP reply from the SNTP server, GetLastError returns > > 7279088 > > 142210 40113 102097 > > 143458 41357 102101 > > ....... > > ....... > > 156500 54615 101885 > > > > NTP time changed from 38014 to 40113 ((40113 - 38014) / 100 = 21 mSec) > > while QPC jumped from 38018 to 142210 ((142210 - 38014) / 100 = 1042 > > mSec) > > How long was the downtime when you migrated the vServer? I use Win7-32 for testing. > > I have done similar tests and have not seen this... > Can you show the relevant parts of your patch. Remember I use the REFERENCE_COUTNER not iTSC. Yes, I also test it for reference counter. case HV_X64_MSR_TIME_REF_COUNT: { // u64 now_ns; // local_irq_disable(); // now_ns = get_kernel_ns() + kvm->arch.kvmclock_offset; // data = div_u64(now_ns - kvm->arch.hv_ref_count, 100); // local_irq_enable(); data = div_u64(get_kernel_ns() + kvm->arch.kvmclock_offset, 100); break; } > > Peter > -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 08.01.2014 13:12, Vadim Rozenfeld wrote: > On Wed, 2014-01-08 at 12:48 +0100, Peter Lieven wrote: >> On 08.01.2014 11:44, Vadim Rozenfeld wrote: >>> On Wed, 2014-01-08 at 11:15 +0100, Peter Lieven wrote: >>>> On 08.01.2014 10:40, Vadim Rozenfeld wrote: >>>>> On Tue, 2014-01-07 at 18:52 +0100, Peter Lieven wrote: >>>>>> Am 07.01.2014 10:36, schrieb Vadim Rozenfeld: >>>>>>> On Thu, 2014-01-02 at 17:52 +0100, Peter Lieven wrote: >>>>>>>> Am 11.12.2013 19:59, schrieb Marcelo Tosatti: >>>>>>>>> On Wed, Dec 11, 2013 at 04:53:05PM -0200, Marcelo Tosatti wrote: >>>>>>>>>> On Sun, Dec 08, 2013 at 10:33:38PM +1100, Vadim Rozenfeld wrote: >>>>>>>>>>> Signed-off: Peter Lieven <pl@dlh.net> >>>>>>>>>>> Signed-off: Gleb Natapov <gleb@redhat.com> >>>>>>>>>>> Signed-off: Vadim Rozenfeld <vrozenfe@redhat.com> >>>>>>>>>>> >>>>>>>>>>> v1 -> v2 >>>>>>>>>>> 1. mark TSC page dirty as suggested by >>>>>>>>>>> Eric Northup <digitaleric@google.com> and Gleb >>>>>>>>>>> 2. disable local irq when calling get_kernel_ns, >>>>>>>>>>> as it was done by Peter Lieven <pl@dlhnet.de> >>>>>>>>>>> 3. move check for TSC page enable from second patch >>>>>>>>>>> to this one. >>>>>>>>>>> >>>>>>>>>>> --- >>>>>>>>>>> arch/x86/include/asm/kvm_host.h | 2 ++ >>>>>>>>>>> arch/x86/include/uapi/asm/hyperv.h | 13 +++++++++++++ >>>>>>>>>>> arch/x86/kvm/x86.c | 39 +++++++++++++++++++++++++++++++++++++- >>>>>>>>>>> include/uapi/linux/kvm.h | 1 + >>>>>>>>>>> 4 files changed, 54 insertions(+), 1 deletion(-) >>>>>>>>>>> >>>>>>>>>>> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h >>>>>>>>>>> index ae5d783..2fd0753 100644 >>>>>>>>>>> --- a/arch/x86/include/asm/kvm_host.h >>>>>>>>>>> +++ b/arch/x86/include/asm/kvm_host.h >>>>>>>>>>> @@ -605,6 +605,8 @@ struct kvm_arch { >>>>>>>>>>> /* fields used by HYPER-V emulation */ >>>>>>>>>>> u64 hv_guest_os_id; >>>>>>>>>>> u64 hv_hypercall; >>>>>>>>>>> + u64 hv_ref_count; >>>>>>>>>>> + u64 hv_tsc_page; >>>>>>>>>>> >>>>>>>>>>> #ifdef CONFIG_KVM_MMU_AUDIT >>>>>>>>>>> int audit_point; >>>>>>>>>>> diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h >>>>>>>>>>> index b8f1c01..462efe7 100644 >>>>>>>>>>> --- a/arch/x86/include/uapi/asm/hyperv.h >>>>>>>>>>> +++ b/arch/x86/include/uapi/asm/hyperv.h >>>>>>>>>>> @@ -28,6 +28,9 @@ >>>>>>>>>>> /* Partition Reference Counter (HV_X64_MSR_TIME_REF_COUNT) available*/ >>>>>>>>>>> #define HV_X64_MSR_TIME_REF_COUNT_AVAILABLE (1 << 1) >>>>>>>>>>> >>>>>>>>>>> +/* A partition's reference time stamp counter (TSC) page */ >>>>>>>>>>> +#define HV_X64_MSR_REFERENCE_TSC 0x40000021 >>>>>>>>>>> + >>>>>>>>>>> /* >>>>>>>>>>> * There is a single feature flag that signifies the presence of the MSR >>>>>>>>>>> * that can be used to retrieve both the local APIC Timer frequency as >>>>>>>>>>> @@ -198,6 +201,9 @@ >>>>>>>>>>> #define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_MASK \ >>>>>>>>>>> (~((1ull << HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT) - 1)) >>>>>>>>>>> >>>>>>>>>>> +#define HV_X64_MSR_TSC_REFERENCE_ENABLE 0x00000001 >>>>>>>>>>> +#define HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT 12 >>>>>>>>>>> + >>>>>>>>>>> #define HV_PROCESSOR_POWER_STATE_C0 0 >>>>>>>>>>> #define HV_PROCESSOR_POWER_STATE_C1 1 >>>>>>>>>>> #define HV_PROCESSOR_POWER_STATE_C2 2 >>>>>>>>>>> @@ -210,4 +216,11 @@ >>>>>>>>>>> #define HV_STATUS_INVALID_ALIGNMENT 4 >>>>>>>>>>> #define HV_STATUS_INSUFFICIENT_BUFFERS 19 >>>>>>>>>>> >>>>>>>>>>> +typedef struct _HV_REFERENCE_TSC_PAGE { >>>>>>>>>>> + __u32 tsc_sequence; >>>>>>>>>>> + __u32 res1; >>>>>>>>>>> + __u64 tsc_scale; >>>>>>>>>>> + __s64 tsc_offset; >>>>>>>>>>> +} HV_REFERENCE_TSC_PAGE, *PHV_REFERENCE_TSC_PAGE; >>>>>>>>>>> + >>>>>>>>>>> #endif >>>>>>>>>>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c >>>>>>>>>>> index 21ef1ba..5e4e495a 100644 >>>>>>>>>>> --- a/arch/x86/kvm/x86.c >>>>>>>>>>> +++ b/arch/x86/kvm/x86.c >>>>>>>>>>> @@ -840,7 +840,7 @@ EXPORT_SYMBOL_GPL(kvm_rdpmc); >>>>>>>>>>> static u32 msrs_to_save[] = { >>>>>>>>>>> MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, >>>>>>>>>>> MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW, >>>>>>>>>>> - HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, >>>>>>>>>>> + HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, HV_X64_MSR_TIME_REF_COUNT, >>>>>>>>>>> HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME, >>>>>>>>>>> MSR_KVM_PV_EOI_EN, >>>>>>>>>>> MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, >>>>>>>>>>> @@ -1826,6 +1826,8 @@ static bool kvm_hv_msr_partition_wide(u32 msr) >>>>>>>>>>> switch (msr) { >>>>>>>>>>> case HV_X64_MSR_GUEST_OS_ID: >>>>>>>>>>> case HV_X64_MSR_HYPERCALL: >>>>>>>>>>> + case HV_X64_MSR_REFERENCE_TSC: >>>>>>>>>>> + case HV_X64_MSR_TIME_REF_COUNT: >>>>>>>>>>> r = true; >>>>>>>>>>> break; >>>>>>>>>>> } >>>>>>>>>>> @@ -1865,6 +1867,29 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data) >>>>>>>>>>> if (__copy_to_user((void __user *)addr, instructions, 4)) >>>>>>>>>>> return 1; >>>>>>>>>>> kvm->arch.hv_hypercall = data; >>>>>>>>>>> + local_irq_disable(); >>>>>>>>>>> + kvm->arch.hv_ref_count = get_kernel_ns() + kvm->arch.kvmclock_offset; >>>>>>>>>>> + local_irq_enable(); >>>>>>>>>> >>>>>>>>>> Where does the docs say that HV_X64_MSR_HYPERCALL is the where the clock >>>>>>>>>> starts counting? >>>>>>>>>> >>>>>>>>>> No need to store kvmclock_offset in hv_ref_count? (moreover >>>>>>>>>> the name is weird, better name would be "hv_ref_start_time". >>>>>>>>> >>>>>>>>> Just add kvmclock_offset when reading the values (otherwise you have a >>>>>>>>> "stale copy" of kvmclock_offset in hv_ref_count). >>>>>>>>> >>>>>>>> >>>>>>>> After some experiments I think we do no need kvm->arch.hv_ref_count at all. >>>>>>>> >>>>>>>> I was debugging some weird clockjump issues and I think the problem is that after live migration >>>>>>>> kvm->arch.hv_ref_count is initialized to 0. Depending on the uptime of the vServer when the >>>>>>>> hypercall was set up this can lead to series jumps. >>>>>>>> >>>>>>>> So I would suggest to completely drop kvm->arch.hv_ref_count. >>>>>>>> >>>>>>>> And use simply this in get_msr_hyperv_pw(). >>>>>>>> >>>>>>>> case HV_X64_MSR_TIME_REF_COUNT: { >>>>>>>> data = div_u64(get_kernel_ns() + kvm->arch.kvmclock_offset, 100); >>>>>>>> break; >>>>>>>> } >>>>>>>> >>>>>>> >>>>>>> Agreed. It should work as long as we rely on kvmclock_offset. >>>>>>> Vadim. >>>>>> >>>>>> I think we can rely on kvmclock_offset. Had you had a chance to do further testing >>>>>> during the weekend? >>>>> >>>>> Yes, and still testing. >>>>> >>>>> There is still some inconsistency in the value returned by >>>>> QueryPerformanceCounter during migration. >>>> >>>> With my proposal? >>>> >>> >>> Right. >>> Please see below QTC vs. NTP time stamp >>> before and after migration: >>> >>> C:\timedrift\objchk_win7_x86\i386>timedrift.exe >>> quantum = 15 >>> QPC NTP Delta >>> 1311 1307 4 >>> 2621 2618 3 >>> 3932 3931 1 >>> ....... >>> ....... >>> 38018 38014 4 >>> Unable to wait for NTP reply from the SNTP server, GetLastError returns >>> 7279088 >>> 142210 40113 102097 >>> 143458 41357 102101 >>> ....... >>> ....... >>> 156500 54615 101885 >>> >>> NTP time changed from 38014 to 40113 ((40113 - 38014) / 100 = 21 mSec) >>> while QPC jumped from 38018 to 142210 ((142210 - 38014) / 100 = 1042 >>> mSec) >> >> How long was the downtime when you migrated the vServer? > I use Win7-32 for testing. >> >> I have done similar tests and have not seen this... >> Can you show the relevant parts of your patch. Remember I use the REFERENCE_COUTNER not iTSC. > Yes, I also test it for reference counter. Can you use these parameters for testing: -cpu qemu64,hv_relaxed,hv_vapic,hv_spinlocks=4096,hv_refcnt -rtc base=localtime -vga std -usb -usbdevice tablet -no-hpet If you look in the HMP at "info migrate" after the migration what does the downtime info say? Peter -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Wed, 2014-01-08 at 15:54 +0100, Peter Lieven wrote: > On 08.01.2014 13:12, Vadim Rozenfeld wrote: > > On Wed, 2014-01-08 at 12:48 +0100, Peter Lieven wrote: > >> On 08.01.2014 11:44, Vadim Rozenfeld wrote: > >>> On Wed, 2014-01-08 at 11:15 +0100, Peter Lieven wrote: > >>>> On 08.01.2014 10:40, Vadim Rozenfeld wrote: > >>>>> On Tue, 2014-01-07 at 18:52 +0100, Peter Lieven wrote: > >>>>>> Am 07.01.2014 10:36, schrieb Vadim Rozenfeld: > >>>>>>> On Thu, 2014-01-02 at 17:52 +0100, Peter Lieven wrote: > >>>>>>>> Am 11.12.2013 19:59, schrieb Marcelo Tosatti: > >>>>>>>>> On Wed, Dec 11, 2013 at 04:53:05PM -0200, Marcelo Tosatti wrote: > >>>>>>>>>> On Sun, Dec 08, 2013 at 10:33:38PM +1100, Vadim Rozenfeld wrote: > >>>>>>>>>>> Signed-off: Peter Lieven <pl@dlh.net> > >>>>>>>>>>> Signed-off: Gleb Natapov <gleb@redhat.com> > >>>>>>>>>>> Signed-off: Vadim Rozenfeld <vrozenfe@redhat.com> > >>>>>>>>>>> > >>>>>>>>>>> v1 -> v2 > >>>>>>>>>>> 1. mark TSC page dirty as suggested by > >>>>>>>>>>> Eric Northup <digitaleric@google.com> and Gleb > >>>>>>>>>>> 2. disable local irq when calling get_kernel_ns, > >>>>>>>>>>> as it was done by Peter Lieven <pl@dlhnet.de> > >>>>>>>>>>> 3. move check for TSC page enable from second patch > >>>>>>>>>>> to this one. > >>>>>>>>>>> > >>>>>>>>>>> --- > >>>>>>>>>>> arch/x86/include/asm/kvm_host.h | 2 ++ > >>>>>>>>>>> arch/x86/include/uapi/asm/hyperv.h | 13 +++++++++++++ > >>>>>>>>>>> arch/x86/kvm/x86.c | 39 +++++++++++++++++++++++++++++++++++++- > >>>>>>>>>>> include/uapi/linux/kvm.h | 1 + > >>>>>>>>>>> 4 files changed, 54 insertions(+), 1 deletion(-) > >>>>>>>>>>> > >>>>>>>>>>> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h > >>>>>>>>>>> index ae5d783..2fd0753 100644 > >>>>>>>>>>> --- a/arch/x86/include/asm/kvm_host.h > >>>>>>>>>>> +++ b/arch/x86/include/asm/kvm_host.h > >>>>>>>>>>> @@ -605,6 +605,8 @@ struct kvm_arch { > >>>>>>>>>>> /* fields used by HYPER-V emulation */ > >>>>>>>>>>> u64 hv_guest_os_id; > >>>>>>>>>>> u64 hv_hypercall; > >>>>>>>>>>> + u64 hv_ref_count; > >>>>>>>>>>> + u64 hv_tsc_page; > >>>>>>>>>>> > >>>>>>>>>>> #ifdef CONFIG_KVM_MMU_AUDIT > >>>>>>>>>>> int audit_point; > >>>>>>>>>>> diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h > >>>>>>>>>>> index b8f1c01..462efe7 100644 > >>>>>>>>>>> --- a/arch/x86/include/uapi/asm/hyperv.h > >>>>>>>>>>> +++ b/arch/x86/include/uapi/asm/hyperv.h > >>>>>>>>>>> @@ -28,6 +28,9 @@ > >>>>>>>>>>> /* Partition Reference Counter (HV_X64_MSR_TIME_REF_COUNT) available*/ > >>>>>>>>>>> #define HV_X64_MSR_TIME_REF_COUNT_AVAILABLE (1 << 1) > >>>>>>>>>>> > >>>>>>>>>>> +/* A partition's reference time stamp counter (TSC) page */ > >>>>>>>>>>> +#define HV_X64_MSR_REFERENCE_TSC 0x40000021 > >>>>>>>>>>> + > >>>>>>>>>>> /* > >>>>>>>>>>> * There is a single feature flag that signifies the presence of the MSR > >>>>>>>>>>> * that can be used to retrieve both the local APIC Timer frequency as > >>>>>>>>>>> @@ -198,6 +201,9 @@ > >>>>>>>>>>> #define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_MASK \ > >>>>>>>>>>> (~((1ull << HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT) - 1)) > >>>>>>>>>>> > >>>>>>>>>>> +#define HV_X64_MSR_TSC_REFERENCE_ENABLE 0x00000001 > >>>>>>>>>>> +#define HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT 12 > >>>>>>>>>>> + > >>>>>>>>>>> #define HV_PROCESSOR_POWER_STATE_C0 0 > >>>>>>>>>>> #define HV_PROCESSOR_POWER_STATE_C1 1 > >>>>>>>>>>> #define HV_PROCESSOR_POWER_STATE_C2 2 > >>>>>>>>>>> @@ -210,4 +216,11 @@ > >>>>>>>>>>> #define HV_STATUS_INVALID_ALIGNMENT 4 > >>>>>>>>>>> #define HV_STATUS_INSUFFICIENT_BUFFERS 19 > >>>>>>>>>>> > >>>>>>>>>>> +typedef struct _HV_REFERENCE_TSC_PAGE { > >>>>>>>>>>> + __u32 tsc_sequence; > >>>>>>>>>>> + __u32 res1; > >>>>>>>>>>> + __u64 tsc_scale; > >>>>>>>>>>> + __s64 tsc_offset; > >>>>>>>>>>> +} HV_REFERENCE_TSC_PAGE, *PHV_REFERENCE_TSC_PAGE; > >>>>>>>>>>> + > >>>>>>>>>>> #endif > >>>>>>>>>>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > >>>>>>>>>>> index 21ef1ba..5e4e495a 100644 > >>>>>>>>>>> --- a/arch/x86/kvm/x86.c > >>>>>>>>>>> +++ b/arch/x86/kvm/x86.c > >>>>>>>>>>> @@ -840,7 +840,7 @@ EXPORT_SYMBOL_GPL(kvm_rdpmc); > >>>>>>>>>>> static u32 msrs_to_save[] = { > >>>>>>>>>>> MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, > >>>>>>>>>>> MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW, > >>>>>>>>>>> - HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, > >>>>>>>>>>> + HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, HV_X64_MSR_TIME_REF_COUNT, > >>>>>>>>>>> HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME, > >>>>>>>>>>> MSR_KVM_PV_EOI_EN, > >>>>>>>>>>> MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, > >>>>>>>>>>> @@ -1826,6 +1826,8 @@ static bool kvm_hv_msr_partition_wide(u32 msr) > >>>>>>>>>>> switch (msr) { > >>>>>>>>>>> case HV_X64_MSR_GUEST_OS_ID: > >>>>>>>>>>> case HV_X64_MSR_HYPERCALL: > >>>>>>>>>>> + case HV_X64_MSR_REFERENCE_TSC: > >>>>>>>>>>> + case HV_X64_MSR_TIME_REF_COUNT: > >>>>>>>>>>> r = true; > >>>>>>>>>>> break; > >>>>>>>>>>> } > >>>>>>>>>>> @@ -1865,6 +1867,29 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data) > >>>>>>>>>>> if (__copy_to_user((void __user *)addr, instructions, 4)) > >>>>>>>>>>> return 1; > >>>>>>>>>>> kvm->arch.hv_hypercall = data; > >>>>>>>>>>> + local_irq_disable(); > >>>>>>>>>>> + kvm->arch.hv_ref_count = get_kernel_ns() + kvm->arch.kvmclock_offset; > >>>>>>>>>>> + local_irq_enable(); > >>>>>>>>>> > >>>>>>>>>> Where does the docs say that HV_X64_MSR_HYPERCALL is the where the clock > >>>>>>>>>> starts counting? > >>>>>>>>>> > >>>>>>>>>> No need to store kvmclock_offset in hv_ref_count? (moreover > >>>>>>>>>> the name is weird, better name would be "hv_ref_start_time". > >>>>>>>>> > >>>>>>>>> Just add kvmclock_offset when reading the values (otherwise you have a > >>>>>>>>> "stale copy" of kvmclock_offset in hv_ref_count). > >>>>>>>>> > >>>>>>>> > >>>>>>>> After some experiments I think we do no need kvm->arch.hv_ref_count at all. > >>>>>>>> > >>>>>>>> I was debugging some weird clockjump issues and I think the problem is that after live migration > >>>>>>>> kvm->arch.hv_ref_count is initialized to 0. Depending on the uptime of the vServer when the > >>>>>>>> hypercall was set up this can lead to series jumps. > >>>>>>>> > >>>>>>>> So I would suggest to completely drop kvm->arch.hv_ref_count. > >>>>>>>> > >>>>>>>> And use simply this in get_msr_hyperv_pw(). > >>>>>>>> > >>>>>>>> case HV_X64_MSR_TIME_REF_COUNT: { > >>>>>>>> data = div_u64(get_kernel_ns() + kvm->arch.kvmclock_offset, 100); > >>>>>>>> break; > >>>>>>>> } > >>>>>>>> > >>>>>>> > >>>>>>> Agreed. It should work as long as we rely on kvmclock_offset. > >>>>>>> Vadim. > >>>>>> > >>>>>> I think we can rely on kvmclock_offset. Had you had a chance to do further testing > >>>>>> during the weekend? > >>>>> > >>>>> Yes, and still testing. > >>>>> > >>>>> There is still some inconsistency in the value returned by > >>>>> QueryPerformanceCounter during migration. > >>>> > >>>> With my proposal? > >>>> > >>> > >>> Right. > >>> Please see below QTC vs. NTP time stamp > >>> before and after migration: > >>> > >>> C:\timedrift\objchk_win7_x86\i386>timedrift.exe > >>> quantum = 15 > >>> QPC NTP Delta > >>> 1311 1307 4 > >>> 2621 2618 3 > >>> 3932 3931 1 > >>> ....... > >>> ....... > >>> 38018 38014 4 > >>> Unable to wait for NTP reply from the SNTP server, GetLastError returns > >>> 7279088 > >>> 142210 40113 102097 > >>> 143458 41357 102101 > >>> ....... > >>> ....... > >>> 156500 54615 101885 > >>> > >>> NTP time changed from 38014 to 40113 ((40113 - 38014) / 100 = 21 mSec) > >>> while QPC jumped from 38018 to 142210 ((142210 - 38014) / 100 = 1042 > >>> mSec) > >> > >> How long was the downtime when you migrated the vServer? > > I use Win7-32 for testing. > >> > >> I have done similar tests and have not seen this... > >> Can you show the relevant parts of your patch. Remember I use the REFERENCE_COUTNER not iTSC. > > Yes, I also test it for reference counter. > > Can you use these parameters for testing: > > -cpu qemu64,hv_relaxed,hv_vapic,hv_spinlocks=4096,hv_refcnt > -rtc base=localtime -vga std -usb -usbdevice tablet -no-hpet My settings are: -cpu qemu64, +x2apic,family=0xf,hv_vapic,hv_spinlocks=0xfff,hv_relaxed,hv_tsc -m 1G -smp 4 > > If you look in the HMP at "info migrate" after the migration what does the downtime info say? downtime: 4 milliseconds > > Peter > > -- > To unsubscribe from this list: send the line "unsubscribe kvm" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Am 08.01.2014 21:08, schrieb Vadim Rozenfeld: > On Wed, 2014-01-08 at 15:54 +0100, Peter Lieven wrote: >> On 08.01.2014 13:12, Vadim Rozenfeld wrote: >>> On Wed, 2014-01-08 at 12:48 +0100, Peter Lieven wrote: >>>> On 08.01.2014 11:44, Vadim Rozenfeld wrote: >>>>> On Wed, 2014-01-08 at 11:15 +0100, Peter Lieven wrote: >>>>>> On 08.01.2014 10:40, Vadim Rozenfeld wrote: >>>>>>> On Tue, 2014-01-07 at 18:52 +0100, Peter Lieven wrote: >>>>>>>> Am 07.01.2014 10:36, schrieb Vadim Rozenfeld: >>>>>>>>> On Thu, 2014-01-02 at 17:52 +0100, Peter Lieven wrote: >>>>>>>>>> Am 11.12.2013 19:59, schrieb Marcelo Tosatti: >>>>>>>>>>> On Wed, Dec 11, 2013 at 04:53:05PM -0200, Marcelo Tosatti wrote: >>>>>>>>>>>> On Sun, Dec 08, 2013 at 10:33:38PM +1100, Vadim Rozenfeld wrote: >>>>>>>>>>>>> Signed-off: Peter Lieven <pl@dlh.net> >>>>>>>>>>>>> Signed-off: Gleb Natapov <gleb@redhat.com> >>>>>>>>>>>>> Signed-off: Vadim Rozenfeld <vrozenfe@redhat.com> >>>>>>>>>>>>> >>>>>>>>>>>>> v1 -> v2 >>>>>>>>>>>>> 1. mark TSC page dirty as suggested by >>>>>>>>>>>>> Eric Northup <digitaleric@google.com> and Gleb >>>>>>>>>>>>> 2. disable local irq when calling get_kernel_ns, >>>>>>>>>>>>> as it was done by Peter Lieven <pl@dlhnet.de> >>>>>>>>>>>>> 3. move check for TSC page enable from second patch >>>>>>>>>>>>> to this one. >>>>>>>>>>>>> >>>>>>>>>>>>> --- >>>>>>>>>>>>> arch/x86/include/asm/kvm_host.h | 2 ++ >>>>>>>>>>>>> arch/x86/include/uapi/asm/hyperv.h | 13 +++++++++++++ >>>>>>>>>>>>> arch/x86/kvm/x86.c | 39 +++++++++++++++++++++++++++++++++++++- >>>>>>>>>>>>> include/uapi/linux/kvm.h | 1 + >>>>>>>>>>>>> 4 files changed, 54 insertions(+), 1 deletion(-) >>>>>>>>>>>>> >>>>>>>>>>>>> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h >>>>>>>>>>>>> index ae5d783..2fd0753 100644 >>>>>>>>>>>>> --- a/arch/x86/include/asm/kvm_host.h >>>>>>>>>>>>> +++ b/arch/x86/include/asm/kvm_host.h >>>>>>>>>>>>> @@ -605,6 +605,8 @@ struct kvm_arch { >>>>>>>>>>>>> /* fields used by HYPER-V emulation */ >>>>>>>>>>>>> u64 hv_guest_os_id; >>>>>>>>>>>>> u64 hv_hypercall; >>>>>>>>>>>>> + u64 hv_ref_count; >>>>>>>>>>>>> + u64 hv_tsc_page; >>>>>>>>>>>>> >>>>>>>>>>>>> #ifdef CONFIG_KVM_MMU_AUDIT >>>>>>>>>>>>> int audit_point; >>>>>>>>>>>>> diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h >>>>>>>>>>>>> index b8f1c01..462efe7 100644 >>>>>>>>>>>>> --- a/arch/x86/include/uapi/asm/hyperv.h >>>>>>>>>>>>> +++ b/arch/x86/include/uapi/asm/hyperv.h >>>>>>>>>>>>> @@ -28,6 +28,9 @@ >>>>>>>>>>>>> /* Partition Reference Counter (HV_X64_MSR_TIME_REF_COUNT) available*/ >>>>>>>>>>>>> #define HV_X64_MSR_TIME_REF_COUNT_AVAILABLE (1 << 1) >>>>>>>>>>>>> >>>>>>>>>>>>> +/* A partition's reference time stamp counter (TSC) page */ >>>>>>>>>>>>> +#define HV_X64_MSR_REFERENCE_TSC 0x40000021 >>>>>>>>>>>>> + >>>>>>>>>>>>> /* >>>>>>>>>>>>> * There is a single feature flag that signifies the presence of the MSR >>>>>>>>>>>>> * that can be used to retrieve both the local APIC Timer frequency as >>>>>>>>>>>>> @@ -198,6 +201,9 @@ >>>>>>>>>>>>> #define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_MASK \ >>>>>>>>>>>>> (~((1ull << HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT) - 1)) >>>>>>>>>>>>> >>>>>>>>>>>>> +#define HV_X64_MSR_TSC_REFERENCE_ENABLE 0x00000001 >>>>>>>>>>>>> +#define HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT 12 >>>>>>>>>>>>> + >>>>>>>>>>>>> #define HV_PROCESSOR_POWER_STATE_C0 0 >>>>>>>>>>>>> #define HV_PROCESSOR_POWER_STATE_C1 1 >>>>>>>>>>>>> #define HV_PROCESSOR_POWER_STATE_C2 2 >>>>>>>>>>>>> @@ -210,4 +216,11 @@ >>>>>>>>>>>>> #define HV_STATUS_INVALID_ALIGNMENT 4 >>>>>>>>>>>>> #define HV_STATUS_INSUFFICIENT_BUFFERS 19 >>>>>>>>>>>>> >>>>>>>>>>>>> +typedef struct _HV_REFERENCE_TSC_PAGE { >>>>>>>>>>>>> + __u32 tsc_sequence; >>>>>>>>>>>>> + __u32 res1; >>>>>>>>>>>>> + __u64 tsc_scale; >>>>>>>>>>>>> + __s64 tsc_offset; >>>>>>>>>>>>> +} HV_REFERENCE_TSC_PAGE, *PHV_REFERENCE_TSC_PAGE; >>>>>>>>>>>>> + >>>>>>>>>>>>> #endif >>>>>>>>>>>>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c >>>>>>>>>>>>> index 21ef1ba..5e4e495a 100644 >>>>>>>>>>>>> --- a/arch/x86/kvm/x86.c >>>>>>>>>>>>> +++ b/arch/x86/kvm/x86.c >>>>>>>>>>>>> @@ -840,7 +840,7 @@ EXPORT_SYMBOL_GPL(kvm_rdpmc); >>>>>>>>>>>>> static u32 msrs_to_save[] = { >>>>>>>>>>>>> MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, >>>>>>>>>>>>> MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW, >>>>>>>>>>>>> - HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, >>>>>>>>>>>>> + HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, HV_X64_MSR_TIME_REF_COUNT, >>>>>>>>>>>>> HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME, >>>>>>>>>>>>> MSR_KVM_PV_EOI_EN, >>>>>>>>>>>>> MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, >>>>>>>>>>>>> @@ -1826,6 +1826,8 @@ static bool kvm_hv_msr_partition_wide(u32 msr) >>>>>>>>>>>>> switch (msr) { >>>>>>>>>>>>> case HV_X64_MSR_GUEST_OS_ID: >>>>>>>>>>>>> case HV_X64_MSR_HYPERCALL: >>>>>>>>>>>>> + case HV_X64_MSR_REFERENCE_TSC: >>>>>>>>>>>>> + case HV_X64_MSR_TIME_REF_COUNT: >>>>>>>>>>>>> r = true; >>>>>>>>>>>>> break; >>>>>>>>>>>>> } >>>>>>>>>>>>> @@ -1865,6 +1867,29 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data) >>>>>>>>>>>>> if (__copy_to_user((void __user *)addr, instructions, 4)) >>>>>>>>>>>>> return 1; >>>>>>>>>>>>> kvm->arch.hv_hypercall = data; >>>>>>>>>>>>> + local_irq_disable(); >>>>>>>>>>>>> + kvm->arch.hv_ref_count = get_kernel_ns() + kvm->arch.kvmclock_offset; >>>>>>>>>>>>> + local_irq_enable(); >>>>>>>>>>>> >>>>>>>>>>>> Where does the docs say that HV_X64_MSR_HYPERCALL is the where the clock >>>>>>>>>>>> starts counting? >>>>>>>>>>>> >>>>>>>>>>>> No need to store kvmclock_offset in hv_ref_count? (moreover >>>>>>>>>>>> the name is weird, better name would be "hv_ref_start_time". >>>>>>>>>>> >>>>>>>>>>> Just add kvmclock_offset when reading the values (otherwise you have a >>>>>>>>>>> "stale copy" of kvmclock_offset in hv_ref_count). >>>>>>>>>>> >>>>>>>>>> >>>>>>>>>> After some experiments I think we do no need kvm->arch.hv_ref_count at all. >>>>>>>>>> >>>>>>>>>> I was debugging some weird clockjump issues and I think the problem is that after live migration >>>>>>>>>> kvm->arch.hv_ref_count is initialized to 0. Depending on the uptime of the vServer when the >>>>>>>>>> hypercall was set up this can lead to series jumps. >>>>>>>>>> >>>>>>>>>> So I would suggest to completely drop kvm->arch.hv_ref_count. >>>>>>>>>> >>>>>>>>>> And use simply this in get_msr_hyperv_pw(). >>>>>>>>>> >>>>>>>>>> case HV_X64_MSR_TIME_REF_COUNT: { >>>>>>>>>> data = div_u64(get_kernel_ns() + kvm->arch.kvmclock_offset, 100); >>>>>>>>>> break; >>>>>>>>>> } >>>>>>>>>> >>>>>>>>> >>>>>>>>> Agreed. It should work as long as we rely on kvmclock_offset. >>>>>>>>> Vadim. >>>>>>>> >>>>>>>> I think we can rely on kvmclock_offset. Had you had a chance to do further testing >>>>>>>> during the weekend? >>>>>>> >>>>>>> Yes, and still testing. >>>>>>> >>>>>>> There is still some inconsistency in the value returned by >>>>>>> QueryPerformanceCounter during migration. >>>>>> >>>>>> With my proposal? >>>>>> >>>>> >>>>> Right. >>>>> Please see below QTC vs. NTP time stamp >>>>> before and after migration: >>>>> >>>>> C:\timedrift\objchk_win7_x86\i386>timedrift.exe >>>>> quantum = 15 >>>>> QPC NTP Delta >>>>> 1311 1307 4 >>>>> 2621 2618 3 >>>>> 3932 3931 1 >>>>> ....... >>>>> ....... >>>>> 38018 38014 4 >>>>> Unable to wait for NTP reply from the SNTP server, GetLastError returns >>>>> 7279088 >>>>> 142210 40113 102097 >>>>> 143458 41357 102101 >>>>> ....... >>>>> ....... >>>>> 156500 54615 101885 >>>>> >>>>> NTP time changed from 38014 to 40113 ((40113 - 38014) / 100 = 21 mSec) >>>>> while QPC jumped from 38018 to 142210 ((142210 - 38014) / 100 = 1042 >>>>> mSec) >>>> >>>> How long was the downtime when you migrated the vServer? >>> I use Win7-32 for testing. >>>> >>>> I have done similar tests and have not seen this... >>>> Can you show the relevant parts of your patch. Remember I use the REFERENCE_COUTNER not iTSC. >>> Yes, I also test it for reference counter. >> >> Can you use these parameters for testing: >> >> -cpu qemu64,hv_relaxed,hv_vapic,hv_spinlocks=4096,hv_refcnt >> -rtc base=localtime -vga std -usb -usbdevice tablet -no-hpet > > My settings are: > -cpu qemu64, > +x2apic,family=0xf,hv_vapic,hv_spinlocks=0xfff,hv_relaxed,hv_tsc -m 1G > -smp 4 do you also see the jump if you leave the hv_tsc out and just use hv_refcnt? Peter -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Wed, 2014-01-08 at 23:20 +0100, Peter Lieven wrote: > Am 08.01.2014 21:08, schrieb Vadim Rozenfeld: > > On Wed, 2014-01-08 at 15:54 +0100, Peter Lieven wrote: > >> On 08.01.2014 13:12, Vadim Rozenfeld wrote: > >>> On Wed, 2014-01-08 at 12:48 +0100, Peter Lieven wrote: > >>>> On 08.01.2014 11:44, Vadim Rozenfeld wrote: > >>>>> On Wed, 2014-01-08 at 11:15 +0100, Peter Lieven wrote: > >>>>>> On 08.01.2014 10:40, Vadim Rozenfeld wrote: > >>>>>>> On Tue, 2014-01-07 at 18:52 +0100, Peter Lieven wrote: > >>>>>>>> Am 07.01.2014 10:36, schrieb Vadim Rozenfeld: > >>>>>>>>> On Thu, 2014-01-02 at 17:52 +0100, Peter Lieven wrote: > >>>>>>>>>> Am 11.12.2013 19:59, schrieb Marcelo Tosatti: > >>>>>>>>>>> On Wed, Dec 11, 2013 at 04:53:05PM -0200, Marcelo Tosatti wrote: > >>>>>>>>>>>> On Sun, Dec 08, 2013 at 10:33:38PM +1100, Vadim Rozenfeld wrote: > >>>>>>>>>>>>> Signed-off: Peter Lieven <pl@dlh.net> > >>>>>>>>>>>>> Signed-off: Gleb Natapov <gleb@redhat.com> > >>>>>>>>>>>>> Signed-off: Vadim Rozenfeld <vrozenfe@redhat.com> > >>>>>>>>>>>>> > >>>>>>>>>>>>> v1 -> v2 > >>>>>>>>>>>>> 1. mark TSC page dirty as suggested by > >>>>>>>>>>>>> Eric Northup <digitaleric@google.com> and Gleb > >>>>>>>>>>>>> 2. disable local irq when calling get_kernel_ns, > >>>>>>>>>>>>> as it was done by Peter Lieven <pl@dlhnet.de> > >>>>>>>>>>>>> 3. move check for TSC page enable from second patch > >>>>>>>>>>>>> to this one. > >>>>>>>>>>>>> > >>>>>>>>>>>>> --- > >>>>>>>>>>>>> arch/x86/include/asm/kvm_host.h | 2 ++ > >>>>>>>>>>>>> arch/x86/include/uapi/asm/hyperv.h | 13 +++++++++++++ > >>>>>>>>>>>>> arch/x86/kvm/x86.c | 39 +++++++++++++++++++++++++++++++++++++- > >>>>>>>>>>>>> include/uapi/linux/kvm.h | 1 + > >>>>>>>>>>>>> 4 files changed, 54 insertions(+), 1 deletion(-) > >>>>>>>>>>>>> > >>>>>>>>>>>>> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h > >>>>>>>>>>>>> index ae5d783..2fd0753 100644 > >>>>>>>>>>>>> --- a/arch/x86/include/asm/kvm_host.h > >>>>>>>>>>>>> +++ b/arch/x86/include/asm/kvm_host.h > >>>>>>>>>>>>> @@ -605,6 +605,8 @@ struct kvm_arch { > >>>>>>>>>>>>> /* fields used by HYPER-V emulation */ > >>>>>>>>>>>>> u64 hv_guest_os_id; > >>>>>>>>>>>>> u64 hv_hypercall; > >>>>>>>>>>>>> + u64 hv_ref_count; > >>>>>>>>>>>>> + u64 hv_tsc_page; > >>>>>>>>>>>>> > >>>>>>>>>>>>> #ifdef CONFIG_KVM_MMU_AUDIT > >>>>>>>>>>>>> int audit_point; > >>>>>>>>>>>>> diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h > >>>>>>>>>>>>> index b8f1c01..462efe7 100644 > >>>>>>>>>>>>> --- a/arch/x86/include/uapi/asm/hyperv.h > >>>>>>>>>>>>> +++ b/arch/x86/include/uapi/asm/hyperv.h > >>>>>>>>>>>>> @@ -28,6 +28,9 @@ > >>>>>>>>>>>>> /* Partition Reference Counter (HV_X64_MSR_TIME_REF_COUNT) available*/ > >>>>>>>>>>>>> #define HV_X64_MSR_TIME_REF_COUNT_AVAILABLE (1 << 1) > >>>>>>>>>>>>> > >>>>>>>>>>>>> +/* A partition's reference time stamp counter (TSC) page */ > >>>>>>>>>>>>> +#define HV_X64_MSR_REFERENCE_TSC 0x40000021 > >>>>>>>>>>>>> + > >>>>>>>>>>>>> /* > >>>>>>>>>>>>> * There is a single feature flag that signifies the presence of the MSR > >>>>>>>>>>>>> * that can be used to retrieve both the local APIC Timer frequency as > >>>>>>>>>>>>> @@ -198,6 +201,9 @@ > >>>>>>>>>>>>> #define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_MASK \ > >>>>>>>>>>>>> (~((1ull << HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT) - 1)) > >>>>>>>>>>>>> > >>>>>>>>>>>>> +#define HV_X64_MSR_TSC_REFERENCE_ENABLE 0x00000001 > >>>>>>>>>>>>> +#define HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT 12 > >>>>>>>>>>>>> + > >>>>>>>>>>>>> #define HV_PROCESSOR_POWER_STATE_C0 0 > >>>>>>>>>>>>> #define HV_PROCESSOR_POWER_STATE_C1 1 > >>>>>>>>>>>>> #define HV_PROCESSOR_POWER_STATE_C2 2 > >>>>>>>>>>>>> @@ -210,4 +216,11 @@ > >>>>>>>>>>>>> #define HV_STATUS_INVALID_ALIGNMENT 4 > >>>>>>>>>>>>> #define HV_STATUS_INSUFFICIENT_BUFFERS 19 > >>>>>>>>>>>>> > >>>>>>>>>>>>> +typedef struct _HV_REFERENCE_TSC_PAGE { > >>>>>>>>>>>>> + __u32 tsc_sequence; > >>>>>>>>>>>>> + __u32 res1; > >>>>>>>>>>>>> + __u64 tsc_scale; > >>>>>>>>>>>>> + __s64 tsc_offset; > >>>>>>>>>>>>> +} HV_REFERENCE_TSC_PAGE, *PHV_REFERENCE_TSC_PAGE; > >>>>>>>>>>>>> + > >>>>>>>>>>>>> #endif > >>>>>>>>>>>>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > >>>>>>>>>>>>> index 21ef1ba..5e4e495a 100644 > >>>>>>>>>>>>> --- a/arch/x86/kvm/x86.c > >>>>>>>>>>>>> +++ b/arch/x86/kvm/x86.c > >>>>>>>>>>>>> @@ -840,7 +840,7 @@ EXPORT_SYMBOL_GPL(kvm_rdpmc); > >>>>>>>>>>>>> static u32 msrs_to_save[] = { > >>>>>>>>>>>>> MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, > >>>>>>>>>>>>> MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW, > >>>>>>>>>>>>> - HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, > >>>>>>>>>>>>> + HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, HV_X64_MSR_TIME_REF_COUNT, > >>>>>>>>>>>>> HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME, > >>>>>>>>>>>>> MSR_KVM_PV_EOI_EN, > >>>>>>>>>>>>> MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, > >>>>>>>>>>>>> @@ -1826,6 +1826,8 @@ static bool kvm_hv_msr_partition_wide(u32 msr) > >>>>>>>>>>>>> switch (msr) { > >>>>>>>>>>>>> case HV_X64_MSR_GUEST_OS_ID: > >>>>>>>>>>>>> case HV_X64_MSR_HYPERCALL: > >>>>>>>>>>>>> + case HV_X64_MSR_REFERENCE_TSC: > >>>>>>>>>>>>> + case HV_X64_MSR_TIME_REF_COUNT: > >>>>>>>>>>>>> r = true; > >>>>>>>>>>>>> break; > >>>>>>>>>>>>> } > >>>>>>>>>>>>> @@ -1865,6 +1867,29 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data) > >>>>>>>>>>>>> if (__copy_to_user((void __user *)addr, instructions, 4)) > >>>>>>>>>>>>> return 1; > >>>>>>>>>>>>> kvm->arch.hv_hypercall = data; > >>>>>>>>>>>>> + local_irq_disable(); > >>>>>>>>>>>>> + kvm->arch.hv_ref_count = get_kernel_ns() + kvm->arch.kvmclock_offset; > >>>>>>>>>>>>> + local_irq_enable(); > >>>>>>>>>>>> > >>>>>>>>>>>> Where does the docs say that HV_X64_MSR_HYPERCALL is the where the clock > >>>>>>>>>>>> starts counting? > >>>>>>>>>>>> > >>>>>>>>>>>> No need to store kvmclock_offset in hv_ref_count? (moreover > >>>>>>>>>>>> the name is weird, better name would be "hv_ref_start_time". > >>>>>>>>>>> > >>>>>>>>>>> Just add kvmclock_offset when reading the values (otherwise you have a > >>>>>>>>>>> "stale copy" of kvmclock_offset in hv_ref_count). > >>>>>>>>>>> > >>>>>>>>>> > >>>>>>>>>> After some experiments I think we do no need kvm->arch.hv_ref_count at all. > >>>>>>>>>> > >>>>>>>>>> I was debugging some weird clockjump issues and I think the problem is that after live migration > >>>>>>>>>> kvm->arch.hv_ref_count is initialized to 0. Depending on the uptime of the vServer when the > >>>>>>>>>> hypercall was set up this can lead to series jumps. > >>>>>>>>>> > >>>>>>>>>> So I would suggest to completely drop kvm->arch.hv_ref_count. > >>>>>>>>>> > >>>>>>>>>> And use simply this in get_msr_hyperv_pw(). > >>>>>>>>>> > >>>>>>>>>> case HV_X64_MSR_TIME_REF_COUNT: { > >>>>>>>>>> data = div_u64(get_kernel_ns() + kvm->arch.kvmclock_offset, 100); > >>>>>>>>>> break; > >>>>>>>>>> } > >>>>>>>>>> > >>>>>>>>> > >>>>>>>>> Agreed. It should work as long as we rely on kvmclock_offset. > >>>>>>>>> Vadim. > >>>>>>>> > >>>>>>>> I think we can rely on kvmclock_offset. Had you had a chance to do further testing > >>>>>>>> during the weekend? > >>>>>>> > >>>>>>> Yes, and still testing. > >>>>>>> > >>>>>>> There is still some inconsistency in the value returned by > >>>>>>> QueryPerformanceCounter during migration. > >>>>>> > >>>>>> With my proposal? > >>>>>> > >>>>> > >>>>> Right. > >>>>> Please see below QTC vs. NTP time stamp > >>>>> before and after migration: > >>>>> > >>>>> C:\timedrift\objchk_win7_x86\i386>timedrift.exe > >>>>> quantum = 15 > >>>>> QPC NTP Delta > >>>>> 1311 1307 4 > >>>>> 2621 2618 3 > >>>>> 3932 3931 1 > >>>>> ....... > >>>>> ....... > >>>>> 38018 38014 4 > >>>>> Unable to wait for NTP reply from the SNTP server, GetLastError returns > >>>>> 7279088 > >>>>> 142210 40113 102097 > >>>>> 143458 41357 102101 > >>>>> ....... > >>>>> ....... > >>>>> 156500 54615 101885 > >>>>> > >>>>> NTP time changed from 38014 to 40113 ((40113 - 38014) / 100 = 21 mSec) > >>>>> while QPC jumped from 38018 to 142210 ((142210 - 38014) / 100 = 1042 > >>>>> mSec) > >>>> > >>>> How long was the downtime when you migrated the vServer? > >>> I use Win7-32 for testing. > >>>> > >>>> I have done similar tests and have not seen this... > >>>> Can you show the relevant parts of your patch. Remember I use the REFERENCE_COUTNER not iTSC. > >>> Yes, I also test it for reference counter. > >> > >> Can you use these parameters for testing: > >> > >> -cpu qemu64,hv_relaxed,hv_vapic,hv_spinlocks=4096,hv_refcnt > >> -rtc base=localtime -vga std -usb -usbdevice tablet -no-hpet > > > > My settings are: > > -cpu qemu64, > > +x2apic,family=0xf,hv_vapic,hv_spinlocks=0xfff,hv_relaxed,hv_tsc -m 1G > > -smp 4 > > do you also see the jump if you leave the hv_tsc out and just use hv_refcnt? Yes, almost the same results. I will try to re-check it on a different machine tomorrow. Best regards, Vadim. > > Peter > -- > To unsubscribe from this list: send the line "unsubscribe kvm" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Wed, 2014-01-08 at 23:20 +0100, Peter Lieven wrote: > Am 08.01.2014 21:08, schrieb Vadim Rozenfeld: > > On Wed, 2014-01-08 at 15:54 +0100, Peter Lieven wrote: > >> On 08.01.2014 13:12, Vadim Rozenfeld wrote: > >>> On Wed, 2014-01-08 at 12:48 +0100, Peter Lieven wrote: > >>>> On 08.01.2014 11:44, Vadim Rozenfeld wrote: > >>>>> On Wed, 2014-01-08 at 11:15 +0100, Peter Lieven wrote: > >>>>>> On 08.01.2014 10:40, Vadim Rozenfeld wrote: > >>>>>>> On Tue, 2014-01-07 at 18:52 +0100, Peter Lieven wrote: > >>>>>>>> Am 07.01.2014 10:36, schrieb Vadim Rozenfeld: > >>>>>>>>> On Thu, 2014-01-02 at 17:52 +0100, Peter Lieven wrote: > >>>>>>>>>> Am 11.12.2013 19:59, schrieb Marcelo Tosatti: > >>>>>>>>>>> On Wed, Dec 11, 2013 at 04:53:05PM -0200, Marcelo Tosatti wrote: > >>>>>>>>>>>> On Sun, Dec 08, 2013 at 10:33:38PM +1100, Vadim Rozenfeld wrote: > >>>>>>>>>>>>> Signed-off: Peter Lieven <pl@dlh.net> > >>>>>>>>>>>>> Signed-off: Gleb Natapov <gleb@redhat.com> > >>>>>>>>>>>>> Signed-off: Vadim Rozenfeld <vrozenfe@redhat.com> > >>>>>>>>>>>>> > >>>>>>>>>>>>> v1 -> v2 > >>>>>>>>>>>>> 1. mark TSC page dirty as suggested by > >>>>>>>>>>>>> Eric Northup <digitaleric@google.com> and Gleb > >>>>>>>>>>>>> 2. disable local irq when calling get_kernel_ns, > >>>>>>>>>>>>> as it was done by Peter Lieven <pl@dlhnet.de> > >>>>>>>>>>>>> 3. move check for TSC page enable from second patch > >>>>>>>>>>>>> to this one. > >>>>>>>>>>>>> > >>>>>>>>>>>>> --- > >>>>>>>>>>>>> arch/x86/include/asm/kvm_host.h | 2 ++ > >>>>>>>>>>>>> arch/x86/include/uapi/asm/hyperv.h | 13 +++++++++++++ > >>>>>>>>>>>>> arch/x86/kvm/x86.c | 39 +++++++++++++++++++++++++++++++++++++- > >>>>>>>>>>>>> include/uapi/linux/kvm.h | 1 + > >>>>>>>>>>>>> 4 files changed, 54 insertions(+), 1 deletion(-) > >>>>>>>>>>>>> > >>>>>>>>>>>>> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h > >>>>>>>>>>>>> index ae5d783..2fd0753 100644 > >>>>>>>>>>>>> --- a/arch/x86/include/asm/kvm_host.h > >>>>>>>>>>>>> +++ b/arch/x86/include/asm/kvm_host.h > >>>>>>>>>>>>> @@ -605,6 +605,8 @@ struct kvm_arch { > >>>>>>>>>>>>> /* fields used by HYPER-V emulation */ > >>>>>>>>>>>>> u64 hv_guest_os_id; > >>>>>>>>>>>>> u64 hv_hypercall; > >>>>>>>>>>>>> + u64 hv_ref_count; > >>>>>>>>>>>>> + u64 hv_tsc_page; > >>>>>>>>>>>>> > >>>>>>>>>>>>> #ifdef CONFIG_KVM_MMU_AUDIT > >>>>>>>>>>>>> int audit_point; > >>>>>>>>>>>>> diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h > >>>>>>>>>>>>> index b8f1c01..462efe7 100644 > >>>>>>>>>>>>> --- a/arch/x86/include/uapi/asm/hyperv.h > >>>>>>>>>>>>> +++ b/arch/x86/include/uapi/asm/hyperv.h > >>>>>>>>>>>>> @@ -28,6 +28,9 @@ > >>>>>>>>>>>>> /* Partition Reference Counter (HV_X64_MSR_TIME_REF_COUNT) available*/ > >>>>>>>>>>>>> #define HV_X64_MSR_TIME_REF_COUNT_AVAILABLE (1 << 1) > >>>>>>>>>>>>> > >>>>>>>>>>>>> +/* A partition's reference time stamp counter (TSC) page */ > >>>>>>>>>>>>> +#define HV_X64_MSR_REFERENCE_TSC 0x40000021 > >>>>>>>>>>>>> + > >>>>>>>>>>>>> /* > >>>>>>>>>>>>> * There is a single feature flag that signifies the presence of the MSR > >>>>>>>>>>>>> * that can be used to retrieve both the local APIC Timer frequency as > >>>>>>>>>>>>> @@ -198,6 +201,9 @@ > >>>>>>>>>>>>> #define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_MASK \ > >>>>>>>>>>>>> (~((1ull << HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT) - 1)) > >>>>>>>>>>>>> > >>>>>>>>>>>>> +#define HV_X64_MSR_TSC_REFERENCE_ENABLE 0x00000001 > >>>>>>>>>>>>> +#define HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT 12 > >>>>>>>>>>>>> + > >>>>>>>>>>>>> #define HV_PROCESSOR_POWER_STATE_C0 0 > >>>>>>>>>>>>> #define HV_PROCESSOR_POWER_STATE_C1 1 > >>>>>>>>>>>>> #define HV_PROCESSOR_POWER_STATE_C2 2 > >>>>>>>>>>>>> @@ -210,4 +216,11 @@ > >>>>>>>>>>>>> #define HV_STATUS_INVALID_ALIGNMENT 4 > >>>>>>>>>>>>> #define HV_STATUS_INSUFFICIENT_BUFFERS 19 > >>>>>>>>>>>>> > >>>>>>>>>>>>> +typedef struct _HV_REFERENCE_TSC_PAGE { > >>>>>>>>>>>>> + __u32 tsc_sequence; > >>>>>>>>>>>>> + __u32 res1; > >>>>>>>>>>>>> + __u64 tsc_scale; > >>>>>>>>>>>>> + __s64 tsc_offset; > >>>>>>>>>>>>> +} HV_REFERENCE_TSC_PAGE, *PHV_REFERENCE_TSC_PAGE; > >>>>>>>>>>>>> + > >>>>>>>>>>>>> #endif > >>>>>>>>>>>>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > >>>>>>>>>>>>> index 21ef1ba..5e4e495a 100644 > >>>>>>>>>>>>> --- a/arch/x86/kvm/x86.c > >>>>>>>>>>>>> +++ b/arch/x86/kvm/x86.c > >>>>>>>>>>>>> @@ -840,7 +840,7 @@ EXPORT_SYMBOL_GPL(kvm_rdpmc); > >>>>>>>>>>>>> static u32 msrs_to_save[] = { > >>>>>>>>>>>>> MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, > >>>>>>>>>>>>> MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW, > >>>>>>>>>>>>> - HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, > >>>>>>>>>>>>> + HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, HV_X64_MSR_TIME_REF_COUNT, > >>>>>>>>>>>>> HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME, > >>>>>>>>>>>>> MSR_KVM_PV_EOI_EN, > >>>>>>>>>>>>> MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, > >>>>>>>>>>>>> @@ -1826,6 +1826,8 @@ static bool kvm_hv_msr_partition_wide(u32 msr) > >>>>>>>>>>>>> switch (msr) { > >>>>>>>>>>>>> case HV_X64_MSR_GUEST_OS_ID: > >>>>>>>>>>>>> case HV_X64_MSR_HYPERCALL: > >>>>>>>>>>>>> + case HV_X64_MSR_REFERENCE_TSC: > >>>>>>>>>>>>> + case HV_X64_MSR_TIME_REF_COUNT: > >>>>>>>>>>>>> r = true; > >>>>>>>>>>>>> break; > >>>>>>>>>>>>> } > >>>>>>>>>>>>> @@ -1865,6 +1867,29 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data) > >>>>>>>>>>>>> if (__copy_to_user((void __user *)addr, instructions, 4)) > >>>>>>>>>>>>> return 1; > >>>>>>>>>>>>> kvm->arch.hv_hypercall = data; > >>>>>>>>>>>>> + local_irq_disable(); > >>>>>>>>>>>>> + kvm->arch.hv_ref_count = get_kernel_ns() + kvm->arch.kvmclock_offset; > >>>>>>>>>>>>> + local_irq_enable(); > >>>>>>>>>>>> > >>>>>>>>>>>> Where does the docs say that HV_X64_MSR_HYPERCALL is the where the clock > >>>>>>>>>>>> starts counting? > >>>>>>>>>>>> > >>>>>>>>>>>> No need to store kvmclock_offset in hv_ref_count? (moreover > >>>>>>>>>>>> the name is weird, better name would be "hv_ref_start_time". > >>>>>>>>>>> > >>>>>>>>>>> Just add kvmclock_offset when reading the values (otherwise you have a > >>>>>>>>>>> "stale copy" of kvmclock_offset in hv_ref_count). > >>>>>>>>>>> > >>>>>>>>>> > >>>>>>>>>> After some experiments I think we do no need kvm->arch.hv_ref_count at all. > >>>>>>>>>> > >>>>>>>>>> I was debugging some weird clockjump issues and I think the problem is that after live migration > >>>>>>>>>> kvm->arch.hv_ref_count is initialized to 0. Depending on the uptime of the vServer when the > >>>>>>>>>> hypercall was set up this can lead to series jumps. > >>>>>>>>>> > >>>>>>>>>> So I would suggest to completely drop kvm->arch.hv_ref_count. > >>>>>>>>>> > >>>>>>>>>> And use simply this in get_msr_hyperv_pw(). > >>>>>>>>>> > >>>>>>>>>> case HV_X64_MSR_TIME_REF_COUNT: { > >>>>>>>>>> data = div_u64(get_kernel_ns() + kvm->arch.kvmclock_offset, 100); > >>>>>>>>>> break; > >>>>>>>>>> } > >>>>>>>>>> > >>>>>>>>> > >>>>>>>>> Agreed. It should work as long as we rely on kvmclock_offset. > >>>>>>>>> Vadim. > >>>>>>>> > >>>>>>>> I think we can rely on kvmclock_offset. Had you had a chance to do further testing > >>>>>>>> during the weekend? > >>>>>>> > >>>>>>> Yes, and still testing. It seems to be working fine. However, I would to keep hv_ref_count as is for the following reason: " A partition's reference time counter is initialized to zero when the partition is created." http://msdn.microsoft.com/en-us/library/windows/hardware/ff542637% 28v=vs.85%29.aspx Technical, hypercall page initialization event (HV_X64_MSR_HYPERCALL MSR write handler) can be treated as such event. In this case hv_ref_count is no more than just an offset, which should be subtracted from the current time ( get_kernel_ns() + kvm->arch.kvmclock_offset ) when handling HV_X64_MSR_TIME_REF_COUNT MSR read request. Best regards, Vadim. > >>>>>>> > >>>>>>> There is still some inconsistency in the value returned by > >>>>>>> QueryPerformanceCounter during migration. > >>>>>> > >>>>>> With my proposal? > >>>>>> > >>>>> > >>>>> Right. > >>>>> Please see below QTC vs. NTP time stamp > >>>>> before and after migration: > >>>>> > >>>>> C:\timedrift\objchk_win7_x86\i386>timedrift.exe > >>>>> quantum = 15 > >>>>> QPC NTP Delta > >>>>> 1311 1307 4 > >>>>> 2621 2618 3 > >>>>> 3932 3931 1 > >>>>> ....... > >>>>> ....... > >>>>> 38018 38014 4 > >>>>> Unable to wait for NTP reply from the SNTP server, GetLastError returns > >>>>> 7279088 > >>>>> 142210 40113 102097 > >>>>> 143458 41357 102101 > >>>>> ....... > >>>>> ....... > >>>>> 156500 54615 101885 > >>>>> > >>>>> NTP time changed from 38014 to 40113 ((40113 - 38014) / 100 = 21 mSec) > >>>>> while QPC jumped from 38018 to 142210 ((142210 - 38014) / 100 = 1042 > >>>>> mSec) > >>>> > >>>> How long was the downtime when you migrated the vServer? > >>> I use Win7-32 for testing. > >>>> > >>>> I have done similar tests and have not seen this... > >>>> Can you show the relevant parts of your patch. Remember I use the REFERENCE_COUTNER not iTSC. > >>> Yes, I also test it for reference counter. > >> > >> Can you use these parameters for testing: > >> > >> -cpu qemu64,hv_relaxed,hv_vapic,hv_spinlocks=4096,hv_refcnt > >> -rtc base=localtime -vga std -usb -usbdevice tablet -no-hpet > > > > My settings are: > > -cpu qemu64, > > +x2apic,family=0xf,hv_vapic,hv_spinlocks=0xfff,hv_relaxed,hv_tsc -m 1G > > -smp 4 > > do you also see the jump if you leave the hv_tsc out and just use hv_refcnt? > > Peter -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Am 12.01.2014 um 13:08 schrieb Vadim Rozenfeld <vrozenfe@redhat.com>: > On Wed, 2014-01-08 at 23:20 +0100, Peter Lieven wrote: >> Am 08.01.2014 21:08, schrieb Vadim Rozenfeld: >>> On Wed, 2014-01-08 at 15:54 +0100, Peter Lieven wrote: >>>> On 08.01.2014 13:12, Vadim Rozenfeld wrote: >>>>> On Wed, 2014-01-08 at 12:48 +0100, Peter Lieven wrote: >>>>>> On 08.01.2014 11:44, Vadim Rozenfeld wrote: >>>>>>> On Wed, 2014-01-08 at 11:15 +0100, Peter Lieven wrote: >>>>>>>> On 08.01.2014 10:40, Vadim Rozenfeld wrote: >>>>>>>>> On Tue, 2014-01-07 at 18:52 +0100, Peter Lieven wrote: >>>>>>>>>> Am 07.01.2014 10:36, schrieb Vadim Rozenfeld: >>>>>>>>>>> On Thu, 2014-01-02 at 17:52 +0100, Peter Lieven wrote: >>>>>>>>>>>> Am 11.12.2013 19:59, schrieb Marcelo Tosatti: >>>>>>>>>>>>> On Wed, Dec 11, 2013 at 04:53:05PM -0200, Marcelo Tosatti wrote: >>>>>>>>>>>>>> On Sun, Dec 08, 2013 at 10:33:38PM +1100, Vadim Rozenfeld wrote: >>>>>>>>>>>>>>> Signed-off: Peter Lieven <pl@dlh.net> >>>>>>>>>>>>>>> Signed-off: Gleb Natapov <gleb@redhat.com> >>>>>>>>>>>>>>> Signed-off: Vadim Rozenfeld <vrozenfe@redhat.com> >>>>>>>>>>>>>>> >>>>>>>>>>>>>>> v1 -> v2 >>>>>>>>>>>>>>> 1. mark TSC page dirty as suggested by >>>>>>>>>>>>>>> Eric Northup <digitaleric@google.com> and Gleb >>>>>>>>>>>>>>> 2. disable local irq when calling get_kernel_ns, >>>>>>>>>>>>>>> as it was done by Peter Lieven <pl@dlhnet.de> >>>>>>>>>>>>>>> 3. move check for TSC page enable from second patch >>>>>>>>>>>>>>> to this one. >>>>>>>>>>>>>>> >>>>>>>>>>>>>>> --- >>>>>>>>>>>>>>> arch/x86/include/asm/kvm_host.h | 2 ++ >>>>>>>>>>>>>>> arch/x86/include/uapi/asm/hyperv.h | 13 +++++++++++++ >>>>>>>>>>>>>>> arch/x86/kvm/x86.c | 39 +++++++++++++++++++++++++++++++++++++- >>>>>>>>>>>>>>> include/uapi/linux/kvm.h | 1 + >>>>>>>>>>>>>>> 4 files changed, 54 insertions(+), 1 deletion(-) >>>>>>>>>>>>>>> >>>>>>>>>>>>>>> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h >>>>>>>>>>>>>>> index ae5d783..2fd0753 100644 >>>>>>>>>>>>>>> --- a/arch/x86/include/asm/kvm_host.h >>>>>>>>>>>>>>> +++ b/arch/x86/include/asm/kvm_host.h >>>>>>>>>>>>>>> @@ -605,6 +605,8 @@ struct kvm_arch { >>>>>>>>>>>>>>> /* fields used by HYPER-V emulation */ >>>>>>>>>>>>>>> u64 hv_guest_os_id; >>>>>>>>>>>>>>> u64 hv_hypercall; >>>>>>>>>>>>>>> + u64 hv_ref_count; >>>>>>>>>>>>>>> + u64 hv_tsc_page; >>>>>>>>>>>>>>> >>>>>>>>>>>>>>> #ifdef CONFIG_KVM_MMU_AUDIT >>>>>>>>>>>>>>> int audit_point; >>>>>>>>>>>>>>> diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h >>>>>>>>>>>>>>> index b8f1c01..462efe7 100644 >>>>>>>>>>>>>>> --- a/arch/x86/include/uapi/asm/hyperv.h >>>>>>>>>>>>>>> +++ b/arch/x86/include/uapi/asm/hyperv.h >>>>>>>>>>>>>>> @@ -28,6 +28,9 @@ >>>>>>>>>>>>>>> /* Partition Reference Counter (HV_X64_MSR_TIME_REF_COUNT) available*/ >>>>>>>>>>>>>>> #define HV_X64_MSR_TIME_REF_COUNT_AVAILABLE (1 << 1) >>>>>>>>>>>>>>> >>>>>>>>>>>>>>> +/* A partition's reference time stamp counter (TSC) page */ >>>>>>>>>>>>>>> +#define HV_X64_MSR_REFERENCE_TSC 0x40000021 >>>>>>>>>>>>>>> + >>>>>>>>>>>>>>> /* >>>>>>>>>>>>>>> * There is a single feature flag that signifies the presence of the MSR >>>>>>>>>>>>>>> * that can be used to retrieve both the local APIC Timer frequency as >>>>>>>>>>>>>>> @@ -198,6 +201,9 @@ >>>>>>>>>>>>>>> #define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_MASK \ >>>>>>>>>>>>>>> (~((1ull << HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT) - 1)) >>>>>>>>>>>>>>> >>>>>>>>>>>>>>> +#define HV_X64_MSR_TSC_REFERENCE_ENABLE 0x00000001 >>>>>>>>>>>>>>> +#define HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT 12 >>>>>>>>>>>>>>> + >>>>>>>>>>>>>>> #define HV_PROCESSOR_POWER_STATE_C0 0 >>>>>>>>>>>>>>> #define HV_PROCESSOR_POWER_STATE_C1 1 >>>>>>>>>>>>>>> #define HV_PROCESSOR_POWER_STATE_C2 2 >>>>>>>>>>>>>>> @@ -210,4 +216,11 @@ >>>>>>>>>>>>>>> #define HV_STATUS_INVALID_ALIGNMENT 4 >>>>>>>>>>>>>>> #define HV_STATUS_INSUFFICIENT_BUFFERS 19 >>>>>>>>>>>>>>> >>>>>>>>>>>>>>> +typedef struct _HV_REFERENCE_TSC_PAGE { >>>>>>>>>>>>>>> + __u32 tsc_sequence; >>>>>>>>>>>>>>> + __u32 res1; >>>>>>>>>>>>>>> + __u64 tsc_scale; >>>>>>>>>>>>>>> + __s64 tsc_offset; >>>>>>>>>>>>>>> +} HV_REFERENCE_TSC_PAGE, *PHV_REFERENCE_TSC_PAGE; >>>>>>>>>>>>>>> + >>>>>>>>>>>>>>> #endif >>>>>>>>>>>>>>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c >>>>>>>>>>>>>>> index 21ef1ba..5e4e495a 100644 >>>>>>>>>>>>>>> --- a/arch/x86/kvm/x86.c >>>>>>>>>>>>>>> +++ b/arch/x86/kvm/x86.c >>>>>>>>>>>>>>> @@ -840,7 +840,7 @@ EXPORT_SYMBOL_GPL(kvm_rdpmc); >>>>>>>>>>>>>>> static u32 msrs_to_save[] = { >>>>>>>>>>>>>>> MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, >>>>>>>>>>>>>>> MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW, >>>>>>>>>>>>>>> - HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, >>>>>>>>>>>>>>> + HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, HV_X64_MSR_TIME_REF_COUNT, >>>>>>>>>>>>>>> HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME, >>>>>>>>>>>>>>> MSR_KVM_PV_EOI_EN, >>>>>>>>>>>>>>> MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, >>>>>>>>>>>>>>> @@ -1826,6 +1826,8 @@ static bool kvm_hv_msr_partition_wide(u32 msr) >>>>>>>>>>>>>>> switch (msr) { >>>>>>>>>>>>>>> case HV_X64_MSR_GUEST_OS_ID: >>>>>>>>>>>>>>> case HV_X64_MSR_HYPERCALL: >>>>>>>>>>>>>>> + case HV_X64_MSR_REFERENCE_TSC: >>>>>>>>>>>>>>> + case HV_X64_MSR_TIME_REF_COUNT: >>>>>>>>>>>>>>> r = true; >>>>>>>>>>>>>>> break; >>>>>>>>>>>>>>> } >>>>>>>>>>>>>>> @@ -1865,6 +1867,29 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data) >>>>>>>>>>>>>>> if (__copy_to_user((void __user *)addr, instructions, 4)) >>>>>>>>>>>>>>> return 1; >>>>>>>>>>>>>>> kvm->arch.hv_hypercall = data; >>>>>>>>>>>>>>> + local_irq_disable(); >>>>>>>>>>>>>>> + kvm->arch.hv_ref_count = get_kernel_ns() + kvm->arch.kvmclock_offset; >>>>>>>>>>>>>>> + local_irq_enable(); >>>>>>>>>>>>>> >>>>>>>>>>>>>> Where does the docs say that HV_X64_MSR_HYPERCALL is the where the clock >>>>>>>>>>>>>> starts counting? >>>>>>>>>>>>>> >>>>>>>>>>>>>> No need to store kvmclock_offset in hv_ref_count? (moreover >>>>>>>>>>>>>> the name is weird, better name would be "hv_ref_start_time". >>>>>>>>>>>>> >>>>>>>>>>>>> Just add kvmclock_offset when reading the values (otherwise you have a >>>>>>>>>>>>> "stale copy" of kvmclock_offset in hv_ref_count). >>>>>>>>>>>>> >>>>>>>>>>>> >>>>>>>>>>>> After some experiments I think we do no need kvm->arch.hv_ref_count at all. >>>>>>>>>>>> >>>>>>>>>>>> I was debugging some weird clockjump issues and I think the problem is that after live migration >>>>>>>>>>>> kvm->arch.hv_ref_count is initialized to 0. Depending on the uptime of the vServer when the >>>>>>>>>>>> hypercall was set up this can lead to series jumps. >>>>>>>>>>>> >>>>>>>>>>>> So I would suggest to completely drop kvm->arch.hv_ref_count. >>>>>>>>>>>> >>>>>>>>>>>> And use simply this in get_msr_hyperv_pw(). >>>>>>>>>>>> >>>>>>>>>>>> case HV_X64_MSR_TIME_REF_COUNT: { >>>>>>>>>>>> data = div_u64(get_kernel_ns() + kvm->arch.kvmclock_offset, 100); >>>>>>>>>>>> break; >>>>>>>>>>>> } >>>>>>>>>>>> >>>>>>>>>>> >>>>>>>>>>> Agreed. It should work as long as we rely on kvmclock_offset. >>>>>>>>>>> Vadim. >>>>>>>>>> >>>>>>>>>> I think we can rely on kvmclock_offset. Had you had a chance to do further testing >>>>>>>>>> during the weekend? >>>>>>>>> >>>>>>>>> Yes, and still testing. > > It seems to be working fine. However, I would to keep hv_ref_count as is > for the following reason: > " A partition's reference time counter is initialized to zero when the > partition is created." > http://msdn.microsoft.com/en-us/library/windows/hardware/ff542637% > 28v=vs.85%29.aspx > > Technical, hypercall page initialization event (HV_X64_MSR_HYPERCALL MSR > write handler) can be treated as such event. In this case hv_ref_count > is no more than just an offset, which should be subtracted from the > current time ( get_kernel_ns() + kvm->arch.kvmclock_offset ) when > handling HV_X64_MSR_TIME_REF_COUNT MSR read request. my 2 cents: a) if you treat the start of the vm as partition create event get_kernel_ns() + kvm->arch.kvmclock_offset is exactly the vm life time in nanoseconds. b) afaik currently the value of hv_ref_count is not restored after a live migration on the destination. this needs to be implemented for proper function. if we go for dropping this offset completely we can drop the requirement of restoring hv_ref_count as well. Peter > > Best regards, > Vadim. > >>>>>>>>> >>>>>>>>> There is still some inconsistency in the value returned by >>>>>>>>> QueryPerformanceCounter during migration. >>>>>>>> >>>>>>>> With my proposal? >>>>>>>> >>>>>>> >>>>>>> Right. >>>>>>> Please see below QTC vs. NTP time stamp >>>>>>> before and after migration: >>>>>>> >>>>>>> C:\timedrift\objchk_win7_x86\i386>timedrift.exe >>>>>>> quantum = 15 >>>>>>> QPC NTP Delta >>>>>>> 1311 1307 4 >>>>>>> 2621 2618 3 >>>>>>> 3932 3931 1 >>>>>>> ....... >>>>>>> ....... >>>>>>> 38018 38014 4 >>>>>>> Unable to wait for NTP reply from the SNTP server, GetLastError returns >>>>>>> 7279088 >>>>>>> 142210 40113 102097 >>>>>>> 143458 41357 102101 >>>>>>> ....... >>>>>>> ....... >>>>>>> 156500 54615 101885 >>>>>>> >>>>>>> NTP time changed from 38014 to 40113 ((40113 - 38014) / 100 = 21 mSec) >>>>>>> while QPC jumped from 38018 to 142210 ((142210 - 38014) / 100 = 1042 >>>>>>> mSec) >>>>>> >>>>>> How long was the downtime when you migrated the vServer? >>>>> I use Win7-32 for testing. >>>>>> >>>>>> I have done similar tests and have not seen this... >>>>>> Can you show the relevant parts of your patch. Remember I use the REFERENCE_COUTNER not iTSC. >>>>> Yes, I also test it for reference counter. >>>> >>>> Can you use these parameters for testing: >>>> >>>> -cpu qemu64,hv_relaxed,hv_vapic,hv_spinlocks=4096,hv_refcnt >>>> -rtc base=localtime -vga std -usb -usbdevice tablet -no-hpet >>> >>> My settings are: >>> -cpu qemu64, >>> +x2apic,family=0xf,hv_vapic,hv_spinlocks=0xfff,hv_relaxed,hv_tsc -m 1G >>> -smp 4 >> >> do you also see the jump if you leave the hv_tsc out and just use hv_refcnt? >> >> Peter > > -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Wed, 2013-12-11 at 16:53 -0200, Marcelo Tosatti wrote: > On Sun, Dec 08, 2013 at 10:33:38PM +1100, Vadim Rozenfeld wrote: > > Signed-off: Peter Lieven <pl@dlh.net> > > Signed-off: Gleb Natapov <gleb@redhat.com> > > Signed-off: Vadim Rozenfeld <vrozenfe@redhat.com> > > > > v1 -> v2 > > 1. mark TSC page dirty as suggested by > > Eric Northup <digitaleric@google.com> and Gleb > > 2. disable local irq when calling get_kernel_ns, > > as it was done by Peter Lieven <pl@dlhnet.de> > > 3. move check for TSC page enable from second patch > > to this one. > > > > --- > > arch/x86/include/asm/kvm_host.h | 2 ++ > > arch/x86/include/uapi/asm/hyperv.h | 13 +++++++++++++ > > arch/x86/kvm/x86.c | 39 +++++++++++++++++++++++++++++++++++++- > > include/uapi/linux/kvm.h | 1 + > > 4 files changed, 54 insertions(+), 1 deletion(-) > > > > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h > > index ae5d783..2fd0753 100644 > > --- a/arch/x86/include/asm/kvm_host.h > > +++ b/arch/x86/include/asm/kvm_host.h > > @@ -605,6 +605,8 @@ struct kvm_arch { > > /* fields used by HYPER-V emulation */ > > u64 hv_guest_os_id; > > u64 hv_hypercall; > > + u64 hv_ref_count; > > + u64 hv_tsc_page; > > > > #ifdef CONFIG_KVM_MMU_AUDIT > > int audit_point; > > diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h > > index b8f1c01..462efe7 100644 > > --- a/arch/x86/include/uapi/asm/hyperv.h > > +++ b/arch/x86/include/uapi/asm/hyperv.h > > @@ -28,6 +28,9 @@ > > /* Partition Reference Counter (HV_X64_MSR_TIME_REF_COUNT) available*/ > > #define HV_X64_MSR_TIME_REF_COUNT_AVAILABLE (1 << 1) > > > > +/* A partition's reference time stamp counter (TSC) page */ > > +#define HV_X64_MSR_REFERENCE_TSC 0x40000021 > > + > > /* > > * There is a single feature flag that signifies the presence of the MSR > > * that can be used to retrieve both the local APIC Timer frequency as > > @@ -198,6 +201,9 @@ > > #define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_MASK \ > > (~((1ull << HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT) - 1)) > > > > +#define HV_X64_MSR_TSC_REFERENCE_ENABLE 0x00000001 > > +#define HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT 12 > > + > > #define HV_PROCESSOR_POWER_STATE_C0 0 > > #define HV_PROCESSOR_POWER_STATE_C1 1 > > #define HV_PROCESSOR_POWER_STATE_C2 2 > > @@ -210,4 +216,11 @@ > > #define HV_STATUS_INVALID_ALIGNMENT 4 > > #define HV_STATUS_INSUFFICIENT_BUFFERS 19 > > > > +typedef struct _HV_REFERENCE_TSC_PAGE { > > + __u32 tsc_sequence; > > + __u32 res1; > > + __u64 tsc_scale; > > + __s64 tsc_offset; > > +} HV_REFERENCE_TSC_PAGE, *PHV_REFERENCE_TSC_PAGE; > > + > > #endif > > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > > index 21ef1ba..5e4e495a 100644 > > --- a/arch/x86/kvm/x86.c > > +++ b/arch/x86/kvm/x86.c > > @@ -840,7 +840,7 @@ EXPORT_SYMBOL_GPL(kvm_rdpmc); > > static u32 msrs_to_save[] = { > > MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, > > MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW, > > - HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, > > + HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, HV_X64_MSR_TIME_REF_COUNT, > > HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME, > > MSR_KVM_PV_EOI_EN, > > MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, > > @@ -1826,6 +1826,8 @@ static bool kvm_hv_msr_partition_wide(u32 msr) > > switch (msr) { > > case HV_X64_MSR_GUEST_OS_ID: > > case HV_X64_MSR_HYPERCALL: > > + case HV_X64_MSR_REFERENCE_TSC: > > + case HV_X64_MSR_TIME_REF_COUNT: > > r = true; > > break; > > } > > @@ -1865,6 +1867,29 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data) > > if (__copy_to_user((void __user *)addr, instructions, 4)) > > return 1; > > kvm->arch.hv_hypercall = data; > > + local_irq_disable(); > > + kvm->arch.hv_ref_count = get_kernel_ns() + kvm->arch.kvmclock_offset; > > + local_irq_enable(); > > Where does the docs say that HV_X64_MSR_HYPERCALL is the where the clock > starts counting? Sorry for delay in reply Access to HV_X64_MSR_HYPERCALL happens during execution KiSystemStartup function code, which I think can be treated as a partition create time. > > No need to store kvmclock_offset in hv_ref_count? (moreover > the name is weird, better name would be "hv_ref_start_time". > > > + break; > > + } > > + case HV_X64_MSR_REFERENCE_TSC: { > > + u64 gfn; > > + unsigned long addr; > > + HV_REFERENCE_TSC_PAGE tsc_ref; > > + tsc_ref.tsc_sequence = 0; > > + if (!(data & HV_X64_MSR_TSC_REFERENCE_ENABLE)) { > > + kvm->arch.hv_tsc_page = data; > > + break; > > + } > > + gfn = data >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT; > > + addr = gfn_to_hva(kvm, data >> > > + HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT); > > + if (kvm_is_error_hva(addr)) > > + return 1; > > + if (__copy_to_user((void __user *)addr, &tsc_ref, sizeof(tsc_ref))) > > + return 1; > > + mark_page_dirty(kvm, gfn); > > + kvm->arch.hv_tsc_page = data; > > break; > > } > > default: > > @@ -2291,6 +2316,17 @@ static int get_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) > > case HV_X64_MSR_HYPERCALL: > > data = kvm->arch.hv_hypercall; > > break; > > + case HV_X64_MSR_TIME_REF_COUNT: { > > + u64 now_ns; > > + local_irq_disable(); > > + now_ns = get_kernel_ns() + kvm->arch.kvmclock_offset; > > + data = div_u64(now_ns - kvm->arch.hv_ref_count, 100); > > + local_irq_enable(); > > No need for irq disable/enable pairs. > > > + break; > > + } > > + case HV_X64_MSR_REFERENCE_TSC: > > + data = kvm->arch.hv_tsc_page; > > + break; > > Does it return non-zero data even if not enabled? kernel reads HV_X64_MSR_REFERENCE_TSC MSR only once during hyperv initialization and checks checks enable bit > > > default: > > vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr); > > return 1; > > @@ -2605,6 +2641,7 @@ int kvm_dev_ioctl_check_extension(long ext) > > case KVM_CAP_ASSIGN_DEV_IRQ: > > case KVM_CAP_PCI_2_3: > > #endif > > + case KVM_CAP_HYPERV_TIME: > > r = 1; > > break; > > case KVM_CAP_COALESCED_MMIO: > > diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h > > index 902f124..686c1ca 100644 > > --- a/include/uapi/linux/kvm.h > > +++ b/include/uapi/linux/kvm.h > > @@ -674,6 +674,7 @@ struct kvm_ppc_smmu_info { > > #define KVM_CAP_ARM_EL1_32BIT 93 > > #define KVM_CAP_SPAPR_MULTITCE 94 > > #define KVM_CAP_EXT_EMUL_CPUID 95 > > +#define KVM_CAP_HYPERV_TIME 96 > > > > #ifdef KVM_CAP_IRQ_ROUTING > > > > -- > > 1.8.1.4 > -- > To unsubscribe from this list: send the line "unsubscribe kvm" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index ae5d783..2fd0753 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -605,6 +605,8 @@ struct kvm_arch { /* fields used by HYPER-V emulation */ u64 hv_guest_os_id; u64 hv_hypercall; + u64 hv_ref_count; + u64 hv_tsc_page; #ifdef CONFIG_KVM_MMU_AUDIT int audit_point; diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h index b8f1c01..462efe7 100644 --- a/arch/x86/include/uapi/asm/hyperv.h +++ b/arch/x86/include/uapi/asm/hyperv.h @@ -28,6 +28,9 @@ /* Partition Reference Counter (HV_X64_MSR_TIME_REF_COUNT) available*/ #define HV_X64_MSR_TIME_REF_COUNT_AVAILABLE (1 << 1) +/* A partition's reference time stamp counter (TSC) page */ +#define HV_X64_MSR_REFERENCE_TSC 0x40000021 + /* * There is a single feature flag that signifies the presence of the MSR * that can be used to retrieve both the local APIC Timer frequency as @@ -198,6 +201,9 @@ #define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_MASK \ (~((1ull << HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT) - 1)) +#define HV_X64_MSR_TSC_REFERENCE_ENABLE 0x00000001 +#define HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT 12 + #define HV_PROCESSOR_POWER_STATE_C0 0 #define HV_PROCESSOR_POWER_STATE_C1 1 #define HV_PROCESSOR_POWER_STATE_C2 2 @@ -210,4 +216,11 @@ #define HV_STATUS_INVALID_ALIGNMENT 4 #define HV_STATUS_INSUFFICIENT_BUFFERS 19 +typedef struct _HV_REFERENCE_TSC_PAGE { + __u32 tsc_sequence; + __u32 res1; + __u64 tsc_scale; + __s64 tsc_offset; +} HV_REFERENCE_TSC_PAGE, *PHV_REFERENCE_TSC_PAGE; + #endif diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 21ef1ba..5e4e495a 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -840,7 +840,7 @@ EXPORT_SYMBOL_GPL(kvm_rdpmc); static u32 msrs_to_save[] = { MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW, - HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, + HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, HV_X64_MSR_TIME_REF_COUNT, HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME, MSR_KVM_PV_EOI_EN, MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, @@ -1826,6 +1826,8 @@ static bool kvm_hv_msr_partition_wide(u32 msr) switch (msr) { case HV_X64_MSR_GUEST_OS_ID: case HV_X64_MSR_HYPERCALL: + case HV_X64_MSR_REFERENCE_TSC: + case HV_X64_MSR_TIME_REF_COUNT: r = true; break; } @@ -1865,6 +1867,29 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data) if (__copy_to_user((void __user *)addr, instructions, 4)) return 1; kvm->arch.hv_hypercall = data; + local_irq_disable(); + kvm->arch.hv_ref_count = get_kernel_ns() + kvm->arch.kvmclock_offset; + local_irq_enable(); + break; + } + case HV_X64_MSR_REFERENCE_TSC: { + u64 gfn; + unsigned long addr; + HV_REFERENCE_TSC_PAGE tsc_ref; + tsc_ref.tsc_sequence = 0; + if (!(data & HV_X64_MSR_TSC_REFERENCE_ENABLE)) { + kvm->arch.hv_tsc_page = data; + break; + } + gfn = data >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT; + addr = gfn_to_hva(kvm, data >> + HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT); + if (kvm_is_error_hva(addr)) + return 1; + if (__copy_to_user((void __user *)addr, &tsc_ref, sizeof(tsc_ref))) + return 1; + mark_page_dirty(kvm, gfn); + kvm->arch.hv_tsc_page = data; break; } default: @@ -2291,6 +2316,17 @@ static int get_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) case HV_X64_MSR_HYPERCALL: data = kvm->arch.hv_hypercall; break; + case HV_X64_MSR_TIME_REF_COUNT: { + u64 now_ns; + local_irq_disable(); + now_ns = get_kernel_ns() + kvm->arch.kvmclock_offset; + data = div_u64(now_ns - kvm->arch.hv_ref_count, 100); + local_irq_enable(); + break; + } + case HV_X64_MSR_REFERENCE_TSC: + data = kvm->arch.hv_tsc_page; + break; default: vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr); return 1; @@ -2605,6 +2641,7 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_ASSIGN_DEV_IRQ: case KVM_CAP_PCI_2_3: #endif + case KVM_CAP_HYPERV_TIME: r = 1; break; case KVM_CAP_COALESCED_MMIO: diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 902f124..686c1ca 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -674,6 +674,7 @@ struct kvm_ppc_smmu_info { #define KVM_CAP_ARM_EL1_32BIT 93 #define KVM_CAP_SPAPR_MULTITCE 94 #define KVM_CAP_EXT_EMUL_CPUID 95 +#define KVM_CAP_HYPERV_TIME 96 #ifdef KVM_CAP_IRQ_ROUTING