diff mbox

[RFC,v3,1/2] add support for Hyper-V reference time counter

Message ID 1386502419-26614-2-git-send-email-vrozenfe@redhat.com (mailing list archive)
State New, archived
Headers show

Commit Message

Vadim Rozenfeld Dec. 8, 2013, 11:33 a.m. UTC
Signed-off: Peter Lieven <pl@dlh.net>
Signed-off: Gleb Natapov <gleb@redhat.com>
Signed-off: Vadim Rozenfeld <vrozenfe@redhat.com>

v1 -> v2
1. mark TSC page dirty as suggested by 
    Eric Northup <digitaleric@google.com> and Gleb
2. disable local irq when calling get_kernel_ns, 
    as it was done by Peter Lieven <pl@dlhnet.de>
3. move check for TSC page enable from second patch
    to this one.

---
 arch/x86/include/asm/kvm_host.h    |  2 ++
 arch/x86/include/uapi/asm/hyperv.h | 13 +++++++++++++
 arch/x86/kvm/x86.c                 | 39 +++++++++++++++++++++++++++++++++++++-
 include/uapi/linux/kvm.h           |  1 +
 4 files changed, 54 insertions(+), 1 deletion(-)

Comments

Paolo Bonzini Dec. 9, 2013, 2:23 p.m. UTC | #1
Il 08/12/2013 12:33, Vadim Rozenfeld ha scritto:
> Signed-off: Peter Lieven <pl@dlh.net>
> Signed-off: Gleb Natapov <gleb@redhat.com>
> Signed-off: Vadim Rozenfeld <vrozenfe@redhat.com>
> 
> v1 -> v2
> 1. mark TSC page dirty as suggested by 
>     Eric Northup <digitaleric@google.com> and Gleb
> 2. disable local irq when calling get_kernel_ns, 
>     as it was done by Peter Lieven <pl@dlhnet.de>
> 3. move check for TSC page enable from second patch
>     to this one.
> 
> ---
>  arch/x86/include/asm/kvm_host.h    |  2 ++
>  arch/x86/include/uapi/asm/hyperv.h | 13 +++++++++++++
>  arch/x86/kvm/x86.c                 | 39 +++++++++++++++++++++++++++++++++++++-
>  include/uapi/linux/kvm.h           |  1 +
>  4 files changed, 54 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index ae5d783..2fd0753 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -605,6 +605,8 @@ struct kvm_arch {
>  	/* fields used by HYPER-V emulation */
>  	u64 hv_guest_os_id;
>  	u64 hv_hypercall;
> +	u64 hv_ref_count;
> +	u64 hv_tsc_page;
>  
>  	#ifdef CONFIG_KVM_MMU_AUDIT
>  	int audit_point;
> diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h
> index b8f1c01..462efe7 100644
> --- a/arch/x86/include/uapi/asm/hyperv.h
> +++ b/arch/x86/include/uapi/asm/hyperv.h
> @@ -28,6 +28,9 @@
>  /* Partition Reference Counter (HV_X64_MSR_TIME_REF_COUNT) available*/
>  #define HV_X64_MSR_TIME_REF_COUNT_AVAILABLE	(1 << 1)
>  
> +/* A partition's reference time stamp counter (TSC) page */
> +#define HV_X64_MSR_REFERENCE_TSC		0x40000021
> +
>  /*
>   * There is a single feature flag that signifies the presence of the MSR
>   * that can be used to retrieve both the local APIC Timer frequency as
> @@ -198,6 +201,9 @@
>  #define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_MASK	\
>  		(~((1ull << HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT) - 1))
>  
> +#define HV_X64_MSR_TSC_REFERENCE_ENABLE		0x00000001
> +#define HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT	12
> +
>  #define HV_PROCESSOR_POWER_STATE_C0		0
>  #define HV_PROCESSOR_POWER_STATE_C1		1
>  #define HV_PROCESSOR_POWER_STATE_C2		2
> @@ -210,4 +216,11 @@
>  #define HV_STATUS_INVALID_ALIGNMENT		4
>  #define HV_STATUS_INSUFFICIENT_BUFFERS		19
>  
> +typedef struct _HV_REFERENCE_TSC_PAGE {
> +	__u32 tsc_sequence;
> +	__u32 res1;
> +	__u64 tsc_scale;
> +	__s64 tsc_offset;
> +} HV_REFERENCE_TSC_PAGE, *PHV_REFERENCE_TSC_PAGE;
> +
>  #endif
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 21ef1ba..5e4e495a 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -840,7 +840,7 @@ EXPORT_SYMBOL_GPL(kvm_rdpmc);
>  static u32 msrs_to_save[] = {
>  	MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
>  	MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,
> -	HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
> +	HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, HV_X64_MSR_TIME_REF_COUNT,

You need to bump KVM_SAVE_MSRS_BEGIN.

>  	HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
>  	MSR_KVM_PV_EOI_EN,
>  	MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
> @@ -1826,6 +1826,8 @@ static bool kvm_hv_msr_partition_wide(u32 msr)
>  	switch (msr) {
>  	case HV_X64_MSR_GUEST_OS_ID:
>  	case HV_X64_MSR_HYPERCALL:
> +	case HV_X64_MSR_REFERENCE_TSC:
> +	case HV_X64_MSR_TIME_REF_COUNT:
>  		r = true;
>  		break;
>  	}
> @@ -1865,6 +1867,29 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
>  		if (__copy_to_user((void __user *)addr, instructions, 4))
>  			return 1;
>  		kvm->arch.hv_hypercall = data;
> +		local_irq_disable();
> +		kvm->arch.hv_ref_count = get_kernel_ns() + kvm->arch.kvmclock_offset;
> +		local_irq_enable();

Please add a patch that moves these four lines from KVM_GET_CLOCK and
KVM_SET_CLOCK

                local_irq_disable();
                now_ns = get_kernel_ns();
                delta = user_ns.clock - now_ns;
                local_irq_enable();
                kvm->arch.kvmclock_offset = delta;
                kvm_gen_update_masterclock(kvm);

                local_irq_disable();
                now_ns = get_kernel_ns();
                user_ns.clock = kvm->arch.kvmclock_offset + now_ns;
                local_irq_enable();

For example u64 kvm_get_clock_ns(struct kvm *) and void
kvm_set_clock_ns(struct kvm *, u64).  You can then use the
kvm_get_clock_ns function in this patch.

> +		break;
> +	}
> +	case HV_X64_MSR_REFERENCE_TSC: {
> +		u64 gfn;
> +		unsigned long addr;
> +		HV_REFERENCE_TSC_PAGE tsc_ref;
> +		tsc_ref.tsc_sequence = 0;

Please zero it with memset.  You're leaking values from the stack to the
guest.

> +		if (!(data & HV_X64_MSR_TSC_REFERENCE_ENABLE)) {
> +			kvm->arch.hv_tsc_page = data;
> +			break;
> +		}
> +		gfn = data >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT;
> +		addr = gfn_to_hva(kvm, data >>
> +			HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT);
> +		if (kvm_is_error_hva(addr))
> +			return 1;
> +		if (__copy_to_user((void __user *)addr, &tsc_ref, sizeof(tsc_ref)))
> +			return 1;
> +		mark_page_dirty(kvm, gfn);
> +		kvm->arch.hv_tsc_page = data;
>  		break;
>  	}
>  	default:
> @@ -2291,6 +2316,17 @@ static int get_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
>  	case HV_X64_MSR_HYPERCALL:
>  		data = kvm->arch.hv_hypercall;
>  		break;
> +	case HV_X64_MSR_TIME_REF_COUNT: {
> +		u64 now_ns;
> +		local_irq_disable();
> +		now_ns = get_kernel_ns() + kvm->arch.kvmclock_offset;
> +		data = div_u64(now_ns - kvm->arch.hv_ref_count, 100);
> +		local_irq_enable();

Another possible user of kvm_get_clock_ns.

The patch should be good with these changes.

Paolo

> +		break;
> +	}
> +	case HV_X64_MSR_REFERENCE_TSC:
> +		data = kvm->arch.hv_tsc_page;
> +		break;
>  	default:
>  		vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
>  		return 1;
> @@ -2605,6 +2641,7 @@ int kvm_dev_ioctl_check_extension(long ext)
>  	case KVM_CAP_ASSIGN_DEV_IRQ:
>  	case KVM_CAP_PCI_2_3:
>  #endif
> +	case KVM_CAP_HYPERV_TIME:
>  		r = 1;
>  		break;
>  	case KVM_CAP_COALESCED_MMIO:
> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> index 902f124..686c1ca 100644
> --- a/include/uapi/linux/kvm.h
> +++ b/include/uapi/linux/kvm.h
> @@ -674,6 +674,7 @@ struct kvm_ppc_smmu_info {
>  #define KVM_CAP_ARM_EL1_32BIT 93
>  #define KVM_CAP_SPAPR_MULTITCE 94
>  #define KVM_CAP_EXT_EMUL_CPUID 95
> +#define KVM_CAP_HYPERV_TIME 96
>  
>  #ifdef KVM_CAP_IRQ_ROUTING
>  
> 

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Vadim Rozenfeld Dec. 10, 2013, 10:46 a.m. UTC | #2
On Mon, 2013-12-09 at 15:23 +0100, Paolo Bonzini wrote:
> Il 08/12/2013 12:33, Vadim Rozenfeld ha scritto:
> > Signed-off: Peter Lieven <pl@dlh.net>
> > Signed-off: Gleb Natapov <gleb@redhat.com>
> > Signed-off: Vadim Rozenfeld <vrozenfe@redhat.com>
> > 
> > v1 -> v2
> > 1. mark TSC page dirty as suggested by 
> >     Eric Northup <digitaleric@google.com> and Gleb
> > 2. disable local irq when calling get_kernel_ns, 
> >     as it was done by Peter Lieven <pl@dlhnet.de>
> > 3. move check for TSC page enable from second patch
> >     to this one.
> > 
> > ---
> >  arch/x86/include/asm/kvm_host.h    |  2 ++
> >  arch/x86/include/uapi/asm/hyperv.h | 13 +++++++++++++
> >  arch/x86/kvm/x86.c                 | 39 +++++++++++++++++++++++++++++++++++++-
> >  include/uapi/linux/kvm.h           |  1 +
> >  4 files changed, 54 insertions(+), 1 deletion(-)
> > 
> > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> > index ae5d783..2fd0753 100644
> > --- a/arch/x86/include/asm/kvm_host.h
> > +++ b/arch/x86/include/asm/kvm_host.h
> > @@ -605,6 +605,8 @@ struct kvm_arch {
> >  	/* fields used by HYPER-V emulation */
> >  	u64 hv_guest_os_id;
> >  	u64 hv_hypercall;
> > +	u64 hv_ref_count;
> > +	u64 hv_tsc_page;
> >  
> >  	#ifdef CONFIG_KVM_MMU_AUDIT
> >  	int audit_point;
> > diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h
> > index b8f1c01..462efe7 100644
> > --- a/arch/x86/include/uapi/asm/hyperv.h
> > +++ b/arch/x86/include/uapi/asm/hyperv.h
> > @@ -28,6 +28,9 @@
> >  /* Partition Reference Counter (HV_X64_MSR_TIME_REF_COUNT) available*/
> >  #define HV_X64_MSR_TIME_REF_COUNT_AVAILABLE	(1 << 1)
> >  
> > +/* A partition's reference time stamp counter (TSC) page */
> > +#define HV_X64_MSR_REFERENCE_TSC		0x40000021
> > +
> >  /*
> >   * There is a single feature flag that signifies the presence of the MSR
> >   * that can be used to retrieve both the local APIC Timer frequency as
> > @@ -198,6 +201,9 @@
> >  #define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_MASK	\
> >  		(~((1ull << HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT) - 1))
> >  
> > +#define HV_X64_MSR_TSC_REFERENCE_ENABLE		0x00000001
> > +#define HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT	12
> > +
> >  #define HV_PROCESSOR_POWER_STATE_C0		0
> >  #define HV_PROCESSOR_POWER_STATE_C1		1
> >  #define HV_PROCESSOR_POWER_STATE_C2		2
> > @@ -210,4 +216,11 @@
> >  #define HV_STATUS_INVALID_ALIGNMENT		4
> >  #define HV_STATUS_INSUFFICIENT_BUFFERS		19
> >  
> > +typedef struct _HV_REFERENCE_TSC_PAGE {
> > +	__u32 tsc_sequence;
> > +	__u32 res1;
> > +	__u64 tsc_scale;
> > +	__s64 tsc_offset;
> > +} HV_REFERENCE_TSC_PAGE, *PHV_REFERENCE_TSC_PAGE;
> > +
> >  #endif
> > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> > index 21ef1ba..5e4e495a 100644
> > --- a/arch/x86/kvm/x86.c
> > +++ b/arch/x86/kvm/x86.c
> > @@ -840,7 +840,7 @@ EXPORT_SYMBOL_GPL(kvm_rdpmc);
> >  static u32 msrs_to_save[] = {
> >  	MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
> >  	MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,
> > -	HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
> > +	HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, HV_X64_MSR_TIME_REF_COUNT,
> 
> You need to bump KVM_SAVE_MSRS_BEGIN.
> 
> >  	HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
> >  	MSR_KVM_PV_EOI_EN,
> >  	MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
> > @@ -1826,6 +1826,8 @@ static bool kvm_hv_msr_partition_wide(u32 msr)
> >  	switch (msr) {
> >  	case HV_X64_MSR_GUEST_OS_ID:
> >  	case HV_X64_MSR_HYPERCALL:
> > +	case HV_X64_MSR_REFERENCE_TSC:
> > +	case HV_X64_MSR_TIME_REF_COUNT:
> >  		r = true;
> >  		break;
> >  	}
> > @@ -1865,6 +1867,29 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
> >  		if (__copy_to_user((void __user *)addr, instructions, 4))
> >  			return 1;
> >  		kvm->arch.hv_hypercall = data;
> > +		local_irq_disable();
> > +		kvm->arch.hv_ref_count = get_kernel_ns() + kvm->arch.kvmclock_offset;
> > +		local_irq_enable();
> 
> Please add a patch that moves these four lines from KVM_GET_CLOCK and
> KVM_SET_CLOCK
> 
>                 local_irq_disable();
>                 now_ns = get_kernel_ns();
>                 delta = user_ns.clock - now_ns;
>                 local_irq_enable();
>                 kvm->arch.kvmclock_offset = delta;
>                 kvm_gen_update_masterclock(kvm);
> 
>                 local_irq_disable();
>                 now_ns = get_kernel_ns();
>                 user_ns.clock = kvm->arch.kvmclock_offset + now_ns;
>                 local_irq_enable();
> 
> For example u64 kvm_get_clock_ns(struct kvm *) and void
> kvm_set_clock_ns(struct kvm *, u64).  You can then use the
> kvm_get_clock_ns function in this patch.
> 

OK.

> > +		break;
> > +	}
> > +	case HV_X64_MSR_REFERENCE_TSC: {
> > +		u64 gfn;
> > +		unsigned long addr;
> > +		HV_REFERENCE_TSC_PAGE tsc_ref;
> > +		tsc_ref.tsc_sequence = 0;
> 
> Please zero it with memset.  You're leaking values from the stack to the
> guest.

I can do it, but it is probably not necessary, mostly because guest
allocates one page from nonpaged pool and uses it exclusively for
vTSC purpose only, and host only interested in three values, located at
the beginning of this page.    

> 
> > +		if (!(data & HV_X64_MSR_TSC_REFERENCE_ENABLE)) {
> > +			kvm->arch.hv_tsc_page = data;
> > +			break;
> > +		}
> > +		gfn = data >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT;
> > +		addr = gfn_to_hva(kvm, data >>
> > +			HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT);
> > +		if (kvm_is_error_hva(addr))
> > +			return 1;
> > +		if (__copy_to_user((void __user *)addr, &tsc_ref, sizeof(tsc_ref)))
> > +			return 1;
> > +		mark_page_dirty(kvm, gfn);
> > +		kvm->arch.hv_tsc_page = data;
> >  		break;
> >  	}
> >  	default:
> > @@ -2291,6 +2316,17 @@ static int get_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
> >  	case HV_X64_MSR_HYPERCALL:
> >  		data = kvm->arch.hv_hypercall;
> >  		break;
> > +	case HV_X64_MSR_TIME_REF_COUNT: {
> > +		u64 now_ns;
> > +		local_irq_disable();
> > +		now_ns = get_kernel_ns() + kvm->arch.kvmclock_offset;
> > +		data = div_u64(now_ns - kvm->arch.hv_ref_count, 100);
> > +		local_irq_enable();
> 
> Another possible user of kvm_get_clock_ns.
> 
> The patch should be good with these changes.

Thanks,
Vadim.
> 
> Paolo
> 
> > +		break;
> > +	}
> > +	case HV_X64_MSR_REFERENCE_TSC:
> > +		data = kvm->arch.hv_tsc_page;
> > +		break;
> >  	default:
> >  		vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
> >  		return 1;
> > @@ -2605,6 +2641,7 @@ int kvm_dev_ioctl_check_extension(long ext)
> >  	case KVM_CAP_ASSIGN_DEV_IRQ:
> >  	case KVM_CAP_PCI_2_3:
> >  #endif
> > +	case KVM_CAP_HYPERV_TIME:
> >  		r = 1;
> >  		break;
> >  	case KVM_CAP_COALESCED_MMIO:
> > diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> > index 902f124..686c1ca 100644
> > --- a/include/uapi/linux/kvm.h
> > +++ b/include/uapi/linux/kvm.h
> > @@ -674,6 +674,7 @@ struct kvm_ppc_smmu_info {
> >  #define KVM_CAP_ARM_EL1_32BIT 93
> >  #define KVM_CAP_SPAPR_MULTITCE 94
> >  #define KVM_CAP_EXT_EMUL_CPUID 95
> > +#define KVM_CAP_HYPERV_TIME 96
> >  
> >  #ifdef KVM_CAP_IRQ_ROUTING
> >  
> > 
> 


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Marcelo Tosatti Dec. 11, 2013, 6:53 p.m. UTC | #3
On Sun, Dec 08, 2013 at 10:33:38PM +1100, Vadim Rozenfeld wrote:
> Signed-off: Peter Lieven <pl@dlh.net>
> Signed-off: Gleb Natapov <gleb@redhat.com>
> Signed-off: Vadim Rozenfeld <vrozenfe@redhat.com>
> 
> v1 -> v2
> 1. mark TSC page dirty as suggested by 
>     Eric Northup <digitaleric@google.com> and Gleb
> 2. disable local irq when calling get_kernel_ns, 
>     as it was done by Peter Lieven <pl@dlhnet.de>
> 3. move check for TSC page enable from second patch
>     to this one.
> 
> ---
>  arch/x86/include/asm/kvm_host.h    |  2 ++
>  arch/x86/include/uapi/asm/hyperv.h | 13 +++++++++++++
>  arch/x86/kvm/x86.c                 | 39 +++++++++++++++++++++++++++++++++++++-
>  include/uapi/linux/kvm.h           |  1 +
>  4 files changed, 54 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index ae5d783..2fd0753 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -605,6 +605,8 @@ struct kvm_arch {
>  	/* fields used by HYPER-V emulation */
>  	u64 hv_guest_os_id;
>  	u64 hv_hypercall;
> +	u64 hv_ref_count;
> +	u64 hv_tsc_page;
>  
>  	#ifdef CONFIG_KVM_MMU_AUDIT
>  	int audit_point;
> diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h
> index b8f1c01..462efe7 100644
> --- a/arch/x86/include/uapi/asm/hyperv.h
> +++ b/arch/x86/include/uapi/asm/hyperv.h
> @@ -28,6 +28,9 @@
>  /* Partition Reference Counter (HV_X64_MSR_TIME_REF_COUNT) available*/
>  #define HV_X64_MSR_TIME_REF_COUNT_AVAILABLE	(1 << 1)
>  
> +/* A partition's reference time stamp counter (TSC) page */
> +#define HV_X64_MSR_REFERENCE_TSC		0x40000021
> +
>  /*
>   * There is a single feature flag that signifies the presence of the MSR
>   * that can be used to retrieve both the local APIC Timer frequency as
> @@ -198,6 +201,9 @@
>  #define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_MASK	\
>  		(~((1ull << HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT) - 1))
>  
> +#define HV_X64_MSR_TSC_REFERENCE_ENABLE		0x00000001
> +#define HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT	12
> +
>  #define HV_PROCESSOR_POWER_STATE_C0		0
>  #define HV_PROCESSOR_POWER_STATE_C1		1
>  #define HV_PROCESSOR_POWER_STATE_C2		2
> @@ -210,4 +216,11 @@
>  #define HV_STATUS_INVALID_ALIGNMENT		4
>  #define HV_STATUS_INSUFFICIENT_BUFFERS		19
>  
> +typedef struct _HV_REFERENCE_TSC_PAGE {
> +	__u32 tsc_sequence;
> +	__u32 res1;
> +	__u64 tsc_scale;
> +	__s64 tsc_offset;
> +} HV_REFERENCE_TSC_PAGE, *PHV_REFERENCE_TSC_PAGE;
> +
>  #endif
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 21ef1ba..5e4e495a 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -840,7 +840,7 @@ EXPORT_SYMBOL_GPL(kvm_rdpmc);
>  static u32 msrs_to_save[] = {
>  	MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
>  	MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,
> -	HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
> +	HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, HV_X64_MSR_TIME_REF_COUNT,
>  	HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
>  	MSR_KVM_PV_EOI_EN,
>  	MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
> @@ -1826,6 +1826,8 @@ static bool kvm_hv_msr_partition_wide(u32 msr)
>  	switch (msr) {
>  	case HV_X64_MSR_GUEST_OS_ID:
>  	case HV_X64_MSR_HYPERCALL:
> +	case HV_X64_MSR_REFERENCE_TSC:
> +	case HV_X64_MSR_TIME_REF_COUNT:
>  		r = true;
>  		break;
>  	}
> @@ -1865,6 +1867,29 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
>  		if (__copy_to_user((void __user *)addr, instructions, 4))
>  			return 1;
>  		kvm->arch.hv_hypercall = data;
> +		local_irq_disable();
> +		kvm->arch.hv_ref_count = get_kernel_ns() + kvm->arch.kvmclock_offset;
> +		local_irq_enable();

Where does the docs say that HV_X64_MSR_HYPERCALL is the where the clock
starts counting?

No need to store kvmclock_offset in hv_ref_count? (moreover
the name is weird, better name would be "hv_ref_start_time".

> +		break;
> +	}
> +	case HV_X64_MSR_REFERENCE_TSC: {
> +		u64 gfn;
> +		unsigned long addr;
> +		HV_REFERENCE_TSC_PAGE tsc_ref;
> +		tsc_ref.tsc_sequence = 0;
> +		if (!(data & HV_X64_MSR_TSC_REFERENCE_ENABLE)) {
> +			kvm->arch.hv_tsc_page = data;
> +			break;
> +		}
> +		gfn = data >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT;
> +		addr = gfn_to_hva(kvm, data >>
> +			HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT);
> +		if (kvm_is_error_hva(addr))
> +			return 1;
> +		if (__copy_to_user((void __user *)addr, &tsc_ref, sizeof(tsc_ref)))
> +			return 1;
> +		mark_page_dirty(kvm, gfn);
> +		kvm->arch.hv_tsc_page = data;
>  		break;
>  	}
>  	default:
> @@ -2291,6 +2316,17 @@ static int get_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
>  	case HV_X64_MSR_HYPERCALL:
>  		data = kvm->arch.hv_hypercall;
>  		break;
> +	case HV_X64_MSR_TIME_REF_COUNT: {
> +		u64 now_ns;
> +		local_irq_disable();
> +		now_ns = get_kernel_ns() + kvm->arch.kvmclock_offset;
> +		data = div_u64(now_ns - kvm->arch.hv_ref_count, 100);
> +		local_irq_enable();

No need for irq disable/enable pairs.

> +		break;
> +	}
> +	case HV_X64_MSR_REFERENCE_TSC:
> +		data = kvm->arch.hv_tsc_page;
> +		break;

Does it return non-zero data even if not enabled?

>  	default:
>  		vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
>  		return 1;
> @@ -2605,6 +2641,7 @@ int kvm_dev_ioctl_check_extension(long ext)
>  	case KVM_CAP_ASSIGN_DEV_IRQ:
>  	case KVM_CAP_PCI_2_3:
>  #endif
> +	case KVM_CAP_HYPERV_TIME:
>  		r = 1;
>  		break;
>  	case KVM_CAP_COALESCED_MMIO:
> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> index 902f124..686c1ca 100644
> --- a/include/uapi/linux/kvm.h
> +++ b/include/uapi/linux/kvm.h
> @@ -674,6 +674,7 @@ struct kvm_ppc_smmu_info {
>  #define KVM_CAP_ARM_EL1_32BIT 93
>  #define KVM_CAP_SPAPR_MULTITCE 94
>  #define KVM_CAP_EXT_EMUL_CPUID 95
> +#define KVM_CAP_HYPERV_TIME 96
>  
>  #ifdef KVM_CAP_IRQ_ROUTING
>  
> -- 
> 1.8.1.4
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Marcelo Tosatti Dec. 11, 2013, 6:59 p.m. UTC | #4
On Wed, Dec 11, 2013 at 04:53:05PM -0200, Marcelo Tosatti wrote:
> On Sun, Dec 08, 2013 at 10:33:38PM +1100, Vadim Rozenfeld wrote:
> > Signed-off: Peter Lieven <pl@dlh.net>
> > Signed-off: Gleb Natapov <gleb@redhat.com>
> > Signed-off: Vadim Rozenfeld <vrozenfe@redhat.com>
> > 
> > v1 -> v2
> > 1. mark TSC page dirty as suggested by 
> >     Eric Northup <digitaleric@google.com> and Gleb
> > 2. disable local irq when calling get_kernel_ns, 
> >     as it was done by Peter Lieven <pl@dlhnet.de>
> > 3. move check for TSC page enable from second patch
> >     to this one.
> > 
> > ---
> >  arch/x86/include/asm/kvm_host.h    |  2 ++
> >  arch/x86/include/uapi/asm/hyperv.h | 13 +++++++++++++
> >  arch/x86/kvm/x86.c                 | 39 +++++++++++++++++++++++++++++++++++++-
> >  include/uapi/linux/kvm.h           |  1 +
> >  4 files changed, 54 insertions(+), 1 deletion(-)
> > 
> > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> > index ae5d783..2fd0753 100644
> > --- a/arch/x86/include/asm/kvm_host.h
> > +++ b/arch/x86/include/asm/kvm_host.h
> > @@ -605,6 +605,8 @@ struct kvm_arch {
> >  	/* fields used by HYPER-V emulation */
> >  	u64 hv_guest_os_id;
> >  	u64 hv_hypercall;
> > +	u64 hv_ref_count;
> > +	u64 hv_tsc_page;
> >  
> >  	#ifdef CONFIG_KVM_MMU_AUDIT
> >  	int audit_point;
> > diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h
> > index b8f1c01..462efe7 100644
> > --- a/arch/x86/include/uapi/asm/hyperv.h
> > +++ b/arch/x86/include/uapi/asm/hyperv.h
> > @@ -28,6 +28,9 @@
> >  /* Partition Reference Counter (HV_X64_MSR_TIME_REF_COUNT) available*/
> >  #define HV_X64_MSR_TIME_REF_COUNT_AVAILABLE	(1 << 1)
> >  
> > +/* A partition's reference time stamp counter (TSC) page */
> > +#define HV_X64_MSR_REFERENCE_TSC		0x40000021
> > +
> >  /*
> >   * There is a single feature flag that signifies the presence of the MSR
> >   * that can be used to retrieve both the local APIC Timer frequency as
> > @@ -198,6 +201,9 @@
> >  #define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_MASK	\
> >  		(~((1ull << HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT) - 1))
> >  
> > +#define HV_X64_MSR_TSC_REFERENCE_ENABLE		0x00000001
> > +#define HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT	12
> > +
> >  #define HV_PROCESSOR_POWER_STATE_C0		0
> >  #define HV_PROCESSOR_POWER_STATE_C1		1
> >  #define HV_PROCESSOR_POWER_STATE_C2		2
> > @@ -210,4 +216,11 @@
> >  #define HV_STATUS_INVALID_ALIGNMENT		4
> >  #define HV_STATUS_INSUFFICIENT_BUFFERS		19
> >  
> > +typedef struct _HV_REFERENCE_TSC_PAGE {
> > +	__u32 tsc_sequence;
> > +	__u32 res1;
> > +	__u64 tsc_scale;
> > +	__s64 tsc_offset;
> > +} HV_REFERENCE_TSC_PAGE, *PHV_REFERENCE_TSC_PAGE;
> > +
> >  #endif
> > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> > index 21ef1ba..5e4e495a 100644
> > --- a/arch/x86/kvm/x86.c
> > +++ b/arch/x86/kvm/x86.c
> > @@ -840,7 +840,7 @@ EXPORT_SYMBOL_GPL(kvm_rdpmc);
> >  static u32 msrs_to_save[] = {
> >  	MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
> >  	MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,
> > -	HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
> > +	HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, HV_X64_MSR_TIME_REF_COUNT,
> >  	HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
> >  	MSR_KVM_PV_EOI_EN,
> >  	MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
> > @@ -1826,6 +1826,8 @@ static bool kvm_hv_msr_partition_wide(u32 msr)
> >  	switch (msr) {
> >  	case HV_X64_MSR_GUEST_OS_ID:
> >  	case HV_X64_MSR_HYPERCALL:
> > +	case HV_X64_MSR_REFERENCE_TSC:
> > +	case HV_X64_MSR_TIME_REF_COUNT:
> >  		r = true;
> >  		break;
> >  	}
> > @@ -1865,6 +1867,29 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
> >  		if (__copy_to_user((void __user *)addr, instructions, 4))
> >  			return 1;
> >  		kvm->arch.hv_hypercall = data;
> > +		local_irq_disable();
> > +		kvm->arch.hv_ref_count = get_kernel_ns() + kvm->arch.kvmclock_offset;
> > +		local_irq_enable();
> 
> Where does the docs say that HV_X64_MSR_HYPERCALL is the where the clock
> starts counting?
> 
> No need to store kvmclock_offset in hv_ref_count? (moreover
> the name is weird, better name would be "hv_ref_start_time".

Just add kvmclock_offset when reading the values (otherwise you have a
"stale copy" of kvmclock_offset in hv_ref_count).

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Paolo Bonzini Dec. 12, 2013, 9:33 a.m. UTC | #5
Il 11/12/2013 19:59, Marcelo Tosatti ha scritto:
> > Where does the docs say that HV_X64_MSR_HYPERCALL is the where the clock
> > starts counting?
> > 
> > No need to store kvmclock_offset in hv_ref_count? (moreover
> > the name is weird, better name would be "hv_ref_start_time".
> 
> Just add kvmclock_offset when reading the values (otherwise you have a
> "stale copy" of kvmclock_offset in hv_ref_count).

As mentioned in my review, v4 will probably read kvmclock to provide the
reference time counter.  So all the complexity of kvmclock_offset will
be hidden behind a function.

Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Peter Lieven Jan. 2, 2014, 1:15 p.m. UTC | #6
Am 11.12.2013 19:53, schrieb Marcelo Tosatti:
> On Sun, Dec 08, 2013 at 10:33:38PM +1100, Vadim Rozenfeld wrote:
>> Signed-off: Peter Lieven <pl@dlh.net>
>> Signed-off: Gleb Natapov <gleb@redhat.com>
>> Signed-off: Vadim Rozenfeld <vrozenfe@redhat.com>
>>
>> v1 -> v2
>> 1. mark TSC page dirty as suggested by 
>>     Eric Northup <digitaleric@google.com> and Gleb
>> 2. disable local irq when calling get_kernel_ns, 
>>     as it was done by Peter Lieven <pl@dlhnet.de>
>> 3. move check for TSC page enable from second patch
>>     to this one.
>>
>> ---
>>  arch/x86/include/asm/kvm_host.h    |  2 ++
>>  arch/x86/include/uapi/asm/hyperv.h | 13 +++++++++++++
>>  arch/x86/kvm/x86.c                 | 39 +++++++++++++++++++++++++++++++++++++-
>>  include/uapi/linux/kvm.h           |  1 +
>>  4 files changed, 54 insertions(+), 1 deletion(-)
>>
>> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
>> index ae5d783..2fd0753 100644
>> --- a/arch/x86/include/asm/kvm_host.h
>> +++ b/arch/x86/include/asm/kvm_host.h
>> @@ -605,6 +605,8 @@ struct kvm_arch {
>>  	/* fields used by HYPER-V emulation */
>>  	u64 hv_guest_os_id;
>>  	u64 hv_hypercall;
>> +	u64 hv_ref_count;
>> +	u64 hv_tsc_page;
>>  
>>  	#ifdef CONFIG_KVM_MMU_AUDIT
>>  	int audit_point;
>> diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h
>> index b8f1c01..462efe7 100644
>> --- a/arch/x86/include/uapi/asm/hyperv.h
>> +++ b/arch/x86/include/uapi/asm/hyperv.h
>> @@ -28,6 +28,9 @@
>>  /* Partition Reference Counter (HV_X64_MSR_TIME_REF_COUNT) available*/
>>  #define HV_X64_MSR_TIME_REF_COUNT_AVAILABLE	(1 << 1)
>>  
>> +/* A partition's reference time stamp counter (TSC) page */
>> +#define HV_X64_MSR_REFERENCE_TSC		0x40000021
>> +
>>  /*
>>   * There is a single feature flag that signifies the presence of the MSR
>>   * that can be used to retrieve both the local APIC Timer frequency as
>> @@ -198,6 +201,9 @@
>>  #define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_MASK	\
>>  		(~((1ull << HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT) - 1))
>>  
>> +#define HV_X64_MSR_TSC_REFERENCE_ENABLE		0x00000001
>> +#define HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT	12
>> +
>>  #define HV_PROCESSOR_POWER_STATE_C0		0
>>  #define HV_PROCESSOR_POWER_STATE_C1		1
>>  #define HV_PROCESSOR_POWER_STATE_C2		2
>> @@ -210,4 +216,11 @@
>>  #define HV_STATUS_INVALID_ALIGNMENT		4
>>  #define HV_STATUS_INSUFFICIENT_BUFFERS		19
>>  
>> +typedef struct _HV_REFERENCE_TSC_PAGE {
>> +	__u32 tsc_sequence;
>> +	__u32 res1;
>> +	__u64 tsc_scale;
>> +	__s64 tsc_offset;
>> +} HV_REFERENCE_TSC_PAGE, *PHV_REFERENCE_TSC_PAGE;
>> +
>>  #endif
>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
>> index 21ef1ba..5e4e495a 100644
>> --- a/arch/x86/kvm/x86.c
>> +++ b/arch/x86/kvm/x86.c
>> @@ -840,7 +840,7 @@ EXPORT_SYMBOL_GPL(kvm_rdpmc);
>>  static u32 msrs_to_save[] = {
>>  	MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
>>  	MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,
>> -	HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
>> +	HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, HV_X64_MSR_TIME_REF_COUNT,
>>  	HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
>>  	MSR_KVM_PV_EOI_EN,
>>  	MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
>> @@ -1826,6 +1826,8 @@ static bool kvm_hv_msr_partition_wide(u32 msr)
>>  	switch (msr) {
>>  	case HV_X64_MSR_GUEST_OS_ID:
>>  	case HV_X64_MSR_HYPERCALL:
>> +	case HV_X64_MSR_REFERENCE_TSC:
>> +	case HV_X64_MSR_TIME_REF_COUNT:
>>  		r = true;
>>  		break;
>>  	}
>> @@ -1865,6 +1867,29 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
>>  		if (__copy_to_user((void __user *)addr, instructions, 4))
>>  			return 1;
>>  		kvm->arch.hv_hypercall = data;
>> +		local_irq_disable();
>> +		kvm->arch.hv_ref_count = get_kernel_ns() + kvm->arch.kvmclock_offset;
>> +		local_irq_enable();
> 
> Where does the docs say that HV_X64_MSR_HYPERCALL is the where the clock
> starts counting?
> 
> No need to store kvmclock_offset in hv_ref_count? (moreover
> the name is weird, better name would be "hv_ref_start_time".
> 
>> +		break;
>> +	}
>> +	case HV_X64_MSR_REFERENCE_TSC: {
>> +		u64 gfn;
>> +		unsigned long addr;
>> +		HV_REFERENCE_TSC_PAGE tsc_ref;
>> +		tsc_ref.tsc_sequence = 0;
>> +		if (!(data & HV_X64_MSR_TSC_REFERENCE_ENABLE)) {
>> +			kvm->arch.hv_tsc_page = data;
>> +			break;
>> +		}
>> +		gfn = data >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT;
>> +		addr = gfn_to_hva(kvm, data >>
>> +			HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT);
>> +		if (kvm_is_error_hva(addr))
>> +			return 1;
>> +		if (__copy_to_user((void __user *)addr, &tsc_ref, sizeof(tsc_ref)))
>> +			return 1;
>> +		mark_page_dirty(kvm, gfn);
>> +		kvm->arch.hv_tsc_page = data;
>>  		break;
>>  	}
>>  	default:
>> @@ -2291,6 +2316,17 @@ static int get_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
>>  	case HV_X64_MSR_HYPERCALL:
>>  		data = kvm->arch.hv_hypercall;
>>  		break;
>> +	case HV_X64_MSR_TIME_REF_COUNT: {
>> +		u64 now_ns;
>> +		local_irq_disable();
>> +		now_ns = get_kernel_ns() + kvm->arch.kvmclock_offset;
>> +		data = div_u64(now_ns - kvm->arch.hv_ref_count, 100);
>> +		local_irq_enable();
> 
> No need for irq disable/enable pairs.

KVM_GET_CLOCK / KVM_SET_CLOCK do the irq disable/enable pairs. What is right?

Peter
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Marcelo Tosatti Jan. 2, 2014, 1:57 p.m. UTC | #7
On Thu, Jan 02, 2014 at 02:15:48PM +0100, Peter Lieven wrote:
> Am 11.12.2013 19:53, schrieb Marcelo Tosatti:
> > On Sun, Dec 08, 2013 at 10:33:38PM +1100, Vadim Rozenfeld wrote:
> >> Signed-off: Peter Lieven <pl@dlh.net>
> >> Signed-off: Gleb Natapov <gleb@redhat.com>
> >> Signed-off: Vadim Rozenfeld <vrozenfe@redhat.com>
> >>
> >> v1 -> v2
> >> 1. mark TSC page dirty as suggested by 
> >>     Eric Northup <digitaleric@google.com> and Gleb
> >> 2. disable local irq when calling get_kernel_ns, 
> >>     as it was done by Peter Lieven <pl@dlhnet.de>
> >> 3. move check for TSC page enable from second patch
> >>     to this one.
> >>
> >> ---
> >>  arch/x86/include/asm/kvm_host.h    |  2 ++
> >>  arch/x86/include/uapi/asm/hyperv.h | 13 +++++++++++++
> >>  arch/x86/kvm/x86.c                 | 39 +++++++++++++++++++++++++++++++++++++-
> >>  include/uapi/linux/kvm.h           |  1 +
> >>  4 files changed, 54 insertions(+), 1 deletion(-)
> >>
> >> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> >> index ae5d783..2fd0753 100644
> >> --- a/arch/x86/include/asm/kvm_host.h
> >> +++ b/arch/x86/include/asm/kvm_host.h
> >> @@ -605,6 +605,8 @@ struct kvm_arch {
> >>  	/* fields used by HYPER-V emulation */
> >>  	u64 hv_guest_os_id;
> >>  	u64 hv_hypercall;
> >> +	u64 hv_ref_count;
> >> +	u64 hv_tsc_page;
> >>  
> >>  	#ifdef CONFIG_KVM_MMU_AUDIT
> >>  	int audit_point;
> >> diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h
> >> index b8f1c01..462efe7 100644
> >> --- a/arch/x86/include/uapi/asm/hyperv.h
> >> +++ b/arch/x86/include/uapi/asm/hyperv.h
> >> @@ -28,6 +28,9 @@
> >>  /* Partition Reference Counter (HV_X64_MSR_TIME_REF_COUNT) available*/
> >>  #define HV_X64_MSR_TIME_REF_COUNT_AVAILABLE	(1 << 1)
> >>  
> >> +/* A partition's reference time stamp counter (TSC) page */
> >> +#define HV_X64_MSR_REFERENCE_TSC		0x40000021
> >> +
> >>  /*
> >>   * There is a single feature flag that signifies the presence of the MSR
> >>   * that can be used to retrieve both the local APIC Timer frequency as
> >> @@ -198,6 +201,9 @@
> >>  #define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_MASK	\
> >>  		(~((1ull << HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT) - 1))
> >>  
> >> +#define HV_X64_MSR_TSC_REFERENCE_ENABLE		0x00000001
> >> +#define HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT	12
> >> +
> >>  #define HV_PROCESSOR_POWER_STATE_C0		0
> >>  #define HV_PROCESSOR_POWER_STATE_C1		1
> >>  #define HV_PROCESSOR_POWER_STATE_C2		2
> >> @@ -210,4 +216,11 @@
> >>  #define HV_STATUS_INVALID_ALIGNMENT		4
> >>  #define HV_STATUS_INSUFFICIENT_BUFFERS		19
> >>  
> >> +typedef struct _HV_REFERENCE_TSC_PAGE {
> >> +	__u32 tsc_sequence;
> >> +	__u32 res1;
> >> +	__u64 tsc_scale;
> >> +	__s64 tsc_offset;
> >> +} HV_REFERENCE_TSC_PAGE, *PHV_REFERENCE_TSC_PAGE;
> >> +
> >>  #endif
> >> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> >> index 21ef1ba..5e4e495a 100644
> >> --- a/arch/x86/kvm/x86.c
> >> +++ b/arch/x86/kvm/x86.c
> >> @@ -840,7 +840,7 @@ EXPORT_SYMBOL_GPL(kvm_rdpmc);
> >>  static u32 msrs_to_save[] = {
> >>  	MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
> >>  	MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,
> >> -	HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
> >> +	HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, HV_X64_MSR_TIME_REF_COUNT,
> >>  	HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
> >>  	MSR_KVM_PV_EOI_EN,
> >>  	MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
> >> @@ -1826,6 +1826,8 @@ static bool kvm_hv_msr_partition_wide(u32 msr)
> >>  	switch (msr) {
> >>  	case HV_X64_MSR_GUEST_OS_ID:
> >>  	case HV_X64_MSR_HYPERCALL:
> >> +	case HV_X64_MSR_REFERENCE_TSC:
> >> +	case HV_X64_MSR_TIME_REF_COUNT:
> >>  		r = true;
> >>  		break;
> >>  	}
> >> @@ -1865,6 +1867,29 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
> >>  		if (__copy_to_user((void __user *)addr, instructions, 4))
> >>  			return 1;
> >>  		kvm->arch.hv_hypercall = data;
> >> +		local_irq_disable();
> >> +		kvm->arch.hv_ref_count = get_kernel_ns() + kvm->arch.kvmclock_offset;
> >> +		local_irq_enable();
> > 
> > Where does the docs say that HV_X64_MSR_HYPERCALL is the where the clock
> > starts counting?
> > 
> > No need to store kvmclock_offset in hv_ref_count? (moreover
> > the name is weird, better name would be "hv_ref_start_time".
> > 
> >> +		break;
> >> +	}
> >> +	case HV_X64_MSR_REFERENCE_TSC: {
> >> +		u64 gfn;
> >> +		unsigned long addr;
> >> +		HV_REFERENCE_TSC_PAGE tsc_ref;
> >> +		tsc_ref.tsc_sequence = 0;
> >> +		if (!(data & HV_X64_MSR_TSC_REFERENCE_ENABLE)) {
> >> +			kvm->arch.hv_tsc_page = data;
> >> +			break;
> >> +		}
> >> +		gfn = data >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT;
> >> +		addr = gfn_to_hva(kvm, data >>
> >> +			HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT);
> >> +		if (kvm_is_error_hva(addr))
> >> +			return 1;
> >> +		if (__copy_to_user((void __user *)addr, &tsc_ref, sizeof(tsc_ref)))
> >> +			return 1;
> >> +		mark_page_dirty(kvm, gfn);
> >> +		kvm->arch.hv_tsc_page = data;
> >>  		break;
> >>  	}
> >>  	default:
> >> @@ -2291,6 +2316,17 @@ static int get_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
> >>  	case HV_X64_MSR_HYPERCALL:
> >>  		data = kvm->arch.hv_hypercall;
> >>  		break;
> >> +	case HV_X64_MSR_TIME_REF_COUNT: {
> >> +		u64 now_ns;
> >> +		local_irq_disable();
> >> +		now_ns = get_kernel_ns() + kvm->arch.kvmclock_offset;
> >> +		data = div_u64(now_ns - kvm->arch.hv_ref_count, 100);
> >> +		local_irq_enable();
> > 
> > No need for irq disable/enable pairs.
> 
> KVM_GET_CLOCK / KVM_SET_CLOCK do the irq disable/enable pairs. What is right?
> 
> Peter


                local_irq_disable();
                now_ns = get_kernel_ns();
                delta = user_ns.clock - now_ns;
                local_irq_enable();

Not using irq disable/enable pairs. The subtraction is not dependant on
any particular time.

                local_irq_disable();
                now_ns = get_kernel_ns();
                local_irq_enable();
                delta = user_ns.clock - now_ns;

Any interrupt that would affect the value of get_kernel_ns(), would
have a similar effect before the interrupts are disabled. So the 
disable/enable pair achieves nothing in practice. It was copied from
kvm_guest_time_update.




--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Peter Lieven Jan. 2, 2014, 4:08 p.m. UTC | #8
Am 02.01.2014 14:57, schrieb Marcelo Tosatti:
> On Thu, Jan 02, 2014 at 02:15:48PM +0100, Peter Lieven wrote:
>> Am 11.12.2013 19:53, schrieb Marcelo Tosatti:
>>> On Sun, Dec 08, 2013 at 10:33:38PM +1100, Vadim Rozenfeld wrote:
>>>> Signed-off: Peter Lieven <pl@dlh.net>
>>>> Signed-off: Gleb Natapov <gleb@redhat.com>
>>>> Signed-off: Vadim Rozenfeld <vrozenfe@redhat.com>
>>>>
>>>> v1 -> v2
>>>> 1. mark TSC page dirty as suggested by 
>>>>     Eric Northup <digitaleric@google.com> and Gleb
>>>> 2. disable local irq when calling get_kernel_ns, 
>>>>     as it was done by Peter Lieven <pl@dlhnet.de>
>>>> 3. move check for TSC page enable from second patch
>>>>     to this one.
>>>>
>>>> ---
>>>>  arch/x86/include/asm/kvm_host.h    |  2 ++
>>>>  arch/x86/include/uapi/asm/hyperv.h | 13 +++++++++++++
>>>>  arch/x86/kvm/x86.c                 | 39 +++++++++++++++++++++++++++++++++++++-
>>>>  include/uapi/linux/kvm.h           |  1 +
>>>>  4 files changed, 54 insertions(+), 1 deletion(-)
>>>>
>>>> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
>>>> index ae5d783..2fd0753 100644
>>>> --- a/arch/x86/include/asm/kvm_host.h
>>>> +++ b/arch/x86/include/asm/kvm_host.h
>>>> @@ -605,6 +605,8 @@ struct kvm_arch {
>>>>  	/* fields used by HYPER-V emulation */
>>>>  	u64 hv_guest_os_id;
>>>>  	u64 hv_hypercall;
>>>> +	u64 hv_ref_count;
>>>> +	u64 hv_tsc_page;
>>>>  
>>>>  	#ifdef CONFIG_KVM_MMU_AUDIT
>>>>  	int audit_point;
>>>> diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h
>>>> index b8f1c01..462efe7 100644
>>>> --- a/arch/x86/include/uapi/asm/hyperv.h
>>>> +++ b/arch/x86/include/uapi/asm/hyperv.h
>>>> @@ -28,6 +28,9 @@
>>>>  /* Partition Reference Counter (HV_X64_MSR_TIME_REF_COUNT) available*/
>>>>  #define HV_X64_MSR_TIME_REF_COUNT_AVAILABLE	(1 << 1)
>>>>  
>>>> +/* A partition's reference time stamp counter (TSC) page */
>>>> +#define HV_X64_MSR_REFERENCE_TSC		0x40000021
>>>> +
>>>>  /*
>>>>   * There is a single feature flag that signifies the presence of the MSR
>>>>   * that can be used to retrieve both the local APIC Timer frequency as
>>>> @@ -198,6 +201,9 @@
>>>>  #define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_MASK	\
>>>>  		(~((1ull << HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT) - 1))
>>>>  
>>>> +#define HV_X64_MSR_TSC_REFERENCE_ENABLE		0x00000001
>>>> +#define HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT	12
>>>> +
>>>>  #define HV_PROCESSOR_POWER_STATE_C0		0
>>>>  #define HV_PROCESSOR_POWER_STATE_C1		1
>>>>  #define HV_PROCESSOR_POWER_STATE_C2		2
>>>> @@ -210,4 +216,11 @@
>>>>  #define HV_STATUS_INVALID_ALIGNMENT		4
>>>>  #define HV_STATUS_INSUFFICIENT_BUFFERS		19
>>>>  
>>>> +typedef struct _HV_REFERENCE_TSC_PAGE {
>>>> +	__u32 tsc_sequence;
>>>> +	__u32 res1;
>>>> +	__u64 tsc_scale;
>>>> +	__s64 tsc_offset;
>>>> +} HV_REFERENCE_TSC_PAGE, *PHV_REFERENCE_TSC_PAGE;
>>>> +
>>>>  #endif
>>>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
>>>> index 21ef1ba..5e4e495a 100644
>>>> --- a/arch/x86/kvm/x86.c
>>>> +++ b/arch/x86/kvm/x86.c
>>>> @@ -840,7 +840,7 @@ EXPORT_SYMBOL_GPL(kvm_rdpmc);
>>>>  static u32 msrs_to_save[] = {
>>>>  	MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
>>>>  	MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,
>>>> -	HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
>>>> +	HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, HV_X64_MSR_TIME_REF_COUNT,
>>>>  	HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
>>>>  	MSR_KVM_PV_EOI_EN,
>>>>  	MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
>>>> @@ -1826,6 +1826,8 @@ static bool kvm_hv_msr_partition_wide(u32 msr)
>>>>  	switch (msr) {
>>>>  	case HV_X64_MSR_GUEST_OS_ID:
>>>>  	case HV_X64_MSR_HYPERCALL:
>>>> +	case HV_X64_MSR_REFERENCE_TSC:
>>>> +	case HV_X64_MSR_TIME_REF_COUNT:
>>>>  		r = true;
>>>>  		break;
>>>>  	}
>>>> @@ -1865,6 +1867,29 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
>>>>  		if (__copy_to_user((void __user *)addr, instructions, 4))
>>>>  			return 1;
>>>>  		kvm->arch.hv_hypercall = data;
>>>> +		local_irq_disable();
>>>> +		kvm->arch.hv_ref_count = get_kernel_ns() + kvm->arch.kvmclock_offset;
>>>> +		local_irq_enable();
>>>
>>> Where does the docs say that HV_X64_MSR_HYPERCALL is the where the clock
>>> starts counting?
>>>
>>> No need to store kvmclock_offset in hv_ref_count? (moreover
>>> the name is weird, better name would be "hv_ref_start_time".
>>>
>>>> +		break;
>>>> +	}
>>>> +	case HV_X64_MSR_REFERENCE_TSC: {
>>>> +		u64 gfn;
>>>> +		unsigned long addr;
>>>> +		HV_REFERENCE_TSC_PAGE tsc_ref;
>>>> +		tsc_ref.tsc_sequence = 0;
>>>> +		if (!(data & HV_X64_MSR_TSC_REFERENCE_ENABLE)) {
>>>> +			kvm->arch.hv_tsc_page = data;
>>>> +			break;
>>>> +		}
>>>> +		gfn = data >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT;
>>>> +		addr = gfn_to_hva(kvm, data >>
>>>> +			HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT);
>>>> +		if (kvm_is_error_hva(addr))
>>>> +			return 1;
>>>> +		if (__copy_to_user((void __user *)addr, &tsc_ref, sizeof(tsc_ref)))
>>>> +			return 1;
>>>> +		mark_page_dirty(kvm, gfn);
>>>> +		kvm->arch.hv_tsc_page = data;
>>>>  		break;
>>>>  	}
>>>>  	default:
>>>> @@ -2291,6 +2316,17 @@ static int get_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
>>>>  	case HV_X64_MSR_HYPERCALL:
>>>>  		data = kvm->arch.hv_hypercall;
>>>>  		break;
>>>> +	case HV_X64_MSR_TIME_REF_COUNT: {
>>>> +		u64 now_ns;
>>>> +		local_irq_disable();
>>>> +		now_ns = get_kernel_ns() + kvm->arch.kvmclock_offset;
>>>> +		data = div_u64(now_ns - kvm->arch.hv_ref_count, 100);
>>>> +		local_irq_enable();
>>>
>>> No need for irq disable/enable pairs.
>>
>> KVM_GET_CLOCK / KVM_SET_CLOCK do the irq disable/enable pairs. What is right?
>>
>> Peter
> 
> 
>                 local_irq_disable();
>                 now_ns = get_kernel_ns();
>                 delta = user_ns.clock - now_ns;
>                 local_irq_enable();
> 
> Not using irq disable/enable pairs. The subtraction is not dependant on
> any particular time.
> 
>                 local_irq_disable();
>                 now_ns = get_kernel_ns();
>                 local_irq_enable();
>                 delta = user_ns.clock - now_ns;
> 
> Any interrupt that would affect the value of get_kernel_ns(), would
> have a similar effect before the interrupts are disabled. So the 
> disable/enable pair achieves nothing in practice. It was copied from
> kvm_guest_time_update.
> 
> 

Thanks for clarifying. What about get_kernel_ns() can't this be interrupted?

Peter


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Peter Lieven Jan. 2, 2014, 4:52 p.m. UTC | #9
Am 11.12.2013 19:59, schrieb Marcelo Tosatti:
> On Wed, Dec 11, 2013 at 04:53:05PM -0200, Marcelo Tosatti wrote:
>> On Sun, Dec 08, 2013 at 10:33:38PM +1100, Vadim Rozenfeld wrote:
>>> Signed-off: Peter Lieven <pl@dlh.net>
>>> Signed-off: Gleb Natapov <gleb@redhat.com>
>>> Signed-off: Vadim Rozenfeld <vrozenfe@redhat.com>
>>>
>>> v1 -> v2
>>> 1. mark TSC page dirty as suggested by 
>>>     Eric Northup <digitaleric@google.com> and Gleb
>>> 2. disable local irq when calling get_kernel_ns, 
>>>     as it was done by Peter Lieven <pl@dlhnet.de>
>>> 3. move check for TSC page enable from second patch
>>>     to this one.
>>>
>>> ---
>>>  arch/x86/include/asm/kvm_host.h    |  2 ++
>>>  arch/x86/include/uapi/asm/hyperv.h | 13 +++++++++++++
>>>  arch/x86/kvm/x86.c                 | 39 +++++++++++++++++++++++++++++++++++++-
>>>  include/uapi/linux/kvm.h           |  1 +
>>>  4 files changed, 54 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
>>> index ae5d783..2fd0753 100644
>>> --- a/arch/x86/include/asm/kvm_host.h
>>> +++ b/arch/x86/include/asm/kvm_host.h
>>> @@ -605,6 +605,8 @@ struct kvm_arch {
>>>  	/* fields used by HYPER-V emulation */
>>>  	u64 hv_guest_os_id;
>>>  	u64 hv_hypercall;
>>> +	u64 hv_ref_count;
>>> +	u64 hv_tsc_page;
>>>  
>>>  	#ifdef CONFIG_KVM_MMU_AUDIT
>>>  	int audit_point;
>>> diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h
>>> index b8f1c01..462efe7 100644
>>> --- a/arch/x86/include/uapi/asm/hyperv.h
>>> +++ b/arch/x86/include/uapi/asm/hyperv.h
>>> @@ -28,6 +28,9 @@
>>>  /* Partition Reference Counter (HV_X64_MSR_TIME_REF_COUNT) available*/
>>>  #define HV_X64_MSR_TIME_REF_COUNT_AVAILABLE	(1 << 1)
>>>  
>>> +/* A partition's reference time stamp counter (TSC) page */
>>> +#define HV_X64_MSR_REFERENCE_TSC		0x40000021
>>> +
>>>  /*
>>>   * There is a single feature flag that signifies the presence of the MSR
>>>   * that can be used to retrieve both the local APIC Timer frequency as
>>> @@ -198,6 +201,9 @@
>>>  #define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_MASK	\
>>>  		(~((1ull << HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT) - 1))
>>>  
>>> +#define HV_X64_MSR_TSC_REFERENCE_ENABLE		0x00000001
>>> +#define HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT	12
>>> +
>>>  #define HV_PROCESSOR_POWER_STATE_C0		0
>>>  #define HV_PROCESSOR_POWER_STATE_C1		1
>>>  #define HV_PROCESSOR_POWER_STATE_C2		2
>>> @@ -210,4 +216,11 @@
>>>  #define HV_STATUS_INVALID_ALIGNMENT		4
>>>  #define HV_STATUS_INSUFFICIENT_BUFFERS		19
>>>  
>>> +typedef struct _HV_REFERENCE_TSC_PAGE {
>>> +	__u32 tsc_sequence;
>>> +	__u32 res1;
>>> +	__u64 tsc_scale;
>>> +	__s64 tsc_offset;
>>> +} HV_REFERENCE_TSC_PAGE, *PHV_REFERENCE_TSC_PAGE;
>>> +
>>>  #endif
>>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
>>> index 21ef1ba..5e4e495a 100644
>>> --- a/arch/x86/kvm/x86.c
>>> +++ b/arch/x86/kvm/x86.c
>>> @@ -840,7 +840,7 @@ EXPORT_SYMBOL_GPL(kvm_rdpmc);
>>>  static u32 msrs_to_save[] = {
>>>  	MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
>>>  	MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,
>>> -	HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
>>> +	HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, HV_X64_MSR_TIME_REF_COUNT,
>>>  	HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
>>>  	MSR_KVM_PV_EOI_EN,
>>>  	MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
>>> @@ -1826,6 +1826,8 @@ static bool kvm_hv_msr_partition_wide(u32 msr)
>>>  	switch (msr) {
>>>  	case HV_X64_MSR_GUEST_OS_ID:
>>>  	case HV_X64_MSR_HYPERCALL:
>>> +	case HV_X64_MSR_REFERENCE_TSC:
>>> +	case HV_X64_MSR_TIME_REF_COUNT:
>>>  		r = true;
>>>  		break;
>>>  	}
>>> @@ -1865,6 +1867,29 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
>>>  		if (__copy_to_user((void __user *)addr, instructions, 4))
>>>  			return 1;
>>>  		kvm->arch.hv_hypercall = data;
>>> +		local_irq_disable();
>>> +		kvm->arch.hv_ref_count = get_kernel_ns() + kvm->arch.kvmclock_offset;
>>> +		local_irq_enable();
>>
>> Where does the docs say that HV_X64_MSR_HYPERCALL is the where the clock
>> starts counting?
>>
>> No need to store kvmclock_offset in hv_ref_count? (moreover
>> the name is weird, better name would be "hv_ref_start_time".
> 
> Just add kvmclock_offset when reading the values (otherwise you have a
> "stale copy" of kvmclock_offset in hv_ref_count).
> 

After some experiments I think we do no need kvm->arch.hv_ref_count at all.

I was debugging some weird clockjump issues and I think the problem is that after live migration
kvm->arch.hv_ref_count is initialized to 0. Depending on the uptime of the vServer when the
hypercall was set up this can lead to series jumps.

So I would suggest to completely drop kvm->arch.hv_ref_count.

And use simply this in get_msr_hyperv_pw().

        case HV_X64_MSR_TIME_REF_COUNT: {
                data = div_u64(get_kernel_ns() + kvm->arch.kvmclock_offset, 100);
                break;
        }

It seems that get_kernel_ns() + kvm->arch.kvmclock_offset is exactly the vServer uptime
in nanoseconds which starts counting at 0.

Peter


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Marcelo Tosatti Jan. 2, 2014, 8:05 p.m. UTC | #10
On Thu, Jan 02, 2014 at 05:08:07PM +0100, Peter Lieven wrote:
> > Not using irq disable/enable pairs. The subtraction is not dependant on
> > any particular time.
> > 
> >                 local_irq_disable();
> >                 now_ns = get_kernel_ns();
> >                 local_irq_enable();
> >                 delta = user_ns.clock - now_ns;
> > 
> > Any interrupt that would affect the value of get_kernel_ns(), would
> > have a similar effect before the interrupts are disabled. So the 
> > disable/enable pair achieves nothing in practice. It was copied from
> > kvm_guest_time_update.
> > 
> > 
> 
> Thanks for clarifying. What about get_kernel_ns() can't this be interrupted?
> 
> Peter

It can handle interrupts. 

Disabling interrupts there is used when a
{rdtsc, system_time=get_kernel_ns()} tuple is wanted, to reduce
the physical time difference between execution of rdtsc and get_kernel_ns().


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Vadim Rozenfeld Jan. 7, 2014, 9:36 a.m. UTC | #11
On Thu, 2014-01-02 at 17:52 +0100, Peter Lieven wrote:
> Am 11.12.2013 19:59, schrieb Marcelo Tosatti:
> > On Wed, Dec 11, 2013 at 04:53:05PM -0200, Marcelo Tosatti wrote:
> >> On Sun, Dec 08, 2013 at 10:33:38PM +1100, Vadim Rozenfeld wrote:
> >>> Signed-off: Peter Lieven <pl@dlh.net>
> >>> Signed-off: Gleb Natapov <gleb@redhat.com>
> >>> Signed-off: Vadim Rozenfeld <vrozenfe@redhat.com>
> >>>
> >>> v1 -> v2
> >>> 1. mark TSC page dirty as suggested by 
> >>>     Eric Northup <digitaleric@google.com> and Gleb
> >>> 2. disable local irq when calling get_kernel_ns, 
> >>>     as it was done by Peter Lieven <pl@dlhnet.de>
> >>> 3. move check for TSC page enable from second patch
> >>>     to this one.
> >>>
> >>> ---
> >>>  arch/x86/include/asm/kvm_host.h    |  2 ++
> >>>  arch/x86/include/uapi/asm/hyperv.h | 13 +++++++++++++
> >>>  arch/x86/kvm/x86.c                 | 39 +++++++++++++++++++++++++++++++++++++-
> >>>  include/uapi/linux/kvm.h           |  1 +
> >>>  4 files changed, 54 insertions(+), 1 deletion(-)
> >>>
> >>> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> >>> index ae5d783..2fd0753 100644
> >>> --- a/arch/x86/include/asm/kvm_host.h
> >>> +++ b/arch/x86/include/asm/kvm_host.h
> >>> @@ -605,6 +605,8 @@ struct kvm_arch {
> >>>  	/* fields used by HYPER-V emulation */
> >>>  	u64 hv_guest_os_id;
> >>>  	u64 hv_hypercall;
> >>> +	u64 hv_ref_count;
> >>> +	u64 hv_tsc_page;
> >>>  
> >>>  	#ifdef CONFIG_KVM_MMU_AUDIT
> >>>  	int audit_point;
> >>> diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h
> >>> index b8f1c01..462efe7 100644
> >>> --- a/arch/x86/include/uapi/asm/hyperv.h
> >>> +++ b/arch/x86/include/uapi/asm/hyperv.h
> >>> @@ -28,6 +28,9 @@
> >>>  /* Partition Reference Counter (HV_X64_MSR_TIME_REF_COUNT) available*/
> >>>  #define HV_X64_MSR_TIME_REF_COUNT_AVAILABLE	(1 << 1)
> >>>  
> >>> +/* A partition's reference time stamp counter (TSC) page */
> >>> +#define HV_X64_MSR_REFERENCE_TSC		0x40000021
> >>> +
> >>>  /*
> >>>   * There is a single feature flag that signifies the presence of the MSR
> >>>   * that can be used to retrieve both the local APIC Timer frequency as
> >>> @@ -198,6 +201,9 @@
> >>>  #define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_MASK	\
> >>>  		(~((1ull << HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT) - 1))
> >>>  
> >>> +#define HV_X64_MSR_TSC_REFERENCE_ENABLE		0x00000001
> >>> +#define HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT	12
> >>> +
> >>>  #define HV_PROCESSOR_POWER_STATE_C0		0
> >>>  #define HV_PROCESSOR_POWER_STATE_C1		1
> >>>  #define HV_PROCESSOR_POWER_STATE_C2		2
> >>> @@ -210,4 +216,11 @@
> >>>  #define HV_STATUS_INVALID_ALIGNMENT		4
> >>>  #define HV_STATUS_INSUFFICIENT_BUFFERS		19
> >>>  
> >>> +typedef struct _HV_REFERENCE_TSC_PAGE {
> >>> +	__u32 tsc_sequence;
> >>> +	__u32 res1;
> >>> +	__u64 tsc_scale;
> >>> +	__s64 tsc_offset;
> >>> +} HV_REFERENCE_TSC_PAGE, *PHV_REFERENCE_TSC_PAGE;
> >>> +
> >>>  #endif
> >>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> >>> index 21ef1ba..5e4e495a 100644
> >>> --- a/arch/x86/kvm/x86.c
> >>> +++ b/arch/x86/kvm/x86.c
> >>> @@ -840,7 +840,7 @@ EXPORT_SYMBOL_GPL(kvm_rdpmc);
> >>>  static u32 msrs_to_save[] = {
> >>>  	MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
> >>>  	MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,
> >>> -	HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
> >>> +	HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, HV_X64_MSR_TIME_REF_COUNT,
> >>>  	HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
> >>>  	MSR_KVM_PV_EOI_EN,
> >>>  	MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
> >>> @@ -1826,6 +1826,8 @@ static bool kvm_hv_msr_partition_wide(u32 msr)
> >>>  	switch (msr) {
> >>>  	case HV_X64_MSR_GUEST_OS_ID:
> >>>  	case HV_X64_MSR_HYPERCALL:
> >>> +	case HV_X64_MSR_REFERENCE_TSC:
> >>> +	case HV_X64_MSR_TIME_REF_COUNT:
> >>>  		r = true;
> >>>  		break;
> >>>  	}
> >>> @@ -1865,6 +1867,29 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
> >>>  		if (__copy_to_user((void __user *)addr, instructions, 4))
> >>>  			return 1;
> >>>  		kvm->arch.hv_hypercall = data;
> >>> +		local_irq_disable();
> >>> +		kvm->arch.hv_ref_count = get_kernel_ns() + kvm->arch.kvmclock_offset;
> >>> +		local_irq_enable();
> >>
> >> Where does the docs say that HV_X64_MSR_HYPERCALL is the where the clock
> >> starts counting?
> >>
> >> No need to store kvmclock_offset in hv_ref_count? (moreover
> >> the name is weird, better name would be "hv_ref_start_time".
> > 
> > Just add kvmclock_offset when reading the values (otherwise you have a
> > "stale copy" of kvmclock_offset in hv_ref_count).
> > 
> 
> After some experiments I think we do no need kvm->arch.hv_ref_count at all.
> 
> I was debugging some weird clockjump issues and I think the problem is that after live migration
> kvm->arch.hv_ref_count is initialized to 0. Depending on the uptime of the vServer when the
> hypercall was set up this can lead to series jumps.
> 
> So I would suggest to completely drop kvm->arch.hv_ref_count.
> 
> And use simply this in get_msr_hyperv_pw().
> 
>         case HV_X64_MSR_TIME_REF_COUNT: {
>                 data = div_u64(get_kernel_ns() + kvm->arch.kvmclock_offset, 100);
>                 break;
>         }
> 

Agreed. It should work as long as we rely on kvmclock_offset.
Vadim.

> It seems that get_kernel_ns() + kvm->arch.kvmclock_offset is exactly the vServer uptime
> in nanoseconds which starts counting at 0.
> 
> Peter
> 
> 


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Peter Lieven Jan. 7, 2014, 5:52 p.m. UTC | #12
Am 07.01.2014 10:36, schrieb Vadim Rozenfeld:
> On Thu, 2014-01-02 at 17:52 +0100, Peter Lieven wrote:
>> Am 11.12.2013 19:59, schrieb Marcelo Tosatti:
>>> On Wed, Dec 11, 2013 at 04:53:05PM -0200, Marcelo Tosatti wrote:
>>>> On Sun, Dec 08, 2013 at 10:33:38PM +1100, Vadim Rozenfeld wrote:
>>>>> Signed-off: Peter Lieven <pl@dlh.net>
>>>>> Signed-off: Gleb Natapov <gleb@redhat.com>
>>>>> Signed-off: Vadim Rozenfeld <vrozenfe@redhat.com>
>>>>>
>>>>> v1 -> v2
>>>>> 1. mark TSC page dirty as suggested by 
>>>>>     Eric Northup <digitaleric@google.com> and Gleb
>>>>> 2. disable local irq when calling get_kernel_ns, 
>>>>>     as it was done by Peter Lieven <pl@dlhnet.de>
>>>>> 3. move check for TSC page enable from second patch
>>>>>     to this one.
>>>>>
>>>>> ---
>>>>>  arch/x86/include/asm/kvm_host.h    |  2 ++
>>>>>  arch/x86/include/uapi/asm/hyperv.h | 13 +++++++++++++
>>>>>  arch/x86/kvm/x86.c                 | 39 +++++++++++++++++++++++++++++++++++++-
>>>>>  include/uapi/linux/kvm.h           |  1 +
>>>>>  4 files changed, 54 insertions(+), 1 deletion(-)
>>>>>
>>>>> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
>>>>> index ae5d783..2fd0753 100644
>>>>> --- a/arch/x86/include/asm/kvm_host.h
>>>>> +++ b/arch/x86/include/asm/kvm_host.h
>>>>> @@ -605,6 +605,8 @@ struct kvm_arch {
>>>>>  	/* fields used by HYPER-V emulation */
>>>>>  	u64 hv_guest_os_id;
>>>>>  	u64 hv_hypercall;
>>>>> +	u64 hv_ref_count;
>>>>> +	u64 hv_tsc_page;
>>>>>  
>>>>>  	#ifdef CONFIG_KVM_MMU_AUDIT
>>>>>  	int audit_point;
>>>>> diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h
>>>>> index b8f1c01..462efe7 100644
>>>>> --- a/arch/x86/include/uapi/asm/hyperv.h
>>>>> +++ b/arch/x86/include/uapi/asm/hyperv.h
>>>>> @@ -28,6 +28,9 @@
>>>>>  /* Partition Reference Counter (HV_X64_MSR_TIME_REF_COUNT) available*/
>>>>>  #define HV_X64_MSR_TIME_REF_COUNT_AVAILABLE	(1 << 1)
>>>>>  
>>>>> +/* A partition's reference time stamp counter (TSC) page */
>>>>> +#define HV_X64_MSR_REFERENCE_TSC		0x40000021
>>>>> +
>>>>>  /*
>>>>>   * There is a single feature flag that signifies the presence of the MSR
>>>>>   * that can be used to retrieve both the local APIC Timer frequency as
>>>>> @@ -198,6 +201,9 @@
>>>>>  #define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_MASK	\
>>>>>  		(~((1ull << HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT) - 1))
>>>>>  
>>>>> +#define HV_X64_MSR_TSC_REFERENCE_ENABLE		0x00000001
>>>>> +#define HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT	12
>>>>> +
>>>>>  #define HV_PROCESSOR_POWER_STATE_C0		0
>>>>>  #define HV_PROCESSOR_POWER_STATE_C1		1
>>>>>  #define HV_PROCESSOR_POWER_STATE_C2		2
>>>>> @@ -210,4 +216,11 @@
>>>>>  #define HV_STATUS_INVALID_ALIGNMENT		4
>>>>>  #define HV_STATUS_INSUFFICIENT_BUFFERS		19
>>>>>  
>>>>> +typedef struct _HV_REFERENCE_TSC_PAGE {
>>>>> +	__u32 tsc_sequence;
>>>>> +	__u32 res1;
>>>>> +	__u64 tsc_scale;
>>>>> +	__s64 tsc_offset;
>>>>> +} HV_REFERENCE_TSC_PAGE, *PHV_REFERENCE_TSC_PAGE;
>>>>> +
>>>>>  #endif
>>>>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
>>>>> index 21ef1ba..5e4e495a 100644
>>>>> --- a/arch/x86/kvm/x86.c
>>>>> +++ b/arch/x86/kvm/x86.c
>>>>> @@ -840,7 +840,7 @@ EXPORT_SYMBOL_GPL(kvm_rdpmc);
>>>>>  static u32 msrs_to_save[] = {
>>>>>  	MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
>>>>>  	MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,
>>>>> -	HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
>>>>> +	HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, HV_X64_MSR_TIME_REF_COUNT,
>>>>>  	HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
>>>>>  	MSR_KVM_PV_EOI_EN,
>>>>>  	MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
>>>>> @@ -1826,6 +1826,8 @@ static bool kvm_hv_msr_partition_wide(u32 msr)
>>>>>  	switch (msr) {
>>>>>  	case HV_X64_MSR_GUEST_OS_ID:
>>>>>  	case HV_X64_MSR_HYPERCALL:
>>>>> +	case HV_X64_MSR_REFERENCE_TSC:
>>>>> +	case HV_X64_MSR_TIME_REF_COUNT:
>>>>>  		r = true;
>>>>>  		break;
>>>>>  	}
>>>>> @@ -1865,6 +1867,29 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
>>>>>  		if (__copy_to_user((void __user *)addr, instructions, 4))
>>>>>  			return 1;
>>>>>  		kvm->arch.hv_hypercall = data;
>>>>> +		local_irq_disable();
>>>>> +		kvm->arch.hv_ref_count = get_kernel_ns() + kvm->arch.kvmclock_offset;
>>>>> +		local_irq_enable();
>>>>
>>>> Where does the docs say that HV_X64_MSR_HYPERCALL is the where the clock
>>>> starts counting?
>>>>
>>>> No need to store kvmclock_offset in hv_ref_count? (moreover
>>>> the name is weird, better name would be "hv_ref_start_time".
>>>
>>> Just add kvmclock_offset when reading the values (otherwise you have a
>>> "stale copy" of kvmclock_offset in hv_ref_count).
>>>
>>
>> After some experiments I think we do no need kvm->arch.hv_ref_count at all.
>>
>> I was debugging some weird clockjump issues and I think the problem is that after live migration
>> kvm->arch.hv_ref_count is initialized to 0. Depending on the uptime of the vServer when the
>> hypercall was set up this can lead to series jumps.
>>
>> So I would suggest to completely drop kvm->arch.hv_ref_count.
>>
>> And use simply this in get_msr_hyperv_pw().
>>
>>         case HV_X64_MSR_TIME_REF_COUNT: {
>>                 data = div_u64(get_kernel_ns() + kvm->arch.kvmclock_offset, 100);
>>                 break;
>>         }
>>
> 
> Agreed. It should work as long as we rely on kvmclock_offset.
> Vadim.

I think we can rely on kvmclock_offset. Had you had a chance to do further testing
during the weekend?

Peter
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Vadim Rozenfeld Jan. 8, 2014, 9:40 a.m. UTC | #13
On Tue, 2014-01-07 at 18:52 +0100, Peter Lieven wrote:
> Am 07.01.2014 10:36, schrieb Vadim Rozenfeld:
> > On Thu, 2014-01-02 at 17:52 +0100, Peter Lieven wrote:
> >> Am 11.12.2013 19:59, schrieb Marcelo Tosatti:
> >>> On Wed, Dec 11, 2013 at 04:53:05PM -0200, Marcelo Tosatti wrote:
> >>>> On Sun, Dec 08, 2013 at 10:33:38PM +1100, Vadim Rozenfeld wrote:
> >>>>> Signed-off: Peter Lieven <pl@dlh.net>
> >>>>> Signed-off: Gleb Natapov <gleb@redhat.com>
> >>>>> Signed-off: Vadim Rozenfeld <vrozenfe@redhat.com>
> >>>>>
> >>>>> v1 -> v2
> >>>>> 1. mark TSC page dirty as suggested by 
> >>>>>     Eric Northup <digitaleric@google.com> and Gleb
> >>>>> 2. disable local irq when calling get_kernel_ns, 
> >>>>>     as it was done by Peter Lieven <pl@dlhnet.de>
> >>>>> 3. move check for TSC page enable from second patch
> >>>>>     to this one.
> >>>>>
> >>>>> ---
> >>>>>  arch/x86/include/asm/kvm_host.h    |  2 ++
> >>>>>  arch/x86/include/uapi/asm/hyperv.h | 13 +++++++++++++
> >>>>>  arch/x86/kvm/x86.c                 | 39 +++++++++++++++++++++++++++++++++++++-
> >>>>>  include/uapi/linux/kvm.h           |  1 +
> >>>>>  4 files changed, 54 insertions(+), 1 deletion(-)
> >>>>>
> >>>>> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> >>>>> index ae5d783..2fd0753 100644
> >>>>> --- a/arch/x86/include/asm/kvm_host.h
> >>>>> +++ b/arch/x86/include/asm/kvm_host.h
> >>>>> @@ -605,6 +605,8 @@ struct kvm_arch {
> >>>>>  	/* fields used by HYPER-V emulation */
> >>>>>  	u64 hv_guest_os_id;
> >>>>>  	u64 hv_hypercall;
> >>>>> +	u64 hv_ref_count;
> >>>>> +	u64 hv_tsc_page;
> >>>>>  
> >>>>>  	#ifdef CONFIG_KVM_MMU_AUDIT
> >>>>>  	int audit_point;
> >>>>> diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h
> >>>>> index b8f1c01..462efe7 100644
> >>>>> --- a/arch/x86/include/uapi/asm/hyperv.h
> >>>>> +++ b/arch/x86/include/uapi/asm/hyperv.h
> >>>>> @@ -28,6 +28,9 @@
> >>>>>  /* Partition Reference Counter (HV_X64_MSR_TIME_REF_COUNT) available*/
> >>>>>  #define HV_X64_MSR_TIME_REF_COUNT_AVAILABLE	(1 << 1)
> >>>>>  
> >>>>> +/* A partition's reference time stamp counter (TSC) page */
> >>>>> +#define HV_X64_MSR_REFERENCE_TSC		0x40000021
> >>>>> +
> >>>>>  /*
> >>>>>   * There is a single feature flag that signifies the presence of the MSR
> >>>>>   * that can be used to retrieve both the local APIC Timer frequency as
> >>>>> @@ -198,6 +201,9 @@
> >>>>>  #define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_MASK	\
> >>>>>  		(~((1ull << HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT) - 1))
> >>>>>  
> >>>>> +#define HV_X64_MSR_TSC_REFERENCE_ENABLE		0x00000001
> >>>>> +#define HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT	12
> >>>>> +
> >>>>>  #define HV_PROCESSOR_POWER_STATE_C0		0
> >>>>>  #define HV_PROCESSOR_POWER_STATE_C1		1
> >>>>>  #define HV_PROCESSOR_POWER_STATE_C2		2
> >>>>> @@ -210,4 +216,11 @@
> >>>>>  #define HV_STATUS_INVALID_ALIGNMENT		4
> >>>>>  #define HV_STATUS_INSUFFICIENT_BUFFERS		19
> >>>>>  
> >>>>> +typedef struct _HV_REFERENCE_TSC_PAGE {
> >>>>> +	__u32 tsc_sequence;
> >>>>> +	__u32 res1;
> >>>>> +	__u64 tsc_scale;
> >>>>> +	__s64 tsc_offset;
> >>>>> +} HV_REFERENCE_TSC_PAGE, *PHV_REFERENCE_TSC_PAGE;
> >>>>> +
> >>>>>  #endif
> >>>>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> >>>>> index 21ef1ba..5e4e495a 100644
> >>>>> --- a/arch/x86/kvm/x86.c
> >>>>> +++ b/arch/x86/kvm/x86.c
> >>>>> @@ -840,7 +840,7 @@ EXPORT_SYMBOL_GPL(kvm_rdpmc);
> >>>>>  static u32 msrs_to_save[] = {
> >>>>>  	MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
> >>>>>  	MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,
> >>>>> -	HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
> >>>>> +	HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, HV_X64_MSR_TIME_REF_COUNT,
> >>>>>  	HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
> >>>>>  	MSR_KVM_PV_EOI_EN,
> >>>>>  	MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
> >>>>> @@ -1826,6 +1826,8 @@ static bool kvm_hv_msr_partition_wide(u32 msr)
> >>>>>  	switch (msr) {
> >>>>>  	case HV_X64_MSR_GUEST_OS_ID:
> >>>>>  	case HV_X64_MSR_HYPERCALL:
> >>>>> +	case HV_X64_MSR_REFERENCE_TSC:
> >>>>> +	case HV_X64_MSR_TIME_REF_COUNT:
> >>>>>  		r = true;
> >>>>>  		break;
> >>>>>  	}
> >>>>> @@ -1865,6 +1867,29 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
> >>>>>  		if (__copy_to_user((void __user *)addr, instructions, 4))
> >>>>>  			return 1;
> >>>>>  		kvm->arch.hv_hypercall = data;
> >>>>> +		local_irq_disable();
> >>>>> +		kvm->arch.hv_ref_count = get_kernel_ns() + kvm->arch.kvmclock_offset;
> >>>>> +		local_irq_enable();
> >>>>
> >>>> Where does the docs say that HV_X64_MSR_HYPERCALL is the where the clock
> >>>> starts counting?
> >>>>
> >>>> No need to store kvmclock_offset in hv_ref_count? (moreover
> >>>> the name is weird, better name would be "hv_ref_start_time".
> >>>
> >>> Just add kvmclock_offset when reading the values (otherwise you have a
> >>> "stale copy" of kvmclock_offset in hv_ref_count).
> >>>
> >>
> >> After some experiments I think we do no need kvm->arch.hv_ref_count at all.
> >>
> >> I was debugging some weird clockjump issues and I think the problem is that after live migration
> >> kvm->arch.hv_ref_count is initialized to 0. Depending on the uptime of the vServer when the
> >> hypercall was set up this can lead to series jumps.
> >>
> >> So I would suggest to completely drop kvm->arch.hv_ref_count.
> >>
> >> And use simply this in get_msr_hyperv_pw().
> >>
> >>         case HV_X64_MSR_TIME_REF_COUNT: {
> >>                 data = div_u64(get_kernel_ns() + kvm->arch.kvmclock_offset, 100);
> >>                 break;
> >>         }
> >>
> > 
> > Agreed. It should work as long as we rely on kvmclock_offset.
> > Vadim.
> 
> I think we can rely on kvmclock_offset. Had you had a chance to do further testing
> during the weekend?

Yes, and still testing. 

There is still some inconsistency in the value returned by
QueryPerformanceCounter during migration.

Vadim. 
> 
> Peter
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Peter Lieven Jan. 8, 2014, 10:15 a.m. UTC | #14
On 08.01.2014 10:40, Vadim Rozenfeld wrote:
> On Tue, 2014-01-07 at 18:52 +0100, Peter Lieven wrote:
>> Am 07.01.2014 10:36, schrieb Vadim Rozenfeld:
>>> On Thu, 2014-01-02 at 17:52 +0100, Peter Lieven wrote:
>>>> Am 11.12.2013 19:59, schrieb Marcelo Tosatti:
>>>>> On Wed, Dec 11, 2013 at 04:53:05PM -0200, Marcelo Tosatti wrote:
>>>>>> On Sun, Dec 08, 2013 at 10:33:38PM +1100, Vadim Rozenfeld wrote:
>>>>>>> Signed-off: Peter Lieven <pl@dlh.net>
>>>>>>> Signed-off: Gleb Natapov <gleb@redhat.com>
>>>>>>> Signed-off: Vadim Rozenfeld <vrozenfe@redhat.com>
>>>>>>>
>>>>>>> v1 -> v2
>>>>>>> 1. mark TSC page dirty as suggested by
>>>>>>>      Eric Northup <digitaleric@google.com> and Gleb
>>>>>>> 2. disable local irq when calling get_kernel_ns,
>>>>>>>      as it was done by Peter Lieven <pl@dlhnet.de>
>>>>>>> 3. move check for TSC page enable from second patch
>>>>>>>      to this one.
>>>>>>>
>>>>>>> ---
>>>>>>>   arch/x86/include/asm/kvm_host.h    |  2 ++
>>>>>>>   arch/x86/include/uapi/asm/hyperv.h | 13 +++++++++++++
>>>>>>>   arch/x86/kvm/x86.c                 | 39 +++++++++++++++++++++++++++++++++++++-
>>>>>>>   include/uapi/linux/kvm.h           |  1 +
>>>>>>>   4 files changed, 54 insertions(+), 1 deletion(-)
>>>>>>>
>>>>>>> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
>>>>>>> index ae5d783..2fd0753 100644
>>>>>>> --- a/arch/x86/include/asm/kvm_host.h
>>>>>>> +++ b/arch/x86/include/asm/kvm_host.h
>>>>>>> @@ -605,6 +605,8 @@ struct kvm_arch {
>>>>>>>   	/* fields used by HYPER-V emulation */
>>>>>>>   	u64 hv_guest_os_id;
>>>>>>>   	u64 hv_hypercall;
>>>>>>> +	u64 hv_ref_count;
>>>>>>> +	u64 hv_tsc_page;
>>>>>>>
>>>>>>>   	#ifdef CONFIG_KVM_MMU_AUDIT
>>>>>>>   	int audit_point;
>>>>>>> diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h
>>>>>>> index b8f1c01..462efe7 100644
>>>>>>> --- a/arch/x86/include/uapi/asm/hyperv.h
>>>>>>> +++ b/arch/x86/include/uapi/asm/hyperv.h
>>>>>>> @@ -28,6 +28,9 @@
>>>>>>>   /* Partition Reference Counter (HV_X64_MSR_TIME_REF_COUNT) available*/
>>>>>>>   #define HV_X64_MSR_TIME_REF_COUNT_AVAILABLE	(1 << 1)
>>>>>>>
>>>>>>> +/* A partition's reference time stamp counter (TSC) page */
>>>>>>> +#define HV_X64_MSR_REFERENCE_TSC		0x40000021
>>>>>>> +
>>>>>>>   /*
>>>>>>>    * There is a single feature flag that signifies the presence of the MSR
>>>>>>>    * that can be used to retrieve both the local APIC Timer frequency as
>>>>>>> @@ -198,6 +201,9 @@
>>>>>>>   #define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_MASK	\
>>>>>>>   		(~((1ull << HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT) - 1))
>>>>>>>
>>>>>>> +#define HV_X64_MSR_TSC_REFERENCE_ENABLE		0x00000001
>>>>>>> +#define HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT	12
>>>>>>> +
>>>>>>>   #define HV_PROCESSOR_POWER_STATE_C0		0
>>>>>>>   #define HV_PROCESSOR_POWER_STATE_C1		1
>>>>>>>   #define HV_PROCESSOR_POWER_STATE_C2		2
>>>>>>> @@ -210,4 +216,11 @@
>>>>>>>   #define HV_STATUS_INVALID_ALIGNMENT		4
>>>>>>>   #define HV_STATUS_INSUFFICIENT_BUFFERS		19
>>>>>>>
>>>>>>> +typedef struct _HV_REFERENCE_TSC_PAGE {
>>>>>>> +	__u32 tsc_sequence;
>>>>>>> +	__u32 res1;
>>>>>>> +	__u64 tsc_scale;
>>>>>>> +	__s64 tsc_offset;
>>>>>>> +} HV_REFERENCE_TSC_PAGE, *PHV_REFERENCE_TSC_PAGE;
>>>>>>> +
>>>>>>>   #endif
>>>>>>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
>>>>>>> index 21ef1ba..5e4e495a 100644
>>>>>>> --- a/arch/x86/kvm/x86.c
>>>>>>> +++ b/arch/x86/kvm/x86.c
>>>>>>> @@ -840,7 +840,7 @@ EXPORT_SYMBOL_GPL(kvm_rdpmc);
>>>>>>>   static u32 msrs_to_save[] = {
>>>>>>>   	MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
>>>>>>>   	MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,
>>>>>>> -	HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
>>>>>>> +	HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, HV_X64_MSR_TIME_REF_COUNT,
>>>>>>>   	HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
>>>>>>>   	MSR_KVM_PV_EOI_EN,
>>>>>>>   	MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
>>>>>>> @@ -1826,6 +1826,8 @@ static bool kvm_hv_msr_partition_wide(u32 msr)
>>>>>>>   	switch (msr) {
>>>>>>>   	case HV_X64_MSR_GUEST_OS_ID:
>>>>>>>   	case HV_X64_MSR_HYPERCALL:
>>>>>>> +	case HV_X64_MSR_REFERENCE_TSC:
>>>>>>> +	case HV_X64_MSR_TIME_REF_COUNT:
>>>>>>>   		r = true;
>>>>>>>   		break;
>>>>>>>   	}
>>>>>>> @@ -1865,6 +1867,29 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
>>>>>>>   		if (__copy_to_user((void __user *)addr, instructions, 4))
>>>>>>>   			return 1;
>>>>>>>   		kvm->arch.hv_hypercall = data;
>>>>>>> +		local_irq_disable();
>>>>>>> +		kvm->arch.hv_ref_count = get_kernel_ns() + kvm->arch.kvmclock_offset;
>>>>>>> +		local_irq_enable();
>>>>>>
>>>>>> Where does the docs say that HV_X64_MSR_HYPERCALL is the where the clock
>>>>>> starts counting?
>>>>>>
>>>>>> No need to store kvmclock_offset in hv_ref_count? (moreover
>>>>>> the name is weird, better name would be "hv_ref_start_time".
>>>>>
>>>>> Just add kvmclock_offset when reading the values (otherwise you have a
>>>>> "stale copy" of kvmclock_offset in hv_ref_count).
>>>>>
>>>>
>>>> After some experiments I think we do no need kvm->arch.hv_ref_count at all.
>>>>
>>>> I was debugging some weird clockjump issues and I think the problem is that after live migration
>>>> kvm->arch.hv_ref_count is initialized to 0. Depending on the uptime of the vServer when the
>>>> hypercall was set up this can lead to series jumps.
>>>>
>>>> So I would suggest to completely drop kvm->arch.hv_ref_count.
>>>>
>>>> And use simply this in get_msr_hyperv_pw().
>>>>
>>>>          case HV_X64_MSR_TIME_REF_COUNT: {
>>>>                  data = div_u64(get_kernel_ns() + kvm->arch.kvmclock_offset, 100);
>>>>                  break;
>>>>          }
>>>>
>>>
>>> Agreed. It should work as long as we rely on kvmclock_offset.
>>> Vadim.
>>
>> I think we can rely on kvmclock_offset. Had you had a chance to do further testing
>> during the weekend?
>
> Yes, and still testing.
>
> There is still some inconsistency in the value returned by
> QueryPerformanceCounter during migration.

With my proposal?

Peter

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Vadim Rozenfeld Jan. 8, 2014, 10:44 a.m. UTC | #15
On Wed, 2014-01-08 at 11:15 +0100, Peter Lieven wrote:
> On 08.01.2014 10:40, Vadim Rozenfeld wrote:
> > On Tue, 2014-01-07 at 18:52 +0100, Peter Lieven wrote:
> >> Am 07.01.2014 10:36, schrieb Vadim Rozenfeld:
> >>> On Thu, 2014-01-02 at 17:52 +0100, Peter Lieven wrote:
> >>>> Am 11.12.2013 19:59, schrieb Marcelo Tosatti:
> >>>>> On Wed, Dec 11, 2013 at 04:53:05PM -0200, Marcelo Tosatti wrote:
> >>>>>> On Sun, Dec 08, 2013 at 10:33:38PM +1100, Vadim Rozenfeld wrote:
> >>>>>>> Signed-off: Peter Lieven <pl@dlh.net>
> >>>>>>> Signed-off: Gleb Natapov <gleb@redhat.com>
> >>>>>>> Signed-off: Vadim Rozenfeld <vrozenfe@redhat.com>
> >>>>>>>
> >>>>>>> v1 -> v2
> >>>>>>> 1. mark TSC page dirty as suggested by
> >>>>>>>      Eric Northup <digitaleric@google.com> and Gleb
> >>>>>>> 2. disable local irq when calling get_kernel_ns,
> >>>>>>>      as it was done by Peter Lieven <pl@dlhnet.de>
> >>>>>>> 3. move check for TSC page enable from second patch
> >>>>>>>      to this one.
> >>>>>>>
> >>>>>>> ---
> >>>>>>>   arch/x86/include/asm/kvm_host.h    |  2 ++
> >>>>>>>   arch/x86/include/uapi/asm/hyperv.h | 13 +++++++++++++
> >>>>>>>   arch/x86/kvm/x86.c                 | 39 +++++++++++++++++++++++++++++++++++++-
> >>>>>>>   include/uapi/linux/kvm.h           |  1 +
> >>>>>>>   4 files changed, 54 insertions(+), 1 deletion(-)
> >>>>>>>
> >>>>>>> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> >>>>>>> index ae5d783..2fd0753 100644
> >>>>>>> --- a/arch/x86/include/asm/kvm_host.h
> >>>>>>> +++ b/arch/x86/include/asm/kvm_host.h
> >>>>>>> @@ -605,6 +605,8 @@ struct kvm_arch {
> >>>>>>>   	/* fields used by HYPER-V emulation */
> >>>>>>>   	u64 hv_guest_os_id;
> >>>>>>>   	u64 hv_hypercall;
> >>>>>>> +	u64 hv_ref_count;
> >>>>>>> +	u64 hv_tsc_page;
> >>>>>>>
> >>>>>>>   	#ifdef CONFIG_KVM_MMU_AUDIT
> >>>>>>>   	int audit_point;
> >>>>>>> diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h
> >>>>>>> index b8f1c01..462efe7 100644
> >>>>>>> --- a/arch/x86/include/uapi/asm/hyperv.h
> >>>>>>> +++ b/arch/x86/include/uapi/asm/hyperv.h
> >>>>>>> @@ -28,6 +28,9 @@
> >>>>>>>   /* Partition Reference Counter (HV_X64_MSR_TIME_REF_COUNT) available*/
> >>>>>>>   #define HV_X64_MSR_TIME_REF_COUNT_AVAILABLE	(1 << 1)
> >>>>>>>
> >>>>>>> +/* A partition's reference time stamp counter (TSC) page */
> >>>>>>> +#define HV_X64_MSR_REFERENCE_TSC		0x40000021
> >>>>>>> +
> >>>>>>>   /*
> >>>>>>>    * There is a single feature flag that signifies the presence of the MSR
> >>>>>>>    * that can be used to retrieve both the local APIC Timer frequency as
> >>>>>>> @@ -198,6 +201,9 @@
> >>>>>>>   #define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_MASK	\
> >>>>>>>   		(~((1ull << HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT) - 1))
> >>>>>>>
> >>>>>>> +#define HV_X64_MSR_TSC_REFERENCE_ENABLE		0x00000001
> >>>>>>> +#define HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT	12
> >>>>>>> +
> >>>>>>>   #define HV_PROCESSOR_POWER_STATE_C0		0
> >>>>>>>   #define HV_PROCESSOR_POWER_STATE_C1		1
> >>>>>>>   #define HV_PROCESSOR_POWER_STATE_C2		2
> >>>>>>> @@ -210,4 +216,11 @@
> >>>>>>>   #define HV_STATUS_INVALID_ALIGNMENT		4
> >>>>>>>   #define HV_STATUS_INSUFFICIENT_BUFFERS		19
> >>>>>>>
> >>>>>>> +typedef struct _HV_REFERENCE_TSC_PAGE {
> >>>>>>> +	__u32 tsc_sequence;
> >>>>>>> +	__u32 res1;
> >>>>>>> +	__u64 tsc_scale;
> >>>>>>> +	__s64 tsc_offset;
> >>>>>>> +} HV_REFERENCE_TSC_PAGE, *PHV_REFERENCE_TSC_PAGE;
> >>>>>>> +
> >>>>>>>   #endif
> >>>>>>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> >>>>>>> index 21ef1ba..5e4e495a 100644
> >>>>>>> --- a/arch/x86/kvm/x86.c
> >>>>>>> +++ b/arch/x86/kvm/x86.c
> >>>>>>> @@ -840,7 +840,7 @@ EXPORT_SYMBOL_GPL(kvm_rdpmc);
> >>>>>>>   static u32 msrs_to_save[] = {
> >>>>>>>   	MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
> >>>>>>>   	MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,
> >>>>>>> -	HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
> >>>>>>> +	HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, HV_X64_MSR_TIME_REF_COUNT,
> >>>>>>>   	HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
> >>>>>>>   	MSR_KVM_PV_EOI_EN,
> >>>>>>>   	MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
> >>>>>>> @@ -1826,6 +1826,8 @@ static bool kvm_hv_msr_partition_wide(u32 msr)
> >>>>>>>   	switch (msr) {
> >>>>>>>   	case HV_X64_MSR_GUEST_OS_ID:
> >>>>>>>   	case HV_X64_MSR_HYPERCALL:
> >>>>>>> +	case HV_X64_MSR_REFERENCE_TSC:
> >>>>>>> +	case HV_X64_MSR_TIME_REF_COUNT:
> >>>>>>>   		r = true;
> >>>>>>>   		break;
> >>>>>>>   	}
> >>>>>>> @@ -1865,6 +1867,29 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
> >>>>>>>   		if (__copy_to_user((void __user *)addr, instructions, 4))
> >>>>>>>   			return 1;
> >>>>>>>   		kvm->arch.hv_hypercall = data;
> >>>>>>> +		local_irq_disable();
> >>>>>>> +		kvm->arch.hv_ref_count = get_kernel_ns() + kvm->arch.kvmclock_offset;
> >>>>>>> +		local_irq_enable();
> >>>>>>
> >>>>>> Where does the docs say that HV_X64_MSR_HYPERCALL is the where the clock
> >>>>>> starts counting?
> >>>>>>
> >>>>>> No need to store kvmclock_offset in hv_ref_count? (moreover
> >>>>>> the name is weird, better name would be "hv_ref_start_time".
> >>>>>
> >>>>> Just add kvmclock_offset when reading the values (otherwise you have a
> >>>>> "stale copy" of kvmclock_offset in hv_ref_count).
> >>>>>
> >>>>
> >>>> After some experiments I think we do no need kvm->arch.hv_ref_count at all.
> >>>>
> >>>> I was debugging some weird clockjump issues and I think the problem is that after live migration
> >>>> kvm->arch.hv_ref_count is initialized to 0. Depending on the uptime of the vServer when the
> >>>> hypercall was set up this can lead to series jumps.
> >>>>
> >>>> So I would suggest to completely drop kvm->arch.hv_ref_count.
> >>>>
> >>>> And use simply this in get_msr_hyperv_pw().
> >>>>
> >>>>          case HV_X64_MSR_TIME_REF_COUNT: {
> >>>>                  data = div_u64(get_kernel_ns() + kvm->arch.kvmclock_offset, 100);
> >>>>                  break;
> >>>>          }
> >>>>
> >>>
> >>> Agreed. It should work as long as we rely on kvmclock_offset.
> >>> Vadim.
> >>
> >> I think we can rely on kvmclock_offset. Had you had a chance to do further testing
> >> during the weekend?
> >
> > Yes, and still testing.
> >
> > There is still some inconsistency in the value returned by
> > QueryPerformanceCounter during migration.
> 
> With my proposal?
> 

Right. 
Please see below QTC vs. NTP time stamp
before and after migration:

C:\timedrift\objchk_win7_x86\i386>timedrift.exe
quantum = 15
QPC     NTP     Delta
1311    1307    4
2621    2618    3
3932    3931    1
.......
.......
38018   38014   4
Unable to wait for NTP reply from the SNTP server, GetLastError returns
7279088
142210  40113   102097
143458  41357   102101
.......
.......
156500  54615   101885

NTP time changed from  38014 to 40113 ((40113 - 38014) / 100 = 21 mSec)
while QPC jumped from  38018 to 142210 ((142210 - 38014) / 100 = 1042
mSec)

Vadim.

> Peter
> 


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Peter Lieven Jan. 8, 2014, 11:48 a.m. UTC | #16
On 08.01.2014 11:44, Vadim Rozenfeld wrote:
> On Wed, 2014-01-08 at 11:15 +0100, Peter Lieven wrote:
>> On 08.01.2014 10:40, Vadim Rozenfeld wrote:
>>> On Tue, 2014-01-07 at 18:52 +0100, Peter Lieven wrote:
>>>> Am 07.01.2014 10:36, schrieb Vadim Rozenfeld:
>>>>> On Thu, 2014-01-02 at 17:52 +0100, Peter Lieven wrote:
>>>>>> Am 11.12.2013 19:59, schrieb Marcelo Tosatti:
>>>>>>> On Wed, Dec 11, 2013 at 04:53:05PM -0200, Marcelo Tosatti wrote:
>>>>>>>> On Sun, Dec 08, 2013 at 10:33:38PM +1100, Vadim Rozenfeld wrote:
>>>>>>>>> Signed-off: Peter Lieven <pl@dlh.net>
>>>>>>>>> Signed-off: Gleb Natapov <gleb@redhat.com>
>>>>>>>>> Signed-off: Vadim Rozenfeld <vrozenfe@redhat.com>
>>>>>>>>>
>>>>>>>>> v1 -> v2
>>>>>>>>> 1. mark TSC page dirty as suggested by
>>>>>>>>>       Eric Northup <digitaleric@google.com> and Gleb
>>>>>>>>> 2. disable local irq when calling get_kernel_ns,
>>>>>>>>>       as it was done by Peter Lieven <pl@dlhnet.de>
>>>>>>>>> 3. move check for TSC page enable from second patch
>>>>>>>>>       to this one.
>>>>>>>>>
>>>>>>>>> ---
>>>>>>>>>    arch/x86/include/asm/kvm_host.h    |  2 ++
>>>>>>>>>    arch/x86/include/uapi/asm/hyperv.h | 13 +++++++++++++
>>>>>>>>>    arch/x86/kvm/x86.c                 | 39 +++++++++++++++++++++++++++++++++++++-
>>>>>>>>>    include/uapi/linux/kvm.h           |  1 +
>>>>>>>>>    4 files changed, 54 insertions(+), 1 deletion(-)
>>>>>>>>>
>>>>>>>>> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
>>>>>>>>> index ae5d783..2fd0753 100644
>>>>>>>>> --- a/arch/x86/include/asm/kvm_host.h
>>>>>>>>> +++ b/arch/x86/include/asm/kvm_host.h
>>>>>>>>> @@ -605,6 +605,8 @@ struct kvm_arch {
>>>>>>>>>    	/* fields used by HYPER-V emulation */
>>>>>>>>>    	u64 hv_guest_os_id;
>>>>>>>>>    	u64 hv_hypercall;
>>>>>>>>> +	u64 hv_ref_count;
>>>>>>>>> +	u64 hv_tsc_page;
>>>>>>>>>
>>>>>>>>>    	#ifdef CONFIG_KVM_MMU_AUDIT
>>>>>>>>>    	int audit_point;
>>>>>>>>> diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h
>>>>>>>>> index b8f1c01..462efe7 100644
>>>>>>>>> --- a/arch/x86/include/uapi/asm/hyperv.h
>>>>>>>>> +++ b/arch/x86/include/uapi/asm/hyperv.h
>>>>>>>>> @@ -28,6 +28,9 @@
>>>>>>>>>    /* Partition Reference Counter (HV_X64_MSR_TIME_REF_COUNT) available*/
>>>>>>>>>    #define HV_X64_MSR_TIME_REF_COUNT_AVAILABLE	(1 << 1)
>>>>>>>>>
>>>>>>>>> +/* A partition's reference time stamp counter (TSC) page */
>>>>>>>>> +#define HV_X64_MSR_REFERENCE_TSC		0x40000021
>>>>>>>>> +
>>>>>>>>>    /*
>>>>>>>>>     * There is a single feature flag that signifies the presence of the MSR
>>>>>>>>>     * that can be used to retrieve both the local APIC Timer frequency as
>>>>>>>>> @@ -198,6 +201,9 @@
>>>>>>>>>    #define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_MASK	\
>>>>>>>>>    		(~((1ull << HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT) - 1))
>>>>>>>>>
>>>>>>>>> +#define HV_X64_MSR_TSC_REFERENCE_ENABLE		0x00000001
>>>>>>>>> +#define HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT	12
>>>>>>>>> +
>>>>>>>>>    #define HV_PROCESSOR_POWER_STATE_C0		0
>>>>>>>>>    #define HV_PROCESSOR_POWER_STATE_C1		1
>>>>>>>>>    #define HV_PROCESSOR_POWER_STATE_C2		2
>>>>>>>>> @@ -210,4 +216,11 @@
>>>>>>>>>    #define HV_STATUS_INVALID_ALIGNMENT		4
>>>>>>>>>    #define HV_STATUS_INSUFFICIENT_BUFFERS		19
>>>>>>>>>
>>>>>>>>> +typedef struct _HV_REFERENCE_TSC_PAGE {
>>>>>>>>> +	__u32 tsc_sequence;
>>>>>>>>> +	__u32 res1;
>>>>>>>>> +	__u64 tsc_scale;
>>>>>>>>> +	__s64 tsc_offset;
>>>>>>>>> +} HV_REFERENCE_TSC_PAGE, *PHV_REFERENCE_TSC_PAGE;
>>>>>>>>> +
>>>>>>>>>    #endif
>>>>>>>>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
>>>>>>>>> index 21ef1ba..5e4e495a 100644
>>>>>>>>> --- a/arch/x86/kvm/x86.c
>>>>>>>>> +++ b/arch/x86/kvm/x86.c
>>>>>>>>> @@ -840,7 +840,7 @@ EXPORT_SYMBOL_GPL(kvm_rdpmc);
>>>>>>>>>    static u32 msrs_to_save[] = {
>>>>>>>>>    	MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
>>>>>>>>>    	MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,
>>>>>>>>> -	HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
>>>>>>>>> +	HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, HV_X64_MSR_TIME_REF_COUNT,
>>>>>>>>>    	HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
>>>>>>>>>    	MSR_KVM_PV_EOI_EN,
>>>>>>>>>    	MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
>>>>>>>>> @@ -1826,6 +1826,8 @@ static bool kvm_hv_msr_partition_wide(u32 msr)
>>>>>>>>>    	switch (msr) {
>>>>>>>>>    	case HV_X64_MSR_GUEST_OS_ID:
>>>>>>>>>    	case HV_X64_MSR_HYPERCALL:
>>>>>>>>> +	case HV_X64_MSR_REFERENCE_TSC:
>>>>>>>>> +	case HV_X64_MSR_TIME_REF_COUNT:
>>>>>>>>>    		r = true;
>>>>>>>>>    		break;
>>>>>>>>>    	}
>>>>>>>>> @@ -1865,6 +1867,29 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
>>>>>>>>>    		if (__copy_to_user((void __user *)addr, instructions, 4))
>>>>>>>>>    			return 1;
>>>>>>>>>    		kvm->arch.hv_hypercall = data;
>>>>>>>>> +		local_irq_disable();
>>>>>>>>> +		kvm->arch.hv_ref_count = get_kernel_ns() + kvm->arch.kvmclock_offset;
>>>>>>>>> +		local_irq_enable();
>>>>>>>>
>>>>>>>> Where does the docs say that HV_X64_MSR_HYPERCALL is the where the clock
>>>>>>>> starts counting?
>>>>>>>>
>>>>>>>> No need to store kvmclock_offset in hv_ref_count? (moreover
>>>>>>>> the name is weird, better name would be "hv_ref_start_time".
>>>>>>>
>>>>>>> Just add kvmclock_offset when reading the values (otherwise you have a
>>>>>>> "stale copy" of kvmclock_offset in hv_ref_count).
>>>>>>>
>>>>>>
>>>>>> After some experiments I think we do no need kvm->arch.hv_ref_count at all.
>>>>>>
>>>>>> I was debugging some weird clockjump issues and I think the problem is that after live migration
>>>>>> kvm->arch.hv_ref_count is initialized to 0. Depending on the uptime of the vServer when the
>>>>>> hypercall was set up this can lead to series jumps.
>>>>>>
>>>>>> So I would suggest to completely drop kvm->arch.hv_ref_count.
>>>>>>
>>>>>> And use simply this in get_msr_hyperv_pw().
>>>>>>
>>>>>>           case HV_X64_MSR_TIME_REF_COUNT: {
>>>>>>                   data = div_u64(get_kernel_ns() + kvm->arch.kvmclock_offset, 100);
>>>>>>                   break;
>>>>>>           }
>>>>>>
>>>>>
>>>>> Agreed. It should work as long as we rely on kvmclock_offset.
>>>>> Vadim.
>>>>
>>>> I think we can rely on kvmclock_offset. Had you had a chance to do further testing
>>>> during the weekend?
>>>
>>> Yes, and still testing.
>>>
>>> There is still some inconsistency in the value returned by
>>> QueryPerformanceCounter during migration.
>>
>> With my proposal?
>>
>
> Right.
> Please see below QTC vs. NTP time stamp
> before and after migration:
>
> C:\timedrift\objchk_win7_x86\i386>timedrift.exe
> quantum = 15
> QPC     NTP     Delta
> 1311    1307    4
> 2621    2618    3
> 3932    3931    1
> .......
> .......
> 38018   38014   4
> Unable to wait for NTP reply from the SNTP server, GetLastError returns
> 7279088
> 142210  40113   102097
> 143458  41357   102101
> .......
> .......
> 156500  54615   101885
>
> NTP time changed from  38014 to 40113 ((40113 - 38014) / 100 = 21 mSec)
> while QPC jumped from  38018 to 142210 ((142210 - 38014) / 100 = 1042
> mSec)

How long was the downtime when you migrated the vServer?

I have done similar tests and have not seen this...
Can you show the relevant parts of your patch. Remember I use the REFERENCE_COUTNER not iTSC.

Peter

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Vadim Rozenfeld Jan. 8, 2014, 12:12 p.m. UTC | #17
On Wed, 2014-01-08 at 12:48 +0100, Peter Lieven wrote:
> On 08.01.2014 11:44, Vadim Rozenfeld wrote:
> > On Wed, 2014-01-08 at 11:15 +0100, Peter Lieven wrote:
> >> On 08.01.2014 10:40, Vadim Rozenfeld wrote:
> >>> On Tue, 2014-01-07 at 18:52 +0100, Peter Lieven wrote:
> >>>> Am 07.01.2014 10:36, schrieb Vadim Rozenfeld:
> >>>>> On Thu, 2014-01-02 at 17:52 +0100, Peter Lieven wrote:
> >>>>>> Am 11.12.2013 19:59, schrieb Marcelo Tosatti:
> >>>>>>> On Wed, Dec 11, 2013 at 04:53:05PM -0200, Marcelo Tosatti wrote:
> >>>>>>>> On Sun, Dec 08, 2013 at 10:33:38PM +1100, Vadim Rozenfeld wrote:
> >>>>>>>>> Signed-off: Peter Lieven <pl@dlh.net>
> >>>>>>>>> Signed-off: Gleb Natapov <gleb@redhat.com>
> >>>>>>>>> Signed-off: Vadim Rozenfeld <vrozenfe@redhat.com>
> >>>>>>>>>
> >>>>>>>>> v1 -> v2
> >>>>>>>>> 1. mark TSC page dirty as suggested by
> >>>>>>>>>       Eric Northup <digitaleric@google.com> and Gleb
> >>>>>>>>> 2. disable local irq when calling get_kernel_ns,
> >>>>>>>>>       as it was done by Peter Lieven <pl@dlhnet.de>
> >>>>>>>>> 3. move check for TSC page enable from second patch
> >>>>>>>>>       to this one.
> >>>>>>>>>
> >>>>>>>>> ---
> >>>>>>>>>    arch/x86/include/asm/kvm_host.h    |  2 ++
> >>>>>>>>>    arch/x86/include/uapi/asm/hyperv.h | 13 +++++++++++++
> >>>>>>>>>    arch/x86/kvm/x86.c                 | 39 +++++++++++++++++++++++++++++++++++++-
> >>>>>>>>>    include/uapi/linux/kvm.h           |  1 +
> >>>>>>>>>    4 files changed, 54 insertions(+), 1 deletion(-)
> >>>>>>>>>
> >>>>>>>>> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> >>>>>>>>> index ae5d783..2fd0753 100644
> >>>>>>>>> --- a/arch/x86/include/asm/kvm_host.h
> >>>>>>>>> +++ b/arch/x86/include/asm/kvm_host.h
> >>>>>>>>> @@ -605,6 +605,8 @@ struct kvm_arch {
> >>>>>>>>>    	/* fields used by HYPER-V emulation */
> >>>>>>>>>    	u64 hv_guest_os_id;
> >>>>>>>>>    	u64 hv_hypercall;
> >>>>>>>>> +	u64 hv_ref_count;
> >>>>>>>>> +	u64 hv_tsc_page;
> >>>>>>>>>
> >>>>>>>>>    	#ifdef CONFIG_KVM_MMU_AUDIT
> >>>>>>>>>    	int audit_point;
> >>>>>>>>> diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h
> >>>>>>>>> index b8f1c01..462efe7 100644
> >>>>>>>>> --- a/arch/x86/include/uapi/asm/hyperv.h
> >>>>>>>>> +++ b/arch/x86/include/uapi/asm/hyperv.h
> >>>>>>>>> @@ -28,6 +28,9 @@
> >>>>>>>>>    /* Partition Reference Counter (HV_X64_MSR_TIME_REF_COUNT) available*/
> >>>>>>>>>    #define HV_X64_MSR_TIME_REF_COUNT_AVAILABLE	(1 << 1)
> >>>>>>>>>
> >>>>>>>>> +/* A partition's reference time stamp counter (TSC) page */
> >>>>>>>>> +#define HV_X64_MSR_REFERENCE_TSC		0x40000021
> >>>>>>>>> +
> >>>>>>>>>    /*
> >>>>>>>>>     * There is a single feature flag that signifies the presence of the MSR
> >>>>>>>>>     * that can be used to retrieve both the local APIC Timer frequency as
> >>>>>>>>> @@ -198,6 +201,9 @@
> >>>>>>>>>    #define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_MASK	\
> >>>>>>>>>    		(~((1ull << HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT) - 1))
> >>>>>>>>>
> >>>>>>>>> +#define HV_X64_MSR_TSC_REFERENCE_ENABLE		0x00000001
> >>>>>>>>> +#define HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT	12
> >>>>>>>>> +
> >>>>>>>>>    #define HV_PROCESSOR_POWER_STATE_C0		0
> >>>>>>>>>    #define HV_PROCESSOR_POWER_STATE_C1		1
> >>>>>>>>>    #define HV_PROCESSOR_POWER_STATE_C2		2
> >>>>>>>>> @@ -210,4 +216,11 @@
> >>>>>>>>>    #define HV_STATUS_INVALID_ALIGNMENT		4
> >>>>>>>>>    #define HV_STATUS_INSUFFICIENT_BUFFERS		19
> >>>>>>>>>
> >>>>>>>>> +typedef struct _HV_REFERENCE_TSC_PAGE {
> >>>>>>>>> +	__u32 tsc_sequence;
> >>>>>>>>> +	__u32 res1;
> >>>>>>>>> +	__u64 tsc_scale;
> >>>>>>>>> +	__s64 tsc_offset;
> >>>>>>>>> +} HV_REFERENCE_TSC_PAGE, *PHV_REFERENCE_TSC_PAGE;
> >>>>>>>>> +
> >>>>>>>>>    #endif
> >>>>>>>>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> >>>>>>>>> index 21ef1ba..5e4e495a 100644
> >>>>>>>>> --- a/arch/x86/kvm/x86.c
> >>>>>>>>> +++ b/arch/x86/kvm/x86.c
> >>>>>>>>> @@ -840,7 +840,7 @@ EXPORT_SYMBOL_GPL(kvm_rdpmc);
> >>>>>>>>>    static u32 msrs_to_save[] = {
> >>>>>>>>>    	MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
> >>>>>>>>>    	MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,
> >>>>>>>>> -	HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
> >>>>>>>>> +	HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, HV_X64_MSR_TIME_REF_COUNT,
> >>>>>>>>>    	HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
> >>>>>>>>>    	MSR_KVM_PV_EOI_EN,
> >>>>>>>>>    	MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
> >>>>>>>>> @@ -1826,6 +1826,8 @@ static bool kvm_hv_msr_partition_wide(u32 msr)
> >>>>>>>>>    	switch (msr) {
> >>>>>>>>>    	case HV_X64_MSR_GUEST_OS_ID:
> >>>>>>>>>    	case HV_X64_MSR_HYPERCALL:
> >>>>>>>>> +	case HV_X64_MSR_REFERENCE_TSC:
> >>>>>>>>> +	case HV_X64_MSR_TIME_REF_COUNT:
> >>>>>>>>>    		r = true;
> >>>>>>>>>    		break;
> >>>>>>>>>    	}
> >>>>>>>>> @@ -1865,6 +1867,29 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
> >>>>>>>>>    		if (__copy_to_user((void __user *)addr, instructions, 4))
> >>>>>>>>>    			return 1;
> >>>>>>>>>    		kvm->arch.hv_hypercall = data;
> >>>>>>>>> +		local_irq_disable();
> >>>>>>>>> +		kvm->arch.hv_ref_count = get_kernel_ns() + kvm->arch.kvmclock_offset;
> >>>>>>>>> +		local_irq_enable();
> >>>>>>>>
> >>>>>>>> Where does the docs say that HV_X64_MSR_HYPERCALL is the where the clock
> >>>>>>>> starts counting?
> >>>>>>>>
> >>>>>>>> No need to store kvmclock_offset in hv_ref_count? (moreover
> >>>>>>>> the name is weird, better name would be "hv_ref_start_time".
> >>>>>>>
> >>>>>>> Just add kvmclock_offset when reading the values (otherwise you have a
> >>>>>>> "stale copy" of kvmclock_offset in hv_ref_count).
> >>>>>>>
> >>>>>>
> >>>>>> After some experiments I think we do no need kvm->arch.hv_ref_count at all.
> >>>>>>
> >>>>>> I was debugging some weird clockjump issues and I think the problem is that after live migration
> >>>>>> kvm->arch.hv_ref_count is initialized to 0. Depending on the uptime of the vServer when the
> >>>>>> hypercall was set up this can lead to series jumps.
> >>>>>>
> >>>>>> So I would suggest to completely drop kvm->arch.hv_ref_count.
> >>>>>>
> >>>>>> And use simply this in get_msr_hyperv_pw().
> >>>>>>
> >>>>>>           case HV_X64_MSR_TIME_REF_COUNT: {
> >>>>>>                   data = div_u64(get_kernel_ns() + kvm->arch.kvmclock_offset, 100);
> >>>>>>                   break;
> >>>>>>           }
> >>>>>>
> >>>>>
> >>>>> Agreed. It should work as long as we rely on kvmclock_offset.
> >>>>> Vadim.
> >>>>
> >>>> I think we can rely on kvmclock_offset. Had you had a chance to do further testing
> >>>> during the weekend?
> >>>
> >>> Yes, and still testing.
> >>>
> >>> There is still some inconsistency in the value returned by
> >>> QueryPerformanceCounter during migration.
> >>
> >> With my proposal?
> >>
> >
> > Right.
> > Please see below QTC vs. NTP time stamp
> > before and after migration:
> >
> > C:\timedrift\objchk_win7_x86\i386>timedrift.exe
> > quantum = 15
> > QPC     NTP     Delta
> > 1311    1307    4
> > 2621    2618    3
> > 3932    3931    1
> > .......
> > .......
> > 38018   38014   4
> > Unable to wait for NTP reply from the SNTP server, GetLastError returns
> > 7279088
> > 142210  40113   102097
> > 143458  41357   102101
> > .......
> > .......
> > 156500  54615   101885
> >
> > NTP time changed from  38014 to 40113 ((40113 - 38014) / 100 = 21 mSec)
> > while QPC jumped from  38018 to 142210 ((142210 - 38014) / 100 = 1042
> > mSec)
> 
> How long was the downtime when you migrated the vServer?
I use Win7-32 for testing.
> 
> I have done similar tests and have not seen this...
> Can you show the relevant parts of your patch. Remember I use the REFERENCE_COUTNER not iTSC.
Yes, I also test it for reference counter.

        case HV_X64_MSR_TIME_REF_COUNT: {
//              u64 now_ns;
//              local_irq_disable();
//              now_ns = get_kernel_ns() + kvm->arch.kvmclock_offset;
//              data = div_u64(now_ns - kvm->arch.hv_ref_count, 100);
//              local_irq_enable();
                data = div_u64(get_kernel_ns() +
kvm->arch.kvmclock_offset, 100);
                break;
        }


> 
> Peter
> 


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Peter Lieven Jan. 8, 2014, 2:54 p.m. UTC | #18
On 08.01.2014 13:12, Vadim Rozenfeld wrote:
> On Wed, 2014-01-08 at 12:48 +0100, Peter Lieven wrote:
>> On 08.01.2014 11:44, Vadim Rozenfeld wrote:
>>> On Wed, 2014-01-08 at 11:15 +0100, Peter Lieven wrote:
>>>> On 08.01.2014 10:40, Vadim Rozenfeld wrote:
>>>>> On Tue, 2014-01-07 at 18:52 +0100, Peter Lieven wrote:
>>>>>> Am 07.01.2014 10:36, schrieb Vadim Rozenfeld:
>>>>>>> On Thu, 2014-01-02 at 17:52 +0100, Peter Lieven wrote:
>>>>>>>> Am 11.12.2013 19:59, schrieb Marcelo Tosatti:
>>>>>>>>> On Wed, Dec 11, 2013 at 04:53:05PM -0200, Marcelo Tosatti wrote:
>>>>>>>>>> On Sun, Dec 08, 2013 at 10:33:38PM +1100, Vadim Rozenfeld wrote:
>>>>>>>>>>> Signed-off: Peter Lieven <pl@dlh.net>
>>>>>>>>>>> Signed-off: Gleb Natapov <gleb@redhat.com>
>>>>>>>>>>> Signed-off: Vadim Rozenfeld <vrozenfe@redhat.com>
>>>>>>>>>>>
>>>>>>>>>>> v1 -> v2
>>>>>>>>>>> 1. mark TSC page dirty as suggested by
>>>>>>>>>>>        Eric Northup <digitaleric@google.com> and Gleb
>>>>>>>>>>> 2. disable local irq when calling get_kernel_ns,
>>>>>>>>>>>        as it was done by Peter Lieven <pl@dlhnet.de>
>>>>>>>>>>> 3. move check for TSC page enable from second patch
>>>>>>>>>>>        to this one.
>>>>>>>>>>>
>>>>>>>>>>> ---
>>>>>>>>>>>     arch/x86/include/asm/kvm_host.h    |  2 ++
>>>>>>>>>>>     arch/x86/include/uapi/asm/hyperv.h | 13 +++++++++++++
>>>>>>>>>>>     arch/x86/kvm/x86.c                 | 39 +++++++++++++++++++++++++++++++++++++-
>>>>>>>>>>>     include/uapi/linux/kvm.h           |  1 +
>>>>>>>>>>>     4 files changed, 54 insertions(+), 1 deletion(-)
>>>>>>>>>>>
>>>>>>>>>>> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
>>>>>>>>>>> index ae5d783..2fd0753 100644
>>>>>>>>>>> --- a/arch/x86/include/asm/kvm_host.h
>>>>>>>>>>> +++ b/arch/x86/include/asm/kvm_host.h
>>>>>>>>>>> @@ -605,6 +605,8 @@ struct kvm_arch {
>>>>>>>>>>>     	/* fields used by HYPER-V emulation */
>>>>>>>>>>>     	u64 hv_guest_os_id;
>>>>>>>>>>>     	u64 hv_hypercall;
>>>>>>>>>>> +	u64 hv_ref_count;
>>>>>>>>>>> +	u64 hv_tsc_page;
>>>>>>>>>>>
>>>>>>>>>>>     	#ifdef CONFIG_KVM_MMU_AUDIT
>>>>>>>>>>>     	int audit_point;
>>>>>>>>>>> diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h
>>>>>>>>>>> index b8f1c01..462efe7 100644
>>>>>>>>>>> --- a/arch/x86/include/uapi/asm/hyperv.h
>>>>>>>>>>> +++ b/arch/x86/include/uapi/asm/hyperv.h
>>>>>>>>>>> @@ -28,6 +28,9 @@
>>>>>>>>>>>     /* Partition Reference Counter (HV_X64_MSR_TIME_REF_COUNT) available*/
>>>>>>>>>>>     #define HV_X64_MSR_TIME_REF_COUNT_AVAILABLE	(1 << 1)
>>>>>>>>>>>
>>>>>>>>>>> +/* A partition's reference time stamp counter (TSC) page */
>>>>>>>>>>> +#define HV_X64_MSR_REFERENCE_TSC		0x40000021
>>>>>>>>>>> +
>>>>>>>>>>>     /*
>>>>>>>>>>>      * There is a single feature flag that signifies the presence of the MSR
>>>>>>>>>>>      * that can be used to retrieve both the local APIC Timer frequency as
>>>>>>>>>>> @@ -198,6 +201,9 @@
>>>>>>>>>>>     #define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_MASK	\
>>>>>>>>>>>     		(~((1ull << HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT) - 1))
>>>>>>>>>>>
>>>>>>>>>>> +#define HV_X64_MSR_TSC_REFERENCE_ENABLE		0x00000001
>>>>>>>>>>> +#define HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT	12
>>>>>>>>>>> +
>>>>>>>>>>>     #define HV_PROCESSOR_POWER_STATE_C0		0
>>>>>>>>>>>     #define HV_PROCESSOR_POWER_STATE_C1		1
>>>>>>>>>>>     #define HV_PROCESSOR_POWER_STATE_C2		2
>>>>>>>>>>> @@ -210,4 +216,11 @@
>>>>>>>>>>>     #define HV_STATUS_INVALID_ALIGNMENT		4
>>>>>>>>>>>     #define HV_STATUS_INSUFFICIENT_BUFFERS		19
>>>>>>>>>>>
>>>>>>>>>>> +typedef struct _HV_REFERENCE_TSC_PAGE {
>>>>>>>>>>> +	__u32 tsc_sequence;
>>>>>>>>>>> +	__u32 res1;
>>>>>>>>>>> +	__u64 tsc_scale;
>>>>>>>>>>> +	__s64 tsc_offset;
>>>>>>>>>>> +} HV_REFERENCE_TSC_PAGE, *PHV_REFERENCE_TSC_PAGE;
>>>>>>>>>>> +
>>>>>>>>>>>     #endif
>>>>>>>>>>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
>>>>>>>>>>> index 21ef1ba..5e4e495a 100644
>>>>>>>>>>> --- a/arch/x86/kvm/x86.c
>>>>>>>>>>> +++ b/arch/x86/kvm/x86.c
>>>>>>>>>>> @@ -840,7 +840,7 @@ EXPORT_SYMBOL_GPL(kvm_rdpmc);
>>>>>>>>>>>     static u32 msrs_to_save[] = {
>>>>>>>>>>>     	MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
>>>>>>>>>>>     	MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,
>>>>>>>>>>> -	HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
>>>>>>>>>>> +	HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, HV_X64_MSR_TIME_REF_COUNT,
>>>>>>>>>>>     	HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
>>>>>>>>>>>     	MSR_KVM_PV_EOI_EN,
>>>>>>>>>>>     	MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
>>>>>>>>>>> @@ -1826,6 +1826,8 @@ static bool kvm_hv_msr_partition_wide(u32 msr)
>>>>>>>>>>>     	switch (msr) {
>>>>>>>>>>>     	case HV_X64_MSR_GUEST_OS_ID:
>>>>>>>>>>>     	case HV_X64_MSR_HYPERCALL:
>>>>>>>>>>> +	case HV_X64_MSR_REFERENCE_TSC:
>>>>>>>>>>> +	case HV_X64_MSR_TIME_REF_COUNT:
>>>>>>>>>>>     		r = true;
>>>>>>>>>>>     		break;
>>>>>>>>>>>     	}
>>>>>>>>>>> @@ -1865,6 +1867,29 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
>>>>>>>>>>>     		if (__copy_to_user((void __user *)addr, instructions, 4))
>>>>>>>>>>>     			return 1;
>>>>>>>>>>>     		kvm->arch.hv_hypercall = data;
>>>>>>>>>>> +		local_irq_disable();
>>>>>>>>>>> +		kvm->arch.hv_ref_count = get_kernel_ns() + kvm->arch.kvmclock_offset;
>>>>>>>>>>> +		local_irq_enable();
>>>>>>>>>>
>>>>>>>>>> Where does the docs say that HV_X64_MSR_HYPERCALL is the where the clock
>>>>>>>>>> starts counting?
>>>>>>>>>>
>>>>>>>>>> No need to store kvmclock_offset in hv_ref_count? (moreover
>>>>>>>>>> the name is weird, better name would be "hv_ref_start_time".
>>>>>>>>>
>>>>>>>>> Just add kvmclock_offset when reading the values (otherwise you have a
>>>>>>>>> "stale copy" of kvmclock_offset in hv_ref_count).
>>>>>>>>>
>>>>>>>>
>>>>>>>> After some experiments I think we do no need kvm->arch.hv_ref_count at all.
>>>>>>>>
>>>>>>>> I was debugging some weird clockjump issues and I think the problem is that after live migration
>>>>>>>> kvm->arch.hv_ref_count is initialized to 0. Depending on the uptime of the vServer when the
>>>>>>>> hypercall was set up this can lead to series jumps.
>>>>>>>>
>>>>>>>> So I would suggest to completely drop kvm->arch.hv_ref_count.
>>>>>>>>
>>>>>>>> And use simply this in get_msr_hyperv_pw().
>>>>>>>>
>>>>>>>>            case HV_X64_MSR_TIME_REF_COUNT: {
>>>>>>>>                    data = div_u64(get_kernel_ns() + kvm->arch.kvmclock_offset, 100);
>>>>>>>>                    break;
>>>>>>>>            }
>>>>>>>>
>>>>>>>
>>>>>>> Agreed. It should work as long as we rely on kvmclock_offset.
>>>>>>> Vadim.
>>>>>>
>>>>>> I think we can rely on kvmclock_offset. Had you had a chance to do further testing
>>>>>> during the weekend?
>>>>>
>>>>> Yes, and still testing.
>>>>>
>>>>> There is still some inconsistency in the value returned by
>>>>> QueryPerformanceCounter during migration.
>>>>
>>>> With my proposal?
>>>>
>>>
>>> Right.
>>> Please see below QTC vs. NTP time stamp
>>> before and after migration:
>>>
>>> C:\timedrift\objchk_win7_x86\i386>timedrift.exe
>>> quantum = 15
>>> QPC     NTP     Delta
>>> 1311    1307    4
>>> 2621    2618    3
>>> 3932    3931    1
>>> .......
>>> .......
>>> 38018   38014   4
>>> Unable to wait for NTP reply from the SNTP server, GetLastError returns
>>> 7279088
>>> 142210  40113   102097
>>> 143458  41357   102101
>>> .......
>>> .......
>>> 156500  54615   101885
>>>
>>> NTP time changed from  38014 to 40113 ((40113 - 38014) / 100 = 21 mSec)
>>> while QPC jumped from  38018 to 142210 ((142210 - 38014) / 100 = 1042
>>> mSec)
>>
>> How long was the downtime when you migrated the vServer?
> I use Win7-32 for testing.
>>
>> I have done similar tests and have not seen this...
>> Can you show the relevant parts of your patch. Remember I use the REFERENCE_COUTNER not iTSC.
> Yes, I also test it for reference counter.

Can you use these parameters for testing:

-cpu qemu64,hv_relaxed,hv_vapic,hv_spinlocks=4096,hv_refcnt
-rtc base=localtime -vga std  -usb -usbdevice tablet -no-hpet

If you look in the HMP at "info migrate" after the migration what does the downtime info say?

Peter

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Vadim Rozenfeld Jan. 8, 2014, 8:08 p.m. UTC | #19
On Wed, 2014-01-08 at 15:54 +0100, Peter Lieven wrote:
> On 08.01.2014 13:12, Vadim Rozenfeld wrote:
> > On Wed, 2014-01-08 at 12:48 +0100, Peter Lieven wrote:
> >> On 08.01.2014 11:44, Vadim Rozenfeld wrote:
> >>> On Wed, 2014-01-08 at 11:15 +0100, Peter Lieven wrote:
> >>>> On 08.01.2014 10:40, Vadim Rozenfeld wrote:
> >>>>> On Tue, 2014-01-07 at 18:52 +0100, Peter Lieven wrote:
> >>>>>> Am 07.01.2014 10:36, schrieb Vadim Rozenfeld:
> >>>>>>> On Thu, 2014-01-02 at 17:52 +0100, Peter Lieven wrote:
> >>>>>>>> Am 11.12.2013 19:59, schrieb Marcelo Tosatti:
> >>>>>>>>> On Wed, Dec 11, 2013 at 04:53:05PM -0200, Marcelo Tosatti wrote:
> >>>>>>>>>> On Sun, Dec 08, 2013 at 10:33:38PM +1100, Vadim Rozenfeld wrote:
> >>>>>>>>>>> Signed-off: Peter Lieven <pl@dlh.net>
> >>>>>>>>>>> Signed-off: Gleb Natapov <gleb@redhat.com>
> >>>>>>>>>>> Signed-off: Vadim Rozenfeld <vrozenfe@redhat.com>
> >>>>>>>>>>>
> >>>>>>>>>>> v1 -> v2
> >>>>>>>>>>> 1. mark TSC page dirty as suggested by
> >>>>>>>>>>>        Eric Northup <digitaleric@google.com> and Gleb
> >>>>>>>>>>> 2. disable local irq when calling get_kernel_ns,
> >>>>>>>>>>>        as it was done by Peter Lieven <pl@dlhnet.de>
> >>>>>>>>>>> 3. move check for TSC page enable from second patch
> >>>>>>>>>>>        to this one.
> >>>>>>>>>>>
> >>>>>>>>>>> ---
> >>>>>>>>>>>     arch/x86/include/asm/kvm_host.h    |  2 ++
> >>>>>>>>>>>     arch/x86/include/uapi/asm/hyperv.h | 13 +++++++++++++
> >>>>>>>>>>>     arch/x86/kvm/x86.c                 | 39 +++++++++++++++++++++++++++++++++++++-
> >>>>>>>>>>>     include/uapi/linux/kvm.h           |  1 +
> >>>>>>>>>>>     4 files changed, 54 insertions(+), 1 deletion(-)
> >>>>>>>>>>>
> >>>>>>>>>>> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> >>>>>>>>>>> index ae5d783..2fd0753 100644
> >>>>>>>>>>> --- a/arch/x86/include/asm/kvm_host.h
> >>>>>>>>>>> +++ b/arch/x86/include/asm/kvm_host.h
> >>>>>>>>>>> @@ -605,6 +605,8 @@ struct kvm_arch {
> >>>>>>>>>>>     	/* fields used by HYPER-V emulation */
> >>>>>>>>>>>     	u64 hv_guest_os_id;
> >>>>>>>>>>>     	u64 hv_hypercall;
> >>>>>>>>>>> +	u64 hv_ref_count;
> >>>>>>>>>>> +	u64 hv_tsc_page;
> >>>>>>>>>>>
> >>>>>>>>>>>     	#ifdef CONFIG_KVM_MMU_AUDIT
> >>>>>>>>>>>     	int audit_point;
> >>>>>>>>>>> diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h
> >>>>>>>>>>> index b8f1c01..462efe7 100644
> >>>>>>>>>>> --- a/arch/x86/include/uapi/asm/hyperv.h
> >>>>>>>>>>> +++ b/arch/x86/include/uapi/asm/hyperv.h
> >>>>>>>>>>> @@ -28,6 +28,9 @@
> >>>>>>>>>>>     /* Partition Reference Counter (HV_X64_MSR_TIME_REF_COUNT) available*/
> >>>>>>>>>>>     #define HV_X64_MSR_TIME_REF_COUNT_AVAILABLE	(1 << 1)
> >>>>>>>>>>>
> >>>>>>>>>>> +/* A partition's reference time stamp counter (TSC) page */
> >>>>>>>>>>> +#define HV_X64_MSR_REFERENCE_TSC		0x40000021
> >>>>>>>>>>> +
> >>>>>>>>>>>     /*
> >>>>>>>>>>>      * There is a single feature flag that signifies the presence of the MSR
> >>>>>>>>>>>      * that can be used to retrieve both the local APIC Timer frequency as
> >>>>>>>>>>> @@ -198,6 +201,9 @@
> >>>>>>>>>>>     #define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_MASK	\
> >>>>>>>>>>>     		(~((1ull << HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT) - 1))
> >>>>>>>>>>>
> >>>>>>>>>>> +#define HV_X64_MSR_TSC_REFERENCE_ENABLE		0x00000001
> >>>>>>>>>>> +#define HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT	12
> >>>>>>>>>>> +
> >>>>>>>>>>>     #define HV_PROCESSOR_POWER_STATE_C0		0
> >>>>>>>>>>>     #define HV_PROCESSOR_POWER_STATE_C1		1
> >>>>>>>>>>>     #define HV_PROCESSOR_POWER_STATE_C2		2
> >>>>>>>>>>> @@ -210,4 +216,11 @@
> >>>>>>>>>>>     #define HV_STATUS_INVALID_ALIGNMENT		4
> >>>>>>>>>>>     #define HV_STATUS_INSUFFICIENT_BUFFERS		19
> >>>>>>>>>>>
> >>>>>>>>>>> +typedef struct _HV_REFERENCE_TSC_PAGE {
> >>>>>>>>>>> +	__u32 tsc_sequence;
> >>>>>>>>>>> +	__u32 res1;
> >>>>>>>>>>> +	__u64 tsc_scale;
> >>>>>>>>>>> +	__s64 tsc_offset;
> >>>>>>>>>>> +} HV_REFERENCE_TSC_PAGE, *PHV_REFERENCE_TSC_PAGE;
> >>>>>>>>>>> +
> >>>>>>>>>>>     #endif
> >>>>>>>>>>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> >>>>>>>>>>> index 21ef1ba..5e4e495a 100644
> >>>>>>>>>>> --- a/arch/x86/kvm/x86.c
> >>>>>>>>>>> +++ b/arch/x86/kvm/x86.c
> >>>>>>>>>>> @@ -840,7 +840,7 @@ EXPORT_SYMBOL_GPL(kvm_rdpmc);
> >>>>>>>>>>>     static u32 msrs_to_save[] = {
> >>>>>>>>>>>     	MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
> >>>>>>>>>>>     	MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,
> >>>>>>>>>>> -	HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
> >>>>>>>>>>> +	HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, HV_X64_MSR_TIME_REF_COUNT,
> >>>>>>>>>>>     	HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
> >>>>>>>>>>>     	MSR_KVM_PV_EOI_EN,
> >>>>>>>>>>>     	MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
> >>>>>>>>>>> @@ -1826,6 +1826,8 @@ static bool kvm_hv_msr_partition_wide(u32 msr)
> >>>>>>>>>>>     	switch (msr) {
> >>>>>>>>>>>     	case HV_X64_MSR_GUEST_OS_ID:
> >>>>>>>>>>>     	case HV_X64_MSR_HYPERCALL:
> >>>>>>>>>>> +	case HV_X64_MSR_REFERENCE_TSC:
> >>>>>>>>>>> +	case HV_X64_MSR_TIME_REF_COUNT:
> >>>>>>>>>>>     		r = true;
> >>>>>>>>>>>     		break;
> >>>>>>>>>>>     	}
> >>>>>>>>>>> @@ -1865,6 +1867,29 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
> >>>>>>>>>>>     		if (__copy_to_user((void __user *)addr, instructions, 4))
> >>>>>>>>>>>     			return 1;
> >>>>>>>>>>>     		kvm->arch.hv_hypercall = data;
> >>>>>>>>>>> +		local_irq_disable();
> >>>>>>>>>>> +		kvm->arch.hv_ref_count = get_kernel_ns() + kvm->arch.kvmclock_offset;
> >>>>>>>>>>> +		local_irq_enable();
> >>>>>>>>>>
> >>>>>>>>>> Where does the docs say that HV_X64_MSR_HYPERCALL is the where the clock
> >>>>>>>>>> starts counting?
> >>>>>>>>>>
> >>>>>>>>>> No need to store kvmclock_offset in hv_ref_count? (moreover
> >>>>>>>>>> the name is weird, better name would be "hv_ref_start_time".
> >>>>>>>>>
> >>>>>>>>> Just add kvmclock_offset when reading the values (otherwise you have a
> >>>>>>>>> "stale copy" of kvmclock_offset in hv_ref_count).
> >>>>>>>>>
> >>>>>>>>
> >>>>>>>> After some experiments I think we do no need kvm->arch.hv_ref_count at all.
> >>>>>>>>
> >>>>>>>> I was debugging some weird clockjump issues and I think the problem is that after live migration
> >>>>>>>> kvm->arch.hv_ref_count is initialized to 0. Depending on the uptime of the vServer when the
> >>>>>>>> hypercall was set up this can lead to series jumps.
> >>>>>>>>
> >>>>>>>> So I would suggest to completely drop kvm->arch.hv_ref_count.
> >>>>>>>>
> >>>>>>>> And use simply this in get_msr_hyperv_pw().
> >>>>>>>>
> >>>>>>>>            case HV_X64_MSR_TIME_REF_COUNT: {
> >>>>>>>>                    data = div_u64(get_kernel_ns() + kvm->arch.kvmclock_offset, 100);
> >>>>>>>>                    break;
> >>>>>>>>            }
> >>>>>>>>
> >>>>>>>
> >>>>>>> Agreed. It should work as long as we rely on kvmclock_offset.
> >>>>>>> Vadim.
> >>>>>>
> >>>>>> I think we can rely on kvmclock_offset. Had you had a chance to do further testing
> >>>>>> during the weekend?
> >>>>>
> >>>>> Yes, and still testing.
> >>>>>
> >>>>> There is still some inconsistency in the value returned by
> >>>>> QueryPerformanceCounter during migration.
> >>>>
> >>>> With my proposal?
> >>>>
> >>>
> >>> Right.
> >>> Please see below QTC vs. NTP time stamp
> >>> before and after migration:
> >>>
> >>> C:\timedrift\objchk_win7_x86\i386>timedrift.exe
> >>> quantum = 15
> >>> QPC     NTP     Delta
> >>> 1311    1307    4
> >>> 2621    2618    3
> >>> 3932    3931    1
> >>> .......
> >>> .......
> >>> 38018   38014   4
> >>> Unable to wait for NTP reply from the SNTP server, GetLastError returns
> >>> 7279088
> >>> 142210  40113   102097
> >>> 143458  41357   102101
> >>> .......
> >>> .......
> >>> 156500  54615   101885
> >>>
> >>> NTP time changed from  38014 to 40113 ((40113 - 38014) / 100 = 21 mSec)
> >>> while QPC jumped from  38018 to 142210 ((142210 - 38014) / 100 = 1042
> >>> mSec)
> >>
> >> How long was the downtime when you migrated the vServer?
> > I use Win7-32 for testing.
> >>
> >> I have done similar tests and have not seen this...
> >> Can you show the relevant parts of your patch. Remember I use the REFERENCE_COUTNER not iTSC.
> > Yes, I also test it for reference counter.
> 
> Can you use these parameters for testing:
> 
> -cpu qemu64,hv_relaxed,hv_vapic,hv_spinlocks=4096,hv_refcnt
> -rtc base=localtime -vga std  -usb -usbdevice tablet -no-hpet

My settings are:
-cpu qemu64,
+x2apic,family=0xf,hv_vapic,hv_spinlocks=0xfff,hv_relaxed,hv_tsc -m 1G
-smp 4

> 
> If you look in the HMP at "info migrate" after the migration what does the downtime info say?

downtime: 4 milliseconds

> 
> Peter
> 
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Peter Lieven Jan. 8, 2014, 10:20 p.m. UTC | #20
Am 08.01.2014 21:08, schrieb Vadim Rozenfeld:
> On Wed, 2014-01-08 at 15:54 +0100, Peter Lieven wrote:
>> On 08.01.2014 13:12, Vadim Rozenfeld wrote:
>>> On Wed, 2014-01-08 at 12:48 +0100, Peter Lieven wrote:
>>>> On 08.01.2014 11:44, Vadim Rozenfeld wrote:
>>>>> On Wed, 2014-01-08 at 11:15 +0100, Peter Lieven wrote:
>>>>>> On 08.01.2014 10:40, Vadim Rozenfeld wrote:
>>>>>>> On Tue, 2014-01-07 at 18:52 +0100, Peter Lieven wrote:
>>>>>>>> Am 07.01.2014 10:36, schrieb Vadim Rozenfeld:
>>>>>>>>> On Thu, 2014-01-02 at 17:52 +0100, Peter Lieven wrote:
>>>>>>>>>> Am 11.12.2013 19:59, schrieb Marcelo Tosatti:
>>>>>>>>>>> On Wed, Dec 11, 2013 at 04:53:05PM -0200, Marcelo Tosatti wrote:
>>>>>>>>>>>> On Sun, Dec 08, 2013 at 10:33:38PM +1100, Vadim Rozenfeld wrote:
>>>>>>>>>>>>> Signed-off: Peter Lieven <pl@dlh.net>
>>>>>>>>>>>>> Signed-off: Gleb Natapov <gleb@redhat.com>
>>>>>>>>>>>>> Signed-off: Vadim Rozenfeld <vrozenfe@redhat.com>
>>>>>>>>>>>>>
>>>>>>>>>>>>> v1 -> v2
>>>>>>>>>>>>> 1. mark TSC page dirty as suggested by
>>>>>>>>>>>>>        Eric Northup <digitaleric@google.com> and Gleb
>>>>>>>>>>>>> 2. disable local irq when calling get_kernel_ns,
>>>>>>>>>>>>>        as it was done by Peter Lieven <pl@dlhnet.de>
>>>>>>>>>>>>> 3. move check for TSC page enable from second patch
>>>>>>>>>>>>>        to this one.
>>>>>>>>>>>>>
>>>>>>>>>>>>> ---
>>>>>>>>>>>>>     arch/x86/include/asm/kvm_host.h    |  2 ++
>>>>>>>>>>>>>     arch/x86/include/uapi/asm/hyperv.h | 13 +++++++++++++
>>>>>>>>>>>>>     arch/x86/kvm/x86.c                 | 39 +++++++++++++++++++++++++++++++++++++-
>>>>>>>>>>>>>     include/uapi/linux/kvm.h           |  1 +
>>>>>>>>>>>>>     4 files changed, 54 insertions(+), 1 deletion(-)
>>>>>>>>>>>>>
>>>>>>>>>>>>> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
>>>>>>>>>>>>> index ae5d783..2fd0753 100644
>>>>>>>>>>>>> --- a/arch/x86/include/asm/kvm_host.h
>>>>>>>>>>>>> +++ b/arch/x86/include/asm/kvm_host.h
>>>>>>>>>>>>> @@ -605,6 +605,8 @@ struct kvm_arch {
>>>>>>>>>>>>>     	/* fields used by HYPER-V emulation */
>>>>>>>>>>>>>     	u64 hv_guest_os_id;
>>>>>>>>>>>>>     	u64 hv_hypercall;
>>>>>>>>>>>>> +	u64 hv_ref_count;
>>>>>>>>>>>>> +	u64 hv_tsc_page;
>>>>>>>>>>>>>
>>>>>>>>>>>>>     	#ifdef CONFIG_KVM_MMU_AUDIT
>>>>>>>>>>>>>     	int audit_point;
>>>>>>>>>>>>> diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h
>>>>>>>>>>>>> index b8f1c01..462efe7 100644
>>>>>>>>>>>>> --- a/arch/x86/include/uapi/asm/hyperv.h
>>>>>>>>>>>>> +++ b/arch/x86/include/uapi/asm/hyperv.h
>>>>>>>>>>>>> @@ -28,6 +28,9 @@
>>>>>>>>>>>>>     /* Partition Reference Counter (HV_X64_MSR_TIME_REF_COUNT) available*/
>>>>>>>>>>>>>     #define HV_X64_MSR_TIME_REF_COUNT_AVAILABLE	(1 << 1)
>>>>>>>>>>>>>
>>>>>>>>>>>>> +/* A partition's reference time stamp counter (TSC) page */
>>>>>>>>>>>>> +#define HV_X64_MSR_REFERENCE_TSC		0x40000021
>>>>>>>>>>>>> +
>>>>>>>>>>>>>     /*
>>>>>>>>>>>>>      * There is a single feature flag that signifies the presence of the MSR
>>>>>>>>>>>>>      * that can be used to retrieve both the local APIC Timer frequency as
>>>>>>>>>>>>> @@ -198,6 +201,9 @@
>>>>>>>>>>>>>     #define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_MASK	\
>>>>>>>>>>>>>     		(~((1ull << HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT) - 1))
>>>>>>>>>>>>>
>>>>>>>>>>>>> +#define HV_X64_MSR_TSC_REFERENCE_ENABLE		0x00000001
>>>>>>>>>>>>> +#define HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT	12
>>>>>>>>>>>>> +
>>>>>>>>>>>>>     #define HV_PROCESSOR_POWER_STATE_C0		0
>>>>>>>>>>>>>     #define HV_PROCESSOR_POWER_STATE_C1		1
>>>>>>>>>>>>>     #define HV_PROCESSOR_POWER_STATE_C2		2
>>>>>>>>>>>>> @@ -210,4 +216,11 @@
>>>>>>>>>>>>>     #define HV_STATUS_INVALID_ALIGNMENT		4
>>>>>>>>>>>>>     #define HV_STATUS_INSUFFICIENT_BUFFERS		19
>>>>>>>>>>>>>
>>>>>>>>>>>>> +typedef struct _HV_REFERENCE_TSC_PAGE {
>>>>>>>>>>>>> +	__u32 tsc_sequence;
>>>>>>>>>>>>> +	__u32 res1;
>>>>>>>>>>>>> +	__u64 tsc_scale;
>>>>>>>>>>>>> +	__s64 tsc_offset;
>>>>>>>>>>>>> +} HV_REFERENCE_TSC_PAGE, *PHV_REFERENCE_TSC_PAGE;
>>>>>>>>>>>>> +
>>>>>>>>>>>>>     #endif
>>>>>>>>>>>>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
>>>>>>>>>>>>> index 21ef1ba..5e4e495a 100644
>>>>>>>>>>>>> --- a/arch/x86/kvm/x86.c
>>>>>>>>>>>>> +++ b/arch/x86/kvm/x86.c
>>>>>>>>>>>>> @@ -840,7 +840,7 @@ EXPORT_SYMBOL_GPL(kvm_rdpmc);
>>>>>>>>>>>>>     static u32 msrs_to_save[] = {
>>>>>>>>>>>>>     	MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
>>>>>>>>>>>>>     	MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,
>>>>>>>>>>>>> -	HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
>>>>>>>>>>>>> +	HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, HV_X64_MSR_TIME_REF_COUNT,
>>>>>>>>>>>>>     	HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
>>>>>>>>>>>>>     	MSR_KVM_PV_EOI_EN,
>>>>>>>>>>>>>     	MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
>>>>>>>>>>>>> @@ -1826,6 +1826,8 @@ static bool kvm_hv_msr_partition_wide(u32 msr)
>>>>>>>>>>>>>     	switch (msr) {
>>>>>>>>>>>>>     	case HV_X64_MSR_GUEST_OS_ID:
>>>>>>>>>>>>>     	case HV_X64_MSR_HYPERCALL:
>>>>>>>>>>>>> +	case HV_X64_MSR_REFERENCE_TSC:
>>>>>>>>>>>>> +	case HV_X64_MSR_TIME_REF_COUNT:
>>>>>>>>>>>>>     		r = true;
>>>>>>>>>>>>>     		break;
>>>>>>>>>>>>>     	}
>>>>>>>>>>>>> @@ -1865,6 +1867,29 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
>>>>>>>>>>>>>     		if (__copy_to_user((void __user *)addr, instructions, 4))
>>>>>>>>>>>>>     			return 1;
>>>>>>>>>>>>>     		kvm->arch.hv_hypercall = data;
>>>>>>>>>>>>> +		local_irq_disable();
>>>>>>>>>>>>> +		kvm->arch.hv_ref_count = get_kernel_ns() + kvm->arch.kvmclock_offset;
>>>>>>>>>>>>> +		local_irq_enable();
>>>>>>>>>>>>
>>>>>>>>>>>> Where does the docs say that HV_X64_MSR_HYPERCALL is the where the clock
>>>>>>>>>>>> starts counting?
>>>>>>>>>>>>
>>>>>>>>>>>> No need to store kvmclock_offset in hv_ref_count? (moreover
>>>>>>>>>>>> the name is weird, better name would be "hv_ref_start_time".
>>>>>>>>>>>
>>>>>>>>>>> Just add kvmclock_offset when reading the values (otherwise you have a
>>>>>>>>>>> "stale copy" of kvmclock_offset in hv_ref_count).
>>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>> After some experiments I think we do no need kvm->arch.hv_ref_count at all.
>>>>>>>>>>
>>>>>>>>>> I was debugging some weird clockjump issues and I think the problem is that after live migration
>>>>>>>>>> kvm->arch.hv_ref_count is initialized to 0. Depending on the uptime of the vServer when the
>>>>>>>>>> hypercall was set up this can lead to series jumps.
>>>>>>>>>>
>>>>>>>>>> So I would suggest to completely drop kvm->arch.hv_ref_count.
>>>>>>>>>>
>>>>>>>>>> And use simply this in get_msr_hyperv_pw().
>>>>>>>>>>
>>>>>>>>>>            case HV_X64_MSR_TIME_REF_COUNT: {
>>>>>>>>>>                    data = div_u64(get_kernel_ns() + kvm->arch.kvmclock_offset, 100);
>>>>>>>>>>                    break;
>>>>>>>>>>            }
>>>>>>>>>>
>>>>>>>>>
>>>>>>>>> Agreed. It should work as long as we rely on kvmclock_offset.
>>>>>>>>> Vadim.
>>>>>>>>
>>>>>>>> I think we can rely on kvmclock_offset. Had you had a chance to do further testing
>>>>>>>> during the weekend?
>>>>>>>
>>>>>>> Yes, and still testing.
>>>>>>>
>>>>>>> There is still some inconsistency in the value returned by
>>>>>>> QueryPerformanceCounter during migration.
>>>>>>
>>>>>> With my proposal?
>>>>>>
>>>>>
>>>>> Right.
>>>>> Please see below QTC vs. NTP time stamp
>>>>> before and after migration:
>>>>>
>>>>> C:\timedrift\objchk_win7_x86\i386>timedrift.exe
>>>>> quantum = 15
>>>>> QPC     NTP     Delta
>>>>> 1311    1307    4
>>>>> 2621    2618    3
>>>>> 3932    3931    1
>>>>> .......
>>>>> .......
>>>>> 38018   38014   4
>>>>> Unable to wait for NTP reply from the SNTP server, GetLastError returns
>>>>> 7279088
>>>>> 142210  40113   102097
>>>>> 143458  41357   102101
>>>>> .......
>>>>> .......
>>>>> 156500  54615   101885
>>>>>
>>>>> NTP time changed from  38014 to 40113 ((40113 - 38014) / 100 = 21 mSec)
>>>>> while QPC jumped from  38018 to 142210 ((142210 - 38014) / 100 = 1042
>>>>> mSec)
>>>>
>>>> How long was the downtime when you migrated the vServer?
>>> I use Win7-32 for testing.
>>>>
>>>> I have done similar tests and have not seen this...
>>>> Can you show the relevant parts of your patch. Remember I use the REFERENCE_COUTNER not iTSC.
>>> Yes, I also test it for reference counter.
>>
>> Can you use these parameters for testing:
>>
>> -cpu qemu64,hv_relaxed,hv_vapic,hv_spinlocks=4096,hv_refcnt
>> -rtc base=localtime -vga std  -usb -usbdevice tablet -no-hpet
> 
> My settings are:
> -cpu qemu64,
> +x2apic,family=0xf,hv_vapic,hv_spinlocks=0xfff,hv_relaxed,hv_tsc -m 1G
> -smp 4

do you also see the jump if you leave the hv_tsc out and just use hv_refcnt?

Peter
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Vadim Rozenfeld Jan. 9, 2014, 11:10 a.m. UTC | #21
On Wed, 2014-01-08 at 23:20 +0100, Peter Lieven wrote:
> Am 08.01.2014 21:08, schrieb Vadim Rozenfeld:
> > On Wed, 2014-01-08 at 15:54 +0100, Peter Lieven wrote:
> >> On 08.01.2014 13:12, Vadim Rozenfeld wrote:
> >>> On Wed, 2014-01-08 at 12:48 +0100, Peter Lieven wrote:
> >>>> On 08.01.2014 11:44, Vadim Rozenfeld wrote:
> >>>>> On Wed, 2014-01-08 at 11:15 +0100, Peter Lieven wrote:
> >>>>>> On 08.01.2014 10:40, Vadim Rozenfeld wrote:
> >>>>>>> On Tue, 2014-01-07 at 18:52 +0100, Peter Lieven wrote:
> >>>>>>>> Am 07.01.2014 10:36, schrieb Vadim Rozenfeld:
> >>>>>>>>> On Thu, 2014-01-02 at 17:52 +0100, Peter Lieven wrote:
> >>>>>>>>>> Am 11.12.2013 19:59, schrieb Marcelo Tosatti:
> >>>>>>>>>>> On Wed, Dec 11, 2013 at 04:53:05PM -0200, Marcelo Tosatti wrote:
> >>>>>>>>>>>> On Sun, Dec 08, 2013 at 10:33:38PM +1100, Vadim Rozenfeld wrote:
> >>>>>>>>>>>>> Signed-off: Peter Lieven <pl@dlh.net>
> >>>>>>>>>>>>> Signed-off: Gleb Natapov <gleb@redhat.com>
> >>>>>>>>>>>>> Signed-off: Vadim Rozenfeld <vrozenfe@redhat.com>
> >>>>>>>>>>>>>
> >>>>>>>>>>>>> v1 -> v2
> >>>>>>>>>>>>> 1. mark TSC page dirty as suggested by
> >>>>>>>>>>>>>        Eric Northup <digitaleric@google.com> and Gleb
> >>>>>>>>>>>>> 2. disable local irq when calling get_kernel_ns,
> >>>>>>>>>>>>>        as it was done by Peter Lieven <pl@dlhnet.de>
> >>>>>>>>>>>>> 3. move check for TSC page enable from second patch
> >>>>>>>>>>>>>        to this one.
> >>>>>>>>>>>>>
> >>>>>>>>>>>>> ---
> >>>>>>>>>>>>>     arch/x86/include/asm/kvm_host.h    |  2 ++
> >>>>>>>>>>>>>     arch/x86/include/uapi/asm/hyperv.h | 13 +++++++++++++
> >>>>>>>>>>>>>     arch/x86/kvm/x86.c                 | 39 +++++++++++++++++++++++++++++++++++++-
> >>>>>>>>>>>>>     include/uapi/linux/kvm.h           |  1 +
> >>>>>>>>>>>>>     4 files changed, 54 insertions(+), 1 deletion(-)
> >>>>>>>>>>>>>
> >>>>>>>>>>>>> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> >>>>>>>>>>>>> index ae5d783..2fd0753 100644
> >>>>>>>>>>>>> --- a/arch/x86/include/asm/kvm_host.h
> >>>>>>>>>>>>> +++ b/arch/x86/include/asm/kvm_host.h
> >>>>>>>>>>>>> @@ -605,6 +605,8 @@ struct kvm_arch {
> >>>>>>>>>>>>>     	/* fields used by HYPER-V emulation */
> >>>>>>>>>>>>>     	u64 hv_guest_os_id;
> >>>>>>>>>>>>>     	u64 hv_hypercall;
> >>>>>>>>>>>>> +	u64 hv_ref_count;
> >>>>>>>>>>>>> +	u64 hv_tsc_page;
> >>>>>>>>>>>>>
> >>>>>>>>>>>>>     	#ifdef CONFIG_KVM_MMU_AUDIT
> >>>>>>>>>>>>>     	int audit_point;
> >>>>>>>>>>>>> diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h
> >>>>>>>>>>>>> index b8f1c01..462efe7 100644
> >>>>>>>>>>>>> --- a/arch/x86/include/uapi/asm/hyperv.h
> >>>>>>>>>>>>> +++ b/arch/x86/include/uapi/asm/hyperv.h
> >>>>>>>>>>>>> @@ -28,6 +28,9 @@
> >>>>>>>>>>>>>     /* Partition Reference Counter (HV_X64_MSR_TIME_REF_COUNT) available*/
> >>>>>>>>>>>>>     #define HV_X64_MSR_TIME_REF_COUNT_AVAILABLE	(1 << 1)
> >>>>>>>>>>>>>
> >>>>>>>>>>>>> +/* A partition's reference time stamp counter (TSC) page */
> >>>>>>>>>>>>> +#define HV_X64_MSR_REFERENCE_TSC		0x40000021
> >>>>>>>>>>>>> +
> >>>>>>>>>>>>>     /*
> >>>>>>>>>>>>>      * There is a single feature flag that signifies the presence of the MSR
> >>>>>>>>>>>>>      * that can be used to retrieve both the local APIC Timer frequency as
> >>>>>>>>>>>>> @@ -198,6 +201,9 @@
> >>>>>>>>>>>>>     #define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_MASK	\
> >>>>>>>>>>>>>     		(~((1ull << HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT) - 1))
> >>>>>>>>>>>>>
> >>>>>>>>>>>>> +#define HV_X64_MSR_TSC_REFERENCE_ENABLE		0x00000001
> >>>>>>>>>>>>> +#define HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT	12
> >>>>>>>>>>>>> +
> >>>>>>>>>>>>>     #define HV_PROCESSOR_POWER_STATE_C0		0
> >>>>>>>>>>>>>     #define HV_PROCESSOR_POWER_STATE_C1		1
> >>>>>>>>>>>>>     #define HV_PROCESSOR_POWER_STATE_C2		2
> >>>>>>>>>>>>> @@ -210,4 +216,11 @@
> >>>>>>>>>>>>>     #define HV_STATUS_INVALID_ALIGNMENT		4
> >>>>>>>>>>>>>     #define HV_STATUS_INSUFFICIENT_BUFFERS		19
> >>>>>>>>>>>>>
> >>>>>>>>>>>>> +typedef struct _HV_REFERENCE_TSC_PAGE {
> >>>>>>>>>>>>> +	__u32 tsc_sequence;
> >>>>>>>>>>>>> +	__u32 res1;
> >>>>>>>>>>>>> +	__u64 tsc_scale;
> >>>>>>>>>>>>> +	__s64 tsc_offset;
> >>>>>>>>>>>>> +} HV_REFERENCE_TSC_PAGE, *PHV_REFERENCE_TSC_PAGE;
> >>>>>>>>>>>>> +
> >>>>>>>>>>>>>     #endif
> >>>>>>>>>>>>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> >>>>>>>>>>>>> index 21ef1ba..5e4e495a 100644
> >>>>>>>>>>>>> --- a/arch/x86/kvm/x86.c
> >>>>>>>>>>>>> +++ b/arch/x86/kvm/x86.c
> >>>>>>>>>>>>> @@ -840,7 +840,7 @@ EXPORT_SYMBOL_GPL(kvm_rdpmc);
> >>>>>>>>>>>>>     static u32 msrs_to_save[] = {
> >>>>>>>>>>>>>     	MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
> >>>>>>>>>>>>>     	MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,
> >>>>>>>>>>>>> -	HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
> >>>>>>>>>>>>> +	HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, HV_X64_MSR_TIME_REF_COUNT,
> >>>>>>>>>>>>>     	HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
> >>>>>>>>>>>>>     	MSR_KVM_PV_EOI_EN,
> >>>>>>>>>>>>>     	MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
> >>>>>>>>>>>>> @@ -1826,6 +1826,8 @@ static bool kvm_hv_msr_partition_wide(u32 msr)
> >>>>>>>>>>>>>     	switch (msr) {
> >>>>>>>>>>>>>     	case HV_X64_MSR_GUEST_OS_ID:
> >>>>>>>>>>>>>     	case HV_X64_MSR_HYPERCALL:
> >>>>>>>>>>>>> +	case HV_X64_MSR_REFERENCE_TSC:
> >>>>>>>>>>>>> +	case HV_X64_MSR_TIME_REF_COUNT:
> >>>>>>>>>>>>>     		r = true;
> >>>>>>>>>>>>>     		break;
> >>>>>>>>>>>>>     	}
> >>>>>>>>>>>>> @@ -1865,6 +1867,29 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
> >>>>>>>>>>>>>     		if (__copy_to_user((void __user *)addr, instructions, 4))
> >>>>>>>>>>>>>     			return 1;
> >>>>>>>>>>>>>     		kvm->arch.hv_hypercall = data;
> >>>>>>>>>>>>> +		local_irq_disable();
> >>>>>>>>>>>>> +		kvm->arch.hv_ref_count = get_kernel_ns() + kvm->arch.kvmclock_offset;
> >>>>>>>>>>>>> +		local_irq_enable();
> >>>>>>>>>>>>
> >>>>>>>>>>>> Where does the docs say that HV_X64_MSR_HYPERCALL is the where the clock
> >>>>>>>>>>>> starts counting?
> >>>>>>>>>>>>
> >>>>>>>>>>>> No need to store kvmclock_offset in hv_ref_count? (moreover
> >>>>>>>>>>>> the name is weird, better name would be "hv_ref_start_time".
> >>>>>>>>>>>
> >>>>>>>>>>> Just add kvmclock_offset when reading the values (otherwise you have a
> >>>>>>>>>>> "stale copy" of kvmclock_offset in hv_ref_count).
> >>>>>>>>>>>
> >>>>>>>>>>
> >>>>>>>>>> After some experiments I think we do no need kvm->arch.hv_ref_count at all.
> >>>>>>>>>>
> >>>>>>>>>> I was debugging some weird clockjump issues and I think the problem is that after live migration
> >>>>>>>>>> kvm->arch.hv_ref_count is initialized to 0. Depending on the uptime of the vServer when the
> >>>>>>>>>> hypercall was set up this can lead to series jumps.
> >>>>>>>>>>
> >>>>>>>>>> So I would suggest to completely drop kvm->arch.hv_ref_count.
> >>>>>>>>>>
> >>>>>>>>>> And use simply this in get_msr_hyperv_pw().
> >>>>>>>>>>
> >>>>>>>>>>            case HV_X64_MSR_TIME_REF_COUNT: {
> >>>>>>>>>>                    data = div_u64(get_kernel_ns() + kvm->arch.kvmclock_offset, 100);
> >>>>>>>>>>                    break;
> >>>>>>>>>>            }
> >>>>>>>>>>
> >>>>>>>>>
> >>>>>>>>> Agreed. It should work as long as we rely on kvmclock_offset.
> >>>>>>>>> Vadim.
> >>>>>>>>
> >>>>>>>> I think we can rely on kvmclock_offset. Had you had a chance to do further testing
> >>>>>>>> during the weekend?
> >>>>>>>
> >>>>>>> Yes, and still testing.
> >>>>>>>
> >>>>>>> There is still some inconsistency in the value returned by
> >>>>>>> QueryPerformanceCounter during migration.
> >>>>>>
> >>>>>> With my proposal?
> >>>>>>
> >>>>>
> >>>>> Right.
> >>>>> Please see below QTC vs. NTP time stamp
> >>>>> before and after migration:
> >>>>>
> >>>>> C:\timedrift\objchk_win7_x86\i386>timedrift.exe
> >>>>> quantum = 15
> >>>>> QPC     NTP     Delta
> >>>>> 1311    1307    4
> >>>>> 2621    2618    3
> >>>>> 3932    3931    1
> >>>>> .......
> >>>>> .......
> >>>>> 38018   38014   4
> >>>>> Unable to wait for NTP reply from the SNTP server, GetLastError returns
> >>>>> 7279088
> >>>>> 142210  40113   102097
> >>>>> 143458  41357   102101
> >>>>> .......
> >>>>> .......
> >>>>> 156500  54615   101885
> >>>>>
> >>>>> NTP time changed from  38014 to 40113 ((40113 - 38014) / 100 = 21 mSec)
> >>>>> while QPC jumped from  38018 to 142210 ((142210 - 38014) / 100 = 1042
> >>>>> mSec)
> >>>>
> >>>> How long was the downtime when you migrated the vServer?
> >>> I use Win7-32 for testing.
> >>>>
> >>>> I have done similar tests and have not seen this...
> >>>> Can you show the relevant parts of your patch. Remember I use the REFERENCE_COUTNER not iTSC.
> >>> Yes, I also test it for reference counter.
> >>
> >> Can you use these parameters for testing:
> >>
> >> -cpu qemu64,hv_relaxed,hv_vapic,hv_spinlocks=4096,hv_refcnt
> >> -rtc base=localtime -vga std  -usb -usbdevice tablet -no-hpet
> > 
> > My settings are:
> > -cpu qemu64,
> > +x2apic,family=0xf,hv_vapic,hv_spinlocks=0xfff,hv_relaxed,hv_tsc -m 1G
> > -smp 4
> 
> do you also see the jump if you leave the hv_tsc out and just use hv_refcnt?

Yes, almost the same results. I will try to re-check it on a different
machine tomorrow.
Best regards,
Vadim.

> 
> Peter
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Vadim Rozenfeld Jan. 12, 2014, 12:08 p.m. UTC | #22
On Wed, 2014-01-08 at 23:20 +0100, Peter Lieven wrote:
> Am 08.01.2014 21:08, schrieb Vadim Rozenfeld:
> > On Wed, 2014-01-08 at 15:54 +0100, Peter Lieven wrote:
> >> On 08.01.2014 13:12, Vadim Rozenfeld wrote:
> >>> On Wed, 2014-01-08 at 12:48 +0100, Peter Lieven wrote:
> >>>> On 08.01.2014 11:44, Vadim Rozenfeld wrote:
> >>>>> On Wed, 2014-01-08 at 11:15 +0100, Peter Lieven wrote:
> >>>>>> On 08.01.2014 10:40, Vadim Rozenfeld wrote:
> >>>>>>> On Tue, 2014-01-07 at 18:52 +0100, Peter Lieven wrote:
> >>>>>>>> Am 07.01.2014 10:36, schrieb Vadim Rozenfeld:
> >>>>>>>>> On Thu, 2014-01-02 at 17:52 +0100, Peter Lieven wrote:
> >>>>>>>>>> Am 11.12.2013 19:59, schrieb Marcelo Tosatti:
> >>>>>>>>>>> On Wed, Dec 11, 2013 at 04:53:05PM -0200, Marcelo Tosatti wrote:
> >>>>>>>>>>>> On Sun, Dec 08, 2013 at 10:33:38PM +1100, Vadim Rozenfeld wrote:
> >>>>>>>>>>>>> Signed-off: Peter Lieven <pl@dlh.net>
> >>>>>>>>>>>>> Signed-off: Gleb Natapov <gleb@redhat.com>
> >>>>>>>>>>>>> Signed-off: Vadim Rozenfeld <vrozenfe@redhat.com>
> >>>>>>>>>>>>>
> >>>>>>>>>>>>> v1 -> v2
> >>>>>>>>>>>>> 1. mark TSC page dirty as suggested by
> >>>>>>>>>>>>>        Eric Northup <digitaleric@google.com> and Gleb
> >>>>>>>>>>>>> 2. disable local irq when calling get_kernel_ns,
> >>>>>>>>>>>>>        as it was done by Peter Lieven <pl@dlhnet.de>
> >>>>>>>>>>>>> 3. move check for TSC page enable from second patch
> >>>>>>>>>>>>>        to this one.
> >>>>>>>>>>>>>
> >>>>>>>>>>>>> ---
> >>>>>>>>>>>>>     arch/x86/include/asm/kvm_host.h    |  2 ++
> >>>>>>>>>>>>>     arch/x86/include/uapi/asm/hyperv.h | 13 +++++++++++++
> >>>>>>>>>>>>>     arch/x86/kvm/x86.c                 | 39 +++++++++++++++++++++++++++++++++++++-
> >>>>>>>>>>>>>     include/uapi/linux/kvm.h           |  1 +
> >>>>>>>>>>>>>     4 files changed, 54 insertions(+), 1 deletion(-)
> >>>>>>>>>>>>>
> >>>>>>>>>>>>> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> >>>>>>>>>>>>> index ae5d783..2fd0753 100644
> >>>>>>>>>>>>> --- a/arch/x86/include/asm/kvm_host.h
> >>>>>>>>>>>>> +++ b/arch/x86/include/asm/kvm_host.h
> >>>>>>>>>>>>> @@ -605,6 +605,8 @@ struct kvm_arch {
> >>>>>>>>>>>>>     	/* fields used by HYPER-V emulation */
> >>>>>>>>>>>>>     	u64 hv_guest_os_id;
> >>>>>>>>>>>>>     	u64 hv_hypercall;
> >>>>>>>>>>>>> +	u64 hv_ref_count;
> >>>>>>>>>>>>> +	u64 hv_tsc_page;
> >>>>>>>>>>>>>
> >>>>>>>>>>>>>     	#ifdef CONFIG_KVM_MMU_AUDIT
> >>>>>>>>>>>>>     	int audit_point;
> >>>>>>>>>>>>> diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h
> >>>>>>>>>>>>> index b8f1c01..462efe7 100644
> >>>>>>>>>>>>> --- a/arch/x86/include/uapi/asm/hyperv.h
> >>>>>>>>>>>>> +++ b/arch/x86/include/uapi/asm/hyperv.h
> >>>>>>>>>>>>> @@ -28,6 +28,9 @@
> >>>>>>>>>>>>>     /* Partition Reference Counter (HV_X64_MSR_TIME_REF_COUNT) available*/
> >>>>>>>>>>>>>     #define HV_X64_MSR_TIME_REF_COUNT_AVAILABLE	(1 << 1)
> >>>>>>>>>>>>>
> >>>>>>>>>>>>> +/* A partition's reference time stamp counter (TSC) page */
> >>>>>>>>>>>>> +#define HV_X64_MSR_REFERENCE_TSC		0x40000021
> >>>>>>>>>>>>> +
> >>>>>>>>>>>>>     /*
> >>>>>>>>>>>>>      * There is a single feature flag that signifies the presence of the MSR
> >>>>>>>>>>>>>      * that can be used to retrieve both the local APIC Timer frequency as
> >>>>>>>>>>>>> @@ -198,6 +201,9 @@
> >>>>>>>>>>>>>     #define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_MASK	\
> >>>>>>>>>>>>>     		(~((1ull << HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT) - 1))
> >>>>>>>>>>>>>
> >>>>>>>>>>>>> +#define HV_X64_MSR_TSC_REFERENCE_ENABLE		0x00000001
> >>>>>>>>>>>>> +#define HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT	12
> >>>>>>>>>>>>> +
> >>>>>>>>>>>>>     #define HV_PROCESSOR_POWER_STATE_C0		0
> >>>>>>>>>>>>>     #define HV_PROCESSOR_POWER_STATE_C1		1
> >>>>>>>>>>>>>     #define HV_PROCESSOR_POWER_STATE_C2		2
> >>>>>>>>>>>>> @@ -210,4 +216,11 @@
> >>>>>>>>>>>>>     #define HV_STATUS_INVALID_ALIGNMENT		4
> >>>>>>>>>>>>>     #define HV_STATUS_INSUFFICIENT_BUFFERS		19
> >>>>>>>>>>>>>
> >>>>>>>>>>>>> +typedef struct _HV_REFERENCE_TSC_PAGE {
> >>>>>>>>>>>>> +	__u32 tsc_sequence;
> >>>>>>>>>>>>> +	__u32 res1;
> >>>>>>>>>>>>> +	__u64 tsc_scale;
> >>>>>>>>>>>>> +	__s64 tsc_offset;
> >>>>>>>>>>>>> +} HV_REFERENCE_TSC_PAGE, *PHV_REFERENCE_TSC_PAGE;
> >>>>>>>>>>>>> +
> >>>>>>>>>>>>>     #endif
> >>>>>>>>>>>>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> >>>>>>>>>>>>> index 21ef1ba..5e4e495a 100644
> >>>>>>>>>>>>> --- a/arch/x86/kvm/x86.c
> >>>>>>>>>>>>> +++ b/arch/x86/kvm/x86.c
> >>>>>>>>>>>>> @@ -840,7 +840,7 @@ EXPORT_SYMBOL_GPL(kvm_rdpmc);
> >>>>>>>>>>>>>     static u32 msrs_to_save[] = {
> >>>>>>>>>>>>>     	MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
> >>>>>>>>>>>>>     	MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,
> >>>>>>>>>>>>> -	HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
> >>>>>>>>>>>>> +	HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, HV_X64_MSR_TIME_REF_COUNT,
> >>>>>>>>>>>>>     	HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
> >>>>>>>>>>>>>     	MSR_KVM_PV_EOI_EN,
> >>>>>>>>>>>>>     	MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
> >>>>>>>>>>>>> @@ -1826,6 +1826,8 @@ static bool kvm_hv_msr_partition_wide(u32 msr)
> >>>>>>>>>>>>>     	switch (msr) {
> >>>>>>>>>>>>>     	case HV_X64_MSR_GUEST_OS_ID:
> >>>>>>>>>>>>>     	case HV_X64_MSR_HYPERCALL:
> >>>>>>>>>>>>> +	case HV_X64_MSR_REFERENCE_TSC:
> >>>>>>>>>>>>> +	case HV_X64_MSR_TIME_REF_COUNT:
> >>>>>>>>>>>>>     		r = true;
> >>>>>>>>>>>>>     		break;
> >>>>>>>>>>>>>     	}
> >>>>>>>>>>>>> @@ -1865,6 +1867,29 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
> >>>>>>>>>>>>>     		if (__copy_to_user((void __user *)addr, instructions, 4))
> >>>>>>>>>>>>>     			return 1;
> >>>>>>>>>>>>>     		kvm->arch.hv_hypercall = data;
> >>>>>>>>>>>>> +		local_irq_disable();
> >>>>>>>>>>>>> +		kvm->arch.hv_ref_count = get_kernel_ns() + kvm->arch.kvmclock_offset;
> >>>>>>>>>>>>> +		local_irq_enable();
> >>>>>>>>>>>>
> >>>>>>>>>>>> Where does the docs say that HV_X64_MSR_HYPERCALL is the where the clock
> >>>>>>>>>>>> starts counting?
> >>>>>>>>>>>>
> >>>>>>>>>>>> No need to store kvmclock_offset in hv_ref_count? (moreover
> >>>>>>>>>>>> the name is weird, better name would be "hv_ref_start_time".
> >>>>>>>>>>>
> >>>>>>>>>>> Just add kvmclock_offset when reading the values (otherwise you have a
> >>>>>>>>>>> "stale copy" of kvmclock_offset in hv_ref_count).
> >>>>>>>>>>>
> >>>>>>>>>>
> >>>>>>>>>> After some experiments I think we do no need kvm->arch.hv_ref_count at all.
> >>>>>>>>>>
> >>>>>>>>>> I was debugging some weird clockjump issues and I think the problem is that after live migration
> >>>>>>>>>> kvm->arch.hv_ref_count is initialized to 0. Depending on the uptime of the vServer when the
> >>>>>>>>>> hypercall was set up this can lead to series jumps.
> >>>>>>>>>>
> >>>>>>>>>> So I would suggest to completely drop kvm->arch.hv_ref_count.
> >>>>>>>>>>
> >>>>>>>>>> And use simply this in get_msr_hyperv_pw().
> >>>>>>>>>>
> >>>>>>>>>>            case HV_X64_MSR_TIME_REF_COUNT: {
> >>>>>>>>>>                    data = div_u64(get_kernel_ns() + kvm->arch.kvmclock_offset, 100);
> >>>>>>>>>>                    break;
> >>>>>>>>>>            }
> >>>>>>>>>>
> >>>>>>>>>
> >>>>>>>>> Agreed. It should work as long as we rely on kvmclock_offset.
> >>>>>>>>> Vadim.
> >>>>>>>>
> >>>>>>>> I think we can rely on kvmclock_offset. Had you had a chance to do further testing
> >>>>>>>> during the weekend?
> >>>>>>>
> >>>>>>> Yes, and still testing.

It seems to be working fine. However, I would to keep hv_ref_count as is
for the following reason: 
" A partition's reference time counter is initialized to zero when the
partition is created."
http://msdn.microsoft.com/en-us/library/windows/hardware/ff542637%
28v=vs.85%29.aspx

Technical, hypercall page initialization event (HV_X64_MSR_HYPERCALL MSR
write handler) can be treated as such event. In this case  hv_ref_count
is no more than just an offset, which should be subtracted from the
current time (  get_kernel_ns() + kvm->arch.kvmclock_offset ) when
handling HV_X64_MSR_TIME_REF_COUNT MSR read request.

Best regards,
Vadim.

> >>>>>>>
> >>>>>>> There is still some inconsistency in the value returned by
> >>>>>>> QueryPerformanceCounter during migration.
> >>>>>>
> >>>>>> With my proposal?
> >>>>>>
> >>>>>
> >>>>> Right.
> >>>>> Please see below QTC vs. NTP time stamp
> >>>>> before and after migration:
> >>>>>
> >>>>> C:\timedrift\objchk_win7_x86\i386>timedrift.exe
> >>>>> quantum = 15
> >>>>> QPC     NTP     Delta
> >>>>> 1311    1307    4
> >>>>> 2621    2618    3
> >>>>> 3932    3931    1
> >>>>> .......
> >>>>> .......
> >>>>> 38018   38014   4
> >>>>> Unable to wait for NTP reply from the SNTP server, GetLastError returns
> >>>>> 7279088
> >>>>> 142210  40113   102097
> >>>>> 143458  41357   102101
> >>>>> .......
> >>>>> .......
> >>>>> 156500  54615   101885
> >>>>>
> >>>>> NTP time changed from  38014 to 40113 ((40113 - 38014) / 100 = 21 mSec)
> >>>>> while QPC jumped from  38018 to 142210 ((142210 - 38014) / 100 = 1042
> >>>>> mSec)
> >>>>
> >>>> How long was the downtime when you migrated the vServer?
> >>> I use Win7-32 for testing.
> >>>>
> >>>> I have done similar tests and have not seen this...
> >>>> Can you show the relevant parts of your patch. Remember I use the REFERENCE_COUTNER not iTSC.
> >>> Yes, I also test it for reference counter.
> >>
> >> Can you use these parameters for testing:
> >>
> >> -cpu qemu64,hv_relaxed,hv_vapic,hv_spinlocks=4096,hv_refcnt
> >> -rtc base=localtime -vga std  -usb -usbdevice tablet -no-hpet
> > 
> > My settings are:
> > -cpu qemu64,
> > +x2apic,family=0xf,hv_vapic,hv_spinlocks=0xfff,hv_relaxed,hv_tsc -m 1G
> > -smp 4
> 
> do you also see the jump if you leave the hv_tsc out and just use hv_refcnt?
> 
> Peter


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Peter Lieven Jan. 12, 2014, 8:35 p.m. UTC | #23
Am 12.01.2014 um 13:08 schrieb Vadim Rozenfeld <vrozenfe@redhat.com>:

> On Wed, 2014-01-08 at 23:20 +0100, Peter Lieven wrote:
>> Am 08.01.2014 21:08, schrieb Vadim Rozenfeld:
>>> On Wed, 2014-01-08 at 15:54 +0100, Peter Lieven wrote:
>>>> On 08.01.2014 13:12, Vadim Rozenfeld wrote:
>>>>> On Wed, 2014-01-08 at 12:48 +0100, Peter Lieven wrote:
>>>>>> On 08.01.2014 11:44, Vadim Rozenfeld wrote:
>>>>>>> On Wed, 2014-01-08 at 11:15 +0100, Peter Lieven wrote:
>>>>>>>> On 08.01.2014 10:40, Vadim Rozenfeld wrote:
>>>>>>>>> On Tue, 2014-01-07 at 18:52 +0100, Peter Lieven wrote:
>>>>>>>>>> Am 07.01.2014 10:36, schrieb Vadim Rozenfeld:
>>>>>>>>>>> On Thu, 2014-01-02 at 17:52 +0100, Peter Lieven wrote:
>>>>>>>>>>>> Am 11.12.2013 19:59, schrieb Marcelo Tosatti:
>>>>>>>>>>>>> On Wed, Dec 11, 2013 at 04:53:05PM -0200, Marcelo Tosatti wrote:
>>>>>>>>>>>>>> On Sun, Dec 08, 2013 at 10:33:38PM +1100, Vadim Rozenfeld wrote:
>>>>>>>>>>>>>>> Signed-off: Peter Lieven <pl@dlh.net>
>>>>>>>>>>>>>>> Signed-off: Gleb Natapov <gleb@redhat.com>
>>>>>>>>>>>>>>> Signed-off: Vadim Rozenfeld <vrozenfe@redhat.com>
>>>>>>>>>>>>>>> 
>>>>>>>>>>>>>>> v1 -> v2
>>>>>>>>>>>>>>> 1. mark TSC page dirty as suggested by
>>>>>>>>>>>>>>>       Eric Northup <digitaleric@google.com> and Gleb
>>>>>>>>>>>>>>> 2. disable local irq when calling get_kernel_ns,
>>>>>>>>>>>>>>>       as it was done by Peter Lieven <pl@dlhnet.de>
>>>>>>>>>>>>>>> 3. move check for TSC page enable from second patch
>>>>>>>>>>>>>>>       to this one.
>>>>>>>>>>>>>>> 
>>>>>>>>>>>>>>> ---
>>>>>>>>>>>>>>>    arch/x86/include/asm/kvm_host.h    |  2 ++
>>>>>>>>>>>>>>>    arch/x86/include/uapi/asm/hyperv.h | 13 +++++++++++++
>>>>>>>>>>>>>>>    arch/x86/kvm/x86.c                 | 39 +++++++++++++++++++++++++++++++++++++-
>>>>>>>>>>>>>>>    include/uapi/linux/kvm.h           |  1 +
>>>>>>>>>>>>>>>    4 files changed, 54 insertions(+), 1 deletion(-)
>>>>>>>>>>>>>>> 
>>>>>>>>>>>>>>> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
>>>>>>>>>>>>>>> index ae5d783..2fd0753 100644
>>>>>>>>>>>>>>> --- a/arch/x86/include/asm/kvm_host.h
>>>>>>>>>>>>>>> +++ b/arch/x86/include/asm/kvm_host.h
>>>>>>>>>>>>>>> @@ -605,6 +605,8 @@ struct kvm_arch {
>>>>>>>>>>>>>>>    	/* fields used by HYPER-V emulation */
>>>>>>>>>>>>>>>    	u64 hv_guest_os_id;
>>>>>>>>>>>>>>>    	u64 hv_hypercall;
>>>>>>>>>>>>>>> +	u64 hv_ref_count;
>>>>>>>>>>>>>>> +	u64 hv_tsc_page;
>>>>>>>>>>>>>>> 
>>>>>>>>>>>>>>>    	#ifdef CONFIG_KVM_MMU_AUDIT
>>>>>>>>>>>>>>>    	int audit_point;
>>>>>>>>>>>>>>> diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h
>>>>>>>>>>>>>>> index b8f1c01..462efe7 100644
>>>>>>>>>>>>>>> --- a/arch/x86/include/uapi/asm/hyperv.h
>>>>>>>>>>>>>>> +++ b/arch/x86/include/uapi/asm/hyperv.h
>>>>>>>>>>>>>>> @@ -28,6 +28,9 @@
>>>>>>>>>>>>>>>    /* Partition Reference Counter (HV_X64_MSR_TIME_REF_COUNT) available*/
>>>>>>>>>>>>>>>    #define HV_X64_MSR_TIME_REF_COUNT_AVAILABLE	(1 << 1)
>>>>>>>>>>>>>>> 
>>>>>>>>>>>>>>> +/* A partition's reference time stamp counter (TSC) page */
>>>>>>>>>>>>>>> +#define HV_X64_MSR_REFERENCE_TSC		0x40000021
>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>>    /*
>>>>>>>>>>>>>>>     * There is a single feature flag that signifies the presence of the MSR
>>>>>>>>>>>>>>>     * that can be used to retrieve both the local APIC Timer frequency as
>>>>>>>>>>>>>>> @@ -198,6 +201,9 @@
>>>>>>>>>>>>>>>    #define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_MASK	\
>>>>>>>>>>>>>>>    		(~((1ull << HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT) - 1))
>>>>>>>>>>>>>>> 
>>>>>>>>>>>>>>> +#define HV_X64_MSR_TSC_REFERENCE_ENABLE		0x00000001
>>>>>>>>>>>>>>> +#define HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT	12
>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>>    #define HV_PROCESSOR_POWER_STATE_C0		0
>>>>>>>>>>>>>>>    #define HV_PROCESSOR_POWER_STATE_C1		1
>>>>>>>>>>>>>>>    #define HV_PROCESSOR_POWER_STATE_C2		2
>>>>>>>>>>>>>>> @@ -210,4 +216,11 @@
>>>>>>>>>>>>>>>    #define HV_STATUS_INVALID_ALIGNMENT		4
>>>>>>>>>>>>>>>    #define HV_STATUS_INSUFFICIENT_BUFFERS		19
>>>>>>>>>>>>>>> 
>>>>>>>>>>>>>>> +typedef struct _HV_REFERENCE_TSC_PAGE {
>>>>>>>>>>>>>>> +	__u32 tsc_sequence;
>>>>>>>>>>>>>>> +	__u32 res1;
>>>>>>>>>>>>>>> +	__u64 tsc_scale;
>>>>>>>>>>>>>>> +	__s64 tsc_offset;
>>>>>>>>>>>>>>> +} HV_REFERENCE_TSC_PAGE, *PHV_REFERENCE_TSC_PAGE;
>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>>    #endif
>>>>>>>>>>>>>>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
>>>>>>>>>>>>>>> index 21ef1ba..5e4e495a 100644
>>>>>>>>>>>>>>> --- a/arch/x86/kvm/x86.c
>>>>>>>>>>>>>>> +++ b/arch/x86/kvm/x86.c
>>>>>>>>>>>>>>> @@ -840,7 +840,7 @@ EXPORT_SYMBOL_GPL(kvm_rdpmc);
>>>>>>>>>>>>>>>    static u32 msrs_to_save[] = {
>>>>>>>>>>>>>>>    	MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
>>>>>>>>>>>>>>>    	MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,
>>>>>>>>>>>>>>> -	HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
>>>>>>>>>>>>>>> +	HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, HV_X64_MSR_TIME_REF_COUNT,
>>>>>>>>>>>>>>>    	HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
>>>>>>>>>>>>>>>    	MSR_KVM_PV_EOI_EN,
>>>>>>>>>>>>>>>    	MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
>>>>>>>>>>>>>>> @@ -1826,6 +1826,8 @@ static bool kvm_hv_msr_partition_wide(u32 msr)
>>>>>>>>>>>>>>>    	switch (msr) {
>>>>>>>>>>>>>>>    	case HV_X64_MSR_GUEST_OS_ID:
>>>>>>>>>>>>>>>    	case HV_X64_MSR_HYPERCALL:
>>>>>>>>>>>>>>> +	case HV_X64_MSR_REFERENCE_TSC:
>>>>>>>>>>>>>>> +	case HV_X64_MSR_TIME_REF_COUNT:
>>>>>>>>>>>>>>>    		r = true;
>>>>>>>>>>>>>>>    		break;
>>>>>>>>>>>>>>>    	}
>>>>>>>>>>>>>>> @@ -1865,6 +1867,29 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
>>>>>>>>>>>>>>>    		if (__copy_to_user((void __user *)addr, instructions, 4))
>>>>>>>>>>>>>>>    			return 1;
>>>>>>>>>>>>>>>    		kvm->arch.hv_hypercall = data;
>>>>>>>>>>>>>>> +		local_irq_disable();
>>>>>>>>>>>>>>> +		kvm->arch.hv_ref_count = get_kernel_ns() + kvm->arch.kvmclock_offset;
>>>>>>>>>>>>>>> +		local_irq_enable();
>>>>>>>>>>>>>> 
>>>>>>>>>>>>>> Where does the docs say that HV_X64_MSR_HYPERCALL is the where the clock
>>>>>>>>>>>>>> starts counting?
>>>>>>>>>>>>>> 
>>>>>>>>>>>>>> No need to store kvmclock_offset in hv_ref_count? (moreover
>>>>>>>>>>>>>> the name is weird, better name would be "hv_ref_start_time".
>>>>>>>>>>>>> 
>>>>>>>>>>>>> Just add kvmclock_offset when reading the values (otherwise you have a
>>>>>>>>>>>>> "stale copy" of kvmclock_offset in hv_ref_count).
>>>>>>>>>>>>> 
>>>>>>>>>>>> 
>>>>>>>>>>>> After some experiments I think we do no need kvm->arch.hv_ref_count at all.
>>>>>>>>>>>> 
>>>>>>>>>>>> I was debugging some weird clockjump issues and I think the problem is that after live migration
>>>>>>>>>>>> kvm->arch.hv_ref_count is initialized to 0. Depending on the uptime of the vServer when the
>>>>>>>>>>>> hypercall was set up this can lead to series jumps.
>>>>>>>>>>>> 
>>>>>>>>>>>> So I would suggest to completely drop kvm->arch.hv_ref_count.
>>>>>>>>>>>> 
>>>>>>>>>>>> And use simply this in get_msr_hyperv_pw().
>>>>>>>>>>>> 
>>>>>>>>>>>>           case HV_X64_MSR_TIME_REF_COUNT: {
>>>>>>>>>>>>                   data = div_u64(get_kernel_ns() + kvm->arch.kvmclock_offset, 100);
>>>>>>>>>>>>                   break;
>>>>>>>>>>>>           }
>>>>>>>>>>>> 
>>>>>>>>>>> 
>>>>>>>>>>> Agreed. It should work as long as we rely on kvmclock_offset.
>>>>>>>>>>> Vadim.
>>>>>>>>>> 
>>>>>>>>>> I think we can rely on kvmclock_offset. Had you had a chance to do further testing
>>>>>>>>>> during the weekend?
>>>>>>>>> 
>>>>>>>>> Yes, and still testing.
> 
> It seems to be working fine. However, I would to keep hv_ref_count as is
> for the following reason: 
> " A partition's reference time counter is initialized to zero when the
> partition is created."
> http://msdn.microsoft.com/en-us/library/windows/hardware/ff542637%
> 28v=vs.85%29.aspx
> 
> Technical, hypercall page initialization event (HV_X64_MSR_HYPERCALL MSR
> write handler) can be treated as such event. In this case  hv_ref_count
> is no more than just an offset, which should be subtracted from the
> current time (  get_kernel_ns() + kvm->arch.kvmclock_offset ) when
> handling HV_X64_MSR_TIME_REF_COUNT MSR read request.

my 2 cents:

a) if you treat the start of the vm as partition create event get_kernel_ns() + kvm->arch.kvmclock_offset
is exactly the vm life time in nanoseconds.

b) afaik currently the value of hv_ref_count is not restored after a live migration on the destination. this
needs to be implemented for proper function. if we go for dropping this offset completely we can drop
the requirement of restoring hv_ref_count as well.

Peter

> 
> Best regards,
> Vadim.
> 
>>>>>>>>> 
>>>>>>>>> There is still some inconsistency in the value returned by
>>>>>>>>> QueryPerformanceCounter during migration.
>>>>>>>> 
>>>>>>>> With my proposal?
>>>>>>>> 
>>>>>>> 
>>>>>>> Right.
>>>>>>> Please see below QTC vs. NTP time stamp
>>>>>>> before and after migration:
>>>>>>> 
>>>>>>> C:\timedrift\objchk_win7_x86\i386>timedrift.exe
>>>>>>> quantum = 15
>>>>>>> QPC     NTP     Delta
>>>>>>> 1311    1307    4
>>>>>>> 2621    2618    3
>>>>>>> 3932    3931    1
>>>>>>> .......
>>>>>>> .......
>>>>>>> 38018   38014   4
>>>>>>> Unable to wait for NTP reply from the SNTP server, GetLastError returns
>>>>>>> 7279088
>>>>>>> 142210  40113   102097
>>>>>>> 143458  41357   102101
>>>>>>> .......
>>>>>>> .......
>>>>>>> 156500  54615   101885
>>>>>>> 
>>>>>>> NTP time changed from  38014 to 40113 ((40113 - 38014) / 100 = 21 mSec)
>>>>>>> while QPC jumped from  38018 to 142210 ((142210 - 38014) / 100 = 1042
>>>>>>> mSec)
>>>>>> 
>>>>>> How long was the downtime when you migrated the vServer?
>>>>> I use Win7-32 for testing.
>>>>>> 
>>>>>> I have done similar tests and have not seen this...
>>>>>> Can you show the relevant parts of your patch. Remember I use the REFERENCE_COUTNER not iTSC.
>>>>> Yes, I also test it for reference counter.
>>>> 
>>>> Can you use these parameters for testing:
>>>> 
>>>> -cpu qemu64,hv_relaxed,hv_vapic,hv_spinlocks=4096,hv_refcnt
>>>> -rtc base=localtime -vga std  -usb -usbdevice tablet -no-hpet
>>> 
>>> My settings are:
>>> -cpu qemu64,
>>> +x2apic,family=0xf,hv_vapic,hv_spinlocks=0xfff,hv_relaxed,hv_tsc -m 1G
>>> -smp 4
>> 
>> do you also see the jump if you leave the hv_tsc out and just use hv_refcnt?
>> 
>> Peter
> 
> 

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Vadim Rozenfeld Jan. 13, 2014, 12:10 p.m. UTC | #24
On Wed, 2013-12-11 at 16:53 -0200, Marcelo Tosatti wrote:
> On Sun, Dec 08, 2013 at 10:33:38PM +1100, Vadim Rozenfeld wrote:
> > Signed-off: Peter Lieven <pl@dlh.net>
> > Signed-off: Gleb Natapov <gleb@redhat.com>
> > Signed-off: Vadim Rozenfeld <vrozenfe@redhat.com>
> > 
> > v1 -> v2
> > 1. mark TSC page dirty as suggested by 
> >     Eric Northup <digitaleric@google.com> and Gleb
> > 2. disable local irq when calling get_kernel_ns, 
> >     as it was done by Peter Lieven <pl@dlhnet.de>
> > 3. move check for TSC page enable from second patch
> >     to this one.
> > 
> > ---
> >  arch/x86/include/asm/kvm_host.h    |  2 ++
> >  arch/x86/include/uapi/asm/hyperv.h | 13 +++++++++++++
> >  arch/x86/kvm/x86.c                 | 39 +++++++++++++++++++++++++++++++++++++-
> >  include/uapi/linux/kvm.h           |  1 +
> >  4 files changed, 54 insertions(+), 1 deletion(-)
> > 
> > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> > index ae5d783..2fd0753 100644
> > --- a/arch/x86/include/asm/kvm_host.h
> > +++ b/arch/x86/include/asm/kvm_host.h
> > @@ -605,6 +605,8 @@ struct kvm_arch {
> >  	/* fields used by HYPER-V emulation */
> >  	u64 hv_guest_os_id;
> >  	u64 hv_hypercall;
> > +	u64 hv_ref_count;
> > +	u64 hv_tsc_page;
> >  
> >  	#ifdef CONFIG_KVM_MMU_AUDIT
> >  	int audit_point;
> > diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h
> > index b8f1c01..462efe7 100644
> > --- a/arch/x86/include/uapi/asm/hyperv.h
> > +++ b/arch/x86/include/uapi/asm/hyperv.h
> > @@ -28,6 +28,9 @@
> >  /* Partition Reference Counter (HV_X64_MSR_TIME_REF_COUNT) available*/
> >  #define HV_X64_MSR_TIME_REF_COUNT_AVAILABLE	(1 << 1)
> >  
> > +/* A partition's reference time stamp counter (TSC) page */
> > +#define HV_X64_MSR_REFERENCE_TSC		0x40000021
> > +
> >  /*
> >   * There is a single feature flag that signifies the presence of the MSR
> >   * that can be used to retrieve both the local APIC Timer frequency as
> > @@ -198,6 +201,9 @@
> >  #define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_MASK	\
> >  		(~((1ull << HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT) - 1))
> >  
> > +#define HV_X64_MSR_TSC_REFERENCE_ENABLE		0x00000001
> > +#define HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT	12
> > +
> >  #define HV_PROCESSOR_POWER_STATE_C0		0
> >  #define HV_PROCESSOR_POWER_STATE_C1		1
> >  #define HV_PROCESSOR_POWER_STATE_C2		2
> > @@ -210,4 +216,11 @@
> >  #define HV_STATUS_INVALID_ALIGNMENT		4
> >  #define HV_STATUS_INSUFFICIENT_BUFFERS		19
> >  
> > +typedef struct _HV_REFERENCE_TSC_PAGE {
> > +	__u32 tsc_sequence;
> > +	__u32 res1;
> > +	__u64 tsc_scale;
> > +	__s64 tsc_offset;
> > +} HV_REFERENCE_TSC_PAGE, *PHV_REFERENCE_TSC_PAGE;
> > +
> >  #endif
> > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> > index 21ef1ba..5e4e495a 100644
> > --- a/arch/x86/kvm/x86.c
> > +++ b/arch/x86/kvm/x86.c
> > @@ -840,7 +840,7 @@ EXPORT_SYMBOL_GPL(kvm_rdpmc);
> >  static u32 msrs_to_save[] = {
> >  	MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
> >  	MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,
> > -	HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
> > +	HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, HV_X64_MSR_TIME_REF_COUNT,
> >  	HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
> >  	MSR_KVM_PV_EOI_EN,
> >  	MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
> > @@ -1826,6 +1826,8 @@ static bool kvm_hv_msr_partition_wide(u32 msr)
> >  	switch (msr) {
> >  	case HV_X64_MSR_GUEST_OS_ID:
> >  	case HV_X64_MSR_HYPERCALL:
> > +	case HV_X64_MSR_REFERENCE_TSC:
> > +	case HV_X64_MSR_TIME_REF_COUNT:
> >  		r = true;
> >  		break;
> >  	}
> > @@ -1865,6 +1867,29 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
> >  		if (__copy_to_user((void __user *)addr, instructions, 4))
> >  			return 1;
> >  		kvm->arch.hv_hypercall = data;
> > +		local_irq_disable();
> > +		kvm->arch.hv_ref_count = get_kernel_ns() + kvm->arch.kvmclock_offset;
> > +		local_irq_enable();
> 
> Where does the docs say that HV_X64_MSR_HYPERCALL is the where the clock
> starts counting?

Sorry for delay in reply

Access to HV_X64_MSR_HYPERCALL happens during execution KiSystemStartup
function code, which I think can be treated as a partition create time. 
 
> 
> No need to store kvmclock_offset in hv_ref_count? (moreover
> the name is weird, better name would be "hv_ref_start_time".
> 
> > +		break;
> > +	}
> > +	case HV_X64_MSR_REFERENCE_TSC: {
> > +		u64 gfn;
> > +		unsigned long addr;
> > +		HV_REFERENCE_TSC_PAGE tsc_ref;
> > +		tsc_ref.tsc_sequence = 0;
> > +		if (!(data & HV_X64_MSR_TSC_REFERENCE_ENABLE)) {
> > +			kvm->arch.hv_tsc_page = data;
> > +			break;
> > +		}
> > +		gfn = data >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT;
> > +		addr = gfn_to_hva(kvm, data >>
> > +			HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT);
> > +		if (kvm_is_error_hva(addr))
> > +			return 1;
> > +		if (__copy_to_user((void __user *)addr, &tsc_ref, sizeof(tsc_ref)))
> > +			return 1;
> > +		mark_page_dirty(kvm, gfn);
> > +		kvm->arch.hv_tsc_page = data;
> >  		break;
> >  	}
> >  	default:
> > @@ -2291,6 +2316,17 @@ static int get_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
> >  	case HV_X64_MSR_HYPERCALL:
> >  		data = kvm->arch.hv_hypercall;
> >  		break;
> > +	case HV_X64_MSR_TIME_REF_COUNT: {
> > +		u64 now_ns;
> > +		local_irq_disable();
> > +		now_ns = get_kernel_ns() + kvm->arch.kvmclock_offset;
> > +		data = div_u64(now_ns - kvm->arch.hv_ref_count, 100);
> > +		local_irq_enable();
> 
> No need for irq disable/enable pairs.
> 
> > +		break;
> > +	}
> > +	case HV_X64_MSR_REFERENCE_TSC:
> > +		data = kvm->arch.hv_tsc_page;
> > +		break;
> 
> Does it return non-zero data even if not enabled?

kernel reads HV_X64_MSR_REFERENCE_TSC MSR only once during hyperv
initialization and checks checks enable bit
 
> 
> >  	default:
> >  		vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
> >  		return 1;
> > @@ -2605,6 +2641,7 @@ int kvm_dev_ioctl_check_extension(long ext)
> >  	case KVM_CAP_ASSIGN_DEV_IRQ:
> >  	case KVM_CAP_PCI_2_3:
> >  #endif
> > +	case KVM_CAP_HYPERV_TIME:
> >  		r = 1;
> >  		break;
> >  	case KVM_CAP_COALESCED_MMIO:
> > diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> > index 902f124..686c1ca 100644
> > --- a/include/uapi/linux/kvm.h
> > +++ b/include/uapi/linux/kvm.h
> > @@ -674,6 +674,7 @@ struct kvm_ppc_smmu_info {
> >  #define KVM_CAP_ARM_EL1_32BIT 93
> >  #define KVM_CAP_SPAPR_MULTITCE 94
> >  #define KVM_CAP_EXT_EMUL_CPUID 95
> > +#define KVM_CAP_HYPERV_TIME 96
> >  
> >  #ifdef KVM_CAP_IRQ_ROUTING
> >  
> > -- 
> > 1.8.1.4
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index ae5d783..2fd0753 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -605,6 +605,8 @@  struct kvm_arch {
 	/* fields used by HYPER-V emulation */
 	u64 hv_guest_os_id;
 	u64 hv_hypercall;
+	u64 hv_ref_count;
+	u64 hv_tsc_page;
 
 	#ifdef CONFIG_KVM_MMU_AUDIT
 	int audit_point;
diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h
index b8f1c01..462efe7 100644
--- a/arch/x86/include/uapi/asm/hyperv.h
+++ b/arch/x86/include/uapi/asm/hyperv.h
@@ -28,6 +28,9 @@ 
 /* Partition Reference Counter (HV_X64_MSR_TIME_REF_COUNT) available*/
 #define HV_X64_MSR_TIME_REF_COUNT_AVAILABLE	(1 << 1)
 
+/* A partition's reference time stamp counter (TSC) page */
+#define HV_X64_MSR_REFERENCE_TSC		0x40000021
+
 /*
  * There is a single feature flag that signifies the presence of the MSR
  * that can be used to retrieve both the local APIC Timer frequency as
@@ -198,6 +201,9 @@ 
 #define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_MASK	\
 		(~((1ull << HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT) - 1))
 
+#define HV_X64_MSR_TSC_REFERENCE_ENABLE		0x00000001
+#define HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT	12
+
 #define HV_PROCESSOR_POWER_STATE_C0		0
 #define HV_PROCESSOR_POWER_STATE_C1		1
 #define HV_PROCESSOR_POWER_STATE_C2		2
@@ -210,4 +216,11 @@ 
 #define HV_STATUS_INVALID_ALIGNMENT		4
 #define HV_STATUS_INSUFFICIENT_BUFFERS		19
 
+typedef struct _HV_REFERENCE_TSC_PAGE {
+	__u32 tsc_sequence;
+	__u32 res1;
+	__u64 tsc_scale;
+	__s64 tsc_offset;
+} HV_REFERENCE_TSC_PAGE, *PHV_REFERENCE_TSC_PAGE;
+
 #endif
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 21ef1ba..5e4e495a 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -840,7 +840,7 @@  EXPORT_SYMBOL_GPL(kvm_rdpmc);
 static u32 msrs_to_save[] = {
 	MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
 	MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,
-	HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
+	HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, HV_X64_MSR_TIME_REF_COUNT,
 	HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
 	MSR_KVM_PV_EOI_EN,
 	MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
@@ -1826,6 +1826,8 @@  static bool kvm_hv_msr_partition_wide(u32 msr)
 	switch (msr) {
 	case HV_X64_MSR_GUEST_OS_ID:
 	case HV_X64_MSR_HYPERCALL:
+	case HV_X64_MSR_REFERENCE_TSC:
+	case HV_X64_MSR_TIME_REF_COUNT:
 		r = true;
 		break;
 	}
@@ -1865,6 +1867,29 @@  static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
 		if (__copy_to_user((void __user *)addr, instructions, 4))
 			return 1;
 		kvm->arch.hv_hypercall = data;
+		local_irq_disable();
+		kvm->arch.hv_ref_count = get_kernel_ns() + kvm->arch.kvmclock_offset;
+		local_irq_enable();
+		break;
+	}
+	case HV_X64_MSR_REFERENCE_TSC: {
+		u64 gfn;
+		unsigned long addr;
+		HV_REFERENCE_TSC_PAGE tsc_ref;
+		tsc_ref.tsc_sequence = 0;
+		if (!(data & HV_X64_MSR_TSC_REFERENCE_ENABLE)) {
+			kvm->arch.hv_tsc_page = data;
+			break;
+		}
+		gfn = data >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT;
+		addr = gfn_to_hva(kvm, data >>
+			HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT);
+		if (kvm_is_error_hva(addr))
+			return 1;
+		if (__copy_to_user((void __user *)addr, &tsc_ref, sizeof(tsc_ref)))
+			return 1;
+		mark_page_dirty(kvm, gfn);
+		kvm->arch.hv_tsc_page = data;
 		break;
 	}
 	default:
@@ -2291,6 +2316,17 @@  static int get_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
 	case HV_X64_MSR_HYPERCALL:
 		data = kvm->arch.hv_hypercall;
 		break;
+	case HV_X64_MSR_TIME_REF_COUNT: {
+		u64 now_ns;
+		local_irq_disable();
+		now_ns = get_kernel_ns() + kvm->arch.kvmclock_offset;
+		data = div_u64(now_ns - kvm->arch.hv_ref_count, 100);
+		local_irq_enable();
+		break;
+	}
+	case HV_X64_MSR_REFERENCE_TSC:
+		data = kvm->arch.hv_tsc_page;
+		break;
 	default:
 		vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
 		return 1;
@@ -2605,6 +2641,7 @@  int kvm_dev_ioctl_check_extension(long ext)
 	case KVM_CAP_ASSIGN_DEV_IRQ:
 	case KVM_CAP_PCI_2_3:
 #endif
+	case KVM_CAP_HYPERV_TIME:
 		r = 1;
 		break;
 	case KVM_CAP_COALESCED_MMIO:
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 902f124..686c1ca 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -674,6 +674,7 @@  struct kvm_ppc_smmu_info {
 #define KVM_CAP_ARM_EL1_32BIT 93
 #define KVM_CAP_SPAPR_MULTITCE 94
 #define KVM_CAP_EXT_EMUL_CPUID 95
+#define KVM_CAP_HYPERV_TIME 96
 
 #ifdef KVM_CAP_IRQ_ROUTING