diff mbox

[RFC,v5] add support for Hyper-V reference time counter

Message ID 1389863917-18558-1-git-send-email-vrozenfe@redhat.com (mailing list archive)
State New, archived
Headers show

Commit Message

Vadim Rozenfeld Jan. 16, 2014, 9:18 a.m. UTC
Signed-off: Peter Lieven <pl@kamp.de>
Signed-off: Gleb Natapov
Signed-off: Vadim Rozenfeld <vrozenfe@redhat.com>
 
After some consideration I decided to submit only Hyper-V reference
counters support this time. I will submit iTSC support as a separate
patch as soon as it is ready. 

v1 -> v2
1. mark TSC page dirty as suggested by 
    Eric Northup <digitaleric@google.com> and Gleb
2. disable local irq when calling get_kernel_ns, 
    as it was done by Peter Lieven <pl@amp.de>
3. move check for TSC page enable from second patch
    to this one.

v3 -> v4
    Get rid of ref counter offset.

v4 -> v5
    replace __copy_to_user with kvm_write_guest
    when updateing iTSC page.

---
 arch/x86/include/asm/kvm_host.h    |  1 +
 arch/x86/include/uapi/asm/hyperv.h | 13 +++++++++++++
 arch/x86/kvm/x86.c                 | 28 +++++++++++++++++++++++++++-
 include/uapi/linux/kvm.h           |  1 +
 4 files changed, 42 insertions(+), 1 deletion(-)

Comments

Marcelo Tosatti Jan. 16, 2014, 10:23 p.m. UTC | #1
On Thu, Jan 16, 2014 at 08:18:37PM +1100, Vadim Rozenfeld wrote:
> Signed-off: Peter Lieven <pl@kamp.de>
> Signed-off: Gleb Natapov
> Signed-off: Vadim Rozenfeld <vrozenfe@redhat.com>
>  
> After some consideration I decided to submit only Hyper-V reference
> counters support this time. I will submit iTSC support as a separate
> patch as soon as it is ready. 
> 
> v1 -> v2
> 1. mark TSC page dirty as suggested by 
>     Eric Northup <digitaleric@google.com> and Gleb
> 2. disable local irq when calling get_kernel_ns, 
>     as it was done by Peter Lieven <pl@amp.de>
> 3. move check for TSC page enable from second patch
>     to this one.
> 
> v3 -> v4
>     Get rid of ref counter offset.
> 
> v4 -> v5
>     replace __copy_to_user with kvm_write_guest
>     when updateing iTSC page.
> 
> ---
>  arch/x86/include/asm/kvm_host.h    |  1 +
>  arch/x86/include/uapi/asm/hyperv.h | 13 +++++++++++++
>  arch/x86/kvm/x86.c                 | 28 +++++++++++++++++++++++++++-
>  include/uapi/linux/kvm.h           |  1 +
>  4 files changed, 42 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index ae5d783..33fef07 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -605,6 +605,7 @@ struct kvm_arch {
>  	/* fields used by HYPER-V emulation */
>  	u64 hv_guest_os_id;
>  	u64 hv_hypercall;
> +	u64 hv_tsc_page;
>  
>  	#ifdef CONFIG_KVM_MMU_AUDIT
>  	int audit_point;
> diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h
> index b8f1c01..462efe7 100644
> --- a/arch/x86/include/uapi/asm/hyperv.h
> +++ b/arch/x86/include/uapi/asm/hyperv.h
> @@ -28,6 +28,9 @@
>  /* Partition Reference Counter (HV_X64_MSR_TIME_REF_COUNT) available*/
>  #define HV_X64_MSR_TIME_REF_COUNT_AVAILABLE	(1 << 1)
>  
> +/* A partition's reference time stamp counter (TSC) page */
> +#define HV_X64_MSR_REFERENCE_TSC		0x40000021
> +
>  /*
>   * There is a single feature flag that signifies the presence of the MSR
>   * that can be used to retrieve both the local APIC Timer frequency as
> @@ -198,6 +201,9 @@
>  #define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_MASK	\
>  		(~((1ull << HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT) - 1))
>  
> +#define HV_X64_MSR_TSC_REFERENCE_ENABLE		0x00000001
> +#define HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT	12
> +
>  #define HV_PROCESSOR_POWER_STATE_C0		0
>  #define HV_PROCESSOR_POWER_STATE_C1		1
>  #define HV_PROCESSOR_POWER_STATE_C2		2
> @@ -210,4 +216,11 @@
>  #define HV_STATUS_INVALID_ALIGNMENT		4
>  #define HV_STATUS_INSUFFICIENT_BUFFERS		19
>  
> +typedef struct _HV_REFERENCE_TSC_PAGE {
> +	__u32 tsc_sequence;
> +	__u32 res1;
> +	__u64 tsc_scale;
> +	__s64 tsc_offset;
> +} HV_REFERENCE_TSC_PAGE, *PHV_REFERENCE_TSC_PAGE;
> +
>  #endif
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 5d004da..8e685b8 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -836,11 +836,12 @@ EXPORT_SYMBOL_GPL(kvm_rdpmc);
>   * kvm-specific. Those are put in the beginning of the list.
>   */
>  
> -#define KVM_SAVE_MSRS_BEGIN	10
> +#define KVM_SAVE_MSRS_BEGIN	12
>  static u32 msrs_to_save[] = {
>  	MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
>  	MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,
>  	HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
> +	HV_X64_MSR_TIME_REF_COUNT, HV_X64_MSR_REFERENCE_TSC,
>  	HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
>  	MSR_KVM_PV_EOI_EN,
>  	MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
> @@ -1826,6 +1827,8 @@ static bool kvm_hv_msr_partition_wide(u32 msr)
>  	switch (msr) {
>  	case HV_X64_MSR_GUEST_OS_ID:
>  	case HV_X64_MSR_HYPERCALL:
> +	case HV_X64_MSR_REFERENCE_TSC:
> +	case HV_X64_MSR_TIME_REF_COUNT:
>  		r = true;
>  		break;
>  	}
> @@ -1867,6 +1870,20 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
>  		kvm->arch.hv_hypercall = data;
>  		break;
>  	}
> +	case HV_X64_MSR_REFERENCE_TSC: {
> +		u64 gfn;
> +		HV_REFERENCE_TSC_PAGE tsc_ref;
> +		memset(&tsc_ref, 0, sizeof(tsc_ref));
> +		kvm->arch.hv_tsc_page = data;

Comment 1)

Is there a reason (that is compliance with spec) to maintain
value, for HV_X64_MSR_REFERENCE_TSC wrmsr operation, in case
HV_X64_MSR_TSC_REFERENCE_ENABLE is not set?

If not, should only assign to kvm->arch.hv_tsc_page after proper checks.

> +		if (!(data & HV_X64_MSR_TSC_REFERENCE_ENABLE))
> +			break;
> +		gfn = data >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT;
> +		if (kvm_write_guest(kvm, data,
> +			&tsc_ref, sizeof(tsc_ref)))
> +			return 1;
> +		mark_page_dirty(kvm, gfn);
> +		break;
> +	}
>  	default:
>  		vcpu_unimpl(vcpu, "HYPER-V unimplemented wrmsr: 0x%x "
>  			    "data 0x%llx\n", msr, data);
> @@ -2291,6 +2308,14 @@ static int get_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
>  	case HV_X64_MSR_HYPERCALL:
>  		data = kvm->arch.hv_hypercall;
>  		break;
> +	case HV_X64_MSR_TIME_REF_COUNT: {
> +		data =
> +		     div_u64(get_kernel_ns() + kvm->arch.kvmclock_offset, 100);

Comment 2)

Is there any specification related to the initial value of the clock
after it is enabled ?

> +		break;
> +	}
> +	case HV_X64_MSR_REFERENCE_TSC:
> +		data = kvm->arch.hv_tsc_page;
> +		break;
>  	default:
>  		vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
>  		return 1;
> @@ -2604,6 +2629,7 @@ int kvm_dev_ioctl_check_extension(long ext)
>  #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
>  	case KVM_CAP_ASSIGN_DEV_IRQ:
>  	case KVM_CAP_PCI_2_3:
> +	case KVM_CAP_HYPERV_TIME:
>  #endif
>  		r = 1;
>  		break;
> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> index 902f124..686c1ca 100644
> --- a/include/uapi/linux/kvm.h
> +++ b/include/uapi/linux/kvm.h
> @@ -674,6 +674,7 @@ struct kvm_ppc_smmu_info {
>  #define KVM_CAP_ARM_EL1_32BIT 93
>  #define KVM_CAP_SPAPR_MULTITCE 94
>  #define KVM_CAP_EXT_EMUL_CPUID 95
> +#define KVM_CAP_HYPERV_TIME 96
>  
>  #ifdef KVM_CAP_IRQ_ROUTING
>  
> -- 
> 1.8.1.4
> 

Comment 3) 

Missing qemu HV_X64_MSR_REFERENCE_TSC save/restore.

No further comments.

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Paolo Bonzini Jan. 17, 2014, 9:20 a.m. UTC | #2
Il 16/01/2014 23:23, Marcelo Tosatti ha scritto:
> Comment 2)
> 
> Is there any specification related to the initial value of the clock
> after it is enabled ?

The clock counts since the VM was started.

Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Vadim Rozenfeld Jan. 17, 2014, 11:06 a.m. UTC | #3
On Thu, 2014-01-16 at 20:23 -0200, Marcelo Tosatti wrote:
> On Thu, Jan 16, 2014 at 08:18:37PM +1100, Vadim Rozenfeld wrote:
> > Signed-off: Peter Lieven <pl@kamp.de>
> > Signed-off: Gleb Natapov
> > Signed-off: Vadim Rozenfeld <vrozenfe@redhat.com>
> >  
> > After some consideration I decided to submit only Hyper-V reference
> > counters support this time. I will submit iTSC support as a separate
> > patch as soon as it is ready. 
> > 
> > v1 -> v2
> > 1. mark TSC page dirty as suggested by 
> >     Eric Northup <digitaleric@google.com> and Gleb
> > 2. disable local irq when calling get_kernel_ns, 
> >     as it was done by Peter Lieven <pl@amp.de>
> > 3. move check for TSC page enable from second patch
> >     to this one.
> > 
> > v3 -> v4
> >     Get rid of ref counter offset.
> > 
> > v4 -> v5
> >     replace __copy_to_user with kvm_write_guest
> >     when updateing iTSC page.
> > 
> > ---
> >  arch/x86/include/asm/kvm_host.h    |  1 +
> >  arch/x86/include/uapi/asm/hyperv.h | 13 +++++++++++++
> >  arch/x86/kvm/x86.c                 | 28 +++++++++++++++++++++++++++-
> >  include/uapi/linux/kvm.h           |  1 +
> >  4 files changed, 42 insertions(+), 1 deletion(-)
> > 
> > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> > index ae5d783..33fef07 100644
> > --- a/arch/x86/include/asm/kvm_host.h
> > +++ b/arch/x86/include/asm/kvm_host.h
> > @@ -605,6 +605,7 @@ struct kvm_arch {
> >  	/* fields used by HYPER-V emulation */
> >  	u64 hv_guest_os_id;
> >  	u64 hv_hypercall;
> > +	u64 hv_tsc_page;
> >  
> >  	#ifdef CONFIG_KVM_MMU_AUDIT
> >  	int audit_point;
> > diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h
> > index b8f1c01..462efe7 100644
> > --- a/arch/x86/include/uapi/asm/hyperv.h
> > +++ b/arch/x86/include/uapi/asm/hyperv.h
> > @@ -28,6 +28,9 @@
> >  /* Partition Reference Counter (HV_X64_MSR_TIME_REF_COUNT) available*/
> >  #define HV_X64_MSR_TIME_REF_COUNT_AVAILABLE	(1 << 1)
> >  
> > +/* A partition's reference time stamp counter (TSC) page */
> > +#define HV_X64_MSR_REFERENCE_TSC		0x40000021
> > +
> >  /*
> >   * There is a single feature flag that signifies the presence of the MSR
> >   * that can be used to retrieve both the local APIC Timer frequency as
> > @@ -198,6 +201,9 @@
> >  #define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_MASK	\
> >  		(~((1ull << HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT) - 1))
> >  
> > +#define HV_X64_MSR_TSC_REFERENCE_ENABLE		0x00000001
> > +#define HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT	12
> > +
> >  #define HV_PROCESSOR_POWER_STATE_C0		0
> >  #define HV_PROCESSOR_POWER_STATE_C1		1
> >  #define HV_PROCESSOR_POWER_STATE_C2		2
> > @@ -210,4 +216,11 @@
> >  #define HV_STATUS_INVALID_ALIGNMENT		4
> >  #define HV_STATUS_INSUFFICIENT_BUFFERS		19
> >  
> > +typedef struct _HV_REFERENCE_TSC_PAGE {
> > +	__u32 tsc_sequence;
> > +	__u32 res1;
> > +	__u64 tsc_scale;
> > +	__s64 tsc_offset;
> > +} HV_REFERENCE_TSC_PAGE, *PHV_REFERENCE_TSC_PAGE;
> > +
> >  #endif
> > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> > index 5d004da..8e685b8 100644
> > --- a/arch/x86/kvm/x86.c
> > +++ b/arch/x86/kvm/x86.c
> > @@ -836,11 +836,12 @@ EXPORT_SYMBOL_GPL(kvm_rdpmc);
> >   * kvm-specific. Those are put in the beginning of the list.
> >   */
> >  
> > -#define KVM_SAVE_MSRS_BEGIN	10
> > +#define KVM_SAVE_MSRS_BEGIN	12
> >  static u32 msrs_to_save[] = {
> >  	MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
> >  	MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,
> >  	HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
> > +	HV_X64_MSR_TIME_REF_COUNT, HV_X64_MSR_REFERENCE_TSC,
> >  	HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
> >  	MSR_KVM_PV_EOI_EN,
> >  	MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
> > @@ -1826,6 +1827,8 @@ static bool kvm_hv_msr_partition_wide(u32 msr)
> >  	switch (msr) {
> >  	case HV_X64_MSR_GUEST_OS_ID:
> >  	case HV_X64_MSR_HYPERCALL:
> > +	case HV_X64_MSR_REFERENCE_TSC:
> > +	case HV_X64_MSR_TIME_REF_COUNT:
> >  		r = true;
> >  		break;
> >  	}
> > @@ -1867,6 +1870,20 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
> >  		kvm->arch.hv_hypercall = data;
> >  		break;
> >  	}
> > +	case HV_X64_MSR_REFERENCE_TSC: {
> > +		u64 gfn;
> > +		HV_REFERENCE_TSC_PAGE tsc_ref;
> > +		memset(&tsc_ref, 0, sizeof(tsc_ref));
> > +		kvm->arch.hv_tsc_page = data;
> 
> Comment 1)
> 
> Is there a reason (that is compliance with spec) to maintain
> value, for HV_X64_MSR_REFERENCE_TSC wrmsr operation, in case
> HV_X64_MSR_TSC_REFERENCE_ENABLE is not set?
 
Windows seems to be retrieving HV_X64_MSR_REFERENCE_TSC value only once
on boot-up, checks HV_X64_MSR_TSC_REFERENCE_ENABLE bit allocate one page
and maps it into the system space, and writes the page address to
HV_X64_MSR_REFERENCE_TSC MSR if this bit was not set. Windows keeps the
TSC page address value in HvlReferenceTscPage variable and uses it
every time when needs to read the TSC page content.

> 
> If not, should only assign to kvm->arch.hv_tsc_page after proper checks.
> 
> > +		if (!(data & HV_X64_MSR_TSC_REFERENCE_ENABLE))
> > +			break;
> > +		gfn = data >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT;
> > +		if (kvm_write_guest(kvm, data,
> > +			&tsc_ref, sizeof(tsc_ref)))
> > +			return 1;
> > +		mark_page_dirty(kvm, gfn);
> > +		break;
> > +	}
> >  	default:
> >  		vcpu_unimpl(vcpu, "HYPER-V unimplemented wrmsr: 0x%x "
> >  			    "data 0x%llx\n", msr, data);
> > @@ -2291,6 +2308,14 @@ static int get_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
> >  	case HV_X64_MSR_HYPERCALL:
> >  		data = kvm->arch.hv_hypercall;
> >  		break;
> > +	case HV_X64_MSR_TIME_REF_COUNT: {
> > +		data =
> > +		     div_u64(get_kernel_ns() + kvm->arch.kvmclock_offset, 100);
> 
> Comment 2)
> 
> Is there any specification related to the initial value of the clock
> after it is enabled ?

The MS documentation says the following:
"A partition's reference time counter is initialized to zero when the
partition is created. The reference time counter for all partitions
count at the same rate. However, at any time, the absolute value of each
reference time counter will typically differ because partitions will
have different creation times."

http://msdn.microsoft.com/en-us/library/windows/hardware/ff542637%
28v=vs.85%29.aspx

However, Windows doesn't seem to be worried about the initial value.
HvlGetReferenceTime only reads  HV_X64_MSR_TIME_REF_COUNT MSR and
returns the obtained value.

> 
> > +		break;
> > +	}
> > +	case HV_X64_MSR_REFERENCE_TSC:
> > +		data = kvm->arch.hv_tsc_page;
> > +		break;
> >  	default:
> >  		vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
> >  		return 1;
> > @@ -2604,6 +2629,7 @@ int kvm_dev_ioctl_check_extension(long ext)
> >  #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
> >  	case KVM_CAP_ASSIGN_DEV_IRQ:
> >  	case KVM_CAP_PCI_2_3:
> > +	case KVM_CAP_HYPERV_TIME:
> >  #endif
> >  		r = 1;
> >  		break;
> > diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> > index 902f124..686c1ca 100644
> > --- a/include/uapi/linux/kvm.h
> > +++ b/include/uapi/linux/kvm.h
> > @@ -674,6 +674,7 @@ struct kvm_ppc_smmu_info {
> >  #define KVM_CAP_ARM_EL1_32BIT 93
> >  #define KVM_CAP_SPAPR_MULTITCE 94
> >  #define KVM_CAP_EXT_EMUL_CPUID 95
> > +#define KVM_CAP_HYPERV_TIME 96
> >  
> >  #ifdef KVM_CAP_IRQ_ROUTING
> >  
> > -- 
> > 1.8.1.4
> > 
> 
> Comment 3) 
> 
> Missing qemu HV_X64_MSR_REFERENCE_TSC save/restore.
I have this code ready. I'm going to send it to qemu 
list as soon as KVM patch will be accepted.

Vadim. 
> 
> No further comments.
> 


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Marcelo Tosatti Jan. 17, 2014, 1:18 p.m. UTC | #4
On Fri, Jan 17, 2014 at 10:06:00PM +1100, Vadim Rozenfeld wrote:
> On Thu, 2014-01-16 at 20:23 -0200, Marcelo Tosatti wrote:
> > On Thu, Jan 16, 2014 at 08:18:37PM +1100, Vadim Rozenfeld wrote:
> > > Signed-off: Peter Lieven <pl@kamp.de>
> > > Signed-off: Gleb Natapov
> > > Signed-off: Vadim Rozenfeld <vrozenfe@redhat.com>
> > >  
> > > After some consideration I decided to submit only Hyper-V reference
> > > counters support this time. I will submit iTSC support as a separate
> > > patch as soon as it is ready. 
> > > 
> > > v1 -> v2
> > > 1. mark TSC page dirty as suggested by 
> > >     Eric Northup <digitaleric@google.com> and Gleb
> > > 2. disable local irq when calling get_kernel_ns, 
> > >     as it was done by Peter Lieven <pl@amp.de>
> > > 3. move check for TSC page enable from second patch
> > >     to this one.
> > > 
> > > v3 -> v4
> > >     Get rid of ref counter offset.
> > > 
> > > v4 -> v5
> > >     replace __copy_to_user with kvm_write_guest
> > >     when updateing iTSC page.
> > > 
> > > ---
> > >  arch/x86/include/asm/kvm_host.h    |  1 +
> > >  arch/x86/include/uapi/asm/hyperv.h | 13 +++++++++++++
> > >  arch/x86/kvm/x86.c                 | 28 +++++++++++++++++++++++++++-
> > >  include/uapi/linux/kvm.h           |  1 +
> > >  4 files changed, 42 insertions(+), 1 deletion(-)
> > > 
> > > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> > > index ae5d783..33fef07 100644
> > > --- a/arch/x86/include/asm/kvm_host.h
> > > +++ b/arch/x86/include/asm/kvm_host.h
> > > @@ -605,6 +605,7 @@ struct kvm_arch {
> > >  	/* fields used by HYPER-V emulation */
> > >  	u64 hv_guest_os_id;
> > >  	u64 hv_hypercall;
> > > +	u64 hv_tsc_page;
> > >  
> > >  	#ifdef CONFIG_KVM_MMU_AUDIT
> > >  	int audit_point;
> > > diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h
> > > index b8f1c01..462efe7 100644
> > > --- a/arch/x86/include/uapi/asm/hyperv.h
> > > +++ b/arch/x86/include/uapi/asm/hyperv.h
> > > @@ -28,6 +28,9 @@
> > >  /* Partition Reference Counter (HV_X64_MSR_TIME_REF_COUNT) available*/
> > >  #define HV_X64_MSR_TIME_REF_COUNT_AVAILABLE	(1 << 1)
> > >  
> > > +/* A partition's reference time stamp counter (TSC) page */
> > > +#define HV_X64_MSR_REFERENCE_TSC		0x40000021
> > > +
> > >  /*
> > >   * There is a single feature flag that signifies the presence of the MSR
> > >   * that can be used to retrieve both the local APIC Timer frequency as
> > > @@ -198,6 +201,9 @@
> > >  #define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_MASK	\
> > >  		(~((1ull << HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT) - 1))
> > >  
> > > +#define HV_X64_MSR_TSC_REFERENCE_ENABLE		0x00000001
> > > +#define HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT	12
> > > +
> > >  #define HV_PROCESSOR_POWER_STATE_C0		0
> > >  #define HV_PROCESSOR_POWER_STATE_C1		1
> > >  #define HV_PROCESSOR_POWER_STATE_C2		2
> > > @@ -210,4 +216,11 @@
> > >  #define HV_STATUS_INVALID_ALIGNMENT		4
> > >  #define HV_STATUS_INSUFFICIENT_BUFFERS		19
> > >  
> > > +typedef struct _HV_REFERENCE_TSC_PAGE {
> > > +	__u32 tsc_sequence;
> > > +	__u32 res1;
> > > +	__u64 tsc_scale;
> > > +	__s64 tsc_offset;
> > > +} HV_REFERENCE_TSC_PAGE, *PHV_REFERENCE_TSC_PAGE;
> > > +
> > >  #endif
> > > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> > > index 5d004da..8e685b8 100644
> > > --- a/arch/x86/kvm/x86.c
> > > +++ b/arch/x86/kvm/x86.c
> > > @@ -836,11 +836,12 @@ EXPORT_SYMBOL_GPL(kvm_rdpmc);
> > >   * kvm-specific. Those are put in the beginning of the list.
> > >   */
> > >  
> > > -#define KVM_SAVE_MSRS_BEGIN	10
> > > +#define KVM_SAVE_MSRS_BEGIN	12
> > >  static u32 msrs_to_save[] = {
> > >  	MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
> > >  	MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,
> > >  	HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
> > > +	HV_X64_MSR_TIME_REF_COUNT, HV_X64_MSR_REFERENCE_TSC,
> > >  	HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
> > >  	MSR_KVM_PV_EOI_EN,
> > >  	MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
> > > @@ -1826,6 +1827,8 @@ static bool kvm_hv_msr_partition_wide(u32 msr)
> > >  	switch (msr) {
> > >  	case HV_X64_MSR_GUEST_OS_ID:
> > >  	case HV_X64_MSR_HYPERCALL:
> > > +	case HV_X64_MSR_REFERENCE_TSC:
> > > +	case HV_X64_MSR_TIME_REF_COUNT:
> > >  		r = true;
> > >  		break;
> > >  	}
> > > @@ -1867,6 +1870,20 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
> > >  		kvm->arch.hv_hypercall = data;
> > >  		break;
> > >  	}
> > > +	case HV_X64_MSR_REFERENCE_TSC: {
> > > +		u64 gfn;
> > > +		HV_REFERENCE_TSC_PAGE tsc_ref;
> > > +		memset(&tsc_ref, 0, sizeof(tsc_ref));
> > > +		kvm->arch.hv_tsc_page = data;
> > 
> > Comment 1)
> > 
> > Is there a reason (that is compliance with spec) to maintain
> > value, for HV_X64_MSR_REFERENCE_TSC wrmsr operation, in case
> > HV_X64_MSR_TSC_REFERENCE_ENABLE is not set?
>  
> Windows seems to be retrieving HV_X64_MSR_REFERENCE_TSC value only once
> on boot-up, checks HV_X64_MSR_TSC_REFERENCE_ENABLE bit allocate one page
> and maps it into the system space, and writes the page address to
> HV_X64_MSR_REFERENCE_TSC MSR if this bit was not set. Windows keeps the
> TSC page address value in HvlReferenceTscPage variable and uses it
> every time when needs to read the TSC page content.

Ok then it has to be saved/restored irrespective of the value of
HV_X64_MSR_TSC_REFERENCE_ENABLE.

> > If not, should only assign to kvm->arch.hv_tsc_page after proper checks.
> > 
> > > +		if (!(data & HV_X64_MSR_TSC_REFERENCE_ENABLE))
> > > +			break;
> > > +		gfn = data >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT;
> > > +		if (kvm_write_guest(kvm, data,
> > > +			&tsc_ref, sizeof(tsc_ref)))
> > > +			return 1;
> > > +		mark_page_dirty(kvm, gfn);
> > > +		break;
> > > +	}
> > >  	default:
> > >  		vcpu_unimpl(vcpu, "HYPER-V unimplemented wrmsr: 0x%x "
> > >  			    "data 0x%llx\n", msr, data);
> > > @@ -2291,6 +2308,14 @@ static int get_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
> > >  	case HV_X64_MSR_HYPERCALL:
> > >  		data = kvm->arch.hv_hypercall;
> > >  		break;
> > > +	case HV_X64_MSR_TIME_REF_COUNT: {
> > > +		data =
> > > +		     div_u64(get_kernel_ns() + kvm->arch.kvmclock_offset, 100);
> > 
> > Comment 2)
> > 
> > Is there any specification related to the initial value of the clock
> > after it is enabled ?
> 
> The MS documentation says the following:
> "A partition's reference time counter is initialized to zero when the
> partition is created. The reference time counter for all partitions
> count at the same rate. However, at any time, the absolute value of each
> reference time counter will typically differ because partitions will
> have different creation times."
> 
> http://msdn.microsoft.com/en-us/library/windows/hardware/ff542637%
> 28v=vs.85%29.aspx
> 
> However, Windows doesn't seem to be worried about the initial value.
> HvlGetReferenceTime only reads  HV_X64_MSR_TIME_REF_COUNT MSR and
> returns the obtained value.

Right. It might be better to initialize kvmclock_offset to zero 
in case of VM creation (QEMU's PUT_FULL state), in case Hyper-V timers
enabled, with reference to MS docs on top.

Otherwise might end up hitting Windows code timers bugs.

But its a potential improvement, patch looks good.

> > > +	}
> > > +	case HV_X64_MSR_REFERENCE_TSC:
> > > +		data = kvm->arch.hv_tsc_page;
> > > +		break;
> > >  	default:
> > >  		vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
> > >  		return 1;
> > > @@ -2604,6 +2629,7 @@ int kvm_dev_ioctl_check_extension(long ext)
> > >  #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
> > >  	case KVM_CAP_ASSIGN_DEV_IRQ:
> > >  	case KVM_CAP_PCI_2_3:
> > > +	case KVM_CAP_HYPERV_TIME:
> > >  #endif
> > >  		r = 1;
> > >  		break;
> > > diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> > > index 902f124..686c1ca 100644
> > > --- a/include/uapi/linux/kvm.h
> > > +++ b/include/uapi/linux/kvm.h
> > > @@ -674,6 +674,7 @@ struct kvm_ppc_smmu_info {
> > >  #define KVM_CAP_ARM_EL1_32BIT 93
> > >  #define KVM_CAP_SPAPR_MULTITCE 94
> > >  #define KVM_CAP_EXT_EMUL_CPUID 95
> > > +#define KVM_CAP_HYPERV_TIME 96
> > >  
> > >  #ifdef KVM_CAP_IRQ_ROUTING
> > >  
> > > -- 
> > > 1.8.1.4
> > > 
> > 
> > Comment 3) 
> > 
> > Missing qemu HV_X64_MSR_REFERENCE_TSC save/restore.
> I have this code ready. I'm going to send it to qemu 
> list as soon as KVM patch will be accepted.

Looks good to me.

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Paolo Bonzini Jan. 17, 2014, 1:25 p.m. UTC | #5
Il 17/01/2014 14:18, Marcelo Tosatti ha scritto:
> On Fri, Jan 17, 2014 at 10:06:00PM +1100, Vadim Rozenfeld wrote:
>> On Thu, 2014-01-16 at 20:23 -0200, Marcelo Tosatti wrote:
>>> On Thu, Jan 16, 2014 at 08:18:37PM +1100, Vadim Rozenfeld wrote:
>>>> Signed-off: Peter Lieven <pl@kamp.de>
>>>> Signed-off: Gleb Natapov
>>>> Signed-off: Vadim Rozenfeld <vrozenfe@redhat.com>
>>>>  
>>>> After some consideration I decided to submit only Hyper-V reference
>>>> counters support this time. I will submit iTSC support as a separate
>>>> patch as soon as it is ready. 
>>>>
>>>> v1 -> v2
>>>> 1. mark TSC page dirty as suggested by 
>>>>     Eric Northup <digitaleric@google.com> and Gleb
>>>> 2. disable local irq when calling get_kernel_ns, 
>>>>     as it was done by Peter Lieven <pl@amp.de>
>>>> 3. move check for TSC page enable from second patch
>>>>     to this one.
>>>>
>>>> v3 -> v4
>>>>     Get rid of ref counter offset.
>>>>
>>>> v4 -> v5
>>>>     replace __copy_to_user with kvm_write_guest
>>>>     when updateing iTSC page.
>>>>
>>>> ---
>>>>  arch/x86/include/asm/kvm_host.h    |  1 +
>>>>  arch/x86/include/uapi/asm/hyperv.h | 13 +++++++++++++
>>>>  arch/x86/kvm/x86.c                 | 28 +++++++++++++++++++++++++++-
>>>>  include/uapi/linux/kvm.h           |  1 +
>>>>  4 files changed, 42 insertions(+), 1 deletion(-)
>>>>
>>>> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
>>>> index ae5d783..33fef07 100644
>>>> --- a/arch/x86/include/asm/kvm_host.h
>>>> +++ b/arch/x86/include/asm/kvm_host.h
>>>> @@ -605,6 +605,7 @@ struct kvm_arch {
>>>>  	/* fields used by HYPER-V emulation */
>>>>  	u64 hv_guest_os_id;
>>>>  	u64 hv_hypercall;
>>>> +	u64 hv_tsc_page;
>>>>  
>>>>  	#ifdef CONFIG_KVM_MMU_AUDIT
>>>>  	int audit_point;
>>>> diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h
>>>> index b8f1c01..462efe7 100644
>>>> --- a/arch/x86/include/uapi/asm/hyperv.h
>>>> +++ b/arch/x86/include/uapi/asm/hyperv.h
>>>> @@ -28,6 +28,9 @@
>>>>  /* Partition Reference Counter (HV_X64_MSR_TIME_REF_COUNT) available*/
>>>>  #define HV_X64_MSR_TIME_REF_COUNT_AVAILABLE	(1 << 1)
>>>>  
>>>> +/* A partition's reference time stamp counter (TSC) page */
>>>> +#define HV_X64_MSR_REFERENCE_TSC		0x40000021
>>>> +
>>>>  /*
>>>>   * There is a single feature flag that signifies the presence of the MSR
>>>>   * that can be used to retrieve both the local APIC Timer frequency as
>>>> @@ -198,6 +201,9 @@
>>>>  #define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_MASK	\
>>>>  		(~((1ull << HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT) - 1))
>>>>  
>>>> +#define HV_X64_MSR_TSC_REFERENCE_ENABLE		0x00000001
>>>> +#define HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT	12
>>>> +
>>>>  #define HV_PROCESSOR_POWER_STATE_C0		0
>>>>  #define HV_PROCESSOR_POWER_STATE_C1		1
>>>>  #define HV_PROCESSOR_POWER_STATE_C2		2
>>>> @@ -210,4 +216,11 @@
>>>>  #define HV_STATUS_INVALID_ALIGNMENT		4
>>>>  #define HV_STATUS_INSUFFICIENT_BUFFERS		19
>>>>  
>>>> +typedef struct _HV_REFERENCE_TSC_PAGE {
>>>> +	__u32 tsc_sequence;
>>>> +	__u32 res1;
>>>> +	__u64 tsc_scale;
>>>> +	__s64 tsc_offset;
>>>> +} HV_REFERENCE_TSC_PAGE, *PHV_REFERENCE_TSC_PAGE;
>>>> +
>>>>  #endif
>>>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
>>>> index 5d004da..8e685b8 100644
>>>> --- a/arch/x86/kvm/x86.c
>>>> +++ b/arch/x86/kvm/x86.c
>>>> @@ -836,11 +836,12 @@ EXPORT_SYMBOL_GPL(kvm_rdpmc);
>>>>   * kvm-specific. Those are put in the beginning of the list.
>>>>   */
>>>>  
>>>> -#define KVM_SAVE_MSRS_BEGIN	10
>>>> +#define KVM_SAVE_MSRS_BEGIN	12
>>>>  static u32 msrs_to_save[] = {
>>>>  	MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
>>>>  	MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,
>>>>  	HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
>>>> +	HV_X64_MSR_TIME_REF_COUNT, HV_X64_MSR_REFERENCE_TSC,
>>>>  	HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
>>>>  	MSR_KVM_PV_EOI_EN,
>>>>  	MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
>>>> @@ -1826,6 +1827,8 @@ static bool kvm_hv_msr_partition_wide(u32 msr)
>>>>  	switch (msr) {
>>>>  	case HV_X64_MSR_GUEST_OS_ID:
>>>>  	case HV_X64_MSR_HYPERCALL:
>>>> +	case HV_X64_MSR_REFERENCE_TSC:
>>>> +	case HV_X64_MSR_TIME_REF_COUNT:
>>>>  		r = true;
>>>>  		break;
>>>>  	}
>>>> @@ -1867,6 +1870,20 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
>>>>  		kvm->arch.hv_hypercall = data;
>>>>  		break;
>>>>  	}
>>>> +	case HV_X64_MSR_REFERENCE_TSC: {
>>>> +		u64 gfn;
>>>> +		HV_REFERENCE_TSC_PAGE tsc_ref;
>>>> +		memset(&tsc_ref, 0, sizeof(tsc_ref));
>>>> +		kvm->arch.hv_tsc_page = data;
>>>
>>> Comment 1)
>>>
>>> Is there a reason (that is compliance with spec) to maintain
>>> value, for HV_X64_MSR_REFERENCE_TSC wrmsr operation, in case
>>> HV_X64_MSR_TSC_REFERENCE_ENABLE is not set?
>>  
>> Windows seems to be retrieving HV_X64_MSR_REFERENCE_TSC value only once
>> on boot-up, checks HV_X64_MSR_TSC_REFERENCE_ENABLE bit allocate one page
>> and maps it into the system space, and writes the page address to
>> HV_X64_MSR_REFERENCE_TSC MSR if this bit was not set. Windows keeps the
>> TSC page address value in HvlReferenceTscPage variable and uses it
>> every time when needs to read the TSC page content.
> 
> Ok then it has to be saved/restored irrespective of the value of
> HV_X64_MSR_TSC_REFERENCE_ENABLE.
> 
>>> If not, should only assign to kvm->arch.hv_tsc_page after proper checks.
>>>
>>>> +		if (!(data & HV_X64_MSR_TSC_REFERENCE_ENABLE))
>>>> +			break;
>>>> +		gfn = data >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT;
>>>> +		if (kvm_write_guest(kvm, data,
>>>> +			&tsc_ref, sizeof(tsc_ref)))
>>>> +			return 1;
>>>> +		mark_page_dirty(kvm, gfn);
>>>> +		break;
>>>> +	}
>>>>  	default:
>>>>  		vcpu_unimpl(vcpu, "HYPER-V unimplemented wrmsr: 0x%x "
>>>>  			    "data 0x%llx\n", msr, data);
>>>> @@ -2291,6 +2308,14 @@ static int get_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
>>>>  	case HV_X64_MSR_HYPERCALL:
>>>>  		data = kvm->arch.hv_hypercall;
>>>>  		break;
>>>> +	case HV_X64_MSR_TIME_REF_COUNT: {
>>>> +		data =
>>>> +		     div_u64(get_kernel_ns() + kvm->arch.kvmclock_offset, 100);
>>>
>>> Comment 2)
>>>
>>> Is there any specification related to the initial value of the clock
>>> after it is enabled ?
>>
>> The MS documentation says the following:
>> "A partition's reference time counter is initialized to zero when the
>> partition is created. The reference time counter for all partitions
>> count at the same rate. However, at any time, the absolute value of each
>> reference time counter will typically differ because partitions will
>> have different creation times."
>>
>> http://msdn.microsoft.com/en-us/library/windows/hardware/ff542637%
>> 28v=vs.85%29.aspx
>>
>> However, Windows doesn't seem to be worried about the initial value.
>> HvlGetReferenceTime only reads  HV_X64_MSR_TIME_REF_COUNT MSR and
>> returns the obtained value.
> 
> Right. It might be better to initialize kvmclock_offset to zero 
> in case of VM creation (QEMU's PUT_FULL state), in case Hyper-V timers
> enabled, with reference to MS docs on top.
> 
> Otherwise might end up hitting Windows code timers bugs.
> 
> But its a potential improvement, patch looks good.
> 
>>>> +	}
>>>> +	case HV_X64_MSR_REFERENCE_TSC:
>>>> +		data = kvm->arch.hv_tsc_page;
>>>> +		break;
>>>>  	default:
>>>>  		vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
>>>>  		return 1;
>>>> @@ -2604,6 +2629,7 @@ int kvm_dev_ioctl_check_extension(long ext)
>>>>  #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
>>>>  	case KVM_CAP_ASSIGN_DEV_IRQ:
>>>>  	case KVM_CAP_PCI_2_3:
>>>> +	case KVM_CAP_HYPERV_TIME:
>>>>  #endif
>>>>  		r = 1;
>>>>  		break;
>>>> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
>>>> index 902f124..686c1ca 100644
>>>> --- a/include/uapi/linux/kvm.h
>>>> +++ b/include/uapi/linux/kvm.h
>>>> @@ -674,6 +674,7 @@ struct kvm_ppc_smmu_info {
>>>>  #define KVM_CAP_ARM_EL1_32BIT 93
>>>>  #define KVM_CAP_SPAPR_MULTITCE 94
>>>>  #define KVM_CAP_EXT_EMUL_CPUID 95
>>>> +#define KVM_CAP_HYPERV_TIME 96
>>>>  
>>>>  #ifdef KVM_CAP_IRQ_ROUTING
>>>>  
>>>> -- 
>>>> 1.8.1.4
>>>>
>>>
>>> Comment 3) 
>>>
>>> Missing qemu HV_X64_MSR_REFERENCE_TSC save/restore.
>> I have this code ready. I'm going to send it to qemu 
>> list as soon as KVM patch will be accepted.
> 
> Looks good to me.
> 

Applied to kvm/queue, thanks!

Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Vadim Rozenfeld Jan. 17, 2014, 9:19 p.m. UTC | #6
On Fri, 2014-01-17 at 14:25 +0100, Paolo Bonzini wrote:
> Il 17/01/2014 14:18, Marcelo Tosatti ha scritto:
> > On Fri, Jan 17, 2014 at 10:06:00PM +1100, Vadim Rozenfeld wrote:
> >> On Thu, 2014-01-16 at 20:23 -0200, Marcelo Tosatti wrote:
> >>> On Thu, Jan 16, 2014 at 08:18:37PM +1100, Vadim Rozenfeld wrote:
> >>>> Signed-off: Peter Lieven <pl@kamp.de>
> >>>> Signed-off: Gleb Natapov
> >>>> Signed-off: Vadim Rozenfeld <vrozenfe@redhat.com>
> >>>>  
> >>>> After some consideration I decided to submit only Hyper-V reference
> >>>> counters support this time. I will submit iTSC support as a separate
> >>>> patch as soon as it is ready. 
> >>>>
> >>>> v1 -> v2
> >>>> 1. mark TSC page dirty as suggested by 
> >>>>     Eric Northup <digitaleric@google.com> and Gleb
> >>>> 2. disable local irq when calling get_kernel_ns, 
> >>>>     as it was done by Peter Lieven <pl@amp.de>
> >>>> 3. move check for TSC page enable from second patch
> >>>>     to this one.
> >>>>
> >>>> v3 -> v4
> >>>>     Get rid of ref counter offset.
> >>>>
> >>>> v4 -> v5
> >>>>     replace __copy_to_user with kvm_write_guest
> >>>>     when updateing iTSC page.
> >>>>
> >>>> ---
> >>>>  arch/x86/include/asm/kvm_host.h    |  1 +
> >>>>  arch/x86/include/uapi/asm/hyperv.h | 13 +++++++++++++
> >>>>  arch/x86/kvm/x86.c                 | 28 +++++++++++++++++++++++++++-
> >>>>  include/uapi/linux/kvm.h           |  1 +
> >>>>  4 files changed, 42 insertions(+), 1 deletion(-)
> >>>>
> >>>> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> >>>> index ae5d783..33fef07 100644
> >>>> --- a/arch/x86/include/asm/kvm_host.h
> >>>> +++ b/arch/x86/include/asm/kvm_host.h
> >>>> @@ -605,6 +605,7 @@ struct kvm_arch {
> >>>>  	/* fields used by HYPER-V emulation */
> >>>>  	u64 hv_guest_os_id;
> >>>>  	u64 hv_hypercall;
> >>>> +	u64 hv_tsc_page;
> >>>>  
> >>>>  	#ifdef CONFIG_KVM_MMU_AUDIT
> >>>>  	int audit_point;
> >>>> diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h
> >>>> index b8f1c01..462efe7 100644
> >>>> --- a/arch/x86/include/uapi/asm/hyperv.h
> >>>> +++ b/arch/x86/include/uapi/asm/hyperv.h
> >>>> @@ -28,6 +28,9 @@
> >>>>  /* Partition Reference Counter (HV_X64_MSR_TIME_REF_COUNT) available*/
> >>>>  #define HV_X64_MSR_TIME_REF_COUNT_AVAILABLE	(1 << 1)
> >>>>  
> >>>> +/* A partition's reference time stamp counter (TSC) page */
> >>>> +#define HV_X64_MSR_REFERENCE_TSC		0x40000021
> >>>> +
> >>>>  /*
> >>>>   * There is a single feature flag that signifies the presence of the MSR
> >>>>   * that can be used to retrieve both the local APIC Timer frequency as
> >>>> @@ -198,6 +201,9 @@
> >>>>  #define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_MASK	\
> >>>>  		(~((1ull << HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT) - 1))
> >>>>  
> >>>> +#define HV_X64_MSR_TSC_REFERENCE_ENABLE		0x00000001
> >>>> +#define HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT	12
> >>>> +
> >>>>  #define HV_PROCESSOR_POWER_STATE_C0		0
> >>>>  #define HV_PROCESSOR_POWER_STATE_C1		1
> >>>>  #define HV_PROCESSOR_POWER_STATE_C2		2
> >>>> @@ -210,4 +216,11 @@
> >>>>  #define HV_STATUS_INVALID_ALIGNMENT		4
> >>>>  #define HV_STATUS_INSUFFICIENT_BUFFERS		19
> >>>>  
> >>>> +typedef struct _HV_REFERENCE_TSC_PAGE {
> >>>> +	__u32 tsc_sequence;
> >>>> +	__u32 res1;
> >>>> +	__u64 tsc_scale;
> >>>> +	__s64 tsc_offset;
> >>>> +} HV_REFERENCE_TSC_PAGE, *PHV_REFERENCE_TSC_PAGE;
> >>>> +
> >>>>  #endif
> >>>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> >>>> index 5d004da..8e685b8 100644
> >>>> --- a/arch/x86/kvm/x86.c
> >>>> +++ b/arch/x86/kvm/x86.c
> >>>> @@ -836,11 +836,12 @@ EXPORT_SYMBOL_GPL(kvm_rdpmc);
> >>>>   * kvm-specific. Those are put in the beginning of the list.
> >>>>   */
> >>>>  
> >>>> -#define KVM_SAVE_MSRS_BEGIN	10
> >>>> +#define KVM_SAVE_MSRS_BEGIN	12
> >>>>  static u32 msrs_to_save[] = {
> >>>>  	MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
> >>>>  	MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,
> >>>>  	HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
> >>>> +	HV_X64_MSR_TIME_REF_COUNT, HV_X64_MSR_REFERENCE_TSC,
> >>>>  	HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
> >>>>  	MSR_KVM_PV_EOI_EN,
> >>>>  	MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
> >>>> @@ -1826,6 +1827,8 @@ static bool kvm_hv_msr_partition_wide(u32 msr)
> >>>>  	switch (msr) {
> >>>>  	case HV_X64_MSR_GUEST_OS_ID:
> >>>>  	case HV_X64_MSR_HYPERCALL:
> >>>> +	case HV_X64_MSR_REFERENCE_TSC:
> >>>> +	case HV_X64_MSR_TIME_REF_COUNT:
> >>>>  		r = true;
> >>>>  		break;
> >>>>  	}
> >>>> @@ -1867,6 +1870,20 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
> >>>>  		kvm->arch.hv_hypercall = data;
> >>>>  		break;
> >>>>  	}
> >>>> +	case HV_X64_MSR_REFERENCE_TSC: {
> >>>> +		u64 gfn;
> >>>> +		HV_REFERENCE_TSC_PAGE tsc_ref;
> >>>> +		memset(&tsc_ref, 0, sizeof(tsc_ref));
> >>>> +		kvm->arch.hv_tsc_page = data;
> >>>
> >>> Comment 1)
> >>>
> >>> Is there a reason (that is compliance with spec) to maintain
> >>> value, for HV_X64_MSR_REFERENCE_TSC wrmsr operation, in case
> >>> HV_X64_MSR_TSC_REFERENCE_ENABLE is not set?
> >>  
> >> Windows seems to be retrieving HV_X64_MSR_REFERENCE_TSC value only once
> >> on boot-up, checks HV_X64_MSR_TSC_REFERENCE_ENABLE bit allocate one page
> >> and maps it into the system space, and writes the page address to
> >> HV_X64_MSR_REFERENCE_TSC MSR if this bit was not set. Windows keeps the
> >> TSC page address value in HvlReferenceTscPage variable and uses it
> >> every time when needs to read the TSC page content.
> > 
> > Ok then it has to be saved/restored irrespective of the value of
> > HV_X64_MSR_TSC_REFERENCE_ENABLE.
> > 
> >>> If not, should only assign to kvm->arch.hv_tsc_page after proper checks.
> >>>
> >>>> +		if (!(data & HV_X64_MSR_TSC_REFERENCE_ENABLE))
> >>>> +			break;
> >>>> +		gfn = data >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT;
> >>>> +		if (kvm_write_guest(kvm, data,
> >>>> +			&tsc_ref, sizeof(tsc_ref)))
> >>>> +			return 1;
> >>>> +		mark_page_dirty(kvm, gfn);
> >>>> +		break;
> >>>> +	}
> >>>>  	default:
> >>>>  		vcpu_unimpl(vcpu, "HYPER-V unimplemented wrmsr: 0x%x "
> >>>>  			    "data 0x%llx\n", msr, data);
> >>>> @@ -2291,6 +2308,14 @@ static int get_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
> >>>>  	case HV_X64_MSR_HYPERCALL:
> >>>>  		data = kvm->arch.hv_hypercall;
> >>>>  		break;
> >>>> +	case HV_X64_MSR_TIME_REF_COUNT: {
> >>>> +		data =
> >>>> +		     div_u64(get_kernel_ns() + kvm->arch.kvmclock_offset, 100);
> >>>
> >>> Comment 2)
> >>>
> >>> Is there any specification related to the initial value of the clock
> >>> after it is enabled ?
> >>
> >> The MS documentation says the following:
> >> "A partition's reference time counter is initialized to zero when the
> >> partition is created. The reference time counter for all partitions
> >> count at the same rate. However, at any time, the absolute value of each
> >> reference time counter will typically differ because partitions will
> >> have different creation times."
> >>
> >> http://msdn.microsoft.com/en-us/library/windows/hardware/ff542637%
> >> 28v=vs.85%29.aspx
> >>
> >> However, Windows doesn't seem to be worried about the initial value.
> >> HvlGetReferenceTime only reads  HV_X64_MSR_TIME_REF_COUNT MSR and
> >> returns the obtained value.
> > 
> > Right. It might be better to initialize kvmclock_offset to zero 
> > in case of VM creation (QEMU's PUT_FULL state), in case Hyper-V timers
> > enabled, with reference to MS docs on top.
> > 
> > Otherwise might end up hitting Windows code timers bugs.
> > 
> > But its a potential improvement, patch looks good.
> > 
> >>>> +	}
> >>>> +	case HV_X64_MSR_REFERENCE_TSC:
> >>>> +		data = kvm->arch.hv_tsc_page;
> >>>> +		break;
> >>>>  	default:
> >>>>  		vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
> >>>>  		return 1;
> >>>> @@ -2604,6 +2629,7 @@ int kvm_dev_ioctl_check_extension(long ext)
> >>>>  #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
> >>>>  	case KVM_CAP_ASSIGN_DEV_IRQ:
> >>>>  	case KVM_CAP_PCI_2_3:
> >>>> +	case KVM_CAP_HYPERV_TIME:
> >>>>  #endif
> >>>>  		r = 1;
> >>>>  		break;
> >>>> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> >>>> index 902f124..686c1ca 100644
> >>>> --- a/include/uapi/linux/kvm.h
> >>>> +++ b/include/uapi/linux/kvm.h
> >>>> @@ -674,6 +674,7 @@ struct kvm_ppc_smmu_info {
> >>>>  #define KVM_CAP_ARM_EL1_32BIT 93
> >>>>  #define KVM_CAP_SPAPR_MULTITCE 94
> >>>>  #define KVM_CAP_EXT_EMUL_CPUID 95
> >>>> +#define KVM_CAP_HYPERV_TIME 96
> >>>>  
> >>>>  #ifdef KVM_CAP_IRQ_ROUTING
> >>>>  
> >>>> -- 
> >>>> 1.8.1.4
> >>>>
> >>>
> >>> Comment 3) 
> >>>
> >>> Missing qemu HV_X64_MSR_REFERENCE_TSC save/restore.
> >> I have this code ready. I'm going to send it to qemu 
> >> list as soon as KVM patch will be accepted.
> > 
> > Looks good to me.
> > 
> 
> Applied to kvm/queue, thanks!
Great.
Thank you guys for your help.
Vadim.
> 
> Paolo


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index ae5d783..33fef07 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -605,6 +605,7 @@  struct kvm_arch {
 	/* fields used by HYPER-V emulation */
 	u64 hv_guest_os_id;
 	u64 hv_hypercall;
+	u64 hv_tsc_page;
 
 	#ifdef CONFIG_KVM_MMU_AUDIT
 	int audit_point;
diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h
index b8f1c01..462efe7 100644
--- a/arch/x86/include/uapi/asm/hyperv.h
+++ b/arch/x86/include/uapi/asm/hyperv.h
@@ -28,6 +28,9 @@ 
 /* Partition Reference Counter (HV_X64_MSR_TIME_REF_COUNT) available*/
 #define HV_X64_MSR_TIME_REF_COUNT_AVAILABLE	(1 << 1)
 
+/* A partition's reference time stamp counter (TSC) page */
+#define HV_X64_MSR_REFERENCE_TSC		0x40000021
+
 /*
  * There is a single feature flag that signifies the presence of the MSR
  * that can be used to retrieve both the local APIC Timer frequency as
@@ -198,6 +201,9 @@ 
 #define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_MASK	\
 		(~((1ull << HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT) - 1))
 
+#define HV_X64_MSR_TSC_REFERENCE_ENABLE		0x00000001
+#define HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT	12
+
 #define HV_PROCESSOR_POWER_STATE_C0		0
 #define HV_PROCESSOR_POWER_STATE_C1		1
 #define HV_PROCESSOR_POWER_STATE_C2		2
@@ -210,4 +216,11 @@ 
 #define HV_STATUS_INVALID_ALIGNMENT		4
 #define HV_STATUS_INSUFFICIENT_BUFFERS		19
 
+typedef struct _HV_REFERENCE_TSC_PAGE {
+	__u32 tsc_sequence;
+	__u32 res1;
+	__u64 tsc_scale;
+	__s64 tsc_offset;
+} HV_REFERENCE_TSC_PAGE, *PHV_REFERENCE_TSC_PAGE;
+
 #endif
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 5d004da..8e685b8 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -836,11 +836,12 @@  EXPORT_SYMBOL_GPL(kvm_rdpmc);
  * kvm-specific. Those are put in the beginning of the list.
  */
 
-#define KVM_SAVE_MSRS_BEGIN	10
+#define KVM_SAVE_MSRS_BEGIN	12
 static u32 msrs_to_save[] = {
 	MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
 	MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,
 	HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
+	HV_X64_MSR_TIME_REF_COUNT, HV_X64_MSR_REFERENCE_TSC,
 	HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
 	MSR_KVM_PV_EOI_EN,
 	MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
@@ -1826,6 +1827,8 @@  static bool kvm_hv_msr_partition_wide(u32 msr)
 	switch (msr) {
 	case HV_X64_MSR_GUEST_OS_ID:
 	case HV_X64_MSR_HYPERCALL:
+	case HV_X64_MSR_REFERENCE_TSC:
+	case HV_X64_MSR_TIME_REF_COUNT:
 		r = true;
 		break;
 	}
@@ -1867,6 +1870,20 @@  static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
 		kvm->arch.hv_hypercall = data;
 		break;
 	}
+	case HV_X64_MSR_REFERENCE_TSC: {
+		u64 gfn;
+		HV_REFERENCE_TSC_PAGE tsc_ref;
+		memset(&tsc_ref, 0, sizeof(tsc_ref));
+		kvm->arch.hv_tsc_page = data;
+		if (!(data & HV_X64_MSR_TSC_REFERENCE_ENABLE))
+			break;
+		gfn = data >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT;
+		if (kvm_write_guest(kvm, data,
+			&tsc_ref, sizeof(tsc_ref)))
+			return 1;
+		mark_page_dirty(kvm, gfn);
+		break;
+	}
 	default:
 		vcpu_unimpl(vcpu, "HYPER-V unimplemented wrmsr: 0x%x "
 			    "data 0x%llx\n", msr, data);
@@ -2291,6 +2308,14 @@  static int get_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
 	case HV_X64_MSR_HYPERCALL:
 		data = kvm->arch.hv_hypercall;
 		break;
+	case HV_X64_MSR_TIME_REF_COUNT: {
+		data =
+		     div_u64(get_kernel_ns() + kvm->arch.kvmclock_offset, 100);
+		break;
+	}
+	case HV_X64_MSR_REFERENCE_TSC:
+		data = kvm->arch.hv_tsc_page;
+		break;
 	default:
 		vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
 		return 1;
@@ -2604,6 +2629,7 @@  int kvm_dev_ioctl_check_extension(long ext)
 #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
 	case KVM_CAP_ASSIGN_DEV_IRQ:
 	case KVM_CAP_PCI_2_3:
+	case KVM_CAP_HYPERV_TIME:
 #endif
 		r = 1;
 		break;
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 902f124..686c1ca 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -674,6 +674,7 @@  struct kvm_ppc_smmu_info {
 #define KVM_CAP_ARM_EL1_32BIT 93
 #define KVM_CAP_SPAPR_MULTITCE 94
 #define KVM_CAP_EXT_EMUL_CPUID 95
+#define KVM_CAP_HYPERV_TIME 96
 
 #ifdef KVM_CAP_IRQ_ROUTING