Message ID | 1368947197-9033-2-git-send-email-vrozenfe@redhat.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Sun, May 19, 2013 at 05:06:36PM +1000, Vadim Rozenfeld wrote: > Signed-off: Peter Lieven <pl@dlh.net> > Signed-off: Gleb Natapov <gleb@redhat.com> > Signed-off: Vadim Rozenfeld <vrozenfe@redhat.com> > > v1 -> v2 > 1. mark TSC page dirty as suggested by > Eric Northup <digitaleric@google.com> and Gleb > 2. disable local irq when calling get_kernel_ns, > as it was done by Peter Lieven <pl@dlhnet.de> > 3. move check for TSC page enable from second patch > to this one. > > --- > arch/x86/include/asm/kvm_host.h | 2 ++ > arch/x86/include/uapi/asm/hyperv.h | 14 ++++++++++++++ > arch/x86/kvm/x86.c | 39 +++++++++++++++++++++++++++++++++++++- > include/uapi/linux/kvm.h | 1 + > 4 files changed, 55 insertions(+), 1 deletion(-) > > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h > index 3741c65..f0fee35 100644 > --- a/arch/x86/include/asm/kvm_host.h > +++ b/arch/x86/include/asm/kvm_host.h > @@ -575,6 +575,8 @@ struct kvm_arch { > /* fields used by HYPER-V emulation */ > u64 hv_guest_os_id; > u64 hv_hypercall; > + u64 hv_ref_count; > + u64 hv_tsc_page; > > #ifdef CONFIG_KVM_MMU_AUDIT > int audit_point; > diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h > index b80420b..890dfc3 100644 > --- a/arch/x86/include/uapi/asm/hyperv.h > +++ b/arch/x86/include/uapi/asm/hyperv.h > @@ -136,6 +136,9 @@ > /* MSR used to read the per-partition time reference counter */ > #define HV_X64_MSR_TIME_REF_COUNT 0x40000020 > > +/* A partition's reference time stamp counter (TSC) page */ > +#define HV_X64_MSR_REFERENCE_TSC 0x40000021 > + > /* Define the virtual APIC registers */ > #define HV_X64_MSR_EOI 0x40000070 > #define HV_X64_MSR_ICR 0x40000071 > @@ -179,6 +182,9 @@ > #define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_MASK \ > (~((1ull << HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT) - 1)) > > +#define HV_X64_MSR_TSC_REFERENCE_ENABLE 0x00000001 > +#define HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT 12 > + > #define HV_PROCESSOR_POWER_STATE_C0 0 > #define HV_PROCESSOR_POWER_STATE_C1 1 > #define HV_PROCESSOR_POWER_STATE_C2 2 > @@ -191,4 +197,12 @@ > #define HV_STATUS_INVALID_ALIGNMENT 4 > #define HV_STATUS_INSUFFICIENT_BUFFERS 19 > > +typedef struct _HV_REFERENCE_TSC_PAGE { > + __u32 TscSequence; > + __u32 Rserved1; > + __u64 TscScale; > + __s64 TscOffset; > +} HV_REFERENCE_TSC_PAGE, *PHV_REFERENCE_TSC_PAGE; > + I understand that you want to keep structure definition as close to MS docs as possible, but this breaks all the rules of Linux kernel code unfortunately. Lest define it like that: struct hv_reference_tsc_page { __u32 tsc_sequence; __u32 reserved1; __u64 tsc_scale; __s64 tsc_offset; }; And drop typedef. If you'll look at include/linux/hyperv.h this is what Linux hyper-v guest support does. > + > #endif > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > index 8d28810..9645dab 100644 > --- a/arch/x86/kvm/x86.c > +++ b/arch/x86/kvm/x86.c > @@ -843,7 +843,7 @@ EXPORT_SYMBOL_GPL(kvm_rdpmc); > static u32 msrs_to_save[] = { > MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, > MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW, > - HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, > + HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, HV_X64_MSR_TIME_REF_COUNT, > HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME, > MSR_KVM_PV_EOI_EN, > MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, > @@ -1788,6 +1788,8 @@ static bool kvm_hv_msr_partition_wide(u32 msr) > switch (msr) { > case HV_X64_MSR_GUEST_OS_ID: > case HV_X64_MSR_HYPERCALL: > + case HV_X64_MSR_REFERENCE_TSC: > + case HV_X64_MSR_TIME_REF_COUNT: > r = true; > break; > } > @@ -1827,6 +1829,29 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data) > if (__copy_to_user((void __user *)addr, instructions, 4)) > return 1; > kvm->arch.hv_hypercall = data; > + local_irq_disable(); > + kvm->arch.hv_ref_count = get_kernel_ns(); > + local_irq_enable(); > + break; > + } > + case HV_X64_MSR_REFERENCE_TSC: { > + u64 gfn; > + unsigned long addr; > + HV_REFERENCE_TSC_PAGE tsc_ref; > + tsc_ref.TscSequence = 0; > + if (!(data & HV_X64_MSR_TSC_REFERENCE_ENABLE)) { > + kvm->arch.hv_tsc_page = data; > + break; > + } > + gfn = data >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT; > + addr = gfn_to_hva(kvm, data >> > + HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT); addr = gfn_to_hva(kvm, gfn); You already calculated gfn :) > + if (kvm_is_error_hva(addr)) > + return 1; > + if (__copy_to_user((void __user *)addr, &tsc_ref, sizeof(tsc_ref))) > + return 1; > + mark_page_dirty(kvm, gfn); > + kvm->arch.hv_tsc_page = data; > break; > } > default: > @@ -2253,6 +2278,17 @@ static int get_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) > case HV_X64_MSR_HYPERCALL: > data = kvm->arch.hv_hypercall; > break; > + case HV_X64_MSR_TIME_REF_COUNT: { > + u64 now_ns; > + local_irq_disable(); > + now_ns = get_kernel_ns() + kvm->arch.kvmclock_offset; > + data = div_u64(now_ns - kvm->arch.hv_ref_count, 100); > + local_irq_enable(); > + break; > + } > + case HV_X64_MSR_REFERENCE_TSC: > + data = kvm->arch.hv_tsc_page; > + break; > default: > vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr); > return 1; > @@ -2566,6 +2602,7 @@ int kvm_dev_ioctl_check_extension(long ext) > case KVM_CAP_ASSIGN_DEV_IRQ: > case KVM_CAP_PCI_2_3: > #endif > + case KVM_CAP_HYPERV_TIME: > r = 1; > break; > case KVM_CAP_COALESCED_MMIO: > diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h > index a5c86fc..0810763 100644 > --- a/include/uapi/linux/kvm.h > +++ b/include/uapi/linux/kvm.h > @@ -666,6 +666,7 @@ struct kvm_ppc_smmu_info { > #define KVM_CAP_IRQ_MPIC 90 > #define KVM_CAP_PPC_RTAS 91 > #define KVM_CAP_IRQ_XICS 92 > +#define KVM_CAP_HYPERV_TIME 93 > > #ifdef KVM_CAP_IRQ_ROUTING > > -- > 1.8.1.2 -- Gleb. -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Il 19/05/2013 09:06, Vadim Rozenfeld ha scritto: > Signed-off: Peter Lieven <pl@dlh.net> > Signed-off: Gleb Natapov <gleb@redhat.com> > Signed-off: Vadim Rozenfeld <vrozenfe@redhat.com> > > v1 -> v2 > 1. mark TSC page dirty as suggested by > Eric Northup <digitaleric@google.com> and Gleb > 2. disable local irq when calling get_kernel_ns, > as it was done by Peter Lieven <pl@dlhnet.de> > 3. move check for TSC page enable from second patch > to this one. > > --- > arch/x86/include/asm/kvm_host.h | 2 ++ > arch/x86/include/uapi/asm/hyperv.h | 14 ++++++++++++++ > arch/x86/kvm/x86.c | 39 +++++++++++++++++++++++++++++++++++++- > include/uapi/linux/kvm.h | 1 + > 4 files changed, 55 insertions(+), 1 deletion(-) > > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h > index 3741c65..f0fee35 100644 > --- a/arch/x86/include/asm/kvm_host.h > +++ b/arch/x86/include/asm/kvm_host.h > @@ -575,6 +575,8 @@ struct kvm_arch { > /* fields used by HYPER-V emulation */ > u64 hv_guest_os_id; > u64 hv_hypercall; > + u64 hv_ref_count; > + u64 hv_tsc_page; > > #ifdef CONFIG_KVM_MMU_AUDIT > int audit_point; > diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h > index b80420b..890dfc3 100644 > --- a/arch/x86/include/uapi/asm/hyperv.h > +++ b/arch/x86/include/uapi/asm/hyperv.h > @@ -136,6 +136,9 @@ > /* MSR used to read the per-partition time reference counter */ > #define HV_X64_MSR_TIME_REF_COUNT 0x40000020 > > +/* A partition's reference time stamp counter (TSC) page */ > +#define HV_X64_MSR_REFERENCE_TSC 0x40000021 > + > /* Define the virtual APIC registers */ > #define HV_X64_MSR_EOI 0x40000070 > #define HV_X64_MSR_ICR 0x40000071 > @@ -179,6 +182,9 @@ > #define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_MASK \ > (~((1ull << HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT) - 1)) > > +#define HV_X64_MSR_TSC_REFERENCE_ENABLE 0x00000001 > +#define HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT 12 > + > #define HV_PROCESSOR_POWER_STATE_C0 0 > #define HV_PROCESSOR_POWER_STATE_C1 1 > #define HV_PROCESSOR_POWER_STATE_C2 2 > @@ -191,4 +197,12 @@ > #define HV_STATUS_INVALID_ALIGNMENT 4 > #define HV_STATUS_INSUFFICIENT_BUFFERS 19 > > +typedef struct _HV_REFERENCE_TSC_PAGE { > + __u32 TscSequence; > + __u32 Rserved1; > + __u64 TscScale; > + __s64 TscOffset; > +} HV_REFERENCE_TSC_PAGE, *PHV_REFERENCE_TSC_PAGE; > + > + > #endif > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > index 8d28810..9645dab 100644 > --- a/arch/x86/kvm/x86.c > +++ b/arch/x86/kvm/x86.c > @@ -843,7 +843,7 @@ EXPORT_SYMBOL_GPL(kvm_rdpmc); > static u32 msrs_to_save[] = { > MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, > MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW, > - HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, > + HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, HV_X64_MSR_TIME_REF_COUNT, > HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME, > MSR_KVM_PV_EOI_EN, > MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, > @@ -1788,6 +1788,8 @@ static bool kvm_hv_msr_partition_wide(u32 msr) > switch (msr) { > case HV_X64_MSR_GUEST_OS_ID: > case HV_X64_MSR_HYPERCALL: > + case HV_X64_MSR_REFERENCE_TSC: > + case HV_X64_MSR_TIME_REF_COUNT: > r = true; > break; > } > @@ -1827,6 +1829,29 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data) > if (__copy_to_user((void __user *)addr, instructions, 4)) > return 1; > kvm->arch.hv_hypercall = data; > + local_irq_disable(); > + kvm->arch.hv_ref_count = get_kernel_ns(); > + local_irq_enable(); > + break; > + } > + case HV_X64_MSR_REFERENCE_TSC: { > + u64 gfn; > + unsigned long addr; > + HV_REFERENCE_TSC_PAGE tsc_ref; > + tsc_ref.TscSequence = 0; Again, please explain why this is not 0xFFFFFFFF. Paolo > + if (!(data & HV_X64_MSR_TSC_REFERENCE_ENABLE)) { > + kvm->arch.hv_tsc_page = data; > + break; > + } > + gfn = data >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT; > + addr = gfn_to_hva(kvm, data >> > + HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT); > + if (kvm_is_error_hva(addr)) > + return 1; > + if (__copy_to_user((void __user *)addr, &tsc_ref, sizeof(tsc_ref))) > + return 1; > + mark_page_dirty(kvm, gfn); > + kvm->arch.hv_tsc_page = data; > break; > } > default: > @@ -2253,6 +2278,17 @@ static int get_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) > case HV_X64_MSR_HYPERCALL: > data = kvm->arch.hv_hypercall; > break; > + case HV_X64_MSR_TIME_REF_COUNT: { > + u64 now_ns; > + local_irq_disable(); > + now_ns = get_kernel_ns() + kvm->arch.kvmclock_offset; > + data = div_u64(now_ns - kvm->arch.hv_ref_count, 100); > + local_irq_enable(); > + break; > + } > + case HV_X64_MSR_REFERENCE_TSC: > + data = kvm->arch.hv_tsc_page; > + break; > default: > vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr); > return 1; > @@ -2566,6 +2602,7 @@ int kvm_dev_ioctl_check_extension(long ext) > case KVM_CAP_ASSIGN_DEV_IRQ: > case KVM_CAP_PCI_2_3: > #endif > + case KVM_CAP_HYPERV_TIME: > r = 1; > break; > case KVM_CAP_COALESCED_MMIO: > diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h > index a5c86fc..0810763 100644 > --- a/include/uapi/linux/kvm.h > +++ b/include/uapi/linux/kvm.h > @@ -666,6 +666,7 @@ struct kvm_ppc_smmu_info { > #define KVM_CAP_IRQ_MPIC 90 > #define KVM_CAP_PPC_RTAS 91 > #define KVM_CAP_IRQ_XICS 92 > +#define KVM_CAP_HYPERV_TIME 93 > > #ifdef KVM_CAP_IRQ_ROUTING > > -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Sun, May 19, 2013 at 05:06:36PM +1000, Vadim Rozenfeld wrote: > Signed-off: Peter Lieven <pl@dlh.net> > Signed-off: Gleb Natapov <gleb@redhat.com> > Signed-off: Vadim Rozenfeld <vrozenfe@redhat.com> > > v1 -> v2 > 1. mark TSC page dirty as suggested by > Eric Northup <digitaleric@google.com> and Gleb > 2. disable local irq when calling get_kernel_ns, > as it was done by Peter Lieven <pl@dlhnet.de> > 3. move check for TSC page enable from second patch > to this one. > > --- > arch/x86/include/asm/kvm_host.h | 2 ++ > arch/x86/include/uapi/asm/hyperv.h | 14 ++++++++++++++ > arch/x86/kvm/x86.c | 39 +++++++++++++++++++++++++++++++++++++- > include/uapi/linux/kvm.h | 1 + > 4 files changed, 55 insertions(+), 1 deletion(-) > > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h > index 3741c65..f0fee35 100644 > --- a/arch/x86/include/asm/kvm_host.h > +++ b/arch/x86/include/asm/kvm_host.h > @@ -575,6 +575,8 @@ struct kvm_arch { > /* fields used by HYPER-V emulation */ > u64 hv_guest_os_id; > u64 hv_hypercall; > + u64 hv_ref_count; > + u64 hv_tsc_page; > > #ifdef CONFIG_KVM_MMU_AUDIT > int audit_point; > diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h > index b80420b..890dfc3 100644 > --- a/arch/x86/include/uapi/asm/hyperv.h > +++ b/arch/x86/include/uapi/asm/hyperv.h > @@ -136,6 +136,9 @@ > /* MSR used to read the per-partition time reference counter */ > #define HV_X64_MSR_TIME_REF_COUNT 0x40000020 > > +/* A partition's reference time stamp counter (TSC) page */ > +#define HV_X64_MSR_REFERENCE_TSC 0x40000021 > + > /* Define the virtual APIC registers */ > #define HV_X64_MSR_EOI 0x40000070 > #define HV_X64_MSR_ICR 0x40000071 > @@ -179,6 +182,9 @@ > #define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_MASK \ > (~((1ull << HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT) - 1)) > > +#define HV_X64_MSR_TSC_REFERENCE_ENABLE 0x00000001 > +#define HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT 12 > + > #define HV_PROCESSOR_POWER_STATE_C0 0 > #define HV_PROCESSOR_POWER_STATE_C1 1 > #define HV_PROCESSOR_POWER_STATE_C2 2 > @@ -191,4 +197,12 @@ > #define HV_STATUS_INVALID_ALIGNMENT 4 > #define HV_STATUS_INSUFFICIENT_BUFFERS 19 > > +typedef struct _HV_REFERENCE_TSC_PAGE { > + __u32 TscSequence; > + __u32 Rserved1; > + __u64 TscScale; > + __s64 TscOffset; > +} HV_REFERENCE_TSC_PAGE, *PHV_REFERENCE_TSC_PAGE; > + > + > #endif > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > index 8d28810..9645dab 100644 > --- a/arch/x86/kvm/x86.c > +++ b/arch/x86/kvm/x86.c > @@ -843,7 +843,7 @@ EXPORT_SYMBOL_GPL(kvm_rdpmc); > static u32 msrs_to_save[] = { > MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, > MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW, not needed. > - HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, > + HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, HV_X64_MSR_TIME_REF_COUNT, > HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME, > MSR_KVM_PV_EOI_EN, > MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, > @@ -1788,6 +1788,8 @@ static bool kvm_hv_msr_partition_wide(u32 msr) > switch (msr) { > case HV_X64_MSR_GUEST_OS_ID: > case HV_X64_MSR_HYPERCALL: > + case HV_X64_MSR_REFERENCE_TSC: > + case HV_X64_MSR_TIME_REF_COUNT: > r = true; > break; > } > @@ -1827,6 +1829,29 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data) > if (__copy_to_user((void __user *)addr, instructions, 4)) > return 1; > kvm->arch.hv_hypercall = data; > + local_irq_disable(); > + kvm->arch.hv_ref_count = get_kernel_ns(); > + local_irq_enable(); > + break; local_irq_disable/local_irq_enable not needed. What is the reasoning behind reading this time value at msr write time? > + } > + case HV_X64_MSR_REFERENCE_TSC: { > + u64 gfn; > + unsigned long addr; > + HV_REFERENCE_TSC_PAGE tsc_ref; > + tsc_ref.TscSequence = 0; > + if (!(data & HV_X64_MSR_TSC_REFERENCE_ENABLE)) { > + kvm->arch.hv_tsc_page = data; > + break; > + } > + gfn = data >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT; > + addr = gfn_to_hva(kvm, data >> > + HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT); > + if (kvm_is_error_hva(addr)) > + return 1; > + if (__copy_to_user((void __user *)addr, &tsc_ref, sizeof(tsc_ref))) > + return 1; > + mark_page_dirty(kvm, gfn); > + kvm->arch.hv_tsc_page = data; > break; > } > default: > @@ -2253,6 +2278,17 @@ static int get_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) > case HV_X64_MSR_HYPERCALL: > data = kvm->arch.hv_hypercall; > break; > + case HV_X64_MSR_TIME_REF_COUNT: { > + u64 now_ns; > + local_irq_disable(); > + now_ns = get_kernel_ns() + kvm->arch.kvmclock_offset; > + data = div_u64(now_ns - kvm->arch.hv_ref_count, 100); > + local_irq_enable(); > + break; > + } local_irq_disable/enable not needed. It would be nice to have a testcase to compare reference tsc versus MSR. -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Tue, May 21, 2013 at 09:46:14PM -0300, Marcelo Tosatti wrote: > On Sun, May 19, 2013 at 05:06:36PM +1000, Vadim Rozenfeld wrote: > > Signed-off: Peter Lieven <pl@dlh.net> > > Signed-off: Gleb Natapov <gleb@redhat.com> > > Signed-off: Vadim Rozenfeld <vrozenfe@redhat.com> > > > > v1 -> v2 > > 1. mark TSC page dirty as suggested by > > Eric Northup <digitaleric@google.com> and Gleb > > 2. disable local irq when calling get_kernel_ns, > > as it was done by Peter Lieven <pl@dlhnet.de> > > 3. move check for TSC page enable from second patch > > to this one. > > > > --- > > arch/x86/include/asm/kvm_host.h | 2 ++ > > arch/x86/include/uapi/asm/hyperv.h | 14 ++++++++++++++ > > arch/x86/kvm/x86.c | 39 +++++++++++++++++++++++++++++++++++++- > > include/uapi/linux/kvm.h | 1 + > > 4 files changed, 55 insertions(+), 1 deletion(-) > > > > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h > > index 3741c65..f0fee35 100644 > > --- a/arch/x86/include/asm/kvm_host.h > > +++ b/arch/x86/include/asm/kvm_host.h > > @@ -575,6 +575,8 @@ struct kvm_arch { > > /* fields used by HYPER-V emulation */ > > u64 hv_guest_os_id; > > u64 hv_hypercall; > > + u64 hv_ref_count; > > + u64 hv_tsc_page; > > > > #ifdef CONFIG_KVM_MMU_AUDIT > > int audit_point; > > diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h > > index b80420b..890dfc3 100644 > > --- a/arch/x86/include/uapi/asm/hyperv.h > > +++ b/arch/x86/include/uapi/asm/hyperv.h > > @@ -136,6 +136,9 @@ > > /* MSR used to read the per-partition time reference counter */ > > #define HV_X64_MSR_TIME_REF_COUNT 0x40000020 > > > > +/* A partition's reference time stamp counter (TSC) page */ > > +#define HV_X64_MSR_REFERENCE_TSC 0x40000021 > > + > > /* Define the virtual APIC registers */ > > #define HV_X64_MSR_EOI 0x40000070 > > #define HV_X64_MSR_ICR 0x40000071 > > @@ -179,6 +182,9 @@ > > #define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_MASK \ > > (~((1ull << HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT) - 1)) > > > > +#define HV_X64_MSR_TSC_REFERENCE_ENABLE 0x00000001 > > +#define HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT 12 > > + > > #define HV_PROCESSOR_POWER_STATE_C0 0 > > #define HV_PROCESSOR_POWER_STATE_C1 1 > > #define HV_PROCESSOR_POWER_STATE_C2 2 > > @@ -191,4 +197,12 @@ > > #define HV_STATUS_INVALID_ALIGNMENT 4 > > #define HV_STATUS_INSUFFICIENT_BUFFERS 19 > > > > +typedef struct _HV_REFERENCE_TSC_PAGE { > > + __u32 TscSequence; > > + __u32 Rserved1; > > + __u64 TscScale; > > + __s64 TscOffset; > > +} HV_REFERENCE_TSC_PAGE, *PHV_REFERENCE_TSC_PAGE; > > + > > + > > #endif > > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > > index 8d28810..9645dab 100644 > > --- a/arch/x86/kvm/x86.c > > +++ b/arch/x86/kvm/x86.c > > @@ -843,7 +843,7 @@ EXPORT_SYMBOL_GPL(kvm_rdpmc); > > static u32 msrs_to_save[] = { > > MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, > > MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW, not needed. > > - HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, > > + HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, HV_X64_MSR_TIME_REF_COUNT, > > HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME, > > MSR_KVM_PV_EOI_EN, > > MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, > > @@ -1788,6 +1788,8 @@ static bool kvm_hv_msr_partition_wide(u32 msr) > > switch (msr) { > > case HV_X64_MSR_GUEST_OS_ID: > > case HV_X64_MSR_HYPERCALL: > > + case HV_X64_MSR_REFERENCE_TSC: > > + case HV_X64_MSR_TIME_REF_COUNT: > > r = true; > > break; > > } > > @@ -1827,6 +1829,29 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data) > > if (__copy_to_user((void __user *)addr, instructions, 4)) > > return 1; > > kvm->arch.hv_hypercall = data; > > + local_irq_disable(); > > + kvm->arch.hv_ref_count = get_kernel_ns(); > > + local_irq_enable(); > > + break; > > local_irq_disable/local_irq_enable not needed. > get_kernel_ns has WARN_ON(preemptible()) so at least preempt_disable() is needed, but all callers of the function disable interrupts. > What is the reasoning behind reading this time value at msr write time? > > > + } > > + case HV_X64_MSR_REFERENCE_TSC: { > > + u64 gfn; > > + unsigned long addr; > > + HV_REFERENCE_TSC_PAGE tsc_ref; > > + tsc_ref.TscSequence = 0; > > + if (!(data & HV_X64_MSR_TSC_REFERENCE_ENABLE)) { > > + kvm->arch.hv_tsc_page = data; > > + break; > > + } > > + gfn = data >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT; > > + addr = gfn_to_hva(kvm, data >> > > + HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT); > > + if (kvm_is_error_hva(addr)) > > + return 1; > > + if (__copy_to_user((void __user *)addr, &tsc_ref, sizeof(tsc_ref))) > > + return 1; > > + mark_page_dirty(kvm, gfn); > > + kvm->arch.hv_tsc_page = data; > > break; > > } > > default: > > @@ -2253,6 +2278,17 @@ static int get_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) > > case HV_X64_MSR_HYPERCALL: > > data = kvm->arch.hv_hypercall; > > break; > > + case HV_X64_MSR_TIME_REF_COUNT: { > > + u64 now_ns; > > + local_irq_disable(); > > + now_ns = get_kernel_ns() + kvm->arch.kvmclock_offset; > > + data = div_u64(now_ns - kvm->arch.hv_ref_count, 100); > > + local_irq_enable(); > > + break; > > + } > > local_irq_disable/enable not needed. > > It would be nice to have a testcase to compare reference tsc versus MSR. -- Gleb. -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Wed, May 22, 2013 at 06:28:47AM +0300, Gleb Natapov wrote > > > + case HV_X64_MSR_TIME_REF_COUNT: > > > r = true; > > > break; > > > } > > > @@ -1827,6 +1829,29 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data) > > > if (__copy_to_user((void __user *)addr, instructions, 4)) > > > return 1; > > > kvm->arch.hv_hypercall = data; > > > + local_irq_disable(); > > > + kvm->arch.hv_ref_count = get_kernel_ns(); > > > + local_irq_enable(); > > > + break; > > > > local_irq_disable/local_irq_enable not needed. > > > get_kernel_ns has WARN_ON(preemptible()) so at least preempt_disable() > is needed, but all callers of the function disable interrupts. OK. Its not necessary to disable interrupts (this is a left over of kvm_guest_update_time). -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Wed, May 22, 2013 at 12:32:57AM -0300, Marcelo Tosatti wrote: > On Wed, May 22, 2013 at 06:28:47AM +0300, Gleb Natapov wrote > > > > + case HV_X64_MSR_TIME_REF_COUNT: > > > > r = true; > > > > break; > > > > } > > > > @@ -1827,6 +1829,29 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data) > > > > if (__copy_to_user((void __user *)addr, instructions, 4)) > > > > return 1; > > > > kvm->arch.hv_hypercall = data; > > > > + local_irq_disable(); > > > > + kvm->arch.hv_ref_count = get_kernel_ns(); > > > > + local_irq_enable(); > > > > + break; > > > > > > local_irq_disable/local_irq_enable not needed. > > > > > get_kernel_ns has WARN_ON(preemptible()) so at least preempt_disable() > > is needed, but all callers of the function disable interrupts. > > OK. Its not necessary to disable interrupts (this is a left over of > kvm_guest_update_time). So preempt_disable() then? Should we change other callers? -- Gleb. -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
----- Original Message ----- From: "Marcelo Tosatti" <mtosatti@redhat.com> To: "Vadim Rozenfeld" <vrozenfe@redhat.com> Cc: kvm@vger.kernel.org, gleb@redhat.com, pl@dlh.net Sent: Wednesday, May 22, 2013 10:46:14 AM Subject: Re: [RFC PATCH v2 1/2] add support for Hyper-V reference time counter On Sun, May 19, 2013 at 05:06:36PM +1000, Vadim Rozenfeld wrote: > Signed-off: Peter Lieven <pl@dlh.net> > Signed-off: Gleb Natapov <gleb@redhat.com> > Signed-off: Vadim Rozenfeld <vrozenfe@redhat.com> > > v1 -> v2 > 1. mark TSC page dirty as suggested by > Eric Northup <digitaleric@google.com> and Gleb > 2. disable local irq when calling get_kernel_ns, > as it was done by Peter Lieven <pl@dlhnet.de> > 3. move check for TSC page enable from second patch > to this one. > > --- > arch/x86/include/asm/kvm_host.h | 2 ++ > arch/x86/include/uapi/asm/hyperv.h | 14 ++++++++++++++ > arch/x86/kvm/x86.c | 39 +++++++++++++++++++++++++++++++++++++- > include/uapi/linux/kvm.h | 1 + > 4 files changed, 55 insertions(+), 1 deletion(-) > > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h > index 3741c65..f0fee35 100644 > --- a/arch/x86/include/asm/kvm_host.h > +++ b/arch/x86/include/asm/kvm_host.h > @@ -575,6 +575,8 @@ struct kvm_arch { > /* fields used by HYPER-V emulation */ > u64 hv_guest_os_id; > u64 hv_hypercall; > + u64 hv_ref_count; > + u64 hv_tsc_page; > > #ifdef CONFIG_KVM_MMU_AUDIT > int audit_point; > diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h > index b80420b..890dfc3 100644 > --- a/arch/x86/include/uapi/asm/hyperv.h > +++ b/arch/x86/include/uapi/asm/hyperv.h > @@ -136,6 +136,9 @@ > /* MSR used to read the per-partition time reference counter */ > #define HV_X64_MSR_TIME_REF_COUNT 0x40000020 > > +/* A partition's reference time stamp counter (TSC) page */ > +#define HV_X64_MSR_REFERENCE_TSC 0x40000021 > + > /* Define the virtual APIC registers */ > #define HV_X64_MSR_EOI 0x40000070 > #define HV_X64_MSR_ICR 0x40000071 > @@ -179,6 +182,9 @@ > #define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_MASK \ > (~((1ull << HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT) - 1)) > > +#define HV_X64_MSR_TSC_REFERENCE_ENABLE 0x00000001 > +#define HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT 12 > + > #define HV_PROCESSOR_POWER_STATE_C0 0 > #define HV_PROCESSOR_POWER_STATE_C1 1 > #define HV_PROCESSOR_POWER_STATE_C2 2 > @@ -191,4 +197,12 @@ > #define HV_STATUS_INVALID_ALIGNMENT 4 > #define HV_STATUS_INSUFFICIENT_BUFFERS 19 > > +typedef struct _HV_REFERENCE_TSC_PAGE { > + __u32 TscSequence; > + __u32 Rserved1; > + __u64 TscScale; > + __s64 TscOffset; > +} HV_REFERENCE_TSC_PAGE, *PHV_REFERENCE_TSC_PAGE; > + > + > #endif > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > index 8d28810..9645dab 100644 > --- a/arch/x86/kvm/x86.c > +++ b/arch/x86/kvm/x86.c > @@ -843,7 +843,7 @@ EXPORT_SYMBOL_GPL(kvm_rdpmc); > static u32 msrs_to_save[] = { > MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, > MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW, not needed. > - HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, > + HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, HV_X64_MSR_TIME_REF_COUNT, > HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME, > MSR_KVM_PV_EOI_EN, > MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, > @@ -1788,6 +1788,8 @@ static bool kvm_hv_msr_partition_wide(u32 msr) > switch (msr) { > case HV_X64_MSR_GUEST_OS_ID: > case HV_X64_MSR_HYPERCALL: > + case HV_X64_MSR_REFERENCE_TSC: > + case HV_X64_MSR_TIME_REF_COUNT: > r = true; > break; > } > @@ -1827,6 +1829,29 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data) > if (__copy_to_user((void __user *)addr, instructions, 4)) > return 1; > kvm->arch.hv_hypercall = data; > + local_irq_disable(); > + kvm->arch.hv_ref_count = get_kernel_ns(); > + local_irq_enable(); > + break; local_irq_disable/local_irq_enable not needed. What is the reasoning behind reading this time value at msr write time? [VR] Windows writs this MSR only once, during HAL initialization. So, I decided to treat this call as a partition crate event. > + } > + case HV_X64_MSR_REFERENCE_TSC: { > + u64 gfn; > + unsigned long addr; > + HV_REFERENCE_TSC_PAGE tsc_ref; > + tsc_ref.TscSequence = 0; > + if (!(data & HV_X64_MSR_TSC_REFERENCE_ENABLE)) { > + kvm->arch.hv_tsc_page = data; > + break; > + } > + gfn = data >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT; > + addr = gfn_to_hva(kvm, data >> > + HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT); > + if (kvm_is_error_hva(addr)) > + return 1; > + if (__copy_to_user((void __user *)addr, &tsc_ref, sizeof(tsc_ref))) > + return 1; > + mark_page_dirty(kvm, gfn); > + kvm->arch.hv_tsc_page = data; > break; > } > default: > @@ -2253,6 +2278,17 @@ static int get_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) > case HV_X64_MSR_HYPERCALL: > data = kvm->arch.hv_hypercall; > break; > + case HV_X64_MSR_TIME_REF_COUNT: { > + u64 now_ns; > + local_irq_disable(); > + now_ns = get_kernel_ns() + kvm->arch.kvmclock_offset; > + data = div_u64(now_ns - kvm->arch.hv_ref_count, 100); > + local_irq_enable(); > + break; > + } local_irq_disable/enable not needed. It would be nice to have a testcase to compare reference tsc versus MSR. -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Wed, May 22, 2013 at 06:38:47AM +0300, Gleb Natapov wrote: > On Wed, May 22, 2013 at 12:32:57AM -0300, Marcelo Tosatti wrote: > > On Wed, May 22, 2013 at 06:28:47AM +0300, Gleb Natapov wrote > > > > > + case HV_X64_MSR_TIME_REF_COUNT: > > > > > r = true; > > > > > break; > > > > > } > > > > > @@ -1827,6 +1829,29 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data) > > > > > if (__copy_to_user((void __user *)addr, instructions, 4)) > > > > > return 1; > > > > > kvm->arch.hv_hypercall = data; > > > > > + local_irq_disable(); > > > > > + kvm->arch.hv_ref_count = get_kernel_ns(); > > > > > + local_irq_enable(); > > > > > + break; > > > > > > > > local_irq_disable/local_irq_enable not needed. > > > > > > > get_kernel_ns has WARN_ON(preemptible()) so at least preempt_disable() > > > is needed, but all callers of the function disable interrupts. > > > > OK. Its not necessary to disable interrupts (this is a left over of > > kvm_guest_update_time). > So preempt_disable() then? Should we change other callers? Yes. Should only disable interrupts if necessary (the case in kvm_write_guest_time is necessary). -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Il 22/05/2013 09:32, Vadim Rozenfeld ha scritto: >> > @@ -1827,6 +1829,29 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data) >> > if (__copy_to_user((void __user *)addr, instructions, 4)) >> > return 1; >> > kvm->arch.hv_hypercall = data; >> > + local_irq_disable(); >> > + kvm->arch.hv_ref_count = get_kernel_ns(); >> > + local_irq_enable(); >> > + break; > local_irq_disable/local_irq_enable not needed. > > > What is the reasoning behind reading this time value at msr write time? > [VR] Windows writs this MSR only once, during HAL initialization. > So, I decided to treat this call as a partition crate event. > But is it expected by Windows that the reference count starts counting up from 0 at partition creation time? If you could just use (get_kernel_ns() + kvm->arch.kvmclock_offset) / 100, it would also be simpler for migration purposes. Paolo -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 22.05.2013 23:55, Paolo Bonzini wrote: > Il 22/05/2013 09:32, Vadim Rozenfeld ha scritto: >>>> @@ -1827,6 +1829,29 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data) >>>> if (__copy_to_user((void __user *)addr, instructions, 4)) >>>> return 1; >>>> kvm->arch.hv_hypercall = data; >>>> + local_irq_disable(); >>>> + kvm->arch.hv_ref_count = get_kernel_ns(); >>>> + local_irq_enable(); >>>> + break; >> local_irq_disable/local_irq_enable not needed. >> >> >> What is the reasoning behind reading this time value at msr write time? >> [VR] Windows writs this MSR only once, during HAL initialization. >> So, I decided to treat this call as a partition crate event. >> > > But is it expected by Windows that the reference count starts counting > up from 0 at partition creation time? If you could just use > (get_kernel_ns() + kvm->arch.kvmclock_offset) / 100, it would also be > simpler for migration purposes. I can just report, that I have used the patch that does it that way and it works. Maybe Windows is calculating the uptime by the reference counter? Peter -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Il 23/05/2013 08:17, Peter Lieven ha scritto: > On 22.05.2013 23:55, Paolo Bonzini wrote: >> Il 22/05/2013 09:32, Vadim Rozenfeld ha scritto: >>>>> @@ -1827,6 +1829,29 @@ static int set_msr_hyperv_pw(struct kvm_vcpu >>>>> *vcpu, u32 msr, u64 data) >>>>> if (__copy_to_user((void __user *)addr, instructions, 4)) >>>>> return 1; >>>>> kvm->arch.hv_hypercall = data; >>>>> + local_irq_disable(); >>>>> + kvm->arch.hv_ref_count = get_kernel_ns(); >>>>> + local_irq_enable(); >>>>> + break; >>> local_irq_disable/local_irq_enable not needed. >>> >>> >>> What is the reasoning behind reading this time value at msr write time? >>> [VR] Windows writs this MSR only once, during HAL initialization. >>> So, I decided to treat this call as a partition crate event. >>> >> >> But is it expected by Windows that the reference count starts counting >> up from 0 at partition creation time? If you could just use >> (get_kernel_ns() + kvm->arch.kvmclock_offset) / 100, it would also be >> simpler for migration purposes. > > I can just report, that I have used the patch that does it that way and > it works. What do you mean by "that way"? :) Paolo > Maybe Windows is calculating the uptime by the reference counter? > > Peter > -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 23.05.2013 11:54, Paolo Bonzini wrote: > Il 23/05/2013 08:17, Peter Lieven ha scritto: >> On 22.05.2013 23:55, Paolo Bonzini wrote: >>> Il 22/05/2013 09:32, Vadim Rozenfeld ha scritto: >>>>>> @@ -1827,6 +1829,29 @@ static int set_msr_hyperv_pw(struct kvm_vcpu >>>>>> *vcpu, u32 msr, u64 data) >>>>>> if (__copy_to_user((void __user *)addr, instructions, 4)) >>>>>> return 1; >>>>>> kvm->arch.hv_hypercall = data; >>>>>> + local_irq_disable(); >>>>>> + kvm->arch.hv_ref_count = get_kernel_ns(); >>>>>> + local_irq_enable(); >>>>>> + break; >>>> local_irq_disable/local_irq_enable not needed. >>>> >>>> >>>> What is the reasoning behind reading this time value at msr write time? >>>> [VR] Windows writs this MSR only once, during HAL initialization. >>>> So, I decided to treat this call as a partition crate event. >>>> >>> >>> But is it expected by Windows that the reference count starts counting >>> up from 0 at partition creation time? If you could just use >>> (get_kernel_ns() + kvm->arch.kvmclock_offset) / 100, it would also be >>> simpler for migration purposes. >> >> I can just report, that I have used the patch that does it that way and >> it works. > > What do you mean by "that way"? :) Ups sorry… I meant the way it was implemented in the old patch (I sent a few days ago). @@ -1426,6 +1428,21 @@ static int set_msr_hyperv_pw(struct kvm_ if (__copy_to_user((void *)addr, instructions, 4)) return 1; kvm->arch.hv_hypercall = data; + kvm->arch.hv_ref_count = get_kernel_ns(); + break; + } + case HV_X64_MSR_REFERENCE_TSC: { + u64 gfn; + unsigned long addr; + u32 hv_tsc_sequence; + gfn = data >> HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT; + addr = gfn_to_hva(kvm, gfn); + if (kvm_is_error_hva(addr)) + return 1; + hv_tsc_sequence = 0x0; //invalid + if (__copy_to_user((void *)addr, (void __user *) &hv_tsc_sequence, sizeof(hv_tsc_sequence))) + return 1; + kvm->arch.hv_reference_tsc = data; break; } default: @@ -1826,6 +1843,17 @@ static int get_msr_hyperv_pw(struct kvm_ case HV_X64_MSR_HYPERCALL: data = kvm->arch.hv_hypercall; break; + case HV_X64_MSR_TIME_REF_COUNT: { + u64 now_ns; + local_irq_disable(); + now_ns = get_kernel_ns(); + data = div_u64(now_ns + kvm->arch.kvmclock_offset - kvm->arch.hv_ref_count,100); + local_irq_enable(); + break; + } + case HV_X64_MSR_REFERENCE_TSC: + data = kvm->arch.hv_reference_tsc; + break; default: pr_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr); return 1; Peter -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
----- Original Message ----- From: "Peter Lieven" <pl@dlhnet.de> To: "Paolo Bonzini" <pbonzini@redhat.com> Cc: "Vadim Rozenfeld" <vrozenfe@redhat.com>, "Marcelo Tosatti" <mtosatti@redhat.com>, kvm@vger.kernel.org, gleb@redhat.com, pl@dlh.net Sent: Thursday, May 23, 2013 4:17:57 PM Subject: Re: [RFC PATCH v2 1/2] add support for Hyper-V reference time counter On 22.05.2013 23:55, Paolo Bonzini wrote: > Il 22/05/2013 09:32, Vadim Rozenfeld ha scritto: >>>> @@ -1827,6 +1829,29 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data) >>>> if (__copy_to_user((void __user *)addr, instructions, 4)) >>>> return 1; >>>> kvm->arch.hv_hypercall = data; >>>> + local_irq_disable(); >>>> + kvm->arch.hv_ref_count = get_kernel_ns(); >>>> + local_irq_enable(); >>>> + break; >> local_irq_disable/local_irq_enable not needed. >> >> >> What is the reasoning behind reading this time value at msr write time? >> [VR] Windows writs this MSR only once, during HAL initialization. >> So, I decided to treat this call as a partition crate event. >> > > But is it expected by Windows that the reference count starts counting > up from 0 at partition creation time? If you could just use > (get_kernel_ns() + kvm->arch.kvmclock_offset) / 100, it would also be > simpler for migration purposes. I can just report, that I have used the patch that does it that way and it works. Maybe Windows is calculating the uptime by the reference counter? [VR] Windows use it (reference counters/iTSC/PMTimer/HPET) as a time-stamp source for (Ke)QueryPerformanceCounter function. Peter -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Il 23/05/2013 14:25, Vadim Rozenfeld ha scritto: > > > ----- Original Message ----- > From: "Peter Lieven" <pl@dlhnet.de> > To: "Paolo Bonzini" <pbonzini@redhat.com> > Cc: "Vadim Rozenfeld" <vrozenfe@redhat.com>, "Marcelo Tosatti" <mtosatti@redhat.com>, kvm@vger.kernel.org, gleb@redhat.com, pl@dlh.net > Sent: Thursday, May 23, 2013 4:17:57 PM > Subject: Re: [RFC PATCH v2 1/2] add support for Hyper-V reference time counter > > On 22.05.2013 23:55, Paolo Bonzini wrote: >> Il 22/05/2013 09:32, Vadim Rozenfeld ha scritto: >>>>> @@ -1827,6 +1829,29 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data) >>>>> if (__copy_to_user((void __user *)addr, instructions, 4)) >>>>> return 1; >>>>> kvm->arch.hv_hypercall = data; >>>>> + local_irq_disable(); >>>>> + kvm->arch.hv_ref_count = get_kernel_ns(); >>>>> + local_irq_enable(); >>>>> + break; >>> local_irq_disable/local_irq_enable not needed. >>> >>> >>> What is the reasoning behind reading this time value at msr write time? >>> [VR] Windows writs this MSR only once, during HAL initialization. >>> So, I decided to treat this call as a partition crate event. >>> >> >> But is it expected by Windows that the reference count starts counting >> up from 0 at partition creation time? If you could just use >> (get_kernel_ns() + kvm->arch.kvmclock_offset) / 100, it would also be >> simpler for migration purposes. > > I can just report, that I have used the patch that does it that way and it works. > Maybe Windows is calculating the uptime by the reference counter? > > [VR] > Windows use it (reference counters/iTSC/PMTimer/HPET) as a time-stamp source > for (Ke)QueryPerformanceCounter function. So I would prefer to remove kvm->arch.hv_ref_count altogether. Paolo -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 23.05.2013 15:18, Paolo Bonzini wrote: > Il 23/05/2013 14:25, Vadim Rozenfeld ha scritto: >> >> >> ----- Original Message ----- >> From: "Peter Lieven" <pl@dlhnet.de> >> To: "Paolo Bonzini" <pbonzini@redhat.com> >> Cc: "Vadim Rozenfeld" <vrozenfe@redhat.com>, "Marcelo Tosatti" <mtosatti@redhat.com>, kvm@vger.kernel.org, gleb@redhat.com, pl@dlh.net >> Sent: Thursday, May 23, 2013 4:17:57 PM >> Subject: Re: [RFC PATCH v2 1/2] add support for Hyper-V reference time counter >> >> On 22.05.2013 23:55, Paolo Bonzini wrote: >>> Il 22/05/2013 09:32, Vadim Rozenfeld ha scritto: >>>>>> @@ -1827,6 +1829,29 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data) >>>>>> if (__copy_to_user((void __user *)addr, instructions, 4)) >>>>>> return 1; >>>>>> kvm->arch.hv_hypercall = data; >>>>>> + local_irq_disable(); >>>>>> + kvm->arch.hv_ref_count = get_kernel_ns(); >>>>>> + local_irq_enable(); >>>>>> + break; >>>> local_irq_disable/local_irq_enable not needed. >>>> >>>> >>>> What is the reasoning behind reading this time value at msr write time? >>>> [VR] Windows writs this MSR only once, during HAL initialization. >>>> So, I decided to treat this call as a partition crate event. >>>> >>> >>> But is it expected by Windows that the reference count starts counting >>> up from 0 at partition creation time? If you could just use >>> (get_kernel_ns() + kvm->arch.kvmclock_offset) / 100, it would also be >>> simpler for migration purposes. >> >> I can just report, that I have used the patch that does it that way and it works. >> Maybe Windows is calculating the uptime by the reference counter? >> >> [VR] >> Windows use it (reference counters/iTSC/PMTimer/HPET) as a time-stamp source >> for (Ke)QueryPerformanceCounter function. > > So I would prefer to remove kvm->arch.hv_ref_count altogether. But only if the migration support is guaranteed. And what if we have a host which lacks invariant TSC support? Peter > > Paolo > -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Il 23/05/2013 15:20, Peter Lieven ha scritto: > On 23.05.2013 15:18, Paolo Bonzini wrote: >> Il 23/05/2013 14:25, Vadim Rozenfeld ha scritto: >>> >>> >>> ----- Original Message ----- >>> From: "Peter Lieven" <pl@dlhnet.de> >>> To: "Paolo Bonzini" <pbonzini@redhat.com> >>> Cc: "Vadim Rozenfeld" <vrozenfe@redhat.com>, "Marcelo Tosatti" >>> <mtosatti@redhat.com>, kvm@vger.kernel.org, gleb@redhat.com, pl@dlh.net >>> Sent: Thursday, May 23, 2013 4:17:57 PM >>> Subject: Re: [RFC PATCH v2 1/2] add support for Hyper-V reference >>> time counter >>> >>> On 22.05.2013 23:55, Paolo Bonzini wrote: >>>> Il 22/05/2013 09:32, Vadim Rozenfeld ha scritto: >>>>>>> @@ -1827,6 +1829,29 @@ static int set_msr_hyperv_pw(struct >>>>>>> kvm_vcpu *vcpu, u32 msr, u64 data) >>>>>>> if (__copy_to_user((void __user *)addr, instructions, 4)) >>>>>>> return 1; >>>>>>> kvm->arch.hv_hypercall = data; >>>>>>> + local_irq_disable(); >>>>>>> + kvm->arch.hv_ref_count = get_kernel_ns(); >>>>>>> + local_irq_enable(); >>>>>>> + break; >>>>> local_irq_disable/local_irq_enable not needed. >>>>> >>>>> >>>>> What is the reasoning behind reading this time value at msr write >>>>> time? >>>>> [VR] Windows writs this MSR only once, during HAL initialization. >>>>> So, I decided to treat this call as a partition crate event. >>>>> >>>> >>>> But is it expected by Windows that the reference count starts counting >>>> up from 0 at partition creation time? If you could just use >>>> (get_kernel_ns() + kvm->arch.kvmclock_offset) / 100, it would also be >>>> simpler for migration purposes. >>> >>> I can just report, that I have used the patch that does it that way >>> and it works. >>> Maybe Windows is calculating the uptime by the reference counter? >>> >>> [VR] >>> Windows use it (reference counters/iTSC/PMTimer/HPET) as a time-stamp >>> source >>> for (Ke)QueryPerformanceCounter function. >> >> So I would prefer to remove kvm->arch.hv_ref_count altogether. > > But only if the migration support is guaranteed. Migration support wouldn't work yet anyway, you need to recompute the scale and sequence. But that could be done by KVM_SET_CLOCK. > And what if we have a host which lacks invariant TSC support? Then the sequence must be set to 0 or 0xFFFFFFFF, I still haven't understood. :) Paolo -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 23.05.2013 15:23, Paolo Bonzini wrote: > Il 23/05/2013 15:20, Peter Lieven ha scritto: >> On 23.05.2013 15:18, Paolo Bonzini wrote: >>> Il 23/05/2013 14:25, Vadim Rozenfeld ha scritto: >>>> >>>> >>>> ----- Original Message ----- >>>> From: "Peter Lieven" <pl@dlhnet.de> >>>> To: "Paolo Bonzini" <pbonzini@redhat.com> >>>> Cc: "Vadim Rozenfeld" <vrozenfe@redhat.com>, "Marcelo Tosatti" >>>> <mtosatti@redhat.com>, kvm@vger.kernel.org, gleb@redhat.com, pl@dlh.net >>>> Sent: Thursday, May 23, 2013 4:17:57 PM >>>> Subject: Re: [RFC PATCH v2 1/2] add support for Hyper-V reference >>>> time counter >>>> >>>> On 22.05.2013 23:55, Paolo Bonzini wrote: >>>>> Il 22/05/2013 09:32, Vadim Rozenfeld ha scritto: >>>>>>>> @@ -1827,6 +1829,29 @@ static int set_msr_hyperv_pw(struct >>>>>>>> kvm_vcpu *vcpu, u32 msr, u64 data) >>>>>>>> if (__copy_to_user((void __user *)addr, instructions, 4)) >>>>>>>> return 1; >>>>>>>> kvm->arch.hv_hypercall = data; >>>>>>>> + local_irq_disable(); >>>>>>>> + kvm->arch.hv_ref_count = get_kernel_ns(); >>>>>>>> + local_irq_enable(); >>>>>>>> + break; >>>>>> local_irq_disable/local_irq_enable not needed. >>>>>> >>>>>> >>>>>> What is the reasoning behind reading this time value at msr write >>>>>> time? >>>>>> [VR] Windows writs this MSR only once, during HAL initialization. >>>>>> So, I decided to treat this call as a partition crate event. >>>>>> >>>>> >>>>> But is it expected by Windows that the reference count starts counting >>>>> up from 0 at partition creation time? If you could just use >>>>> (get_kernel_ns() + kvm->arch.kvmclock_offset) / 100, it would also be >>>>> simpler for migration purposes. >>>> >>>> I can just report, that I have used the patch that does it that way >>>> and it works. >>>> Maybe Windows is calculating the uptime by the reference counter? >>>> >>>> [VR] >>>> Windows use it (reference counters/iTSC/PMTimer/HPET) as a time-stamp >>>> source >>>> for (Ke)QueryPerformanceCounter function. >>> >>> So I would prefer to remove kvm->arch.hv_ref_count altogether. >> >> But only if the migration support is guaranteed. > > Migration support wouldn't work yet anyway, you need to recompute the > scale and sequence. But that could be done by KVM_SET_CLOCK. hv_ref_counter does work out of the box. what I was trying to say is even it is slower than iTSC, it is significantly faster than hpet or pmtimer and I can confirm it works flawlessly with migration. > >> And what if we have a host which lacks invariant TSC support? > > Then the sequence must be set to 0 or 0xFFFFFFFF, I still haven't > understood. :) yes, but windows does then fall back to pmtimer or hpet which is much slower then reference counter. Peter -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Il 23/05/2013 15:30, Peter Lieven ha scritto: >>>> So I would prefer to remove kvm->arch.hv_ref_count altogether. >>> >>> But only if the migration support is guaranteed. >> >> Migration support wouldn't work yet anyway, you need to recompute the >> scale and sequence. But that could be done by KVM_SET_CLOCK. > > hv_ref_counter does work out of the box. what I was trying to say is > even it is slower than iTSC, it is significantly faster than hpet > or pmtimer and I can confirm it works flawlessly with migration. I can't see how without a userspace patch to migrate hv_ref_count. >>> And what if we have a host which lacks invariant TSC support? >> >> Then the sequence must be set to 0 or 0xFFFFFFFF, I still haven't >> understood. :) > > yes, but windows does then fall back to pmtimer or hpet which is much > slower then reference counter. IIUC, with 0 it doesn't. Paolo -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 3741c65..f0fee35 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -575,6 +575,8 @@ struct kvm_arch { /* fields used by HYPER-V emulation */ u64 hv_guest_os_id; u64 hv_hypercall; + u64 hv_ref_count; + u64 hv_tsc_page; #ifdef CONFIG_KVM_MMU_AUDIT int audit_point; diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h index b80420b..890dfc3 100644 --- a/arch/x86/include/uapi/asm/hyperv.h +++ b/arch/x86/include/uapi/asm/hyperv.h @@ -136,6 +136,9 @@ /* MSR used to read the per-partition time reference counter */ #define HV_X64_MSR_TIME_REF_COUNT 0x40000020 +/* A partition's reference time stamp counter (TSC) page */ +#define HV_X64_MSR_REFERENCE_TSC 0x40000021 + /* Define the virtual APIC registers */ #define HV_X64_MSR_EOI 0x40000070 #define HV_X64_MSR_ICR 0x40000071 @@ -179,6 +182,9 @@ #define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_MASK \ (~((1ull << HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT) - 1)) +#define HV_X64_MSR_TSC_REFERENCE_ENABLE 0x00000001 +#define HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT 12 + #define HV_PROCESSOR_POWER_STATE_C0 0 #define HV_PROCESSOR_POWER_STATE_C1 1 #define HV_PROCESSOR_POWER_STATE_C2 2 @@ -191,4 +197,12 @@ #define HV_STATUS_INVALID_ALIGNMENT 4 #define HV_STATUS_INSUFFICIENT_BUFFERS 19 +typedef struct _HV_REFERENCE_TSC_PAGE { + __u32 TscSequence; + __u32 Rserved1; + __u64 TscScale; + __s64 TscOffset; +} HV_REFERENCE_TSC_PAGE, *PHV_REFERENCE_TSC_PAGE; + + #endif diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 8d28810..9645dab 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -843,7 +843,7 @@ EXPORT_SYMBOL_GPL(kvm_rdpmc); static u32 msrs_to_save[] = { MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW, - HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, + HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, HV_X64_MSR_TIME_REF_COUNT, HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME, MSR_KVM_PV_EOI_EN, MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, @@ -1788,6 +1788,8 @@ static bool kvm_hv_msr_partition_wide(u32 msr) switch (msr) { case HV_X64_MSR_GUEST_OS_ID: case HV_X64_MSR_HYPERCALL: + case HV_X64_MSR_REFERENCE_TSC: + case HV_X64_MSR_TIME_REF_COUNT: r = true; break; } @@ -1827,6 +1829,29 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data) if (__copy_to_user((void __user *)addr, instructions, 4)) return 1; kvm->arch.hv_hypercall = data; + local_irq_disable(); + kvm->arch.hv_ref_count = get_kernel_ns(); + local_irq_enable(); + break; + } + case HV_X64_MSR_REFERENCE_TSC: { + u64 gfn; + unsigned long addr; + HV_REFERENCE_TSC_PAGE tsc_ref; + tsc_ref.TscSequence = 0; + if (!(data & HV_X64_MSR_TSC_REFERENCE_ENABLE)) { + kvm->arch.hv_tsc_page = data; + break; + } + gfn = data >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT; + addr = gfn_to_hva(kvm, data >> + HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT); + if (kvm_is_error_hva(addr)) + return 1; + if (__copy_to_user((void __user *)addr, &tsc_ref, sizeof(tsc_ref))) + return 1; + mark_page_dirty(kvm, gfn); + kvm->arch.hv_tsc_page = data; break; } default: @@ -2253,6 +2278,17 @@ static int get_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) case HV_X64_MSR_HYPERCALL: data = kvm->arch.hv_hypercall; break; + case HV_X64_MSR_TIME_REF_COUNT: { + u64 now_ns; + local_irq_disable(); + now_ns = get_kernel_ns() + kvm->arch.kvmclock_offset; + data = div_u64(now_ns - kvm->arch.hv_ref_count, 100); + local_irq_enable(); + break; + } + case HV_X64_MSR_REFERENCE_TSC: + data = kvm->arch.hv_tsc_page; + break; default: vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr); return 1; @@ -2566,6 +2602,7 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_ASSIGN_DEV_IRQ: case KVM_CAP_PCI_2_3: #endif + case KVM_CAP_HYPERV_TIME: r = 1; break; case KVM_CAP_COALESCED_MMIO: diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index a5c86fc..0810763 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -666,6 +666,7 @@ struct kvm_ppc_smmu_info { #define KVM_CAP_IRQ_MPIC 90 #define KVM_CAP_PPC_RTAS 91 #define KVM_CAP_IRQ_XICS 92 +#define KVM_CAP_HYPERV_TIME 93 #ifdef KVM_CAP_IRQ_ROUTING