Message ID | 1386502419-26614-3-git-send-email-vrozenfe@redhat.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Il 08/12/2013 12:33, Vadim Rozenfeld ha scritto: > + tsc_ref.tsc_sequence = > + boot_cpu_has(X86_FEATURE_CONSTANT_TSC) ? 1 : 0; > + tsc_ref.tsc_scale = > + ((10000LL << 32) / vcpu->arch.virtual_tsc_khz) << 32; > + tsc_ref.tsc_offset = 0; > if (__copy_to_user((void __user *)addr, &tsc_ref, sizeof(tsc_ref))) > return 1; > mark_page_dirty(kvm, gfn); > kvm->arch.hv_tsc_page = data; > + kvm->arch.hv_ref_count = 0; > break; > } > default: > @@ -3879,6 +3884,19 @@ long kvm_arch_vm_ioctl(struct file *filp, > local_irq_enable(); > kvm->arch.kvmclock_offset = delta; > kvm_gen_update_masterclock(kvm); > + > + if (kvm->arch.hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE) { > + HV_REFERENCE_TSC_PAGE* tsc_ref; > + u64 curr_time; > + tsc_ref = (HV_REFERENCE_TSC_PAGE*)gfn_to_hva(kvm, > + kvm->arch.hv_tsc_page >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT); > + tsc_ref->tsc_sequence = > + boot_cpu_has(X86_FEATURE_CONSTANT_TSC) ? tsc_ref->tsc_sequence + 1 : 0; > + tsc_ref->tsc_scale = ((10000LL << 32) / __get_cpu_var(cpu_tsc_khz)) << 32; Why shouldn't this be vcpu->arch.virtual_tsc_khz? > + curr_time = (((tsc_ref->tsc_scale >> 32) * native_read_tsc()) >> 32) + > + tsc_ref->tsc_offset; > + tsc_ref->tsc_offset = kvm->arch.hv_ref_time - curr_time; > + } The difference in setting tsc_ref->tsc_scale is the only important change between the two occurrences. If you can avoid that difference and you move this to a separate function, you can reuse that new function in set_msr_hyperv_pw as well. Also, kvm_set_tsc_khz should recompute the reference page's values as well, so you'd have three uses. Paolo -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Mon, 2013-12-09 at 15:32 +0100, Paolo Bonzini wrote: > Il 08/12/2013 12:33, Vadim Rozenfeld ha scritto: > > + tsc_ref.tsc_sequence = > > + boot_cpu_has(X86_FEATURE_CONSTANT_TSC) ? 1 : 0; > > + tsc_ref.tsc_scale = > > + ((10000LL << 32) / vcpu->arch.virtual_tsc_khz) << 32; > > + tsc_ref.tsc_offset = 0; > > if (__copy_to_user((void __user *)addr, &tsc_ref, sizeof(tsc_ref))) > > return 1; > > mark_page_dirty(kvm, gfn); > > kvm->arch.hv_tsc_page = data; > > + kvm->arch.hv_ref_count = 0; > > break; > > } > > default: > > @@ -3879,6 +3884,19 @@ long kvm_arch_vm_ioctl(struct file *filp, > > local_irq_enable(); > > kvm->arch.kvmclock_offset = delta; > > kvm_gen_update_masterclock(kvm); > > + > > + if (kvm->arch.hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE) { > > + HV_REFERENCE_TSC_PAGE* tsc_ref; > > + u64 curr_time; > > + tsc_ref = (HV_REFERENCE_TSC_PAGE*)gfn_to_hva(kvm, > > + kvm->arch.hv_tsc_page >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT); > > + tsc_ref->tsc_sequence = > > + boot_cpu_has(X86_FEATURE_CONSTANT_TSC) ? tsc_ref->tsc_sequence + 1 : 0; > > + tsc_ref->tsc_scale = ((10000LL << 32) / __get_cpu_var(cpu_tsc_khz)) << 32; > > Why shouldn't this be vcpu->arch.virtual_tsc_khz? Yeah, I was thinking about that, but we need a vcpu instance for this. > > > + curr_time = (((tsc_ref->tsc_scale >> 32) * native_read_tsc()) >> 32) + > > + tsc_ref->tsc_offset; > > + tsc_ref->tsc_offset = kvm->arch.hv_ref_time - curr_time; > > + } > > The difference in setting tsc_ref->tsc_scale is the only important > change between the two occurrences. If you can avoid that difference > and you move this to a separate function, you can reuse that new > function in set_msr_hyperv_pw as well. Do you mean between HV_X64_MSR_REFERENCE_TSC which happens during partition creation time and KVM_SET_CLOCK which happens during resume after partition pause? If so - there are several differences, where the offset calculation probably is the most important one. Vadim. > > Also, kvm_set_tsc_khz should recompute the reference page's values as > well, so you'd have three uses. > > Paolo -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Il 10/12/2013 12:23, Vadim Rozenfeld ha scritto: > > > + if (kvm->arch.hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE) { > > > + HV_REFERENCE_TSC_PAGE* tsc_ref; > > > + u64 curr_time; > > > + tsc_ref = (HV_REFERENCE_TSC_PAGE*)gfn_to_hva(kvm, > > > + kvm->arch.hv_tsc_page >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT); > > > + tsc_ref->tsc_sequence = > > > + boot_cpu_has(X86_FEATURE_CONSTANT_TSC) ? tsc_ref->tsc_sequence + 1 : 0; > > > + tsc_ref->tsc_scale = ((10000LL << 32) / __get_cpu_var(cpu_tsc_khz)) << 32; > > > > Why shouldn't this be vcpu->arch.virtual_tsc_khz? > > Yeah, I was thinking about that, but we need a vcpu instance for this. You can perhaps store the value from vcpu->arch.virtual_tsc_khz to kvm->arch when the MSR is first written? > Do you mean between HV_X64_MSR_REFERENCE_TSC which happens during > partition creation time and KVM_SET_CLOCK which happens during resume > after partition pause? If so - there are several differences, where > the offset calculation probably is the most important one. The offset and frequence are the only differences. + curr_time = (((tsc_ref->tsc_scale >> 32) * native_read_tsc()) >> 32) + + tsc_ref->tsc_offset; + tsc_ref->tsc_offset = kvm->arch.hv_ref_time - curr_time; Why do you need kvm->arch.hv_ref_time at all? Can you just use "get_kernel_ns() + kvm->arch.kvmclock_offset - kvm->arch.hv_ref_count"? Then the same code can set tsc_ref->tsc_offset in both cases. In fact, it's not clear to me what hv_ref_time is for, and how it is different from By the way, a small nit: > > + tsc_ref.tsc_sequence = > + boot_cpu_has(X86_FEATURE_CONSTANT_TSC) ? 1 : 0; > + tsc_ref.tsc_scale = > + ((10000LL << 32) / vcpu->arch.virtual_tsc_khz) << 32; > + tsc_ref.tsc_offset = 0; > if (__copy_to_user((void __user *)addr, &tsc_ref, sizeof(tsc_ref))) > return 1; > mark_page_dirty(kvm, gfn); > kvm->arch.hv_tsc_page = data; > + kvm->arch.hv_ref_count = 0; > break; This setting of kvm->arch.hv_ref_count belongs in the previous patch. Paolo -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Tue, 2013-12-10 at 17:52 +0100, Paolo Bonzini wrote: > Il 10/12/2013 12:23, Vadim Rozenfeld ha scritto: > > > > + if (kvm->arch.hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE) { > > > > + HV_REFERENCE_TSC_PAGE* tsc_ref; > > > > + u64 curr_time; > > > > + tsc_ref = (HV_REFERENCE_TSC_PAGE*)gfn_to_hva(kvm, > > > > + kvm->arch.hv_tsc_page >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT); > > > > + tsc_ref->tsc_sequence = > > > > + boot_cpu_has(X86_FEATURE_CONSTANT_TSC) ? tsc_ref->tsc_sequence + 1 : 0; > > > > + tsc_ref->tsc_scale = ((10000LL << 32) / __get_cpu_var(cpu_tsc_khz)) << 32; > > > > > > Why shouldn't this be vcpu->arch.virtual_tsc_khz? > > > > Yeah, I was thinking about that, but we need a vcpu instance for this. > > You can perhaps store the value from vcpu->arch.virtual_tsc_khz to > kvm->arch when the MSR is first written? > > > Do you mean between HV_X64_MSR_REFERENCE_TSC which happens during > > partition creation time and KVM_SET_CLOCK which happens during resume > > after partition pause? If so - there are several differences, where > > the offset calculation probably is the most important one. > > The offset and frequence are the only differences. > > + curr_time = (((tsc_ref->tsc_scale >> 32) * native_read_tsc()) >> 32) + > + tsc_ref->tsc_offset; > + tsc_ref->tsc_offset = kvm->arch.hv_ref_time - curr_time; > > Why do you need kvm->arch.hv_ref_time at all? Can you just use > "get_kernel_ns() + kvm->arch.kvmclock_offset - kvm->arch.hv_ref_count"? > Then the same code can set tsc_ref->tsc_offset in both cases. > > In fact, it's not clear to me what hv_ref_time is for, and how it > is different from OK, let me explain how it works. Hyper-V allows guest to use invariant TSC provided by host as a time stamp source (KeQueryPerformanceCounter). Guest is calling rdtsc and normalizing it to 10MHz frequency, it is why we need "tsc_scale". "tsc_offset" is needed for migration or pause/resume cycles. When we pause a VM, we need to save the current vTSC value ("hv_ref_time"), which is rdtsc * tsc_scale + tsc_offset. Then, during resume, we need to recalculate the new tsc_scale as well as the new tsc_offset value. tsc_offset = old(saved) vTSC - new vTSC So maybe hv_ref_time is not a good name, but we use it for keeping the old vTSC value, saved before stopping VM. Vadim. > > By the way, a small nit: > > > > > + tsc_ref.tsc_sequence = > > + boot_cpu_has(X86_FEATURE_CONSTANT_TSC) ? 1 : 0; > > + tsc_ref.tsc_scale = > > + ((10000LL << 32) / vcpu->arch.virtual_tsc_khz) << 32; > > + tsc_ref.tsc_offset = 0; > > if (__copy_to_user((void __user *)addr, &tsc_ref, sizeof(tsc_ref))) > > return 1; > > mark_page_dirty(kvm, gfn); > > kvm->arch.hv_tsc_page = data; > > + kvm->arch.hv_ref_count = 0; > > break; > > This setting of kvm->arch.hv_ref_count belongs in the previous patch. > > Paolo -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Il 11/12/2013 11:58, Vadim Rozenfeld ha scritto: >> > + curr_time = (((tsc_ref->tsc_scale >> 32) * native_read_tsc()) >> 32) + >> > + tsc_ref->tsc_offset; >> > + tsc_ref->tsc_offset = kvm->arch.hv_ref_time - curr_time; >> > >> > Why do you need kvm->arch.hv_ref_time at all? Can you just use >> > "get_kernel_ns() + kvm->arch.kvmclock_offset - kvm->arch.hv_ref_count"? >> > Then the same code can set tsc_ref->tsc_offset in both cases. >> > >> > In fact, it's not clear to me what hv_ref_time is for, and how it >> > is different from > OK, let me explain how it works. > Hyper-V allows guest to use invariant TSC provided by host as a time > stamp source (KeQueryPerformanceCounter). Guest is calling rdtsc and > normalizing it to 10MHz frequency, it is why we need "tsc_scale". > "tsc_offset" is needed for migration or pause/resume cycles. > When we pause a VM, we need to save the current vTSC value > ("hv_ref_time"), which is rdtsc * tsc_scale + tsc_offset. > Then, during resume, we need to recalculate the new tsc_scale > as well as the new tsc_offset value. > tsc_offset = old(saved) vTSC - new vTSC In practice "save" means KVM_GET_CLOCK, and "restore" means KVM_SET_CLOCK, right? > So maybe hv_ref_time is not a good name, but we use it > for keeping the old vTSC value, saved before stopping VM. Ok, this was roughly my understanding as well. My understanding is also that (((tsc_ref->tsc_scale >> 32) * native_read_tsc()) >> 32) + tsc_ref->tsc_offset returns exactly the same value as HV_X64_MSR_TIME_REF_COUNT. Thus we do not need kvm->arch.hv_ref_time. We can use the value of HV_X64_MSR_TIME_REF_COUNT, which is "(get_kernel_ns() + kvm->arch.kvmclock_offset - kvm->arch.hv_ref_count) / 100", to compute tsc_offset, like this: curr_time = (((tsc_ref->tsc_scale >> 32) * native_read_tsc()) >> 32); tsc_ref->tsc_offset = get_hv_x64_msr_time_ref_count() - curr_time; This code can be applied always: when the TSC page is initialized and when KVM_SET_CLOCK is called. You do not need to do anything for KVM_GET_CLOCK. Paolo -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Sun, Dec 08, 2013 at 10:33:39PM +1100, Vadim Rozenfeld wrote: > The following patch allows to activate a partition reference > time enlightenment that is based on the host platform's support > for an Invariant Time Stamp Counter (iTSC). > > v2 -> v3 > Handle TSC sequence, scale, and offest changing during migration. > > --- > arch/x86/include/asm/kvm_host.h | 1 + > arch/x86/kvm/x86.c | 29 +++++++++++++++++++++++++++-- > 2 files changed, 28 insertions(+), 2 deletions(-) > > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h > index 2fd0753..81fdff0 100644 > --- a/arch/x86/include/asm/kvm_host.h > +++ b/arch/x86/include/asm/kvm_host.h > @@ -607,6 +607,7 @@ struct kvm_arch { > u64 hv_hypercall; > u64 hv_ref_count; > u64 hv_tsc_page; > + u64 hv_ref_time; > > #ifdef CONFIG_KVM_MMU_AUDIT > int audit_point; > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > index 5e4e495a..cb6766a 100644 > --- a/arch/x86/kvm/x86.c > +++ b/arch/x86/kvm/x86.c > @@ -1882,14 +1882,19 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data) > break; > } > gfn = data >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT; > - addr = gfn_to_hva(kvm, data >> > - HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT); > + addr = gfn_to_hva(kvm, gfn); > if (kvm_is_error_hva(addr)) > return 1; > + tsc_ref.tsc_sequence = > + boot_cpu_has(X86_FEATURE_CONSTANT_TSC) ? 1 : 0; > + tsc_ref.tsc_scale = > + ((10000LL << 32) / vcpu->arch.virtual_tsc_khz) << 32; > + tsc_ref.tsc_offset = 0; > if (__copy_to_user((void __user *)addr, &tsc_ref, sizeof(tsc_ref))) > return 1; > mark_page_dirty(kvm, gfn); > kvm->arch.hv_tsc_page = data; > + kvm->arch.hv_ref_count = 0; > break; > } > default: > @@ -3879,6 +3884,19 @@ long kvm_arch_vm_ioctl(struct file *filp, > local_irq_enable(); > kvm->arch.kvmclock_offset = delta; > kvm_gen_update_masterclock(kvm); > + > + if (kvm->arch.hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE) { > + HV_REFERENCE_TSC_PAGE* tsc_ref; > + u64 curr_time; > + tsc_ref = (HV_REFERENCE_TSC_PAGE*)gfn_to_hva(kvm, > + kvm->arch.hv_tsc_page >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT); > + tsc_ref->tsc_sequence = > + boot_cpu_has(X86_FEATURE_CONSTANT_TSC) ? tsc_ref->tsc_sequence + 1 : 0; > + tsc_ref->tsc_scale = ((10000LL << 32) / __get_cpu_var(cpu_tsc_khz)) << 32; > + curr_time = (((tsc_ref->tsc_scale >> 32) * native_read_tsc()) >> 32) + > + tsc_ref->tsc_offset; > + tsc_ref->tsc_offset = kvm->arch.hv_ref_time - curr_time; > + } > break; > } > case KVM_GET_CLOCK: { > @@ -3896,6 +3914,13 @@ long kvm_arch_vm_ioctl(struct file *filp, > if (copy_to_user(argp, &user_ns, sizeof(user_ns))) > goto out; > r = 0; > + if (kvm->arch.hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE) { > + HV_REFERENCE_TSC_PAGE* tsc_ref; > + tsc_ref = (HV_REFERENCE_TSC_PAGE*)gfn_to_hva(kvm, > + kvm->arch.hv_tsc_page >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT); kvm_read_guest_cached. > + kvm->arch.hv_ref_time = (((tsc_ref->tsc_scale >> 32) * > + native_read_tsc()) >> 32) + tsc_ref->tsc_offset; Why native_read_tsc and not ->read_l1_tsc? It is easier to trust on the host to check reliability of the TSC: if it uses TSC clocksource, then the TSCs are stable. So could condition exposing the TSC ref page when ka->use_master_clock=1, see kvm_guest_time_update. And hook into pvclock_gtod_notify. So in addition to X86_FEATURE_CONSTANT_TSC, check ka->use_master_clock=1 -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Tue, Dec 10, 2013 at 10:23:17PM +1100, Vadim Rozenfeld wrote: > On Mon, 2013-12-09 at 15:32 +0100, Paolo Bonzini wrote: > > Il 08/12/2013 12:33, Vadim Rozenfeld ha scritto: > > > + tsc_ref.tsc_sequence = > > > + boot_cpu_has(X86_FEATURE_CONSTANT_TSC) ? 1 : 0; > > > + tsc_ref.tsc_scale = > > > + ((10000LL << 32) / vcpu->arch.virtual_tsc_khz) << 32; > > > + tsc_ref.tsc_offset = 0; > > > if (__copy_to_user((void __user *)addr, &tsc_ref, sizeof(tsc_ref))) > > > return 1; > > > mark_page_dirty(kvm, gfn); > > > kvm->arch.hv_tsc_page = data; > > > + kvm->arch.hv_ref_count = 0; > > > break; > > > } > > > default: > > > @@ -3879,6 +3884,19 @@ long kvm_arch_vm_ioctl(struct file *filp, > > > local_irq_enable(); > > > kvm->arch.kvmclock_offset = delta; > > > kvm_gen_update_masterclock(kvm); > > > + > > > + if (kvm->arch.hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE) { > > > + HV_REFERENCE_TSC_PAGE* tsc_ref; > > > + u64 curr_time; > > > + tsc_ref = (HV_REFERENCE_TSC_PAGE*)gfn_to_hva(kvm, > > > + kvm->arch.hv_tsc_page >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT); > > > + tsc_ref->tsc_sequence = > > > + boot_cpu_has(X86_FEATURE_CONSTANT_TSC) ? tsc_ref->tsc_sequence + 1 : 0; > > > + tsc_ref->tsc_scale = ((10000LL << 32) / __get_cpu_var(cpu_tsc_khz)) << 32; > > > > Why shouldn't this be vcpu->arch.virtual_tsc_khz? > Yeah, I was thinking about that, but we need a vcpu instance for this. Move it to kvm_guest_time_update time (which is necessary anyway for the pvclock gtod notifier changes etc). > > > + curr_time = (((tsc_ref->tsc_scale >> 32) * native_read_tsc()) >> 32) + > > > + tsc_ref->tsc_offset; > > > + tsc_ref->tsc_offset = kvm->arch.hv_ref_time - curr_time; > > > + } > > > > The difference in setting tsc_ref->tsc_scale is the only important > > change between the two occurrences. If you can avoid that difference > > and you move this to a separate function, you can reuse that new > > function in set_msr_hyperv_pw as well. > > Do you mean between HV_X64_MSR_REFERENCE_TSC which happens during > partition creation time and KVM_SET_CLOCK which happens during resume > after partition pause? If so - there are several differences, where > the offset calculation probably is the most important one. > > Vadim. > > > > > Also, kvm_set_tsc_khz should recompute the reference page's values as > > well, so you'd have three uses. > > > > Paolo > -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Il 11/12/2013 20:27, Marcelo Tosatti ha scritto: >> > + if (kvm->arch.hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE) { >> > + HV_REFERENCE_TSC_PAGE* tsc_ref; >> > + tsc_ref = (HV_REFERENCE_TSC_PAGE*)gfn_to_hva(kvm, >> > + kvm->arch.hv_tsc_page >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT); > kvm_read_guest_cached. > >> > + kvm->arch.hv_ref_time = (((tsc_ref->tsc_scale >> 32) * >> > + native_read_tsc()) >> 32) + tsc_ref->tsc_offset; > Why native_read_tsc and not ->read_l1_tsc? > > It is easier to trust on the host to check reliability of the TSC: if > it uses TSC clocksource, then the TSCs are stable. So could condition > exposing the TSC ref page when ka->use_master_clock=1, see kvm_guest_time_update. > And hook into pvclock_gtod_notify. > > So in addition to X86_FEATURE_CONSTANT_TSC, check > ka->use_master_clock=1 FWIW, I agree with all these comments from Marcelo. Paolo -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
----- Original Message ----- From: "Marcelo Tosatti" <mtosatti@redhat.com> To: "Vadim Rozenfeld" <vrozenfe@redhat.com> Cc: kvm@vger.kernel.org, pl@dlhnet.de, pbonzini@redhat.com Sent: Thursday, December 12, 2013 6:27:00 AM Subject: Re: [RFC PATCH v3 2/2] add support for Hyper-V partition reference time enlightenment On Sun, Dec 08, 2013 at 10:33:39PM +1100, Vadim Rozenfeld wrote: > The following patch allows to activate a partition reference > time enlightenment that is based on the host platform's support > for an Invariant Time Stamp Counter (iTSC). > > v2 -> v3 > Handle TSC sequence, scale, and offest changing during migration. > > --- > arch/x86/include/asm/kvm_host.h | 1 + > arch/x86/kvm/x86.c | 29 +++++++++++++++++++++++++++-- > 2 files changed, 28 insertions(+), 2 deletions(-) > > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h > index 2fd0753..81fdff0 100644 > --- a/arch/x86/include/asm/kvm_host.h > +++ b/arch/x86/include/asm/kvm_host.h > @@ -607,6 +607,7 @@ struct kvm_arch { > u64 hv_hypercall; > u64 hv_ref_count; > u64 hv_tsc_page; > + u64 hv_ref_time; > > #ifdef CONFIG_KVM_MMU_AUDIT > int audit_point; > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > index 5e4e495a..cb6766a 100644 > --- a/arch/x86/kvm/x86.c > +++ b/arch/x86/kvm/x86.c > @@ -1882,14 +1882,19 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data) > break; > } > gfn = data >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT; > - addr = gfn_to_hva(kvm, data >> > - HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT); > + addr = gfn_to_hva(kvm, gfn); > if (kvm_is_error_hva(addr)) > return 1; > + tsc_ref.tsc_sequence = > + boot_cpu_has(X86_FEATURE_CONSTANT_TSC) ? 1 : 0; > + tsc_ref.tsc_scale = > + ((10000LL << 32) / vcpu->arch.virtual_tsc_khz) << 32; > + tsc_ref.tsc_offset = 0; > if (__copy_to_user((void __user *)addr, &tsc_ref, sizeof(tsc_ref))) > return 1; > mark_page_dirty(kvm, gfn); > kvm->arch.hv_tsc_page = data; > + kvm->arch.hv_ref_count = 0; > break; > } > default: > @@ -3879,6 +3884,19 @@ long kvm_arch_vm_ioctl(struct file *filp, > local_irq_enable(); > kvm->arch.kvmclock_offset = delta; > kvm_gen_update_masterclock(kvm); > + > + if (kvm->arch.hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE) { > + HV_REFERENCE_TSC_PAGE* tsc_ref; > + u64 curr_time; > + tsc_ref = (HV_REFERENCE_TSC_PAGE*)gfn_to_hva(kvm, > + kvm->arch.hv_tsc_page >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT); > + tsc_ref->tsc_sequence = > + boot_cpu_has(X86_FEATURE_CONSTANT_TSC) ? tsc_ref->tsc_sequence + 1 : 0; > + tsc_ref->tsc_scale = ((10000LL << 32) / __get_cpu_var(cpu_tsc_khz)) << 32; > + curr_time = (((tsc_ref->tsc_scale >> 32) * native_read_tsc()) >> 32) + > + tsc_ref->tsc_offset; > + tsc_ref->tsc_offset = kvm->arch.hv_ref_time - curr_time; > + } > break; > } > case KVM_GET_CLOCK: { > @@ -3896,6 +3914,13 @@ long kvm_arch_vm_ioctl(struct file *filp, > if (copy_to_user(argp, &user_ns, sizeof(user_ns))) > goto out; > r = 0; > + if (kvm->arch.hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE) { > + HV_REFERENCE_TSC_PAGE* tsc_ref; > + tsc_ref = (HV_REFERENCE_TSC_PAGE*)gfn_to_hva(kvm, > + kvm->arch.hv_tsc_page >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT); kvm_read_guest_cached. > + kvm->arch.hv_ref_time = (((tsc_ref->tsc_scale >> 32) * > + native_read_tsc()) >> 32) + tsc_ref->tsc_offset; Why native_read_tsc and not ->read_l1_tsc? [VR] Is it possible to get pointer to the vcpu instance at this point? Thanks, Vadim. It is easier to trust on the host to check reliability of the TSC: if it uses TSC clocksource, then the TSCs are stable. So could condition exposing the TSC ref page when ka->use_master_clock=1, see kvm_guest_time_update. And hook into pvclock_gtod_notify. So in addition to X86_FEATURE_CONSTANT_TSC, check ka->use_master_clock=1 -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Mon, Jan 13, 2014 at 11:11:40PM -0500, Vadim Rozenfeld wrote: > > > ----- Original Message ----- > From: "Marcelo Tosatti" <mtosatti@redhat.com> > To: "Vadim Rozenfeld" <vrozenfe@redhat.com> > Cc: kvm@vger.kernel.org, pl@dlhnet.de, pbonzini@redhat.com > Sent: Thursday, December 12, 2013 6:27:00 AM > Subject: Re: [RFC PATCH v3 2/2] add support for Hyper-V partition reference time enlightenment > > On Sun, Dec 08, 2013 at 10:33:39PM +1100, Vadim Rozenfeld wrote: > > The following patch allows to activate a partition reference > > time enlightenment that is based on the host platform's support > > for an Invariant Time Stamp Counter (iTSC). > > > > v2 -> v3 > > Handle TSC sequence, scale, and offest changing during migration. > > > > --- > > arch/x86/include/asm/kvm_host.h | 1 + > > arch/x86/kvm/x86.c | 29 +++++++++++++++++++++++++++-- > > 2 files changed, 28 insertions(+), 2 deletions(-) > > > > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h > > index 2fd0753..81fdff0 100644 > > --- a/arch/x86/include/asm/kvm_host.h > > +++ b/arch/x86/include/asm/kvm_host.h > > @@ -607,6 +607,7 @@ struct kvm_arch { > > u64 hv_hypercall; > > u64 hv_ref_count; > > u64 hv_tsc_page; > > + u64 hv_ref_time; > > > > #ifdef CONFIG_KVM_MMU_AUDIT > > int audit_point; > > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > > index 5e4e495a..cb6766a 100644 > > --- a/arch/x86/kvm/x86.c > > +++ b/arch/x86/kvm/x86.c > > @@ -1882,14 +1882,19 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data) > > break; > > } > > gfn = data >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT; > > - addr = gfn_to_hva(kvm, data >> > > - HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT); > > + addr = gfn_to_hva(kvm, gfn); > > if (kvm_is_error_hva(addr)) > > return 1; > > + tsc_ref.tsc_sequence = > > + boot_cpu_has(X86_FEATURE_CONSTANT_TSC) ? 1 : 0; > > + tsc_ref.tsc_scale = > > + ((10000LL << 32) / vcpu->arch.virtual_tsc_khz) << 32; > > + tsc_ref.tsc_offset = 0; > > if (__copy_to_user((void __user *)addr, &tsc_ref, sizeof(tsc_ref))) > > return 1; > > mark_page_dirty(kvm, gfn); > > kvm->arch.hv_tsc_page = data; > > + kvm->arch.hv_ref_count = 0; > > break; > > } > > default: > > @@ -3879,6 +3884,19 @@ long kvm_arch_vm_ioctl(struct file *filp, > > local_irq_enable(); > > kvm->arch.kvmclock_offset = delta; > > kvm_gen_update_masterclock(kvm); > > + > > + if (kvm->arch.hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE) { > > + HV_REFERENCE_TSC_PAGE* tsc_ref; > > + u64 curr_time; > > + tsc_ref = (HV_REFERENCE_TSC_PAGE*)gfn_to_hva(kvm, > > + kvm->arch.hv_tsc_page >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT); > > + tsc_ref->tsc_sequence = > > + boot_cpu_has(X86_FEATURE_CONSTANT_TSC) ? tsc_ref->tsc_sequence + 1 : 0; > > + tsc_ref->tsc_scale = ((10000LL << 32) / __get_cpu_var(cpu_tsc_khz)) << 32; > > + curr_time = (((tsc_ref->tsc_scale >> 32) * native_read_tsc()) >> 32) + > > + tsc_ref->tsc_offset; > > + tsc_ref->tsc_offset = kvm->arch.hv_ref_time - curr_time; > > + } > > break; > > } > > case KVM_GET_CLOCK: { > > @@ -3896,6 +3914,13 @@ long kvm_arch_vm_ioctl(struct file *filp, > > if (copy_to_user(argp, &user_ns, sizeof(user_ns))) > > goto out; > > r = 0; > > + if (kvm->arch.hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE) { > > + HV_REFERENCE_TSC_PAGE* tsc_ref; > > + tsc_ref = (HV_REFERENCE_TSC_PAGE*)gfn_to_hva(kvm, > > + kvm->arch.hv_tsc_page >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT); > > kvm_read_guest_cached. > > > + kvm->arch.hv_ref_time = (((tsc_ref->tsc_scale >> 32) * > > + native_read_tsc()) >> 32) + tsc_ref->tsc_offset; > > Why native_read_tsc and not ->read_l1_tsc? > > [VR] > Is it possible to get pointer to the vcpu instance at this point? See the suggestion to move this code to kvm_guest_time_update. > Thanks, > Vadim. > > It is easier to trust on the host to check reliability of the TSC: if > it uses TSC clocksource, then the TSCs are stable. So could condition > exposing the TSC ref page when ka->use_master_clock=1, see kvm_guest_time_update. > And hook into pvclock_gtod_notify. > > So in addition to X86_FEATURE_CONSTANT_TSC, check > ka->use_master_clock=1 > -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 2fd0753..81fdff0 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -607,6 +607,7 @@ struct kvm_arch { u64 hv_hypercall; u64 hv_ref_count; u64 hv_tsc_page; + u64 hv_ref_time; #ifdef CONFIG_KVM_MMU_AUDIT int audit_point; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 5e4e495a..cb6766a 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1882,14 +1882,19 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data) break; } gfn = data >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT; - addr = gfn_to_hva(kvm, data >> - HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT); + addr = gfn_to_hva(kvm, gfn); if (kvm_is_error_hva(addr)) return 1; + tsc_ref.tsc_sequence = + boot_cpu_has(X86_FEATURE_CONSTANT_TSC) ? 1 : 0; + tsc_ref.tsc_scale = + ((10000LL << 32) / vcpu->arch.virtual_tsc_khz) << 32; + tsc_ref.tsc_offset = 0; if (__copy_to_user((void __user *)addr, &tsc_ref, sizeof(tsc_ref))) return 1; mark_page_dirty(kvm, gfn); kvm->arch.hv_tsc_page = data; + kvm->arch.hv_ref_count = 0; break; } default: @@ -3879,6 +3884,19 @@ long kvm_arch_vm_ioctl(struct file *filp, local_irq_enable(); kvm->arch.kvmclock_offset = delta; kvm_gen_update_masterclock(kvm); + + if (kvm->arch.hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE) { + HV_REFERENCE_TSC_PAGE* tsc_ref; + u64 curr_time; + tsc_ref = (HV_REFERENCE_TSC_PAGE*)gfn_to_hva(kvm, + kvm->arch.hv_tsc_page >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT); + tsc_ref->tsc_sequence = + boot_cpu_has(X86_FEATURE_CONSTANT_TSC) ? tsc_ref->tsc_sequence + 1 : 0; + tsc_ref->tsc_scale = ((10000LL << 32) / __get_cpu_var(cpu_tsc_khz)) << 32; + curr_time = (((tsc_ref->tsc_scale >> 32) * native_read_tsc()) >> 32) + + tsc_ref->tsc_offset; + tsc_ref->tsc_offset = kvm->arch.hv_ref_time - curr_time; + } break; } case KVM_GET_CLOCK: { @@ -3896,6 +3914,13 @@ long kvm_arch_vm_ioctl(struct file *filp, if (copy_to_user(argp, &user_ns, sizeof(user_ns))) goto out; r = 0; + if (kvm->arch.hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE) { + HV_REFERENCE_TSC_PAGE* tsc_ref; + tsc_ref = (HV_REFERENCE_TSC_PAGE*)gfn_to_hva(kvm, + kvm->arch.hv_tsc_page >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT); + kvm->arch.hv_ref_time = (((tsc_ref->tsc_scale >> 32) * + native_read_tsc()) >> 32) + tsc_ref->tsc_offset; + } break; }