diff mbox

[RFC,v4,2/2] add support for Hyper-V partition reference time enlightenment

Message ID 1389691337-12050-3-git-send-email-vrozenfe@redhat.com (mailing list archive)
State New, archived
Headers show

Commit Message

Vadim Rozenfeld Jan. 14, 2014, 9:22 a.m. UTC
The following patch allows to activate a partition reference
time enlightenment that is based on the host platform's support
for an Invariant Time Stamp Counter (iTSC).

v2 -> v3
    Handle TSC sequence, scale, and offest changing during migration.

v3 -> v4
    1. Wrap access to iTSC page with kvm_read_guest/kvm_write_guest
    suggested by Andrew Honig <ahonig@google.com> and Marcelo
    2. iTSC data calculation/access fixes suggested by Marcelo and Paolo

---
 arch/x86/kvm/x86.c | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

Comments

Paolo Bonzini Jan. 14, 2014, 11:03 a.m. UTC | #1
The cleanup between v3 and v4 is really nice; thanks.  What happened to
the glitch you reported in the v3 thread.

Il 14/01/2014 10:22, Vadim Rozenfeld ha scritto:
> +			tsc_ref.tsc_scale = ((10000LL << 32) /
> +					     vcpu->arch.virtual_tsc_khz) << 32;

As mentioned in the review of v3, I don't really like the tsc_scale to
come from two different sources in the two cases of MSR write and
KVM_SET_CLOCK.  This is the only qualm I still have with this patch.
Marcelo, do you have any ideas?

Paolo

> +			tsc_ref.tsc_offset = kvm->arch.kvmclock_offset;
> +		}
>  		if (__copy_to_user((void __user *)addr, &tsc_ref, sizeof(tsc_ref)))
>  			return 1;
>  		mark_page_dirty(kvm, gfn);
> @@ -3871,6 +3879,27 @@ long kvm_arch_vm_ioctl(struct file *filp,
>  		local_irq_enable();
>  		kvm->arch.kvmclock_offset = delta;
>  		kvm_gen_update_masterclock(kvm);
> +		if (kvm->arch.hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE) {
> +			HV_REFERENCE_TSC_PAGE tsc_ref;
> +			struct kvm_arch *ka = &kvm->arch;
> +			r = kvm_read_guest(kvm, kvm->arch.hv_tsc_page,
> +					   &tsc_ref, sizeof(tsc_ref));
> +			if (r)
> +				goto out;
> +			if (tsc_ref.tsc_sequence
> +			    && boot_cpu_has(X86_FEATURE_CONSTANT_TSC)
> +			    && ka->use_master_clock) {
> +				tsc_ref.tsc_sequence++;
> +				tsc_ref.tsc_scale = ((10000LL << 32) /
> +					__get_cpu_var(cpu_tsc_khz)) << 32;

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Marcelo Tosatti Jan. 14, 2014, 7:10 p.m. UTC | #2
On Tue, Jan 14, 2014 at 12:03:24PM +0100, Paolo Bonzini wrote:
> The cleanup between v3 and v4 is really nice; thanks.  What happened to
> the glitch you reported in the v3 thread.
> 
> Il 14/01/2014 10:22, Vadim Rozenfeld ha scritto:
> > +			tsc_ref.tsc_scale = ((10000LL << 32) /
> > +					     vcpu->arch.virtual_tsc_khz) << 32;
> 
> As mentioned in the review of v3, I don't really like the tsc_scale to
> come from two different sources in the two cases of MSR write and
> KVM_SET_CLOCK.  This is the only qualm I still have with this patch.
> Marcelo, do you have any ideas?

It should be at kvm_guest_time_update.

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Marcelo Tosatti Jan. 14, 2014, 7:20 p.m. UTC | #3
On Tue, Jan 14, 2014 at 08:22:17PM +1100, Vadim Rozenfeld wrote:
> The following patch allows to activate a partition reference
> time enlightenment that is based on the host platform's support
> for an Invariant Time Stamp Counter (iTSC).
> 
> v2 -> v3
>     Handle TSC sequence, scale, and offest changing during migration.
> 
> v3 -> v4
>     1. Wrap access to iTSC page with kvm_read_guest/kvm_write_guest
>     suggested by Andrew Honig <ahonig@google.com> and Marcelo
>     2. iTSC data calculation/access fixes suggested by Marcelo and Paolo
> 
> ---
>  arch/x86/kvm/x86.c | 29 +++++++++++++++++++++++++++++
>  1 file changed, 29 insertions(+)
> 
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 496bdb1..6e6debf 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -1873,6 +1873,7 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
>  	case HV_X64_MSR_REFERENCE_TSC: {
>  		u64 gfn;
>  		unsigned long addr;
> +		struct kvm_arch *ka = &kvm->arch;
>  		HV_REFERENCE_TSC_PAGE tsc_ref;
>  		memset(&tsc_ref, 0, sizeof(tsc_ref));
>  		if (!(data & HV_X64_MSR_TSC_REFERENCE_ENABLE)) {
> @@ -1883,6 +1884,13 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
>  		addr = gfn_to_hva(kvm, gfn);
>  		if (kvm_is_error_hva(addr))
>  			return 1;
> +		if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC)
> +		    && ka->use_master_clock) {
> +			tsc_ref.tsc_sequence = 1;
> +			tsc_ref.tsc_scale = ((10000LL << 32) /
> +					     vcpu->arch.virtual_tsc_khz) << 32;
> +			tsc_ref.tsc_offset = kvm->arch.kvmclock_offset;
> +		}
>  		if (__copy_to_user((void __user *)addr, &tsc_ref, sizeof(tsc_ref)))
>  			return 1;
>  		mark_page_dirty(kvm, gfn);
> @@ -3871,6 +3879,27 @@ long kvm_arch_vm_ioctl(struct file *filp,
>  		local_irq_enable();
>  		kvm->arch.kvmclock_offset = delta;
>  		kvm_gen_update_masterclock(kvm);
> +		if (kvm->arch.hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE) {
> +			HV_REFERENCE_TSC_PAGE tsc_ref;
> +			struct kvm_arch *ka = &kvm->arch;
> +			r = kvm_read_guest(kvm, kvm->arch.hv_tsc_page,
> +					   &tsc_ref, sizeof(tsc_ref));
> +			if (r)
> +				goto out;
> +			if (tsc_ref.tsc_sequence
> +			    && boot_cpu_has(X86_FEATURE_CONSTANT_TSC)
> +			    && ka->use_master_clock) {
> +				tsc_ref.tsc_sequence++;
> +				tsc_ref.tsc_scale = ((10000LL << 32) /
> +					__get_cpu_var(cpu_tsc_khz)) << 32;
> +                        	tsc_ref.tsc_offset = kvm->arch.kvmclock_offset;
> +			} else
> +				tsc_ref.tsc_sequence = 0;
> +			r = kvm_write_guest(kvm, kvm->arch.hv_tsc_page,
> +					    &tsc_ref, sizeof(tsc_ref));
> +			if (r)
> +				goto out;
> +		}
>  		break;
>  	}

Vadim,

A short comment explaining the lifecycle of the exposed
clock would be nice.

Can you explain why are you setting tsc_offset to kvmclock_offset?

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Paolo Bonzini Jan. 16, 2014, 1:53 p.m. UTC | #4
Il 14/01/2014 10:22, Vadim Rozenfeld ha scritto:
> @@ -1883,6 +1884,13 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
>  		addr = gfn_to_hva(kvm, gfn);
>  		if (kvm_is_error_hva(addr))
>  			return 1;
> +		if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC)
> +		    && ka->use_master_clock) {
> +			tsc_ref.tsc_sequence = 1;

I think that after the first migration the sequence number will always
be 2: set_msr_hyperv_pw will set it to 1, and KVM_SET_KVMCLOCK will
increment it.  Can you check this?

The solution would be to propagate the "struct msr_data *" argument from
kvm_set_msr_common to kvm_set_msr_hyper_pw.  Then, here in this "if" you
can test msr_info->host_initiated, and increment the tsc_sequence
instead of resetting.  Like

	if ((!msr_info->host_initiated || tsc_ref.tsc_sequence) &&
	    boot_cpu_has(X86_FEATURE_CONSTANT_TSC) &&
	    ka->use_master_clock) {
		tsc_ref.tsc_sequence = msr_info->host_initiated
			? tsc_ref.tsc_sequence + 1
			: 1;

> +			tsc_ref.tsc_scale = ((10000LL << 32) /
> +					     vcpu->arch.virtual_tsc_khz) << 32;
> +			tsc_ref.tsc_offset = kvm->arch.kvmclock_offset;
> +		}
>  		if (__copy_to_user((void __user *)addr, &tsc_ref, sizeof(tsc_ref)))
>  			return 1;
>  		mark_page_dirty(kvm, gfn);
> @@ -3871,6 +3879,27 @@ long kvm_arch_vm_ioctl(struct file *filp,
>  		local_irq_enable();
>  		kvm->arch.kvmclock_offset = delta;
>  		kvm_gen_update_masterclock(kvm);
> +		if (kvm->arch.hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE) {
> +			HV_REFERENCE_TSC_PAGE tsc_ref;
> +			struct kvm_arch *ka = &kvm->arch;
> +			r = kvm_read_guest(kvm, kvm->arch.hv_tsc_page,
> +					   &tsc_ref, sizeof(tsc_ref));
> +			if (r)
> +				goto out;
> +			if (tsc_ref.tsc_sequence
> +			    && boot_cpu_has(X86_FEATURE_CONSTANT_TSC)
> +			    && ka->use_master_clock) {
> +				tsc_ref.tsc_sequence++;
> +				tsc_ref.tsc_scale = ((10000LL << 32) /
> +					__get_cpu_var(cpu_tsc_khz)) << 32;

Since on migration kvm_set_hyperv_pw will always be called, do you need
to write tsc_ref.scale at all here?

Paolo

> +                        	tsc_ref.tsc_offset = kvm->arch.kvmclock_offset;
> +			} else
> +				tsc_ref.tsc_sequence = 0;
> +			r = kvm_write_guest(kvm, kvm->arch.hv_tsc_page,
> +					    &tsc_ref, sizeof(tsc_ref));
> +			if (r)
> +				goto out;
> +		}
>  		break;
>  	}
>  	case KVM_GET_CLOCK: {
> 

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 496bdb1..6e6debf 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1873,6 +1873,7 @@  static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
 	case HV_X64_MSR_REFERENCE_TSC: {
 		u64 gfn;
 		unsigned long addr;
+		struct kvm_arch *ka = &kvm->arch;
 		HV_REFERENCE_TSC_PAGE tsc_ref;
 		memset(&tsc_ref, 0, sizeof(tsc_ref));
 		if (!(data & HV_X64_MSR_TSC_REFERENCE_ENABLE)) {
@@ -1883,6 +1884,13 @@  static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
 		addr = gfn_to_hva(kvm, gfn);
 		if (kvm_is_error_hva(addr))
 			return 1;
+		if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC)
+		    && ka->use_master_clock) {
+			tsc_ref.tsc_sequence = 1;
+			tsc_ref.tsc_scale = ((10000LL << 32) /
+					     vcpu->arch.virtual_tsc_khz) << 32;
+			tsc_ref.tsc_offset = kvm->arch.kvmclock_offset;
+		}
 		if (__copy_to_user((void __user *)addr, &tsc_ref, sizeof(tsc_ref)))
 			return 1;
 		mark_page_dirty(kvm, gfn);
@@ -3871,6 +3879,27 @@  long kvm_arch_vm_ioctl(struct file *filp,
 		local_irq_enable();
 		kvm->arch.kvmclock_offset = delta;
 		kvm_gen_update_masterclock(kvm);
+		if (kvm->arch.hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE) {
+			HV_REFERENCE_TSC_PAGE tsc_ref;
+			struct kvm_arch *ka = &kvm->arch;
+			r = kvm_read_guest(kvm, kvm->arch.hv_tsc_page,
+					   &tsc_ref, sizeof(tsc_ref));
+			if (r)
+				goto out;
+			if (tsc_ref.tsc_sequence
+			    && boot_cpu_has(X86_FEATURE_CONSTANT_TSC)
+			    && ka->use_master_clock) {
+				tsc_ref.tsc_sequence++;
+				tsc_ref.tsc_scale = ((10000LL << 32) /
+					__get_cpu_var(cpu_tsc_khz)) << 32;
+                        	tsc_ref.tsc_offset = kvm->arch.kvmclock_offset;
+			} else
+				tsc_ref.tsc_sequence = 0;
+			r = kvm_write_guest(kvm, kvm->arch.hv_tsc_page,
+					    &tsc_ref, sizeof(tsc_ref));
+			if (r)
+				goto out;
+		}
 		break;
 	}
 	case KVM_GET_CLOCK: {