diff mbox series

[v2,03/10] KVM: X86: Add kvm_scale_tsc_l1() and kvm_compute_tsc_offset_l1()

Message ID 20210512150945.4591-4-ilstam@amazon.com (mailing list archive)
State New, archived
Headers show
Series KVM: Implement nested TSC scaling | expand

Commit Message

Ilias Stamatis May 12, 2021, 3:09 p.m. UTC
The existing kvm_scale_tsc() scales the TSC using the current TSC
scaling ratio. That used to be the same as L1's scaling ratio but now
with nested TSC scaling support it is no longer the case.

This patch adds a new kvm_scale_tsc_l1() function that scales the TSC
using L1's scaling ratio. The existing kvm_scale_tsc() can still be used
for scaling L2 TSC values.

Additionally, this patch renames the kvm_compute_tsc_offset() function
to kvm_compute_tsc_offset_l1() and has the function treat its TSC
argument as an L1 TSC value. All existing code uses this function
passing L1 values to it.

Signed-off-by: Ilias Stamatis <ilstam@amazon.com>
---
 arch/x86/include/asm/kvm_host.h |  1 +
 arch/x86/kvm/x86.c              | 41 ++++++++++++++++++++++-----------
 2 files changed, 29 insertions(+), 13 deletions(-)

Comments

Sean Christopherson May 18, 2021, 11:04 p.m. UTC | #1
On Wed, May 12, 2021, Ilias Stamatis wrote:
> The existing kvm_scale_tsc() scales the TSC using the current TSC
> scaling ratio. That used to be the same as L1's scaling ratio but now
> with nested TSC scaling support it is no longer the case.
> 
> This patch adds a new kvm_scale_tsc_l1() function that scales the TSC
> using L1's scaling ratio. The existing kvm_scale_tsc() can still be used
> for scaling L2 TSC values.
> 
> Additionally, this patch renames the kvm_compute_tsc_offset() function
> to kvm_compute_tsc_offset_l1() and has the function treat its TSC
> argument as an L1 TSC value. All existing code uses this function
> passing L1 values to it.
> 
> Signed-off-by: Ilias Stamatis <ilstam@amazon.com>
> ---
>  arch/x86/include/asm/kvm_host.h |  1 +
>  arch/x86/kvm/x86.c              | 41 ++++++++++++++++++++++-----------
>  2 files changed, 29 insertions(+), 13 deletions(-)
> 
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 7dfc609eacd6..be59197e5eb7 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -1789,6 +1789,7 @@ static inline bool kvm_is_supported_user_return_msr(u32 msr)
>  }
>  
>  u64 kvm_scale_tsc(struct kvm_vcpu *vcpu, u64 tsc);
> +u64 kvm_scale_tsc_l1(struct kvm_vcpu *vcpu, u64 tsc);
>  u64 kvm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc);

I don't really care which version is used, but we should be consistent, i.e. choose
kvm_<action>_tsc_l1 or kvm_<action>_tsc_l1, not both.  The easy choice is the
former since it's already there.

>  unsigned long kvm_get_linear_rip(struct kvm_vcpu *vcpu);
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 07cf5d7ece38..84af1af7a2cc 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -2319,18 +2319,30 @@ u64 kvm_scale_tsc(struct kvm_vcpu *vcpu, u64 tsc)
>  }
>  EXPORT_SYMBOL_GPL(kvm_scale_tsc);
>  
> -static u64 kvm_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc)
> +u64 kvm_scale_tsc_l1(struct kvm_vcpu *vcpu, u64 tsc)
> +{
> +	u64 _tsc = tsc;
> +	u64 ratio = vcpu->arch.l1_tsc_scaling_ratio;
> +
> +	if (ratio != kvm_default_tsc_scaling_ratio)
> +		_tsc = __scale_tsc(ratio, tsc);
> +
> +	return _tsc;
> +}

Just make the ratio a param.  This is complete copy+paste of kvm_scale_tsc(),
with 3 characters added.  And all of the callers are already in an L1-specific
function or have L1 vs. L2 awareness.  IMO, that makes the code less magical, too,
as I don't have to dive into a helper to see that it reads l1_tsc_scaling_ratio
versus tsc_scaling_ratio.

> +EXPORT_SYMBOL_GPL(kvm_scale_tsc_l1);
> +
> +static u64 kvm_compute_tsc_offset_l1(struct kvm_vcpu *vcpu, u64 target_tsc)
>  {
>  	u64 tsc;
>  
> -	tsc = kvm_scale_tsc(vcpu, rdtsc());
> +	tsc = kvm_scale_tsc_l1(vcpu, rdtsc());
>  
>  	return target_tsc - tsc;
>  }
>  
>  u64 kvm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc)
>  {
> -	return vcpu->arch.l1_tsc_offset + kvm_scale_tsc(vcpu, host_tsc);
> +	return vcpu->arch.l1_tsc_offset + kvm_scale_tsc_l1(vcpu, host_tsc);
>  }
>  EXPORT_SYMBOL_GPL(kvm_read_l1_tsc);
>  
> @@ -2363,7 +2375,7 @@ static void kvm_synchronize_tsc(struct kvm_vcpu *vcpu, u64 data)
>  	bool synchronizing = false;
>  
>  	raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags);
> -	offset = kvm_compute_tsc_offset(vcpu, data);
> +	offset = kvm_compute_tsc_offset_l1(vcpu, data);
>  	ns = get_kvmclock_base_ns();
>  	elapsed = ns - kvm->arch.last_tsc_nsec;
>  
> @@ -2402,7 +2414,7 @@ static void kvm_synchronize_tsc(struct kvm_vcpu *vcpu, u64 data)
>  		} else {
>  			u64 delta = nsec_to_cycles(vcpu, elapsed);
>  			data += delta;
> -			offset = kvm_compute_tsc_offset(vcpu, data);
> +			offset = kvm_compute_tsc_offset_l1(vcpu, data);
>  		}
>  		matched = true;
>  		already_matched = (vcpu->arch.this_tsc_generation == kvm->arch.cur_tsc_generation);
> @@ -2463,7 +2475,7 @@ static inline void adjust_tsc_offset_host(struct kvm_vcpu *vcpu, s64 adjustment)
>  {
>  	if (vcpu->arch.l1_tsc_scaling_ratio != kvm_default_tsc_scaling_ratio)
>  		WARN_ON(adjustment < 0);
> -	adjustment = kvm_scale_tsc(vcpu, (u64) adjustment);
> +	adjustment = kvm_scale_tsc_l1(vcpu, (u64) adjustment);
>  	adjust_tsc_offset_guest(vcpu, adjustment);
>  }
>  
> @@ -2846,7 +2858,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
>  	/* With all the info we got, fill in the values */
>  
>  	if (kvm_has_tsc_control)
> -		tgt_tsc_khz = kvm_scale_tsc(v, tgt_tsc_khz);
> +		tgt_tsc_khz = kvm_scale_tsc_l1(v, tgt_tsc_khz);
>  
>  	if (unlikely(vcpu->hw_tsc_khz != tgt_tsc_khz)) {
>  		kvm_get_time_scale(NSEC_PER_SEC, tgt_tsc_khz * 1000LL,
> @@ -3235,7 +3247,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>  		if (msr_info->host_initiated) {
>  			kvm_synchronize_tsc(vcpu, data);
>  		} else {
> -			u64 adj = kvm_compute_tsc_offset(vcpu, data) - vcpu->arch.l1_tsc_offset;
> +			u64 adj = kvm_compute_tsc_offset_l1(vcpu, data) - vcpu->arch.l1_tsc_offset;
>  			adjust_tsc_offset_guest(vcpu, adj);
>  			vcpu->arch.ia32_tsc_adjust_msr += adj;
>  		}
> @@ -3537,10 +3549,13 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>  		 * return L1's TSC value to ensure backwards-compatible
>  		 * behavior for migration.
>  		 */
> -		u64 tsc_offset = msr_info->host_initiated ? vcpu->arch.l1_tsc_offset :
> -							    vcpu->arch.tsc_offset;
> -
> -		msr_info->data = kvm_scale_tsc(vcpu, rdtsc()) + tsc_offset;
> +		if (msr_info->host_initiated) {

Unnecessary curly braces.

> +			msr_info->data = kvm_scale_tsc_l1(vcpu, rdtsc()) +
> +					 vcpu->arch.l1_tsc_offset;
> +		} else {
> +			msr_info->data = kvm_scale_tsc(vcpu, rdtsc()) +
> +					 vcpu->arch.tsc_offset;
> +		}
>  		break;
>  	}
>  	case MSR_MTRRcap:
> @@ -4123,7 +4138,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
>  			mark_tsc_unstable("KVM discovered backwards TSC");
>  
>  		if (kvm_check_tsc_unstable()) {
> -			u64 offset = kvm_compute_tsc_offset(vcpu,
> +			u64 offset = kvm_compute_tsc_offset_l1(vcpu,
>  						vcpu->arch.last_guest_tsc);
>  			kvm_vcpu_write_tsc_offset(vcpu, offset);
>  			vcpu->arch.tsc_catchup = 1;
> -- 
> 2.17.1
>
Ilias Stamatis May 19, 2021, 9:02 a.m. UTC | #2
On Tue, 2021-05-18 at 23:04 +0000, Sean Christopherson wrote:
> On Wed, May 12, 2021, Ilias Stamatis wrote:
> > The existing kvm_scale_tsc() scales the TSC using the current TSC
> > scaling ratio. That used to be the same as L1's scaling ratio but now
> > with nested TSC scaling support it is no longer the case.
> > 
> > This patch adds a new kvm_scale_tsc_l1() function that scales the TSC
> > using L1's scaling ratio. The existing kvm_scale_tsc() can still be used
> > for scaling L2 TSC values.
> > 
> > Additionally, this patch renames the kvm_compute_tsc_offset() function
> > to kvm_compute_tsc_offset_l1() and has the function treat its TSC
> > argument as an L1 TSC value. All existing code uses this function
> > passing L1 values to it.
> > 
> > Signed-off-by: Ilias Stamatis <ilstam@amazon.com>
> > ---
> >  arch/x86/include/asm/kvm_host.h |  1 +
> >  arch/x86/kvm/x86.c              | 41 ++++++++++++++++++++++-----------
> >  2 files changed, 29 insertions(+), 13 deletions(-)
> > 
> > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> > index 7dfc609eacd6..be59197e5eb7 100644
> > --- a/arch/x86/include/asm/kvm_host.h
> > +++ b/arch/x86/include/asm/kvm_host.h
> > @@ -1789,6 +1789,7 @@ static inline bool kvm_is_supported_user_return_msr(u32 msr)
> >  }
> > 
> >  u64 kvm_scale_tsc(struct kvm_vcpu *vcpu, u64 tsc);
> > +u64 kvm_scale_tsc_l1(struct kvm_vcpu *vcpu, u64 tsc);
> >  u64 kvm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc);
> 
> I don't really care which version is used, but we should be consistent, i.e. choose
> kvm_<action>_tsc_l1 or kvm_<action>_tsc_l1, not both.  The easy choice is the
> former since it's already there.

OK

> 
> >  unsigned long kvm_get_linear_rip(struct kvm_vcpu *vcpu);
> > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> > index 07cf5d7ece38..84af1af7a2cc 100644
> > --- a/arch/x86/kvm/x86.c
> > +++ b/arch/x86/kvm/x86.c
> > @@ -2319,18 +2319,30 @@ u64 kvm_scale_tsc(struct kvm_vcpu *vcpu, u64 tsc)
> >  }
> >  EXPORT_SYMBOL_GPL(kvm_scale_tsc);
> > 
> > -static u64 kvm_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc)
> > +u64 kvm_scale_tsc_l1(struct kvm_vcpu *vcpu, u64 tsc)
> > +{
> > +     u64 _tsc = tsc;
> > +     u64 ratio = vcpu->arch.l1_tsc_scaling_ratio;
> > +
> > +     if (ratio != kvm_default_tsc_scaling_ratio)
> > +             _tsc = __scale_tsc(ratio, tsc);
> > +
> > +     return _tsc;
> > +}
> 
> Just make the ratio a param.  This is complete copy+paste of kvm_scale_tsc(),
> with 3 characters added.  And all of the callers are already in an L1-specific
> function or have L1 vs. L2 awareness.  IMO, that makes the code less magical, too,
> as I don't have to dive into a helper to see that it reads l1_tsc_scaling_ratio
> versus tsc_scaling_ratio.
> 

That's how I did it initially but changed it into a separate function after
receiving feedback on v1. I'm neutral, I don't mind changing it back.

More
opinions?

> > +EXPORT_SYMBOL_GPL(kvm_scale_tsc_l1);
> > +
> > +static u64 kvm_compute_tsc_offset_l1(struct kvm_vcpu *vcpu, u64 target_tsc)
> >  {
> >       u64 tsc;
> > 
> > -     tsc = kvm_scale_tsc(vcpu, rdtsc());
> > +     tsc = kvm_scale_tsc_l1(vcpu, rdtsc());
> > 
> >       return target_tsc - tsc;
> >  }
> > 
> >  u64 kvm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc)
> >  {
> > -     return vcpu->arch.l1_tsc_offset + kvm_scale_tsc(vcpu, host_tsc);
> > +     return vcpu->arch.l1_tsc_offset + kvm_scale_tsc_l1(vcpu, host_tsc);
> >  }
> >  EXPORT_SYMBOL_GPL(kvm_read_l1_tsc);
> > 
> > @@ -2363,7 +2375,7 @@ static void kvm_synchronize_tsc(struct kvm_vcpu *vcpu, u64 data)
> >       bool synchronizing = false;
> > 
> >       raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags);
> > -     offset = kvm_compute_tsc_offset(vcpu, data);
> > +     offset = kvm_compute_tsc_offset_l1(vcpu, data);
> >       ns = get_kvmclock_base_ns();
> >       elapsed = ns - kvm->arch.last_tsc_nsec;
> > 
> > @@ -2402,7 +2414,7 @@ static void kvm_synchronize_tsc(struct kvm_vcpu *vcpu, u64 data)
> >               } else {
> >                       u64 delta = nsec_to_cycles(vcpu, elapsed);
> >                       data += delta;
> > -                     offset = kvm_compute_tsc_offset(vcpu, data);
> > +                     offset = kvm_compute_tsc_offset_l1(vcpu, data);
> >               }
> >               matched = true;
> >               already_matched = (vcpu->arch.this_tsc_generation == kvm->arch.cur_tsc_generation);
> > @@ -2463,7 +2475,7 @@ static inline void adjust_tsc_offset_host(struct kvm_vcpu *vcpu, s64 adjustment)
> >  {
> >       if (vcpu->arch.l1_tsc_scaling_ratio != kvm_default_tsc_scaling_ratio)
> >               WARN_ON(adjustment < 0);
> > -     adjustment = kvm_scale_tsc(vcpu, (u64) adjustment);
> > +     adjustment = kvm_scale_tsc_l1(vcpu, (u64) adjustment);
> >       adjust_tsc_offset_guest(vcpu, adjustment);
> >  }
> > 
> > @@ -2846,7 +2858,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
> >       /* With all the info we got, fill in the values */
> > 
> >       if (kvm_has_tsc_control)
> > -             tgt_tsc_khz = kvm_scale_tsc(v, tgt_tsc_khz);
> > +             tgt_tsc_khz = kvm_scale_tsc_l1(v, tgt_tsc_khz);
> > 
> >       if (unlikely(vcpu->hw_tsc_khz != tgt_tsc_khz)) {
> >               kvm_get_time_scale(NSEC_PER_SEC, tgt_tsc_khz * 1000LL,
> > @@ -3235,7 +3247,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
> >               if (msr_info->host_initiated) {
> >                       kvm_synchronize_tsc(vcpu, data);
> >               } else {
> > -                     u64 adj = kvm_compute_tsc_offset(vcpu, data) - vcpu->arch.l1_tsc_offset;
> > +                     u64 adj = kvm_compute_tsc_offset_l1(vcpu, data) - vcpu->arch.l1_tsc_offset;
> >                       adjust_tsc_offset_guest(vcpu, adj);
> >                       vcpu->arch.ia32_tsc_adjust_msr += adj;
> >               }
> > @@ -3537,10 +3549,13 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
> >                * return L1's TSC value to ensure backwards-compatible
> >                * behavior for migration.
> >                */
> > -             u64 tsc_offset = msr_info->host_initiated ? vcpu->arch.l1_tsc_offset :
> > -                                                         vcpu->arch.tsc_offset;
> > -
> > -             msr_info->data = kvm_scale_tsc(vcpu, rdtsc()) + tsc_offset;
> > +             if (msr_info->host_initiated) {
> 
> Unnecessary curly braces.
> 
> > +                     msr_info->data = kvm_scale_tsc_l1(vcpu, rdtsc()) +
> > +                                      vcpu->arch.l1_tsc_offset;
> > +             } else {
> > +                     msr_info->data = kvm_scale_tsc(vcpu, rdtsc()) +
> > +                                      vcpu->arch.tsc_offset;
> > +             }
> >               break;
> >       }
> >       case MSR_MTRRcap:
> > @@ -4123,7 +4138,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
> >                       mark_tsc_unstable("KVM discovered backwards TSC");
> > 
> >               if (kvm_check_tsc_unstable()) {
> > -                     u64 offset = kvm_compute_tsc_offset(vcpu,
> > +                     u64 offset = kvm_compute_tsc_offset_l1(vcpu,
> >                                               vcpu->arch.last_guest_tsc);
> >                       kvm_vcpu_write_tsc_offset(vcpu, offset);
> >                       vcpu->arch.tsc_catchup = 1;
> > --
> > 2.17.1
> >
Sean Christopherson May 19, 2021, 3:40 p.m. UTC | #3
On Wed, May 19, 2021, Stamatis, Ilias wrote:
> On Tue, 2021-05-18 at 23:04 +0000, Sean Christopherson wrote:
> > On Wed, May 12, 2021, Ilias Stamatis wrote:
> > > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> > > index 07cf5d7ece38..84af1af7a2cc 100644
> > > --- a/arch/x86/kvm/x86.c
> > > +++ b/arch/x86/kvm/x86.c
> > > @@ -2319,18 +2319,30 @@ u64 kvm_scale_tsc(struct kvm_vcpu *vcpu, u64 tsc)
> > >  }
> > >  EXPORT_SYMBOL_GPL(kvm_scale_tsc);
> > > 
> > > -static u64 kvm_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc)
> > > +u64 kvm_scale_tsc_l1(struct kvm_vcpu *vcpu, u64 tsc)
> > > +{
> > > +     u64 _tsc = tsc;
> > > +     u64 ratio = vcpu->arch.l1_tsc_scaling_ratio;
> > > +
> > > +     if (ratio != kvm_default_tsc_scaling_ratio)
> > > +             _tsc = __scale_tsc(ratio, tsc);
> > > +
> > > +     return _tsc;
> > > +}
> > 
> > Just make the ratio a param.  This is complete copy+paste of kvm_scale_tsc(),
> > with 3 characters added.  And all of the callers are already in an L1-specific
> > function or have L1 vs. L2 awareness.  IMO, that makes the code less magical, too,
> > as I don't have to dive into a helper to see that it reads l1_tsc_scaling_ratio
> > versus tsc_scaling_ratio.
> > 
> 
> That's how I did it initially but changed it into a separate function after
> receiving feedback on v1. I'm neutral, I don't mind changing it back.

Ah, I see the conundrum.  The vendor code isn't straightforward because of all
the enabling checks against vmcs12 controls.

Given that, I don't terribly mind the callbacks, but I do think the connection
between the computation and the VMWRITE needs to be more explicit.

Poking around the code, the other thing that would help would be to get rid of
the awful decache_tsc_multiplier().  That helper was added to paper over the
completely broken logic of commit ff2c3a180377 ("KVM: VMX: Setup TSC scaling
ratio when a vcpu is loaded").  Its use in vmx_vcpu_load_vmcs() is basically
"write the VMCS if we forgot to earlier", which is all kinds of wrong.

If we get rid of that stupidity as prep work at the beginning of this series,
and have the "setters" return the computed value, the nested VMX code can
consume the value directly instead of having the subtle dependency on the helpers.

	vmcs_write64(TSC_OFFSET, kvm_calc_l2_tsc_offset(vcpu));

	if (kvm_has_tsc_control)
		vmcs_write64(TSC_MULTIPLIER, kvm_calc_l2_tsc_multiplier(vcpu));


Side topic, the checks against the vmcs12 controls are wrong.  Specifically,
when checking a secondary execution control, KVM needs to first check that the
secondary control is enabled in the primary control.  But, we helpers for that.
The primary control should use its helper, too.  And while you're at it, drop
the local variable in the getter.  I.e.:

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 3c4eb14a1e86..8735f2d71e17 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -1801,13 +1801,12 @@ static u64 vmx_get_l2_tsc_offset(struct kvm_vcpu *vcpu)
 static u64 vmx_get_l2_tsc_multiplier(struct kvm_vcpu *vcpu)
 {
        struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
-       u64 multiplier = kvm_default_tsc_scaling_ratio;

-       if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETTING &&
-           vmcs12->secondary_vm_exec_control & SECONDARY_EXEC_TSC_SCALING)
-               multiplier = vmcs12->tsc_multiplier;
+       if (nested_cpu_has(vmcs12, CPU_BASED_USE_TSC_OFFSETTING) &&
+           nested_cpu_has2(vmcs12, SECONDARY_EXEC_TSC_SCALING))
+               return vmcs12->tsc_multiplier;

-       return multiplier;
+       return kvm_default_tsc_scaling_ratio;
 }

Side topic #2: I now see why the x86.c helpers skip the math if the multiplier
is kvm_default_tsc_scaling_ratio.
Ilias Stamatis May 20, 2021, 6:27 p.m. UTC | #4
On Wed, 2021-05-19 at 15:40 +0000, Sean Christopherson wrote:
> On Wed, May 19, 2021, Stamatis, Ilias wrote:
> > On Tue, 2021-05-18 at 23:04 +0000, Sean Christopherson wrote:
> > > On Wed, May 12, 2021, Ilias Stamatis wrote:
> > > > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> > > > index 07cf5d7ece38..84af1af7a2cc 100644
> > > > --- a/arch/x86/kvm/x86.c
> > > > +++ b/arch/x86/kvm/x86.c
> > > > @@ -2319,18 +2319,30 @@ u64 kvm_scale_tsc(struct kvm_vcpu *vcpu, u64 tsc)
> > > >  }
> > > >  EXPORT_SYMBOL_GPL(kvm_scale_tsc);
> > > > 
> > > > -static u64 kvm_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc)
> > > > +u64 kvm_scale_tsc_l1(struct kvm_vcpu *vcpu, u64 tsc)
> > > > +{
> > > > +     u64 _tsc = tsc;
> > > > +     u64 ratio = vcpu->arch.l1_tsc_scaling_ratio;
> > > > +
> > > > +     if (ratio != kvm_default_tsc_scaling_ratio)
> > > > +             _tsc = __scale_tsc(ratio, tsc);
> > > > +
> > > > +     return _tsc;
> > > > +}
> > > 
> > > Just make the ratio a param.  This is complete copy+paste of kvm_scale_tsc(),
> > > with 3 characters added.  And all of the callers are already in an L1-specific
> > > function or have L1 vs. L2 awareness.  IMO, that makes the code less magical, too,
> > > as I don't have to dive into a helper to see that it reads l1_tsc_scaling_ratio
> > > versus tsc_scaling_ratio.
> > > 
> > 
> > That's how I did it initially but changed it into a separate function after
> > receiving feedback on v1. I'm neutral, I don't mind changing it back.
> 
> Ah, I see the conundrum.  The vendor code isn't straightforward because of all
> the enabling checks against vmcs12 controls.
> 
> Given that, I don't terribly mind the callbacks, but I do think the connection
> between the computation and the VMWRITE needs to be more explicit.
> 
> Poking around the code, the other thing that would help would be to get rid of
> the awful decache_tsc_multiplier().  That helper was added to paper over the
> completely broken logic of commit ff2c3a180377 ("KVM: VMX: Setup TSC scaling
> ratio when a vcpu is loaded").  Its use in vmx_vcpu_load_vmcs() is basically
> "write the VMCS if we forgot to earlier", which is all kinds of wrong.
> 

I am going to add a patch that removes decache_tsc_multiplier() and 
vmx->current_tsc_ratio as the latter is useless since vcpu->arch.tsc_scaling_ratio 
is already the current ratio. And without it decache_tsc_multiplier() becomes
an one-liner that is pointless to have; we can do vmcs_write64() directly.

Nevertheless, I am not going to move the code outside of vmx_vcpu_load_vmcs().
Granted, a better place for setting the multiplier in hardware would be
set_tsc_khz(). But this function is inside x86.c so it would require yet
another vendor callback to be added, move the svm code too, etc, etc.

Much more refactoring can be done in KVM code in general but I don't think it
has to be part of this series. I am going to send the v3 patches tomorrow. 

Ilias
diff mbox series

Patch

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 7dfc609eacd6..be59197e5eb7 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1789,6 +1789,7 @@  static inline bool kvm_is_supported_user_return_msr(u32 msr)
 }
 
 u64 kvm_scale_tsc(struct kvm_vcpu *vcpu, u64 tsc);
+u64 kvm_scale_tsc_l1(struct kvm_vcpu *vcpu, u64 tsc);
 u64 kvm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc);
 
 unsigned long kvm_get_linear_rip(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 07cf5d7ece38..84af1af7a2cc 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2319,18 +2319,30 @@  u64 kvm_scale_tsc(struct kvm_vcpu *vcpu, u64 tsc)
 }
 EXPORT_SYMBOL_GPL(kvm_scale_tsc);
 
-static u64 kvm_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc)
+u64 kvm_scale_tsc_l1(struct kvm_vcpu *vcpu, u64 tsc)
+{
+	u64 _tsc = tsc;
+	u64 ratio = vcpu->arch.l1_tsc_scaling_ratio;
+
+	if (ratio != kvm_default_tsc_scaling_ratio)
+		_tsc = __scale_tsc(ratio, tsc);
+
+	return _tsc;
+}
+EXPORT_SYMBOL_GPL(kvm_scale_tsc_l1);
+
+static u64 kvm_compute_tsc_offset_l1(struct kvm_vcpu *vcpu, u64 target_tsc)
 {
 	u64 tsc;
 
-	tsc = kvm_scale_tsc(vcpu, rdtsc());
+	tsc = kvm_scale_tsc_l1(vcpu, rdtsc());
 
 	return target_tsc - tsc;
 }
 
 u64 kvm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc)
 {
-	return vcpu->arch.l1_tsc_offset + kvm_scale_tsc(vcpu, host_tsc);
+	return vcpu->arch.l1_tsc_offset + kvm_scale_tsc_l1(vcpu, host_tsc);
 }
 EXPORT_SYMBOL_GPL(kvm_read_l1_tsc);
 
@@ -2363,7 +2375,7 @@  static void kvm_synchronize_tsc(struct kvm_vcpu *vcpu, u64 data)
 	bool synchronizing = false;
 
 	raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags);
-	offset = kvm_compute_tsc_offset(vcpu, data);
+	offset = kvm_compute_tsc_offset_l1(vcpu, data);
 	ns = get_kvmclock_base_ns();
 	elapsed = ns - kvm->arch.last_tsc_nsec;
 
@@ -2402,7 +2414,7 @@  static void kvm_synchronize_tsc(struct kvm_vcpu *vcpu, u64 data)
 		} else {
 			u64 delta = nsec_to_cycles(vcpu, elapsed);
 			data += delta;
-			offset = kvm_compute_tsc_offset(vcpu, data);
+			offset = kvm_compute_tsc_offset_l1(vcpu, data);
 		}
 		matched = true;
 		already_matched = (vcpu->arch.this_tsc_generation == kvm->arch.cur_tsc_generation);
@@ -2463,7 +2475,7 @@  static inline void adjust_tsc_offset_host(struct kvm_vcpu *vcpu, s64 adjustment)
 {
 	if (vcpu->arch.l1_tsc_scaling_ratio != kvm_default_tsc_scaling_ratio)
 		WARN_ON(adjustment < 0);
-	adjustment = kvm_scale_tsc(vcpu, (u64) adjustment);
+	adjustment = kvm_scale_tsc_l1(vcpu, (u64) adjustment);
 	adjust_tsc_offset_guest(vcpu, adjustment);
 }
 
@@ -2846,7 +2858,7 @@  static int kvm_guest_time_update(struct kvm_vcpu *v)
 	/* With all the info we got, fill in the values */
 
 	if (kvm_has_tsc_control)
-		tgt_tsc_khz = kvm_scale_tsc(v, tgt_tsc_khz);
+		tgt_tsc_khz = kvm_scale_tsc_l1(v, tgt_tsc_khz);
 
 	if (unlikely(vcpu->hw_tsc_khz != tgt_tsc_khz)) {
 		kvm_get_time_scale(NSEC_PER_SEC, tgt_tsc_khz * 1000LL,
@@ -3235,7 +3247,7 @@  int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 		if (msr_info->host_initiated) {
 			kvm_synchronize_tsc(vcpu, data);
 		} else {
-			u64 adj = kvm_compute_tsc_offset(vcpu, data) - vcpu->arch.l1_tsc_offset;
+			u64 adj = kvm_compute_tsc_offset_l1(vcpu, data) - vcpu->arch.l1_tsc_offset;
 			adjust_tsc_offset_guest(vcpu, adj);
 			vcpu->arch.ia32_tsc_adjust_msr += adj;
 		}
@@ -3537,10 +3549,13 @@  int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 		 * return L1's TSC value to ensure backwards-compatible
 		 * behavior for migration.
 		 */
-		u64 tsc_offset = msr_info->host_initiated ? vcpu->arch.l1_tsc_offset :
-							    vcpu->arch.tsc_offset;
-
-		msr_info->data = kvm_scale_tsc(vcpu, rdtsc()) + tsc_offset;
+		if (msr_info->host_initiated) {
+			msr_info->data = kvm_scale_tsc_l1(vcpu, rdtsc()) +
+					 vcpu->arch.l1_tsc_offset;
+		} else {
+			msr_info->data = kvm_scale_tsc(vcpu, rdtsc()) +
+					 vcpu->arch.tsc_offset;
+		}
 		break;
 	}
 	case MSR_MTRRcap:
@@ -4123,7 +4138,7 @@  void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 			mark_tsc_unstable("KVM discovered backwards TSC");
 
 		if (kvm_check_tsc_unstable()) {
-			u64 offset = kvm_compute_tsc_offset(vcpu,
+			u64 offset = kvm_compute_tsc_offset_l1(vcpu,
 						vcpu->arch.last_guest_tsc);
 			kvm_vcpu_write_tsc_offset(vcpu, offset);
 			vcpu->arch.tsc_catchup = 1;