diff mbox series

[RFC,v3,02/21] KVM: x86: Improve accuracy of KVM clock when TSC scaling is in force

Message ID 20240522001817.619072-3-dwmw2@infradead.org (mailing list archive)
State New, archived
Headers show
Series Cleaning up the KVM clock mess | expand

Commit Message

David Woodhouse May 22, 2024, 12:16 a.m. UTC
From: David Woodhouse <dwmw@amazon.co.uk>

The kvm_guest_time_update() function scales the host TSC frequency to
the guest's using kvm_scale_tsc() and the v->arch.l1_tsc_scaling_ratio
scaling ratio previously calculated for that vCPU. Then calcuates the
scaling factors for the KVM clock itself based on that guest TSC
frequency.

However, it uses kHz as the unit when scaling, and then multiplies by
1000 only at the end.

With a host TSC frequency of 3000MHz and a guest set to 2500MHz, the
result of kvm_scale_tsc() will actually come out at 2,499,999kHz. So
the KVM clock advertised to the guest is based on a frequency of
2,499,999,000 Hz.

By using Hz as the unit from the beginning, the KVM clock would be based
on a more accurate frequency of 2,499,999,999 Hz in this example.

Fixes: 78db6a503796 ("KVM: x86: rewrite handling of scaled TSC for kvmclock")
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Reviewed-by: Paul Durrant <paul@xen.org>
---
 arch/x86/include/asm/kvm_host.h |  2 +-
 arch/x86/kvm/x86.c              | 17 +++++++++--------
 arch/x86/kvm/xen.c              |  2 +-
 3 files changed, 11 insertions(+), 10 deletions(-)

Comments

Sean Christopherson Aug. 13, 2024, 5:50 p.m. UTC | #1
On Wed, May 22, 2024, David Woodhouse wrote:
> From: David Woodhouse <dwmw@amazon.co.uk>
> 
> The kvm_guest_time_update() function scales the host TSC frequency to
> the guest's using kvm_scale_tsc() and the v->arch.l1_tsc_scaling_ratio
> scaling ratio previously calculated for that vCPU. Then calcuates the
> scaling factors for the KVM clock itself based on that guest TSC
> frequency.
> 
> However, it uses kHz as the unit when scaling, and then multiplies by
> 1000 only at the end.
> 
> With a host TSC frequency of 3000MHz and a guest set to 2500MHz, the
> result of kvm_scale_tsc() will actually come out at 2,499,999kHz. So
> the KVM clock advertised to the guest is based on a frequency of
> 2,499,999,000 Hz.
> 
> By using Hz as the unit from the beginning, the KVM clock would be based
> on a more accurate frequency of 2,499,999,999 Hz in this example.
> 
> Fixes: 78db6a503796 ("KVM: x86: rewrite handling of scaled TSC for kvmclock")
> Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
> Reviewed-by: Paul Durrant <paul@xen.org>
> ---
>  arch/x86/include/asm/kvm_host.h |  2 +-
>  arch/x86/kvm/x86.c              | 17 +++++++++--------
>  arch/x86/kvm/xen.c              |  2 +-
>  3 files changed, 11 insertions(+), 10 deletions(-)
> 
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 01c69840647e..8440c4081727 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -887,7 +887,7 @@ struct kvm_vcpu_arch {
>  
>  	gpa_t time;
>  	struct pvclock_vcpu_time_info hv_clock;
> -	unsigned int hw_tsc_khz;
> +	unsigned int hw_tsc_hz;

Isn't there an overflow issue here?  The local variable is a 64-bit value, but
kvm_vcpu_arch.hw_tsc_hz is a 32-bit value.  And unless I'm having an even worse
review week than I thought, a guest TSC frequency > 4Ghz will get truncated.

>  	struct gfn_to_pfn_cache pv_time;
>  	/* set guest stopped flag in pvclock flags field */
>  	bool pvclock_set_guest_stopped_request;
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 2d2619d3eee4..23281c508c27 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -3215,7 +3215,8 @@ static void kvm_setup_guest_pvclock(struct kvm_vcpu *v,
>  
>  static int kvm_guest_time_update(struct kvm_vcpu *v)
>  {
> -	unsigned long flags, tgt_tsc_khz;
> +	unsigned long flags;
> +	uint64_t tgt_tsc_hz;

s/uint64_t/u64 for kernel code.  There are more than a few uses of uint64_t in
KVM, but u64 is far and away the dominant flavor.

> diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c
> index 5a83a8154b79..014048c22652 100644
> --- a/arch/x86/kvm/xen.c
> +++ b/arch/x86/kvm/xen.c
> @@ -2273,7 +2273,7 @@ void kvm_xen_update_tsc_info(struct kvm_vcpu *vcpu)
>  
>  	entry = kvm_find_cpuid_entry_index(vcpu, function, 2);
>  	if (entry)
> -		entry->eax = vcpu->arch.hw_tsc_khz;
> +		entry->eax = vcpu->arch.hw_tsc_hz / 1000;

And if hw_tsc_hz is a u64, this will need to use div_u64() to play nice with
32-bit kernels.
diff mbox series

Patch

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 01c69840647e..8440c4081727 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -887,7 +887,7 @@  struct kvm_vcpu_arch {
 
 	gpa_t time;
 	struct pvclock_vcpu_time_info hv_clock;
-	unsigned int hw_tsc_khz;
+	unsigned int hw_tsc_hz;
 	struct gfn_to_pfn_cache pv_time;
 	/* set guest stopped flag in pvclock flags field */
 	bool pvclock_set_guest_stopped_request;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 2d2619d3eee4..23281c508c27 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3215,7 +3215,8 @@  static void kvm_setup_guest_pvclock(struct kvm_vcpu *v,
 
 static int kvm_guest_time_update(struct kvm_vcpu *v)
 {
-	unsigned long flags, tgt_tsc_khz;
+	unsigned long flags;
+	uint64_t tgt_tsc_hz;
 	unsigned seq;
 	struct kvm_vcpu_arch *vcpu = &v->arch;
 	struct kvm_arch *ka = &v->kvm->arch;
@@ -3252,8 +3253,8 @@  static int kvm_guest_time_update(struct kvm_vcpu *v)
 
 	/* Keep irq disabled to prevent changes to the clock */
 	local_irq_save(flags);
-	tgt_tsc_khz = get_cpu_tsc_khz();
-	if (unlikely(tgt_tsc_khz == 0)) {
+	tgt_tsc_hz = get_cpu_tsc_khz() * 1000LL;
+	if (unlikely(tgt_tsc_hz == 0)) {
 		local_irq_restore(flags);
 		kvm_make_request(KVM_REQ_CLOCK_UPDATE, v);
 		return 1;
@@ -3288,14 +3289,14 @@  static int kvm_guest_time_update(struct kvm_vcpu *v)
 	/* With all the info we got, fill in the values */
 
 	if (kvm_caps.has_tsc_control)
-		tgt_tsc_khz = kvm_scale_tsc(tgt_tsc_khz,
-					    v->arch.l1_tsc_scaling_ratio);
+		tgt_tsc_hz = kvm_scale_tsc(tgt_tsc_hz,
+					   v->arch.l1_tsc_scaling_ratio);
 
-	if (unlikely(vcpu->hw_tsc_khz != tgt_tsc_khz)) {
-		kvm_get_time_scale(NSEC_PER_SEC, tgt_tsc_khz * 1000LL,
+	if (unlikely(vcpu->hw_tsc_hz != tgt_tsc_hz)) {
+		kvm_get_time_scale(NSEC_PER_SEC, tgt_tsc_hz,
 				   &vcpu->hv_clock.tsc_shift,
 				   &vcpu->hv_clock.tsc_to_system_mul);
-		vcpu->hw_tsc_khz = tgt_tsc_khz;
+		vcpu->hw_tsc_hz = tgt_tsc_hz;
 		kvm_xen_update_tsc_info(v);
 	}
 
diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c
index 5a83a8154b79..014048c22652 100644
--- a/arch/x86/kvm/xen.c
+++ b/arch/x86/kvm/xen.c
@@ -2273,7 +2273,7 @@  void kvm_xen_update_tsc_info(struct kvm_vcpu *vcpu)
 
 	entry = kvm_find_cpuid_entry_index(vcpu, function, 2);
 	if (entry)
-		entry->eax = vcpu->arch.hw_tsc_khz;
+		entry->eax = vcpu->arch.hw_tsc_hz / 1000;
 }
 
 void kvm_xen_init_vm(struct kvm *kvm)