diff mbox series

[v2,09/16] KVM: x86/xen: setup pvclock updates

Message ID 20201206110327.175629-10-dwmw2@infradead.org (mailing list archive)
State New, archived
Headers show
Series KVM: Add Xen hypercall and shared info pages | expand

Commit Message

David Woodhouse Dec. 6, 2020, 11:03 a.m. UTC
From: Joao Martins <joao.m.martins@oracle.com>

This means when we set shared_info page GPA, and request a master
clock update. This will trigger all vcpus to update their respective
shared pvclock data with guests. We follow a similar approach
as Hyper-V and KVM and adjust it accordingly.

Note however that Xen differs a little on how pvclock pages are set up.
Specifically KVM assumes 4KiB page alignment and pvclock data starts in
the beginning of the page. Whereas Xen you can place that information
anywhere in the page.

Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
---
 arch/x86/kvm/x86.c |  2 ++
 arch/x86/kvm/xen.c | 59 ++++++++++++++++++++++++++++++++++++++++++++++
 arch/x86/kvm/xen.h |  1 +
 3 files changed, 62 insertions(+)

Comments

David Woodhouse Dec. 12, 2020, 12:01 a.m. UTC | #1
On Sun, 2020-12-06 at 11:03 +0000, David Woodhouse wrote:
> +       hva = READ_ONCE(v->kvm->arch.xen.shinfo);
> +       if (!hva)
> +               goto out;
> +
> +       offset = v->vcpu_id * sizeof(struct vcpu_info);
> +       offset += offsetof(struct vcpu_info, time);
> +

Hm, now there's two days of my life I want back.

Looks so innocuous, doesn't it? But v->vcpu_id is very much not what we
want there.

That's the APIC ID. But we don't want that; we want the ACPI ID (which
correlates to the Xen vcpu_id that Xen also puts into CPUID leaf
0x40000004. Or at least I *hope* it correlates since Linux uses the one
from CPUID for the boot CPU and the ACPI number for the rest of the
CPUS).

KVM doesn't *have* the ACPI ID. Not unless we want to go grubbing
around in what the VMM put in CPUID leaf 0x40000004... which might be
0x40000204 if Hyper-V is enabled. We don't want to be doing that.

So I think we ditch this indexing in the kernel, and require that the
VMM explicitly register the vcpu_info address for *every* vCPU, even
where the *guest* hasn't explicitly done so, and it ends up being in
the shared_info page.
diff mbox series

Patch

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 4a960629687c..e9191dfffbbb 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2719,6 +2719,8 @@  static int kvm_guest_time_update(struct kvm_vcpu *v)
 
 	if (vcpu->pv_time_enabled)
 		kvm_setup_pvclock_page(v);
+	if (ka->xen.shinfo)
+		kvm_xen_setup_pvclock_page(v);
 	if (v == kvm_get_vcpu(v->kvm, 0))
 		kvm_hv_setup_tsc_page(v->kvm, &vcpu->hv_clock);
 	return 0;
diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c
index c156ed1ef972..5373273e1be9 100644
--- a/arch/x86/kvm/xen.c
+++ b/arch/x86/kvm/xen.c
@@ -55,9 +55,68 @@  static int kvm_xen_shared_info_init(struct kvm *kvm, gfn_t gfn)
 	if (ret)
 		return ret;
 
+	kvm_make_all_cpus_request(kvm, KVM_REQ_MASTERCLOCK_UPDATE);
+
 	return 0;
 }
 
+void kvm_xen_setup_pvclock_page(struct kvm_vcpu *v)
+{
+	struct kvm_vcpu_arch *vcpu = &v->arch;
+	struct pvclock_vcpu_time_info *guest_hv_clock;
+	unsigned int offset;
+	void *hva;
+	int idx;
+
+	if (v->vcpu_id >= MAX_VIRT_CPUS)
+		return;
+
+	BUILD_BUG_ON(offsetof(struct shared_info, vcpu_info) != 0);
+	BUILD_BUG_ON(offsetof(struct compat_shared_info, vcpu_info) != 0);
+	BUILD_BUG_ON(sizeof(struct vcpu_info) != sizeof(struct compat_vcpu_info));
+	BUILD_BUG_ON(offsetof(struct vcpu_info, time) !=
+		     offsetof(struct compat_vcpu_info, time));
+
+	idx = srcu_read_lock(&v->kvm->srcu);
+	hva = READ_ONCE(v->kvm->arch.xen.shinfo);
+	if (!hva)
+		goto out;
+
+	offset = v->vcpu_id * sizeof(struct vcpu_info);
+	offset += offsetof(struct vcpu_info, time);
+
+	guest_hv_clock = hva + offset;
+
+	if (guest_hv_clock->version & 1)
+		++guest_hv_clock->version;  /* first time write, random junk */
+
+	vcpu->hv_clock.version = guest_hv_clock->version + 1;
+	guest_hv_clock->version = vcpu->hv_clock.version;
+
+	smp_wmb();
+
+	/* retain PVCLOCK_GUEST_STOPPED if set in guest copy */
+	vcpu->hv_clock.flags |= (guest_hv_clock->flags & PVCLOCK_GUEST_STOPPED);
+
+	if (vcpu->pvclock_set_guest_stopped_request) {
+		vcpu->hv_clock.flags |= PVCLOCK_GUEST_STOPPED;
+		vcpu->pvclock_set_guest_stopped_request = false;
+	}
+
+	trace_kvm_pvclock_update(v->vcpu_id, &vcpu->hv_clock);
+
+	*guest_hv_clock = vcpu->hv_clock;
+
+	smp_wmb();
+
+	vcpu->hv_clock.version++;
+
+	guest_hv_clock->version = vcpu->hv_clock.version;
+
+ out:
+	srcu_read_unlock(&v->kvm->srcu, idx);
+}
+
 int kvm_xen_hvm_set_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data)
 {
 	int r = -ENOENT;
diff --git a/arch/x86/kvm/xen.h b/arch/x86/kvm/xen.h
index cd3c52b62068..950a364f5b22 100644
--- a/arch/x86/kvm/xen.h
+++ b/arch/x86/kvm/xen.h
@@ -9,6 +9,7 @@ 
 #ifndef __ARCH_X86_KVM_XEN_H__
 #define __ARCH_X86_KVM_XEN_H__
 
+void kvm_xen_setup_pvclock_page(struct kvm_vcpu *vcpu);
 int kvm_xen_hvm_set_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data);
 int kvm_xen_hvm_get_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data);
 int kvm_xen_hypercall(struct kvm_vcpu *vcpu);