diff mbox series

[v0,02/15] KVM: x86/xen: Use gfn_to_pfn_cache for runstate area

Message ID 20220210002721.273608-3-dwmw2@infradead.org (mailing list archive)
State New, archived
Headers show
Series KVM: Add Xen event channel acceleration | expand

Commit Message

David Woodhouse Feb. 10, 2022, 12:27 a.m. UTC
From: David Woodhouse <dwmw@amazon.co.uk>

Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
---
 arch/x86/include/asm/kvm_host.h |   3 +-
 arch/x86/kvm/x86.c              |   1 +
 arch/x86/kvm/xen.c              | 111 ++++++++++++++++----------------
 arch/x86/kvm/xen.h              |   6 +-
 4 files changed, 62 insertions(+), 59 deletions(-)

Comments

Sean Christopherson Feb. 18, 2022, 7:57 p.m. UTC | #1
On Thu, Feb 10, 2022, David Woodhouse wrote:
> From: David Woodhouse <dwmw@amazon.co.uk>

RFC, or lazy? :-D

> Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
> ---
> -		r = kvm_gfn_to_hva_cache_init(vcpu->kvm,
> +		r = kvm_gfn_to_pfn_cache_init(vcpu->kvm,
>  					      &vcpu->arch.xen.runstate_cache,
> -					      data->u.gpa,
> -					      sizeof(struct vcpu_runstate_info));
> -		if (!r) {
> -			vcpu->arch.xen.runstate_set = true;
> -		}
> +					      NULL, false, true, data->u.gpa,
> +					      sizeof(struct vcpu_runstate_info),
> +					      false);

Now that I'm all too aware of the subtle importants of @guest_uses_pa and
@kernel_map, before the pfn cache code gains more users, can you slot this in at
the beginning of the series?  It'd be quite easy to mess up and pass "false, false"
and completely mishandle cases where the PFN is mapped into the guest.


From: Sean Christopherson <seanjc@google.com>
Date: Fri, 18 Feb 2022 11:45:47 -0800
Subject: [PATCH] KVM: Use enum to track if cached PFN will be used in guest
 and/or host

Replace the guest_uses_pa and kernel_map booleans in the PFN cache code
with a unified enum/bitmask.  Using explicit names makes it easier to
review and audit call sites.

Opportunstically add a WARN to prevent passing garbage, instantating a
cache without declaring its usage is either buggy or pointless.

Signed-off-by: Sean Christopherson <seanjc@google.com>
---
 arch/x86/kvm/xen.c        |  2 +-
 include/linux/kvm_host.h  | 11 +++++------
 include/linux/kvm_types.h | 10 ++++++++--
 virt/kvm/pfncache.c       | 14 +++++++-------
 4 files changed, 21 insertions(+), 16 deletions(-)

diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c
index 4aa0f2b31665..5be1c9227105 100644
--- a/arch/x86/kvm/xen.c
+++ b/arch/x86/kvm/xen.c
@@ -39,7 +39,7 @@ static int kvm_xen_shared_info_init(struct kvm *kvm, gfn_t gfn)
 	}

 	do {
-		ret = kvm_gfn_to_pfn_cache_init(kvm, gpc, NULL, false, true,
+		ret = kvm_gfn_to_pfn_cache_init(kvm, gpc, NULL, KVM_HOST_USES_PFN,
 						gpa, PAGE_SIZE, false);
 		if (ret)
 			goto out;
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index f11039944c08..d044e328046a 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1222,9 +1222,9 @@ void kvm_vcpu_mark_page_dirty(struct kvm_vcpu *vcpu, gfn_t gfn);
  * @gpc:	   struct gfn_to_pfn_cache object.
  * @vcpu:	   vCPU to be used for marking pages dirty and to be woken on
  *		   invalidation.
- * @guest_uses_pa: indicates that the resulting host physical PFN is used while
- *		   @vcpu is IN_GUEST_MODE so invalidations should wake it.
- * @kernel_map:    requests a kernel virtual mapping (kmap / memremap).
+ * @usage:	   indicates if the resulting host physical PFN is used while
+ *		   the @vcpu is IN_GUEST_MODE and/or if the PFN used directly
+ *		   by KVM (and thus needs a kernel virtual mapping).
  * @gpa:	   guest physical address to map.
  * @len:	   sanity check; the range being access must fit a single page.
  * @dirty:         mark the cache dirty immediately.
@@ -1240,9 +1240,8 @@ void kvm_vcpu_mark_page_dirty(struct kvm_vcpu *vcpu, gfn_t gfn);
  * to ensure that the cache is valid before accessing the target page.
  */
 int kvm_gfn_to_pfn_cache_init(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
-			      struct kvm_vcpu *vcpu, bool guest_uses_pa,
-			      bool kernel_map, gpa_t gpa, unsigned long len,
-			      bool dirty);
+			      struct kvm_vcpu *vcpu, enum pfn_cache_usage usage,
+			      gpa_t gpa, unsigned long len, bool dirty);

 /**
  * kvm_gfn_to_pfn_cache_check - check validity of a gfn_to_pfn_cache.
diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h
index dceac12c1ce5..784f37cbf33e 100644
--- a/include/linux/kvm_types.h
+++ b/include/linux/kvm_types.h
@@ -18,6 +18,7 @@ struct kvm_memslots;

 enum kvm_mr_change;

+#include <linux/bits.h>
 #include <linux/types.h>
 #include <linux/spinlock_types.h>

@@ -46,6 +47,12 @@ typedef u64            hfn_t;

 typedef hfn_t kvm_pfn_t;

+enum pfn_cache_usage {
+	KVM_GUEST_USES_PFN = BIT(0),
+	KVM_HOST_USES_PFN  = BIT(1),
+	KVM_GUEST_AND_HOST_USE_PFN = KVM_GUEST_USES_PFN | KVM_HOST_USES_PFN,
+};
+
 struct gfn_to_hva_cache {
 	u64 generation;
 	gpa_t gpa;
@@ -64,11 +71,10 @@ struct gfn_to_pfn_cache {
 	rwlock_t lock;
 	void *khva;
 	kvm_pfn_t pfn;
+	enum pfn_cache_usage usage;
 	bool active;
 	bool valid;
 	bool dirty;
-	bool kernel_map;
-	bool guest_uses_pa;
 };

 #ifdef KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE
diff --git a/virt/kvm/pfncache.c b/virt/kvm/pfncache.c
index ce878f4be4da..9b3a192cb18c 100644
--- a/virt/kvm/pfncache.c
+++ b/virt/kvm/pfncache.c
@@ -42,7 +42,7 @@ void gfn_to_pfn_cache_invalidate_start(struct kvm *kvm, unsigned long start,
 			 * If a guest vCPU could be using the physical address,
 			 * it needs to be woken.
 			 */
-			if (gpc->guest_uses_pa) {
+			if (gpc->usage & KVM_GUEST_USES_PFN) {
 				if (!wake_vcpus) {
 					wake_vcpus = true;
 					bitmap_zero(vcpu_bitmap, KVM_MAX_VCPUS);
@@ -219,7 +219,7 @@ int kvm_gfn_to_pfn_cache_refresh(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
 			goto map_done;
 		}

-		if (gpc->kernel_map) {
+		if (gpc->usage & KVM_HOST_USES_PFN) {
 			if (new_pfn == old_pfn) {
 				new_khva = old_khva;
 				old_pfn = KVM_PFN_ERR_FAULT;
@@ -299,10 +299,11 @@ EXPORT_SYMBOL_GPL(kvm_gfn_to_pfn_cache_unmap);


 int kvm_gfn_to_pfn_cache_init(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
-			      struct kvm_vcpu *vcpu, bool guest_uses_pa,
-			      bool kernel_map, gpa_t gpa, unsigned long len,
-			      bool dirty)
+			      struct kvm_vcpu *vcpu, enum pfn_cache_usage usage,
+			      gpa_t gpa, unsigned long len, bool dirty)
 {
+	WARN_ON_ONCE(!usage || (usage & KVM_GUEST_AND_HOST_USE_PFN) != usage);
+
 	if (!gpc->active) {
 		rwlock_init(&gpc->lock);

@@ -310,8 +311,7 @@ int kvm_gfn_to_pfn_cache_init(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
 		gpc->pfn = KVM_PFN_ERR_FAULT;
 		gpc->uhva = KVM_HVA_ERR_BAD;
 		gpc->vcpu = vcpu;
-		gpc->kernel_map = kernel_map;
-		gpc->guest_uses_pa = guest_uses_pa;
+		gpc->usage = usage;
 		gpc->valid = false;
 		gpc->active = true;


base-commit: f6ae04ddb347f526b4620d1053690ecf1f87d77f
--
David Woodhouse Feb. 18, 2022, 9:45 p.m. UTC | #2
On Fri, 2022-02-18 at 19:57 +0000, Sean Christopherson wrote:
> On Thu, Feb 10, 2022, David Woodhouse wrote:
> > From: David Woodhouse <dwmw@amazon.co.uk>
> 
> RFC, or lazy? :-D
> 

The important parts are in place. It needs me to finish the self tests
but you don't need that to start heckling it, and I realised after I'd
posted it that I also hadn't finished the save/restore of pending
oneshot timers, which is now implemented in the version at
https://git.infradead.org/users/dwmw2/linux.git/shortlog/refs/heads/xen-evtchn-kernel

> > Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
> > ---
> > -		r = kvm_gfn_to_hva_cache_init(vcpu->kvm,
> > +		r = kvm_gfn_to_pfn_cache_init(vcpu->kvm,
> >  					      &vcpu->arch.xen.runstate_cache,
> > -					      data->u.gpa,
> > -					      sizeof(struct vcpu_runstate_info));
> > -		if (!r) {
> > -			vcpu->arch.xen.runstate_set = true;
> > -		}
> > +					      NULL, false, true, data->u.gpa,
> > +					      sizeof(struct vcpu_runstate_info),
> > +					      false);
> 
> Now that I'm all too aware of the subtle importants of @guest_uses_pa and
> @kernel_map, before the pfn cache code gains more users, can you slot this in at
> the beginning of the series?  It'd be quite easy to mess up and pass "false, false"
> and completely mishandle cases where the PFN is mapped into the guest.

Looks good to me; I'll roll it in. Thanks.
diff mbox series

Patch

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 6e7c545bc7ee..1e73053fd2bf 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -603,10 +603,9 @@  struct kvm_vcpu_xen {
 	u32 current_runstate;
 	bool vcpu_info_set;
 	bool vcpu_time_info_set;
-	bool runstate_set;
 	struct gfn_to_hva_cache vcpu_info_cache;
 	struct gfn_to_hva_cache vcpu_time_info_cache;
-	struct gfn_to_hva_cache runstate_cache;
+	struct gfn_to_pfn_cache runstate_cache;
 	u64 last_steal;
 	u64 runstate_entry_time;
 	u64 runstate_times[4];
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 74b53a16f38a..5d0191bf30b3 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -11195,6 +11195,7 @@  void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
 	free_cpumask_var(vcpu->arch.wbinvd_dirty_mask);
 	fpu_free_guest_fpstate(&vcpu->arch.guest_fpu);
 
+	kvm_xen_destroy_vcpu(vcpu);
 	kvm_hv_vcpu_uninit(vcpu);
 	kvm_pmu_destroy(vcpu);
 	kfree(vcpu->arch.mce_banks);
diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c
index 39b319f428bc..5d40d6521440 100644
--- a/arch/x86/kvm/xen.c
+++ b/arch/x86/kvm/xen.c
@@ -133,27 +133,37 @@  static void kvm_xen_update_runstate(struct kvm_vcpu *v, int state)
 void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, int state)
 {
 	struct kvm_vcpu_xen *vx = &v->arch.xen;
-	struct gfn_to_hva_cache *ghc = &vx->runstate_cache;
-	struct kvm_memslots *slots = kvm_memslots(v->kvm);
-	bool atomic = (state == RUNSTATE_runnable);
-	uint64_t state_entry_time;
-	int __user *user_state;
-	uint64_t __user *user_times;
+	struct gfn_to_pfn_cache *gpc = &vx->runstate_cache;
+	uint64_t *user_times;
+	unsigned long flags;
+	size_t user_len;
+	int *user_state;
 
 	kvm_xen_update_runstate(v, state);
 
-	if (!vx->runstate_set)
+	if (!vx->runstate_cache.active)
 		return;
 
-	if (unlikely(slots->generation != ghc->generation || kvm_is_error_hva(ghc->hva)) &&
-	    kvm_gfn_to_hva_cache_init(v->kvm, ghc, ghc->gpa, ghc->len))
-		return;
+	if (IS_ENABLED(CONFIG_64BIT) && v->kvm->arch.xen.long_mode)
+		user_len = sizeof(struct vcpu_runstate_info);
+	else
+		user_len = sizeof(struct compat_vcpu_runstate_info);
 
-	/* We made sure it fits in a single page */
-	BUG_ON(!ghc->memslot);
+	read_lock_irqsave(&gpc->lock, flags);
+	while (!kvm_gfn_to_pfn_cache_check(v->kvm, gpc, gpc->gpa,
+					   user_len)) {
+		read_unlock_irqrestore(&gpc->lock, flags);
 
-	if (atomic)
-		pagefault_disable();
+		/* When invoked from kvm_sched_out() we cannot sleep */
+		if (state == RUNSTATE_runnable)
+			return;
+
+		if (kvm_gfn_to_pfn_cache_refresh(v->kvm, gpc, gpc->gpa,
+						 user_len, false))
+			return;
+
+		read_lock_irqsave(&gpc->lock, flags);
+	}
 
 	/*
 	 * The only difference between 32-bit and 64-bit versions of the
@@ -167,37 +177,32 @@  void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, int state)
 	 */
 	BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state) != 0);
 	BUILD_BUG_ON(offsetof(struct compat_vcpu_runstate_info, state) != 0);
-	user_state = (int __user *)ghc->hva;
-
 	BUILD_BUG_ON(sizeof(struct compat_vcpu_runstate_info) != 0x2c);
-
-	user_times = (uint64_t __user *)(ghc->hva +
-					 offsetof(struct compat_vcpu_runstate_info,
-						  state_entry_time));
 #ifdef CONFIG_X86_64
 	BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state_entry_time) !=
 		     offsetof(struct compat_vcpu_runstate_info, state_entry_time) + 4);
 	BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, time) !=
 		     offsetof(struct compat_vcpu_runstate_info, time) + 4);
-
-	if (v->kvm->arch.xen.long_mode)
-		user_times = (uint64_t __user *)(ghc->hva +
-						 offsetof(struct vcpu_runstate_info,
-							  state_entry_time));
 #endif
+
+	user_state = gpc->khva;
+
+	if (IS_ENABLED(CONFIG_64BIT) && v->kvm->arch.xen.long_mode)
+		user_times = gpc->khva + offsetof(struct vcpu_runstate_info,
+						  state_entry_time);
+	else
+		user_times = gpc->khva + offsetof(struct compat_vcpu_runstate_info,
+						  state_entry_time);
+
 	/*
 	 * First write the updated state_entry_time to the guest area.
 	 */
-	state_entry_time = vx->runstate_entry_time;
-	state_entry_time |= XEN_RUNSTATE_UPDATE;
-
 	BUILD_BUG_ON(sizeof_field(struct vcpu_runstate_info, state_entry_time) !=
-		     sizeof(state_entry_time));
+		     sizeof(user_times[0]));
 	BUILD_BUG_ON(sizeof_field(struct compat_vcpu_runstate_info, state_entry_time) !=
-		     sizeof(state_entry_time));
+		     sizeof(user_times[0]));
 
-	if (__put_user(state_entry_time, user_times))
-		goto out;
+	user_times[0] = vx->runstate_entry_time | XEN_RUNSTATE_UPDATE;
 	smp_wmb();
 
 	/*
@@ -209,8 +214,7 @@  void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, int state)
 	BUILD_BUG_ON(sizeof_field(struct compat_vcpu_runstate_info, state) !=
 		     sizeof(vx->current_runstate));
 
-	if (__put_user(vx->current_runstate, user_state))
-		goto out;
+	*user_state = vx->current_runstate;
 
 	/*
 	 * Write the actual runstate times immediately after the
@@ -225,23 +229,19 @@  void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, int state)
 	BUILD_BUG_ON(sizeof_field(struct vcpu_runstate_info, time) !=
 		     sizeof(vx->runstate_times));
 
-	if (__copy_to_user(user_times + 1, vx->runstate_times, sizeof(vx->runstate_times)))
-		goto out;
+	memcpy(user_times + 1, vx->runstate_times, sizeof(vx->runstate_times));
 	smp_wmb();
 
 	/*
 	 * Finally, clear the XEN_RUNSTATE_UPDATE bit in the guest's
 	 * runstate_entry_time field.
 	 */
-	state_entry_time &= ~XEN_RUNSTATE_UPDATE;
-	__put_user(state_entry_time, user_times);
+	user_times[0] &= ~XEN_RUNSTATE_UPDATE;
 	smp_wmb();
 
- out:
-	mark_page_dirty_in_slot(v->kvm, ghc->memslot, ghc->gpa >> PAGE_SHIFT);
+	read_unlock_irqrestore(&gpc->lock, flags);
 
-	if (atomic)
-		pagefault_enable();
+	mark_page_dirty_in_slot(v->kvm, gpc->memslot, gpc->gpa >> PAGE_SHIFT);
 }
 
 int __kvm_xen_has_interrupt(struct kvm_vcpu *v)
@@ -504,24 +504,17 @@  int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
 			break;
 		}
 		if (data->u.gpa == GPA_INVALID) {
-			vcpu->arch.xen.runstate_set = false;
+			kvm_gfn_to_pfn_cache_destroy(vcpu->kvm,
+						     &vcpu->arch.xen.runstate_cache);
 			r = 0;
 			break;
 		}
 
-		/* It must fit within a single page */
-		if ((data->u.gpa & ~PAGE_MASK) + sizeof(struct vcpu_runstate_info) > PAGE_SIZE) {
-			r = -EINVAL;
-			break;
-		}
-
-		r = kvm_gfn_to_hva_cache_init(vcpu->kvm,
+		r = kvm_gfn_to_pfn_cache_init(vcpu->kvm,
 					      &vcpu->arch.xen.runstate_cache,
-					      data->u.gpa,
-					      sizeof(struct vcpu_runstate_info));
-		if (!r) {
-			vcpu->arch.xen.runstate_set = true;
-		}
+					      NULL, false, true, data->u.gpa,
+					      sizeof(struct vcpu_runstate_info),
+					      false);
 		break;
 
 	case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT:
@@ -656,7 +649,7 @@  int kvm_xen_vcpu_get_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
 			r = -EOPNOTSUPP;
 			break;
 		}
-		if (vcpu->arch.xen.runstate_set) {
+		if (vcpu->arch.xen.runstate_cache.active) {
 			data->u.gpa = vcpu->arch.xen.runstate_cache.gpa;
 			r = 0;
 		}
@@ -1054,3 +1047,9 @@  int kvm_xen_setup_evtchn(struct kvm *kvm,
 
 	return 0;
 }
+
+void kvm_xen_destroy_vcpu(struct kvm_vcpu *vcpu)
+{
+	kvm_gfn_to_pfn_cache_destroy(vcpu->kvm,
+				     &vcpu->arch.xen.runstate_cache);
+}
diff --git a/arch/x86/kvm/xen.h b/arch/x86/kvm/xen.h
index adbcc9ed59db..54b2bf4c3001 100644
--- a/arch/x86/kvm/xen.h
+++ b/arch/x86/kvm/xen.h
@@ -23,7 +23,7 @@  int kvm_xen_write_hypercall_page(struct kvm_vcpu *vcpu, u64 data);
 int kvm_xen_hvm_config(struct kvm *kvm, struct kvm_xen_hvm_config *xhc);
 void kvm_xen_init_vm(struct kvm *kvm);
 void kvm_xen_destroy_vm(struct kvm *kvm);
-
+void kvm_xen_destroy_vcpu(struct kvm_vcpu *vcpu);
 int kvm_xen_set_evtchn_fast(struct kvm_kernel_irq_routing_entry *e,
 			    struct kvm *kvm);
 int kvm_xen_setup_evtchn(struct kvm *kvm,
@@ -65,6 +65,10 @@  static inline void kvm_xen_destroy_vm(struct kvm *kvm)
 {
 }
 
+static inline void kvm_xen_destroy_vcpu(struct kvm_vcpu *vcpu)
+{
+}
+
 static inline bool kvm_xen_msr_enabled(struct kvm *kvm)
 {
 	return false;