diff mbox series

[v4,10/12] KVM: x86/mmu: fine-grained gfn zap when guest MTRRs are honored

Message ID 20230714065530.20748-1-yan.y.zhao@intel.com (mailing list archive)
State New, archived
Headers show
Series KVM: x86/mmu: refine memtype related mmu zap | expand

Commit Message

Yan Zhao July 14, 2023, 6:55 a.m. UTC
When guest MTRRs are honored and CR0.CD toggles, rather than blindly zap
everything, find out fine-grained ranges to zap according to guest MTRRs.

Fine-grained and precise zap ranges allow reduced traversal footprint
during zap and increased chances for concurrent vCPUs to find and skip
duplicated ranges to zap.

Opportunistically fix a typo in a nearby comment.

Suggested-by: Sean Christopherson <seanjc@google.com>
Co-developed-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Yan Zhao <yan.y.zhao@intel.com>
---
 arch/x86/kvm/mtrr.c | 164 +++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 162 insertions(+), 2 deletions(-)

Comments

Sean Christopherson Aug. 25, 2023, 11:13 p.m. UTC | #1
On Fri, Jul 14, 2023, Yan Zhao wrote:
>  void kvm_honors_guest_mtrrs_zap_on_cd_toggle(struct kvm_vcpu *vcpu)
>  {
> -	return kvm_mtrr_zap_gfn_range(vcpu, gpa_to_gfn(0), gpa_to_gfn(~0ULL));
> +	struct kvm_mtrr *mtrr_state = &vcpu->arch.mtrr_state;
> +	bool mtrr_enabled = mtrr_is_enabled(mtrr_state);
> +	u8 default_mtrr_type;
> +	bool cd_ipat;
> +	u8 cd_type;
> +
> +	kvm_honors_guest_mtrrs_get_cd_memtype(vcpu, &cd_type, &cd_ipat);
> +
> +	default_mtrr_type = mtrr_enabled ? mtrr_default_type(mtrr_state) :
> +			    mtrr_disabled_type(vcpu);
> +
> +	if (cd_type != default_mtrr_type || cd_ipat)
> +		return kvm_mtrr_zap_gfn_range(vcpu, gpa_to_gfn(0), gpa_to_gfn(~0ULL));

Why does this use use the MTRR version but the failure path does not?  Ah, because
trying to allocate in the failure path will likely fail to allocate memory.  With
a statically sized array, we can just special case the 0 => -1 case.  Actually,
we can do that regardless, it just doesn't need a dedicated flag if we use an
array.

Using the MTRR version on failure (array is full) means that other vCPUs can see
that everything is being zapped and go straight to waitin.

> +
> +	/*
> +	 * If mtrr is not enabled, it will go to zap all above if the default

Pronouns again.  Maybe this?

	/*
	 * The default MTRR type has already been checked above, if MTRRs are
	 * disabled there are no other MTRR types to consider.
	 */

> +	 * type does not equal to cd_type;
> +	 * Or it has no need to zap if the default type equals to cd_type.
> +	 */
> +	if (mtrr_enabled) {

To save some indentation:

	if (!mtrr_enabled)
		return;


> +		if (prepare_zaplist_fixed_mtrr_of_non_type(vcpu, default_mtrr_type))
> +			goto fail;
> +
> +		if (prepare_zaplist_var_mtrr_of_non_type(vcpu, default_mtrr_type))
> +			goto fail;
> +
> +		kvm_zap_or_wait_mtrr_zap_list(vcpu->kvm);
> +	}
> +	return;
> +fail:
> +	kvm_clear_mtrr_zap_list(vcpu->kvm);
> +	/* resort to zapping all on failure*/
> +	kvm_zap_gfn_range(vcpu->kvm, gpa_to_gfn(0), gpa_to_gfn(~0ULL));
> +	return;
>  }
> -- 
> 2.17.1
>
Yan Zhao Sept. 4, 2023, 8:37 a.m. UTC | #2
On Fri, Aug 25, 2023 at 04:13:35PM -0700, Sean Christopherson wrote:
> On Fri, Jul 14, 2023, Yan Zhao wrote:
> >  void kvm_honors_guest_mtrrs_zap_on_cd_toggle(struct kvm_vcpu *vcpu)
> >  {
> > -	return kvm_mtrr_zap_gfn_range(vcpu, gpa_to_gfn(0), gpa_to_gfn(~0ULL));
> > +	struct kvm_mtrr *mtrr_state = &vcpu->arch.mtrr_state;
> > +	bool mtrr_enabled = mtrr_is_enabled(mtrr_state);
> > +	u8 default_mtrr_type;
> > +	bool cd_ipat;
> > +	u8 cd_type;
> > +
> > +	kvm_honors_guest_mtrrs_get_cd_memtype(vcpu, &cd_type, &cd_ipat);
> > +
> > +	default_mtrr_type = mtrr_enabled ? mtrr_default_type(mtrr_state) :
> > +			    mtrr_disabled_type(vcpu);
> > +
> > +	if (cd_type != default_mtrr_type || cd_ipat)
> > +		return kvm_mtrr_zap_gfn_range(vcpu, gpa_to_gfn(0), gpa_to_gfn(~0ULL));
> 
> Why does this use use the MTRR version but the failure path does not?  Ah, because
> trying to allocate in the failure path will likely fail to allocate memory.  With
> a statically sized array, we can just special case the 0 => -1 case.  Actually,
> we can do that regardless, it just doesn't need a dedicated flag if we use an
> array.
> 
> Using the MTRR version on failure (array is full) means that other vCPUs can see
> that everything is being zapped and go straight to waitin.
Yes, will convert to the way of using statially sized array in next version.

> 
> > +
> > +	/*
> > +	 * If mtrr is not enabled, it will go to zap all above if the default
> 
> Pronouns again.  Maybe this?
> 
> 	/*
> 	 * The default MTRR type has already been checked above, if MTRRs are
> 	 * disabled there are no other MTRR types to consider.
> 	 */
Yes, better :)

> > +	 * type does not equal to cd_type;
> > +	 * Or it has no need to zap if the default type equals to cd_type.
> > +	 */
> > +	if (mtrr_enabled) {
> 
> To save some indentation:
> 
> 	if (!mtrr_enabled)
> 		return;
> 
Got it! 

> > +		if (prepare_zaplist_fixed_mtrr_of_non_type(vcpu, default_mtrr_type))
> > +			goto fail;
> > +
> > +		if (prepare_zaplist_var_mtrr_of_non_type(vcpu, default_mtrr_type))
> > +			goto fail;
> > +
> > +		kvm_zap_or_wait_mtrr_zap_list(vcpu->kvm);
> > +	}
> > +	return;
> > +fail:
> > +	kvm_clear_mtrr_zap_list(vcpu->kvm);
> > +	/* resort to zapping all on failure*/
> > +	kvm_zap_gfn_range(vcpu->kvm, gpa_to_gfn(0), gpa_to_gfn(~0ULL));
> > +	return;
> >  }
> > -- 
> > 2.17.1
> >
diff mbox series

Patch

diff --git a/arch/x86/kvm/mtrr.c b/arch/x86/kvm/mtrr.c
index 996a274cee40..9fdbdbf874a8 100644
--- a/arch/x86/kvm/mtrr.c
+++ b/arch/x86/kvm/mtrr.c
@@ -179,7 +179,7 @@  static struct fixed_mtrr_segment fixed_seg_table[] = {
 	{
 		.start = 0xc0000,
 		.end = 0x100000,
-		.range_shift = 12, /* 12K */
+		.range_shift = 12, /* 4K */
 		.range_start = 24,
 	}
 };
@@ -747,6 +747,19 @@  struct mtrr_zap_range {
 	struct list_head node;
 };
 
+static void kvm_clear_mtrr_zap_list(struct kvm *kvm)
+{
+	struct list_head *head = &kvm->arch.mtrr_zap_list;
+	struct mtrr_zap_range *tmp, *n;
+
+	spin_lock(&kvm->arch.mtrr_zap_list_lock);
+	list_for_each_entry_safe(tmp, n, head, node) {
+		list_del(&tmp->node);
+		kfree(tmp);
+	}
+	spin_unlock(&kvm->arch.mtrr_zap_list_lock);
+}
+
 /*
  * Add @range into kvm->arch.mtrr_zap_list and sort the list in
  * "length" ascending + "start" descending order, so that
@@ -795,6 +808,67 @@  static void kvm_add_mtrr_zap_list(struct kvm *kvm, struct mtrr_zap_range *range)
 	spin_unlock(&kvm->arch.mtrr_zap_list_lock);
 }
 
+/*
+ * Fixed ranges are only 256 pages in total.
+ * After balancing between reducing overhead of zap multiple ranges
+ * and increasing chances of finding duplicated ranges,
+ * just add fixed mtrr ranges as a whole to the mtrr zap list
+ * if memory type of one of them is not the specified type.
+ */
+static int prepare_zaplist_fixed_mtrr_of_non_type(struct kvm_vcpu *vcpu, u8 type)
+{
+	struct kvm_mtrr *mtrr_state = &vcpu->arch.mtrr_state;
+	struct mtrr_zap_range *range;
+	int index, seg_end;
+	u8 mem_type;
+
+	for (index = 0; index < KVM_NR_FIXED_MTRR_REGION; index++) {
+		mem_type = mtrr_state->fixed_ranges[index];
+
+		if (mem_type == type)
+			continue;
+
+		range = kmalloc(sizeof(*range), GFP_KERNEL_ACCOUNT);
+		if (!range)
+			return -ENOMEM;
+
+		seg_end = ARRAY_SIZE(fixed_seg_table) - 1;
+		range->start = gpa_to_gfn(fixed_seg_table[0].start);
+		range->end = gpa_to_gfn(fixed_seg_table[seg_end].end);
+		kvm_add_mtrr_zap_list(vcpu->kvm, range);
+		break;
+	}
+	return 0;
+}
+
+/*
+ * Add var mtrr ranges to the mtrr zap list
+ * if its memory type does not equal to type
+ */
+static int prepare_zaplist_var_mtrr_of_non_type(struct kvm_vcpu *vcpu, u8 type)
+{
+	struct kvm_mtrr *mtrr_state = &vcpu->arch.mtrr_state;
+	struct mtrr_zap_range *range;
+	struct kvm_mtrr_range *tmp;
+	u8 mem_type;
+
+	list_for_each_entry(tmp, &mtrr_state->head, node) {
+		mem_type = tmp->base & 0xff;
+		if (mem_type == type)
+			continue;
+
+		range = kmalloc(sizeof(*range), GFP_KERNEL_ACCOUNT);
+		if (!range)
+			return -ENOMEM;
+
+		var_mtrr_range(tmp, &range->start, &range->end);
+		range->start = gpa_to_gfn(range->start);
+		range->end = gpa_to_gfn(range->end);
+		kvm_add_mtrr_zap_list(vcpu->kvm, range);
+	}
+	return 0;
+}
+
 static void kvm_zap_mtrr_zap_list(struct kvm *kvm)
 {
 	struct list_head *head = &kvm->arch.mtrr_zap_list;
@@ -853,7 +927,93 @@  static void kvm_mtrr_zap_gfn_range(struct kvm_vcpu *vcpu,
 	kvm_zap_gfn_range(vcpu->kvm, gfn_start, gfn_end);
 }
 
+/*
+ * Zap SPTEs when guest MTRRs are honored and CR0.CD toggles
+ * in fine-grained way according to guest MTRRs.
+ * As guest MTRRs are per-vCPU, they are unchanged across this function.
+ *
+ * when CR0.CD=1, TDP memtype is WB or UC + IPAT;
+ * when CR0.CD=0, TDP memtype is determined by guest MTRRs.
+ *
+ * On CR0.CD toggles, as guest MTRRs remain unchanged,
+ * - if old memtype are new memtype are equal, nothing needs to do;
+ * - if guest default MTRR type equals to memtype in CR0.CD=1,
+ *   only MTRR ranges of non-default-memtype are required to be zapped.
+ * - if guest default MTRR type !equals to memtype in CR0.CD=1,
+ *   everything is zapped because memtypes for almost all guest memory
+ *   are out-dated.
+ * _____________________________________________________________________
+ *| quirk on             | CD=1 to CD=0         | CD=0 to CD=1          |
+ *|                      | old memtype = WB     | new memtype = WB      |
+ *|----------------------|----------------------|-----------------------|
+ *| MTRR enabled         | new memtype =        | old memtype =         |
+ *|                      | guest MTRR type      | guest MTRR type       |
+ *|    ------------------|----------------------|-----------------------|
+ *|    | if default MTRR | zap non-WB guest     | zap non-WB guest      |
+ *|    | type == WB      | MTRR ranges          | MTRR ranges           |
+ *|    |-----------------|----------------------|-----------------------|
+ *|    | if default MTRR | zap all              | zap all               |
+ *|    | type != WB      | as almost all guest MTRR ranges are non-WB   |
+ *|----------------------|----------------------------------------------|
+ *| MTRR disabled        | new memtype = UC     | old memtype = UC      |
+ *| (w/ FEATURE_MTRR)    | zap all              | zap all               |
+ *|----------------------|----------------------|-----------------------|
+ *| MTRR disabled        | new memtype = WB     | old memtype = WB      |
+ *| (w/o FEATURE_MTRR)   | do nothing           | do nothing            |
+ *|______________________|______________________|_______________________|
+ *
+ * _____________________________________________________________________
+ *| quirk off     | CD=1 to CD=0             | CD=0 to CD=1             |
+ *|               | old memtype = UC + IPAT  | new memtype = UC + IPAT  |
+ *|---------------|--------------------------|--------------------------|
+ *| MTRR enabled  | new memtype = guest MTRR | old memtype = guest MTRR |
+ *|               | type (!= UC + IPAT)      | type (!= UC + IPAT)      |
+ *|               | zap all                  | zap all                  |
+ *|---------------|------------------------- |--------------------------|
+ *| MTRR disabled | new memtype = UC         | old memtype = UC         |
+ *| (w/           | (!= UC + IPAT)           | (!= UC + IPAT)           |
+ *| FEATURE_MTRR) | zap all                  | zap all                  |
+ *|---------------|--------------------------|--------------------------|
+ *| MTRR disabled | new memtype = WB         | old memtype = WB         |
+ *| (w/o          | (!= UC + IPAT)           | (!= UC + IPAT)           |
+ *| FEATURE_MTRR) | zap all                  | zap all                  |
+ *|_______________|__________________________|__________________________|
+ *
+ */
 void kvm_honors_guest_mtrrs_zap_on_cd_toggle(struct kvm_vcpu *vcpu)
 {
-	return kvm_mtrr_zap_gfn_range(vcpu, gpa_to_gfn(0), gpa_to_gfn(~0ULL));
+	struct kvm_mtrr *mtrr_state = &vcpu->arch.mtrr_state;
+	bool mtrr_enabled = mtrr_is_enabled(mtrr_state);
+	u8 default_mtrr_type;
+	bool cd_ipat;
+	u8 cd_type;
+
+	kvm_honors_guest_mtrrs_get_cd_memtype(vcpu, &cd_type, &cd_ipat);
+
+	default_mtrr_type = mtrr_enabled ? mtrr_default_type(mtrr_state) :
+			    mtrr_disabled_type(vcpu);
+
+	if (cd_type != default_mtrr_type || cd_ipat)
+		return kvm_mtrr_zap_gfn_range(vcpu, gpa_to_gfn(0), gpa_to_gfn(~0ULL));
+
+	/*
+	 * If mtrr is not enabled, it will go to zap all above if the default
+	 * type does not equal to cd_type;
+	 * Or it has no need to zap if the default type equals to cd_type.
+	 */
+	if (mtrr_enabled) {
+		if (prepare_zaplist_fixed_mtrr_of_non_type(vcpu, default_mtrr_type))
+			goto fail;
+
+		if (prepare_zaplist_var_mtrr_of_non_type(vcpu, default_mtrr_type))
+			goto fail;
+
+		kvm_zap_or_wait_mtrr_zap_list(vcpu->kvm);
+	}
+	return;
+fail:
+	kvm_clear_mtrr_zap_list(vcpu->kvm);
+	/* resort to zapping all on failure*/
+	kvm_zap_gfn_range(vcpu->kvm, gpa_to_gfn(0), gpa_to_gfn(~0ULL));
+	return;
 }