diff mbox series

[v4,3/6] KVM: x86: nSVM: support PAUSE filtering when L0 doesn't intercept PAUSE

Message ID 20220322174050.241850-4-mlevitsk@redhat.com (mailing list archive)
State New, archived
Headers show
Series nSVM/SVM features | expand

Commit Message

Maxim Levitsky March 22, 2022, 5:40 p.m. UTC
Allow L1 to use PAUSE filtering if L0 doesn't use it.

Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
---
 arch/x86/kvm/svm/nested.c | 26 ++++++++++++++++++++++++++
 arch/x86/kvm/svm/svm.c    | 22 +++++++++++++++++++---
 arch/x86/kvm/svm/svm.h    |  2 ++
 3 files changed, 47 insertions(+), 3 deletions(-)

Comments

Paolo Bonzini March 24, 2022, 6:24 p.m. UTC | #1
On 3/22/22 18:40, Maxim Levitsky wrote:
> Allow L1 to use PAUSE filtering if L0 doesn't use it.
> 
> Signed-off-by: Maxim Levitsky<mlevitsk@redhat.com>

Can you enlarge the commit message to explain the logic in 
nested_vmcb02_prepare_control?

Thanks,

Paolo
Maxim Levitsky March 27, 2022, 3:14 p.m. UTC | #2
On Thu, 2022-03-24 at 19:24 +0100, Paolo Bonzini wrote:
> On 3/22/22 18:40, Maxim Levitsky wrote:
> > Allow L1 to use PAUSE filtering if L0 doesn't use it.
> > 
> > Signed-off-by: Maxim Levitsky<mlevitsk@redhat.com>
> 
> Can you enlarge the commit message to explain the logic in 
> nested_vmcb02_prepare_control?

No problem, I will do in the next version.

How about this:

KVM: x86: nSVM: support nested PAUSE filtering when possible           
 
Expose the pause filtering and threshold in the guest CPUID 
and support PAUSE filtering when possible:

- If the L0 doesn't intercept PAUSE
  (cpu_pm=on, or pause_filter_count kvm_amd's parameter is 0),
  then allow L1 to have full control over PAUSE filtering.
 
- Otherwise if the L1 doesn't intercept PAUSE, 
  use KVM's PAUSE thresholds, and update them even 
  when running nested.
 
- Otherwise ignore both	host and guest PAUSE thresholds,
  because it is	not really possible to merge them correctly.

  It is	expected that in this case, userspace hypervisor (e.g qemu)
  will not enable this feature in the guest CPUID, to avoid
  having the guest to update both thresholds pointlessly.


Best regards,
	Maxim Levitsky

> 
> Thanks,
> 
> Paolo
>
diff mbox series

Patch

diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index c1baa3a68ce6..0a0b4b26c91e 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -667,6 +667,29 @@  static void nested_vmcb02_prepare_control(struct vcpu_svm *svm)
 	if (!nested_vmcb_needs_vls_intercept(svm))
 		vmcb02->control.virt_ext |= VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK;
 
+	if (kvm_pause_in_guest(svm->vcpu.kvm)) {
+		/* use guest values since host doesn't use them */
+		vmcb02->control.pause_filter_count =
+				svm->pause_filter_enabled ?
+				svm->nested.ctl.pause_filter_count : 0;
+
+		vmcb02->control.pause_filter_thresh =
+				svm->pause_threshold_enabled ?
+				svm->nested.ctl.pause_filter_thresh : 0;
+
+	} else if (!vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_PAUSE)) {
+		/* use host values when guest doesn't use them */
+		vmcb02->control.pause_filter_count = vmcb01->control.pause_filter_count;
+		vmcb02->control.pause_filter_thresh = vmcb01->control.pause_filter_thresh;
+	} else {
+		/*
+		 * Intercept every PAUSE otherwise and
+		 * ignore both host and guest values
+		 */
+		vmcb02->control.pause_filter_count = 0;
+		vmcb02->control.pause_filter_thresh = 0;
+	}
+
 	nested_svm_transition_tlb_flush(vcpu);
 
 	/* Enter Guest-Mode */
@@ -927,6 +950,9 @@  int nested_svm_vmexit(struct vcpu_svm *svm)
 	vmcb12->control.event_inj         = svm->nested.ctl.event_inj;
 	vmcb12->control.event_inj_err     = svm->nested.ctl.event_inj_err;
 
+	if (!kvm_pause_in_guest(vcpu->kvm) && vmcb02->control.pause_filter_count)
+		vmcb01->control.pause_filter_count = vmcb02->control.pause_filter_count;
+
 	nested_svm_copy_common_state(svm->nested.vmcb02.ptr, svm->vmcb01.ptr);
 
 	svm_switch_vmcb(svm, &svm->vmcb01);
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index ec9a1dabdcc3..4c23cb1895ab 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -910,6 +910,9 @@  static void grow_ple_window(struct kvm_vcpu *vcpu)
 	struct vmcb_control_area *control = &svm->vmcb->control;
 	int old = control->pause_filter_count;
 
+	if (kvm_pause_in_guest(vcpu->kvm) || !old)
+		return;
+
 	control->pause_filter_count = __grow_ple_window(old,
 							pause_filter_count,
 							pause_filter_count_grow,
@@ -928,6 +931,9 @@  static void shrink_ple_window(struct kvm_vcpu *vcpu)
 	struct vmcb_control_area *control = &svm->vmcb->control;
 	int old = control->pause_filter_count;
 
+	if (kvm_pause_in_guest(vcpu->kvm) || !old)
+		return;
+
 	control->pause_filter_count =
 				__shrink_ple_window(old,
 						    pause_filter_count,
@@ -2984,7 +2990,6 @@  static int interrupt_window_interception(struct kvm_vcpu *vcpu)
 static int pause_interception(struct kvm_vcpu *vcpu)
 {
 	bool in_kernel;
-
 	/*
 	 * CPL is not made available for an SEV-ES guest, therefore
 	 * vcpu->arch.preempted_in_kernel can never be true.  Just
@@ -2992,8 +2997,7 @@  static int pause_interception(struct kvm_vcpu *vcpu)
 	 */
 	in_kernel = !sev_es_guest(vcpu->kvm) && svm_get_cpl(vcpu) == 0;
 
-	if (!kvm_pause_in_guest(vcpu->kvm))
-		grow_ple_window(vcpu);
+	grow_ple_window(vcpu);
 
 	kvm_vcpu_on_spin(vcpu, in_kernel);
 	return kvm_skip_emulated_instruction(vcpu);
@@ -4020,6 +4024,12 @@  static void svm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
 
 	svm->v_vmload_vmsave_enabled = vls && guest_cpuid_has(vcpu, X86_FEATURE_V_VMSAVE_VMLOAD);
 
+	svm->pause_filter_enabled = kvm_cpu_cap_has(X86_FEATURE_PAUSEFILTER) &&
+			guest_cpuid_has(vcpu, X86_FEATURE_PAUSEFILTER);
+
+	svm->pause_threshold_enabled = kvm_cpu_cap_has(X86_FEATURE_PFTHRESHOLD) &&
+			guest_cpuid_has(vcpu, X86_FEATURE_PFTHRESHOLD);
+
 	svm_recalc_instruction_intercepts(vcpu, svm);
 
 	/* For sev guests, the memory encryption bit is not reserved in CR3.  */
@@ -4773,6 +4783,12 @@  static __init void svm_set_cpu_caps(void)
 		if (lbrv)
 			kvm_cpu_cap_set(X86_FEATURE_LBRV);
 
+		if (boot_cpu_has(X86_FEATURE_PAUSEFILTER))
+			kvm_cpu_cap_set(X86_FEATURE_PAUSEFILTER);
+
+		if (boot_cpu_has(X86_FEATURE_PFTHRESHOLD))
+			kvm_cpu_cap_set(X86_FEATURE_PFTHRESHOLD);
+
 		/* Nested VM can receive #VMEXIT instead of triggering #GP */
 		kvm_cpu_cap_set(X86_FEATURE_SVME_ADDR_CHK);
 	}
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index aaf46b1fbf76..9895fd6a7310 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -237,6 +237,8 @@  struct vcpu_svm {
 	bool tsc_scaling_enabled          : 1;
 	bool v_vmload_vmsave_enabled      : 1;
 	bool lbrv_enabled                 : 1;
+	bool pause_filter_enabled         : 1;
+	bool pause_threshold_enabled      : 1;
 
 	u32 ldr_reg;
 	u32 dfr_reg;