diff mbox series

[v2,2/9] KVM: x86/pmu: Don't release vLBR casued by vPMI

Message ID 20230921082957.44628-3-xiong.y.zhang@intel.com (mailing list archive)
State New, archived
Headers show
Series Upgrade intel vPMU version to 5 | expand

Commit Message

Zhang, Xiong Y Sept. 21, 2023, 8:29 a.m. UTC
To emulate Freeze_LBR_On_PMI feature, KVM PMU disables guest LBR at PMI
injection. When guest LBR is disabled, vLBR event may be released at two
vCPU sched-in later. once vLBR event is released, KVM will create a new
vLBR event when guest access LBR MSRs again. First this adds overhead
at vLBR event release and re-creation. Second this may changes guest
LBR contend as vLBR event creation may cause host LBR drvier reset
hw LBR facility.

This commit avoids the vLBR release for Freeze_LBR_On_PMI emulation.
It changes boolean lbr_desc->in_use into enum lbr_desc->state, so
it could express more LBR states, KVM sets lbr_desc->state as
FREEZE_ON_PMI at vPMI injection, so that vLBR release function could
check this state to avoid vLBR release. When guest enables LBR
at the end of the guest PMI handler, KVM will set lbr_desc->state
as IN_USE.

Signed-off-by: Xiong Zhang <xiong.y.zhang@intel.com>
---
 arch/x86/kvm/vmx/pmu_intel.c | 8 +++++---
 arch/x86/kvm/vmx/vmx.c       | 2 +-
 arch/x86/kvm/vmx/vmx.h       | 8 +++++++-
 3 files changed, 13 insertions(+), 5 deletions(-)

Comments

Mi, Dapeng Sept. 25, 2023, 8:22 a.m. UTC | #1
On 9/21/2023 4:29 PM, Xiong Zhang wrote:
> To emulate Freeze_LBR_On_PMI feature, KVM PMU disables guest LBR at PMI
> injection. When guest LBR is disabled, vLBR event may be released at two
> vCPU sched-in later. once vLBR event is released, KVM will create a new
> vLBR event when guest access LBR MSRs again. First this adds overhead
> at vLBR event release and re-creation. Second this may changes guest
> LBR contend as vLBR event creation may cause host LBR drvier reset
> hw LBR facility.
>
> This commit avoids the vLBR release for Freeze_LBR_On_PMI emulation.
> It changes boolean lbr_desc->in_use into enum lbr_desc->state, so
> it could express more LBR states, KVM sets lbr_desc->state as
> FREEZE_ON_PMI at vPMI injection, so that vLBR release function could
> check this state to avoid vLBR release. When guest enables LBR
> at the end of the guest PMI handler, KVM will set lbr_desc->state
> as IN_USE.
>
> Signed-off-by: Xiong Zhang <xiong.y.zhang@intel.com>
> ---
>   arch/x86/kvm/vmx/pmu_intel.c | 8 +++++---
>   arch/x86/kvm/vmx/vmx.c       | 2 +-
>   arch/x86/kvm/vmx/vmx.h       | 8 +++++++-
>   3 files changed, 13 insertions(+), 5 deletions(-)
>
> diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
> index 76d7bd8e4fc6..c8d46c3d1ab6 100644
> --- a/arch/x86/kvm/vmx/pmu_intel.c
> +++ b/arch/x86/kvm/vmx/pmu_intel.c
> @@ -628,7 +628,7 @@ static void intel_pmu_init(struct kvm_vcpu *vcpu)
>   	lbr_desc->records.nr = 0;
>   	lbr_desc->event = NULL;
>   	lbr_desc->msr_passthrough = false;
> -	lbr_desc->in_use = FALSE;
> +	lbr_desc->state = LBR_STATE_PREDICT_FREE;
>   }
>   
>   static void intel_pmu_reset(struct kvm_vcpu *vcpu)
> @@ -671,6 +671,7 @@ static void intel_pmu_legacy_freezing_lbrs_on_pmi(struct kvm_vcpu *vcpu)
>   	if (data & DEBUGCTLMSR_FREEZE_LBRS_ON_PMI) {
>   		data &= ~DEBUGCTLMSR_LBR;
>   		vmcs_write64(GUEST_IA32_DEBUGCTL, data);
> +		vcpu_to_lbr_desc(vcpu)->state = LBR_STATE_FREEZE_ON_PMI;
>   	}
>   }
>   
> @@ -765,9 +766,10 @@ static void intel_pmu_cleanup(struct kvm_vcpu *vcpu)
>   	struct lbr_desc *lbr_desc = vcpu_to_lbr_desc(vcpu);
>   
>   	if (!(vmcs_read64(GUEST_IA32_DEBUGCTL) & DEBUGCTLMSR_LBR)) {
> -		if (!lbr_desc->in_use)
> +		if (lbr_desc->state == LBR_STATE_PREDICT_FREE)
>   			intel_pmu_release_guest_lbr_event(vcpu);
> -		lbr_desc->in_use = false;
> +		else if (lbr_desc->state == LBR_STATE_IN_USE)
> +			lbr_desc->state = LBR_STATE_PREDICT_FREE;
>   	}
>   }


I'm concerning whether there is a risk that the vLBR event can't be 
released forever. Base on the logic, the state must be set to 
LBR_STATE_IN_USE firstly and then could be freed in next cycle. Assuming 
the guest PMI handler doesn't enable LBR again as some reason, the LBR 
state would stay in LBR_STATE_FREEZE_ON_PMI state and the vLBR event 
would not be released and LBR stack would not be reset. when guest 
enables LBR in next time, guest may see the stale LBR data. I think we 
may add a backup mechanism to ensure the vLBR event can be freed if vLBR 
is disabled after several schedule slots.

>   
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index 4056e19266b5..565df8eeb78b 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -2242,7 +2242,7 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>   		if (intel_pmu_lbr_is_enabled(vcpu) && (data & DEBUGCTLMSR_LBR)) {
>   			struct lbr_desc *lbr_desc = vcpu_to_lbr_desc(vcpu);
>   
> -			lbr_desc->in_use = true;
> +			lbr_desc->state = LBR_STATE_IN_USE;
>   			if (!lbr_desc->event)
>   				intel_pmu_create_guest_lbr_event(vcpu);
>   		}
> diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
> index 547edeb52d09..0cb68a319fc8 100644
> --- a/arch/x86/kvm/vmx/vmx.h
> +++ b/arch/x86/kvm/vmx/vmx.h
> @@ -93,6 +93,12 @@ union vmx_exit_reason {
>   	u32 full;
>   };
>   
> +enum lbr_state {
> +	LBR_STATE_PREDICT_FREE = 0,
> +	LBR_STATE_IN_USE = 1,
> +	LBR_STATE_FREEZE_ON_PMI = 2
> +};
> +
>   struct lbr_desc {
>   	/* Basic info about guest LBR records. */
>   	struct x86_pmu_lbr records;
> @@ -108,7 +114,7 @@ struct lbr_desc {
>   	/* True if LBRs are marked as not intercepted in the MSR bitmap */
>   	bool msr_passthrough;
>   
> -	bool in_use;
> +	enum lbr_state state;
>   };
>   
>   /*
Zhang, Xiong Y Sept. 27, 2023, 2:16 a.m. UTC | #2
> On 9/21/2023 4:29 PM, Xiong Zhang wrote:
> > To emulate Freeze_LBR_On_PMI feature, KVM PMU disables guest LBR at
> > PMI injection. When guest LBR is disabled, vLBR event may be released
> > at two vCPU sched-in later. once vLBR event is released, KVM will
> > create a new vLBR event when guest access LBR MSRs again. First this
> > adds overhead at vLBR event release and re-creation. Second this may
> > changes guest LBR contend as vLBR event creation may cause host LBR
> > drvier reset hw LBR facility.
> >
> > This commit avoids the vLBR release for Freeze_LBR_On_PMI emulation.
> > It changes boolean lbr_desc->in_use into enum lbr_desc->state, so it
> > could express more LBR states, KVM sets lbr_desc->state as
> > FREEZE_ON_PMI at vPMI injection, so that vLBR release function could
> > check this state to avoid vLBR release. When guest enables LBR at the
> > end of the guest PMI handler, KVM will set lbr_desc->state as IN_USE.
> >
> > Signed-off-by: Xiong Zhang <xiong.y.zhang@intel.com>
> > ---
> >   arch/x86/kvm/vmx/pmu_intel.c | 8 +++++---
> >   arch/x86/kvm/vmx/vmx.c       | 2 +-
> >   arch/x86/kvm/vmx/vmx.h       | 8 +++++++-
> >   3 files changed, 13 insertions(+), 5 deletions(-)
> >
> > diff --git a/arch/x86/kvm/vmx/pmu_intel.c
> > b/arch/x86/kvm/vmx/pmu_intel.c index 76d7bd8e4fc6..c8d46c3d1ab6 100644
> > --- a/arch/x86/kvm/vmx/pmu_intel.c
> > +++ b/arch/x86/kvm/vmx/pmu_intel.c
> > @@ -628,7 +628,7 @@ static void intel_pmu_init(struct kvm_vcpu *vcpu)
> >   	lbr_desc->records.nr = 0;
> >   	lbr_desc->event = NULL;
> >   	lbr_desc->msr_passthrough = false;
> > -	lbr_desc->in_use = FALSE;
> > +	lbr_desc->state = LBR_STATE_PREDICT_FREE;
> >   }
> >
> >   static void intel_pmu_reset(struct kvm_vcpu *vcpu) @@ -671,6 +671,7
> > @@ static void intel_pmu_legacy_freezing_lbrs_on_pmi(struct kvm_vcpu
> *vcpu)
> >   	if (data & DEBUGCTLMSR_FREEZE_LBRS_ON_PMI) {
> >   		data &= ~DEBUGCTLMSR_LBR;
> >   		vmcs_write64(GUEST_IA32_DEBUGCTL, data);
> > +		vcpu_to_lbr_desc(vcpu)->state = LBR_STATE_FREEZE_ON_PMI;
> >   	}
> >   }
> >
> > @@ -765,9 +766,10 @@ static void intel_pmu_cleanup(struct kvm_vcpu *vcpu)
> >   	struct lbr_desc *lbr_desc = vcpu_to_lbr_desc(vcpu);
> >
> >   	if (!(vmcs_read64(GUEST_IA32_DEBUGCTL) & DEBUGCTLMSR_LBR)) {
> > -		if (!lbr_desc->in_use)
> > +		if (lbr_desc->state == LBR_STATE_PREDICT_FREE)
> >   			intel_pmu_release_guest_lbr_event(vcpu);
> > -		lbr_desc->in_use = false;
> > +		else if (lbr_desc->state == LBR_STATE_IN_USE)
> > +			lbr_desc->state = LBR_STATE_PREDICT_FREE;
> >   	}
> >   }
> 
> 
> I'm concerning whether there is a risk that the vLBR event can't be released
> forever. Base on the logic, the state must be set to LBR_STATE_IN_USE firstly
> and then could be freed in next cycle. Assuming the guest PMI handler doesn't
> enable LBR again as some reason, the LBR state would stay in
> LBR_STATE_FREEZE_ON_PMI state and the vLBR event would not be released
> and LBR stack would not be reset. 
>when guest enables LBR in next time, guest
> may see the stale LBR data. 
Processor won't reset LBR stack by itself, LBR driver resets LBR stack. So guest LBR
driver could reset vLBR before enable it.  

>I think we may add a backup mechanism to ensure
> the vLBR event can be freed if vLBR is disabled after several schedule slots.
Without this commits, disabled vLBR will be released in next two schedule slot. This
commit is an optimization that vLBR disabled by KVM lbr_freeze_on_pmi emulation
never be released during guest PMI handler. Compared with the suggested backup
mechanism, I prefer to drop this optimization.

thanks
> > diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index
> > 4056e19266b5..565df8eeb78b 100644
> > --- a/arch/x86/kvm/vmx/vmx.c
> > +++ b/arch/x86/kvm/vmx/vmx.c
> > @@ -2242,7 +2242,7 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu,
> struct msr_data *msr_info)
> >   		if (intel_pmu_lbr_is_enabled(vcpu) && (data &
> DEBUGCTLMSR_LBR)) {
> >   			struct lbr_desc *lbr_desc = vcpu_to_lbr_desc(vcpu);
> >
> > -			lbr_desc->in_use = true;
> > +			lbr_desc->state = LBR_STATE_IN_USE;
> >   			if (!lbr_desc->event)
> >   				intel_pmu_create_guest_lbr_event(vcpu);
> >   		}
> > diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index
> > 547edeb52d09..0cb68a319fc8 100644
> > --- a/arch/x86/kvm/vmx/vmx.h
> > +++ b/arch/x86/kvm/vmx/vmx.h
> > @@ -93,6 +93,12 @@ union vmx_exit_reason {
> >   	u32 full;
> >   };
> >
> > +enum lbr_state {
> > +	LBR_STATE_PREDICT_FREE = 0,
> > +	LBR_STATE_IN_USE = 1,
> > +	LBR_STATE_FREEZE_ON_PMI = 2
> > +};
> > +
> >   struct lbr_desc {
> >   	/* Basic info about guest LBR records. */
> >   	struct x86_pmu_lbr records;
> > @@ -108,7 +114,7 @@ struct lbr_desc {
> >   	/* True if LBRs are marked as not intercepted in the MSR bitmap */
> >   	bool msr_passthrough;
> >
> > -	bool in_use;
> > +	enum lbr_state state;
> >   };
> >
> >   /*
diff mbox series

Patch

diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index 76d7bd8e4fc6..c8d46c3d1ab6 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -628,7 +628,7 @@  static void intel_pmu_init(struct kvm_vcpu *vcpu)
 	lbr_desc->records.nr = 0;
 	lbr_desc->event = NULL;
 	lbr_desc->msr_passthrough = false;
-	lbr_desc->in_use = FALSE;
+	lbr_desc->state = LBR_STATE_PREDICT_FREE;
 }
 
 static void intel_pmu_reset(struct kvm_vcpu *vcpu)
@@ -671,6 +671,7 @@  static void intel_pmu_legacy_freezing_lbrs_on_pmi(struct kvm_vcpu *vcpu)
 	if (data & DEBUGCTLMSR_FREEZE_LBRS_ON_PMI) {
 		data &= ~DEBUGCTLMSR_LBR;
 		vmcs_write64(GUEST_IA32_DEBUGCTL, data);
+		vcpu_to_lbr_desc(vcpu)->state = LBR_STATE_FREEZE_ON_PMI;
 	}
 }
 
@@ -765,9 +766,10 @@  static void intel_pmu_cleanup(struct kvm_vcpu *vcpu)
 	struct lbr_desc *lbr_desc = vcpu_to_lbr_desc(vcpu);
 
 	if (!(vmcs_read64(GUEST_IA32_DEBUGCTL) & DEBUGCTLMSR_LBR)) {
-		if (!lbr_desc->in_use)
+		if (lbr_desc->state == LBR_STATE_PREDICT_FREE)
 			intel_pmu_release_guest_lbr_event(vcpu);
-		lbr_desc->in_use = false;
+		else if (lbr_desc->state == LBR_STATE_IN_USE)
+			lbr_desc->state = LBR_STATE_PREDICT_FREE;
 	}
 }
 
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 4056e19266b5..565df8eeb78b 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -2242,7 +2242,7 @@  static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 		if (intel_pmu_lbr_is_enabled(vcpu) && (data & DEBUGCTLMSR_LBR)) {
 			struct lbr_desc *lbr_desc = vcpu_to_lbr_desc(vcpu);
 
-			lbr_desc->in_use = true;
+			lbr_desc->state = LBR_STATE_IN_USE;
 			if (!lbr_desc->event)
 				intel_pmu_create_guest_lbr_event(vcpu);
 		}
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index 547edeb52d09..0cb68a319fc8 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -93,6 +93,12 @@  union vmx_exit_reason {
 	u32 full;
 };
 
+enum lbr_state {
+	LBR_STATE_PREDICT_FREE = 0,
+	LBR_STATE_IN_USE = 1,
+	LBR_STATE_FREEZE_ON_PMI = 2
+};
+
 struct lbr_desc {
 	/* Basic info about guest LBR records. */
 	struct x86_pmu_lbr records;
@@ -108,7 +114,7 @@  struct lbr_desc {
 	/* True if LBRs are marked as not intercepted in the MSR bitmap */
 	bool msr_passthrough;
 
-	bool in_use;
+	enum lbr_state state;
 };
 
 /*