diff mbox series

[RFC,39/41] KVM: x86/pmu: Implement emulated counter increment for passthrough PMU

Message ID 20240126085444.324918-40-xiong.y.zhang@linux.intel.com (mailing list archive)
State New, archived
Headers show
Series KVM: x86/pmu: Introduce passthrough vPM | expand

Commit Message

Xiong Zhang Jan. 26, 2024, 8:54 a.m. UTC
From: Mingwei Zhang <mizhang@google.com>

Implement emulated counter increment for passthrough PMU under KVM_REQ_PMU.
Defer the counter increment to KVM_REQ_PMU handler because counter
increment requests come from kvm_pmu_trigger_event() which can be triggered
within the KVM_RUN inner loop or outside of the inner loop. This means the
counter increment could happen before or after PMU context switch.

So process counter increment in one place makes the implementation simple.

Signed-off-by: Mingwei Zhang <mizhang@google.com>
---
 arch/x86/include/asm/kvm_host.h |  2 ++
 arch/x86/kvm/pmu.c              | 52 ++++++++++++++++++++++++++++++++-
 arch/x86/kvm/pmu.h              |  1 +
 arch/x86/kvm/x86.c              |  8 +++--
 4 files changed, 60 insertions(+), 3 deletions(-)

Comments

Sean Christopherson April 11, 2024, 11:12 p.m. UTC | #1
On Fri, Jan 26, 2024, Xiong Zhang wrote:
> From: Mingwei Zhang <mizhang@google.com>
> 
> Implement emulated counter increment for passthrough PMU under KVM_REQ_PMU.
> Defer the counter increment to KVM_REQ_PMU handler because counter
> increment requests come from kvm_pmu_trigger_event() which can be triggered
> within the KVM_RUN inner loop or outside of the inner loop. This means the
> counter increment could happen before or after PMU context switch.
> 
> So process counter increment in one place makes the implementation simple.
> 
> Signed-off-by: Mingwei Zhang <mizhang@google.com>
> ---
>  arch/x86/include/asm/kvm_host.h |  2 ++
>  arch/x86/kvm/pmu.c              | 52 ++++++++++++++++++++++++++++++++-
>  arch/x86/kvm/pmu.h              |  1 +
>  arch/x86/kvm/x86.c              |  8 +++--
>  4 files changed, 60 insertions(+), 3 deletions(-)
> 
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 869de0d81055..9080319751de 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -532,6 +532,7 @@ struct kvm_pmu {
>  	u64 fixed_ctr_ctrl_mask;
>  	u64 global_ctrl;
>  	u64 global_status;
> +	u64 synthesized_overflow;

There's no reason for this to be a per-PMU field, it's only ever used in
kvm_passthrough_pmu_handle_event().

>  	u64 counter_bitmask[2];
>  	u64 global_ctrl_mask;
>  	u64 global_status_mask;
> @@ -550,6 +551,7 @@ struct kvm_pmu {
>  		atomic64_t __reprogram_pmi;
>  	};
>  	DECLARE_BITMAP(all_valid_pmc_idx, X86_PMC_IDX_MAX);
> +	DECLARE_BITMAP(incremented_pmc_idx, X86_PMC_IDX_MAX);
>  	DECLARE_BITMAP(pmc_in_use, X86_PMC_IDX_MAX);
>  
>  	u64 ds_area;
> diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
> index 7b0bac1ac4bf..9e62e96fe48a 100644
> --- a/arch/x86/kvm/pmu.c
> +++ b/arch/x86/kvm/pmu.c
> @@ -449,6 +449,26 @@ static bool kvm_passthrough_pmu_incr_counter(struct kvm_pmc *pmc)
>  	return false;
>  }
>  
> +void kvm_passthrough_pmu_handle_event(struct kvm_vcpu *vcpu)

Huh.  Why do we call the existing helper kvm_pmu_handle_event()?  It's not handling
an event, it's reprogramming counters.

Can you add a patch to clean that up?  It doesn't matter terribly with the existing
code, but once kvm_handle_guest_pmi() exists, the name becomes quite confusing,
e.g. I was expecting this to be the handler for guest PMIs.

> +{
> +	struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
> +	int bit;
> +
> +	for_each_set_bit(bit, pmu->incremented_pmc_idx, X86_PMC_IDX_MAX) {

I don't love the "incremented_pmc_idx" name.  It's specifically for emulated
events, that should ideally be clear in the name.

And does the tracking the emulated counters actually buy anything?  Iterating
over all PMCs and checking emulated_counter doesn't seem like it'd be measurably
slow, especially not when this path is likely writing multiple MSRs.

Wait, why use that and not reprogram_pmi?


> +		struct kvm_pmc *pmc = static_call(kvm_x86_pmu_pmc_idx_to_pmc)(pmu, bit);
> +
> +		if (kvm_passthrough_pmu_incr_counter(pmc)) {

kvm_passthrough_pmu_incr_counter() is *super* misleading.  (a) it's not an
"increment" in standard x86 and kernel terminology, which is "Increment by 1",
and (b) it's not actually bumping the count, it's simply moving an *already*
incremented count from emulated_count to the pmc->counter.

To avoid bikeshedding, and because boolean returns are no fun, just open code it.

		if (!pmc->emulated_counter)
			continue;

		pmc->counter += pmc->emulated_counter;
		pmc->emulated_counter = 0;
		pmc->counter &= pmc_bitmask(pmc);

		/* comment goes here */
		if (pmc->counter)
			continue;

		if (pmc->eventsel & ARCH_PERFMON_EVENTSEL_INT)
			kvm_make_request(KVM_REQ_PMI, vcpu);

		pmu->global_status |= BIT_ULL(pmc->idx);

> diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h
> index 6f44fe056368..0fc37a06fe48 100644
> --- a/arch/x86/kvm/pmu.h
> +++ b/arch/x86/kvm/pmu.h
> @@ -277,6 +277,7 @@ static inline bool is_passthrough_pmu_enabled(struct kvm_vcpu *vcpu)
>  
>  void kvm_pmu_deliver_pmi(struct kvm_vcpu *vcpu);
>  void kvm_pmu_handle_event(struct kvm_vcpu *vcpu);
> +void kvm_passthrough_pmu_handle_event(struct kvm_vcpu *vcpu);
>  int kvm_pmu_rdpmc(struct kvm_vcpu *vcpu, unsigned pmc, u64 *data);
>  bool kvm_pmu_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx);
>  bool kvm_pmu_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr);
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index fe7da1a16c3b..1bbf312cbd73 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -10726,8 +10726,12 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
>  		}
>  		if (kvm_check_request(KVM_REQ_STEAL_UPDATE, vcpu))
>  			record_steal_time(vcpu);
> -		if (kvm_check_request(KVM_REQ_PMU, vcpu))
> -			kvm_pmu_handle_event(vcpu);
> +		if (kvm_check_request(KVM_REQ_PMU, vcpu)) {
> +			if (is_passthrough_pmu_enabled(vcpu))
> +				kvm_passthrough_pmu_handle_event(vcpu);
> +			else
> +				kvm_pmu_handle_event(vcpu);

This seems like a detail that belongs in the PMU code.  E.g. if we get to a point
where the two PMU flavors can share code (and I think we can/show), then there's
no need or desire for two separate APIs.

> +		}
>  		if (kvm_check_request(KVM_REQ_PMI, vcpu))
>  			kvm_pmu_deliver_pmi(vcpu);
>  #ifdef CONFIG_KVM_SMM
> -- 
> 2.34.1
>
Sean Christopherson April 11, 2024, 11:17 p.m. UTC | #2
On Thu, Apr 11, 2024, Sean Christopherson wrote:
> > +	struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
> > +	int bit;
> > +
> > +	for_each_set_bit(bit, pmu->incremented_pmc_idx, X86_PMC_IDX_MAX) {
> 
> I don't love the "incremented_pmc_idx" name.  It's specifically for emulated
> events, that should ideally be clear in the name.
> 
> And does the tracking the emulated counters actually buy anything?  Iterating
> over all PMCs and checking emulated_counter doesn't seem like it'd be measurably
> slow, especially not when this path is likely writing multiple MSRs.
> 
> Wait, why use that and not reprogram_pmi?

If the name is a sticking point, just rename it to something generic, e.g.
dirty_pmcs or something.
diff mbox series

Patch

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 869de0d81055..9080319751de 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -532,6 +532,7 @@  struct kvm_pmu {
 	u64 fixed_ctr_ctrl_mask;
 	u64 global_ctrl;
 	u64 global_status;
+	u64 synthesized_overflow;
 	u64 counter_bitmask[2];
 	u64 global_ctrl_mask;
 	u64 global_status_mask;
@@ -550,6 +551,7 @@  struct kvm_pmu {
 		atomic64_t __reprogram_pmi;
 	};
 	DECLARE_BITMAP(all_valid_pmc_idx, X86_PMC_IDX_MAX);
+	DECLARE_BITMAP(incremented_pmc_idx, X86_PMC_IDX_MAX);
 	DECLARE_BITMAP(pmc_in_use, X86_PMC_IDX_MAX);
 
 	u64 ds_area;
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index 7b0bac1ac4bf..9e62e96fe48a 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -449,6 +449,26 @@  static bool kvm_passthrough_pmu_incr_counter(struct kvm_pmc *pmc)
 	return false;
 }
 
+void kvm_passthrough_pmu_handle_event(struct kvm_vcpu *vcpu)
+{
+	struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
+	int bit;
+
+	for_each_set_bit(bit, pmu->incremented_pmc_idx, X86_PMC_IDX_MAX) {
+		struct kvm_pmc *pmc = static_call(kvm_x86_pmu_pmc_idx_to_pmc)(pmu, bit);
+
+		if (kvm_passthrough_pmu_incr_counter(pmc)) {
+			__set_bit(pmc->idx, (unsigned long *)&pmc_to_pmu(pmc)->synthesized_overflow);
+
+			if (pmc->eventsel & ARCH_PERFMON_EVENTSEL_INT)
+				kvm_make_request(KVM_REQ_PMI, vcpu);
+		}
+	}
+	bitmap_zero(pmu->incremented_pmc_idx, X86_PMC_IDX_MAX);
+	pmu->global_status |= pmu->synthesized_overflow;
+	pmu->synthesized_overflow = 0;
+}
+
 void kvm_pmu_handle_event(struct kvm_vcpu *vcpu)
 {
 	struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
@@ -748,7 +768,29 @@  static inline bool cpl_is_matched(struct kvm_pmc *pmc)
 	return (static_call(kvm_x86_get_cpl)(pmc->vcpu) == 0) ? select_os : select_user;
 }
 
-void kvm_pmu_trigger_event(struct kvm_vcpu *vcpu, u64 perf_hw_id)
+static void __kvm_passthrough_pmu_trigger_event(struct kvm_vcpu *vcpu, u64 perf_hw_id)
+{
+	struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
+	struct kvm_pmc *pmc;
+	int i;
+
+	for_each_set_bit(i, pmu->all_valid_pmc_idx, X86_PMC_IDX_MAX) {
+		pmc = static_call(kvm_x86_pmu_pmc_idx_to_pmc)(pmu, i);
+
+		if (!pmc || !pmc_speculative_in_use(pmc) ||
+		    !check_pmu_event_filter(pmc))
+			continue;
+
+		/* Ignore checks for edge detect, pin control, invert and CMASK bits */
+		if (eventsel_match_perf_hw_id(pmc, perf_hw_id) && cpl_is_matched(pmc)) {
+			pmc->emulated_counter += 1;
+			__set_bit(pmc->idx, pmu->incremented_pmc_idx);
+			kvm_make_request(KVM_REQ_PMU, vcpu);
+		}
+	}
+}
+
+static void __kvm_pmu_trigger_event(struct kvm_vcpu *vcpu, u64 perf_hw_id)
 {
 	struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
 	struct kvm_pmc *pmc;
@@ -765,6 +807,14 @@  void kvm_pmu_trigger_event(struct kvm_vcpu *vcpu, u64 perf_hw_id)
 			kvm_pmu_incr_counter(pmc);
 	}
 }
+
+void kvm_pmu_trigger_event(struct kvm_vcpu *vcpu, u64 perf_hw_id)
+{
+	if (is_passthrough_pmu_enabled(vcpu))
+		__kvm_passthrough_pmu_trigger_event(vcpu, perf_hw_id);
+	else
+		__kvm_pmu_trigger_event(vcpu, perf_hw_id);
+}
 EXPORT_SYMBOL_GPL(kvm_pmu_trigger_event);
 
 static bool is_masked_filter_valid(const struct kvm_x86_pmu_event_filter *filter)
diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h
index 6f44fe056368..0fc37a06fe48 100644
--- a/arch/x86/kvm/pmu.h
+++ b/arch/x86/kvm/pmu.h
@@ -277,6 +277,7 @@  static inline bool is_passthrough_pmu_enabled(struct kvm_vcpu *vcpu)
 
 void kvm_pmu_deliver_pmi(struct kvm_vcpu *vcpu);
 void kvm_pmu_handle_event(struct kvm_vcpu *vcpu);
+void kvm_passthrough_pmu_handle_event(struct kvm_vcpu *vcpu);
 int kvm_pmu_rdpmc(struct kvm_vcpu *vcpu, unsigned pmc, u64 *data);
 bool kvm_pmu_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx);
 bool kvm_pmu_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index fe7da1a16c3b..1bbf312cbd73 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -10726,8 +10726,12 @@  static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 		}
 		if (kvm_check_request(KVM_REQ_STEAL_UPDATE, vcpu))
 			record_steal_time(vcpu);
-		if (kvm_check_request(KVM_REQ_PMU, vcpu))
-			kvm_pmu_handle_event(vcpu);
+		if (kvm_check_request(KVM_REQ_PMU, vcpu)) {
+			if (is_passthrough_pmu_enabled(vcpu))
+				kvm_passthrough_pmu_handle_event(vcpu);
+			else
+				kvm_pmu_handle_event(vcpu);
+		}
 		if (kvm_check_request(KVM_REQ_PMI, vcpu))
 			kvm_pmu_deliver_pmi(vcpu);
 #ifdef CONFIG_KVM_SMM