Message ID | 20240126085444.324918-40-xiong.y.zhang@linux.intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | KVM: x86/pmu: Introduce passthrough vPM | expand |
On Fri, Jan 26, 2024, Xiong Zhang wrote: > From: Mingwei Zhang <mizhang@google.com> > > Implement emulated counter increment for passthrough PMU under KVM_REQ_PMU. > Defer the counter increment to KVM_REQ_PMU handler because counter > increment requests come from kvm_pmu_trigger_event() which can be triggered > within the KVM_RUN inner loop or outside of the inner loop. This means the > counter increment could happen before or after PMU context switch. > > So process counter increment in one place makes the implementation simple. > > Signed-off-by: Mingwei Zhang <mizhang@google.com> > --- > arch/x86/include/asm/kvm_host.h | 2 ++ > arch/x86/kvm/pmu.c | 52 ++++++++++++++++++++++++++++++++- > arch/x86/kvm/pmu.h | 1 + > arch/x86/kvm/x86.c | 8 +++-- > 4 files changed, 60 insertions(+), 3 deletions(-) > > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h > index 869de0d81055..9080319751de 100644 > --- a/arch/x86/include/asm/kvm_host.h > +++ b/arch/x86/include/asm/kvm_host.h > @@ -532,6 +532,7 @@ struct kvm_pmu { > u64 fixed_ctr_ctrl_mask; > u64 global_ctrl; > u64 global_status; > + u64 synthesized_overflow; There's no reason for this to be a per-PMU field, it's only ever used in kvm_passthrough_pmu_handle_event(). > u64 counter_bitmask[2]; > u64 global_ctrl_mask; > u64 global_status_mask; > @@ -550,6 +551,7 @@ struct kvm_pmu { > atomic64_t __reprogram_pmi; > }; > DECLARE_BITMAP(all_valid_pmc_idx, X86_PMC_IDX_MAX); > + DECLARE_BITMAP(incremented_pmc_idx, X86_PMC_IDX_MAX); > DECLARE_BITMAP(pmc_in_use, X86_PMC_IDX_MAX); > > u64 ds_area; > diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c > index 7b0bac1ac4bf..9e62e96fe48a 100644 > --- a/arch/x86/kvm/pmu.c > +++ b/arch/x86/kvm/pmu.c > @@ -449,6 +449,26 @@ static bool kvm_passthrough_pmu_incr_counter(struct kvm_pmc *pmc) > return false; > } > > +void kvm_passthrough_pmu_handle_event(struct kvm_vcpu *vcpu) Huh. Why do we call the existing helper kvm_pmu_handle_event()? It's not handling an event, it's reprogramming counters. Can you add a patch to clean that up? It doesn't matter terribly with the existing code, but once kvm_handle_guest_pmi() exists, the name becomes quite confusing, e.g. I was expecting this to be the handler for guest PMIs. > +{ > + struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); > + int bit; > + > + for_each_set_bit(bit, pmu->incremented_pmc_idx, X86_PMC_IDX_MAX) { I don't love the "incremented_pmc_idx" name. It's specifically for emulated events, that should ideally be clear in the name. And does the tracking the emulated counters actually buy anything? Iterating over all PMCs and checking emulated_counter doesn't seem like it'd be measurably slow, especially not when this path is likely writing multiple MSRs. Wait, why use that and not reprogram_pmi? > + struct kvm_pmc *pmc = static_call(kvm_x86_pmu_pmc_idx_to_pmc)(pmu, bit); > + > + if (kvm_passthrough_pmu_incr_counter(pmc)) { kvm_passthrough_pmu_incr_counter() is *super* misleading. (a) it's not an "increment" in standard x86 and kernel terminology, which is "Increment by 1", and (b) it's not actually bumping the count, it's simply moving an *already* incremented count from emulated_count to the pmc->counter. To avoid bikeshedding, and because boolean returns are no fun, just open code it. if (!pmc->emulated_counter) continue; pmc->counter += pmc->emulated_counter; pmc->emulated_counter = 0; pmc->counter &= pmc_bitmask(pmc); /* comment goes here */ if (pmc->counter) continue; if (pmc->eventsel & ARCH_PERFMON_EVENTSEL_INT) kvm_make_request(KVM_REQ_PMI, vcpu); pmu->global_status |= BIT_ULL(pmc->idx); > diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h > index 6f44fe056368..0fc37a06fe48 100644 > --- a/arch/x86/kvm/pmu.h > +++ b/arch/x86/kvm/pmu.h > @@ -277,6 +277,7 @@ static inline bool is_passthrough_pmu_enabled(struct kvm_vcpu *vcpu) > > void kvm_pmu_deliver_pmi(struct kvm_vcpu *vcpu); > void kvm_pmu_handle_event(struct kvm_vcpu *vcpu); > +void kvm_passthrough_pmu_handle_event(struct kvm_vcpu *vcpu); > int kvm_pmu_rdpmc(struct kvm_vcpu *vcpu, unsigned pmc, u64 *data); > bool kvm_pmu_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx); > bool kvm_pmu_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr); > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > index fe7da1a16c3b..1bbf312cbd73 100644 > --- a/arch/x86/kvm/x86.c > +++ b/arch/x86/kvm/x86.c > @@ -10726,8 +10726,12 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) > } > if (kvm_check_request(KVM_REQ_STEAL_UPDATE, vcpu)) > record_steal_time(vcpu); > - if (kvm_check_request(KVM_REQ_PMU, vcpu)) > - kvm_pmu_handle_event(vcpu); > + if (kvm_check_request(KVM_REQ_PMU, vcpu)) { > + if (is_passthrough_pmu_enabled(vcpu)) > + kvm_passthrough_pmu_handle_event(vcpu); > + else > + kvm_pmu_handle_event(vcpu); This seems like a detail that belongs in the PMU code. E.g. if we get to a point where the two PMU flavors can share code (and I think we can/show), then there's no need or desire for two separate APIs. > + } > if (kvm_check_request(KVM_REQ_PMI, vcpu)) > kvm_pmu_deliver_pmi(vcpu); > #ifdef CONFIG_KVM_SMM > -- > 2.34.1 >
On Thu, Apr 11, 2024, Sean Christopherson wrote: > > + struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); > > + int bit; > > + > > + for_each_set_bit(bit, pmu->incremented_pmc_idx, X86_PMC_IDX_MAX) { > > I don't love the "incremented_pmc_idx" name. It's specifically for emulated > events, that should ideally be clear in the name. > > And does the tracking the emulated counters actually buy anything? Iterating > over all PMCs and checking emulated_counter doesn't seem like it'd be measurably > slow, especially not when this path is likely writing multiple MSRs. > > Wait, why use that and not reprogram_pmi? If the name is a sticking point, just rename it to something generic, e.g. dirty_pmcs or something.
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 869de0d81055..9080319751de 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -532,6 +532,7 @@ struct kvm_pmu { u64 fixed_ctr_ctrl_mask; u64 global_ctrl; u64 global_status; + u64 synthesized_overflow; u64 counter_bitmask[2]; u64 global_ctrl_mask; u64 global_status_mask; @@ -550,6 +551,7 @@ struct kvm_pmu { atomic64_t __reprogram_pmi; }; DECLARE_BITMAP(all_valid_pmc_idx, X86_PMC_IDX_MAX); + DECLARE_BITMAP(incremented_pmc_idx, X86_PMC_IDX_MAX); DECLARE_BITMAP(pmc_in_use, X86_PMC_IDX_MAX); u64 ds_area; diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index 7b0bac1ac4bf..9e62e96fe48a 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c @@ -449,6 +449,26 @@ static bool kvm_passthrough_pmu_incr_counter(struct kvm_pmc *pmc) return false; } +void kvm_passthrough_pmu_handle_event(struct kvm_vcpu *vcpu) +{ + struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); + int bit; + + for_each_set_bit(bit, pmu->incremented_pmc_idx, X86_PMC_IDX_MAX) { + struct kvm_pmc *pmc = static_call(kvm_x86_pmu_pmc_idx_to_pmc)(pmu, bit); + + if (kvm_passthrough_pmu_incr_counter(pmc)) { + __set_bit(pmc->idx, (unsigned long *)&pmc_to_pmu(pmc)->synthesized_overflow); + + if (pmc->eventsel & ARCH_PERFMON_EVENTSEL_INT) + kvm_make_request(KVM_REQ_PMI, vcpu); + } + } + bitmap_zero(pmu->incremented_pmc_idx, X86_PMC_IDX_MAX); + pmu->global_status |= pmu->synthesized_overflow; + pmu->synthesized_overflow = 0; +} + void kvm_pmu_handle_event(struct kvm_vcpu *vcpu) { struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); @@ -748,7 +768,29 @@ static inline bool cpl_is_matched(struct kvm_pmc *pmc) return (static_call(kvm_x86_get_cpl)(pmc->vcpu) == 0) ? select_os : select_user; } -void kvm_pmu_trigger_event(struct kvm_vcpu *vcpu, u64 perf_hw_id) +static void __kvm_passthrough_pmu_trigger_event(struct kvm_vcpu *vcpu, u64 perf_hw_id) +{ + struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); + struct kvm_pmc *pmc; + int i; + + for_each_set_bit(i, pmu->all_valid_pmc_idx, X86_PMC_IDX_MAX) { + pmc = static_call(kvm_x86_pmu_pmc_idx_to_pmc)(pmu, i); + + if (!pmc || !pmc_speculative_in_use(pmc) || + !check_pmu_event_filter(pmc)) + continue; + + /* Ignore checks for edge detect, pin control, invert and CMASK bits */ + if (eventsel_match_perf_hw_id(pmc, perf_hw_id) && cpl_is_matched(pmc)) { + pmc->emulated_counter += 1; + __set_bit(pmc->idx, pmu->incremented_pmc_idx); + kvm_make_request(KVM_REQ_PMU, vcpu); + } + } +} + +static void __kvm_pmu_trigger_event(struct kvm_vcpu *vcpu, u64 perf_hw_id) { struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); struct kvm_pmc *pmc; @@ -765,6 +807,14 @@ void kvm_pmu_trigger_event(struct kvm_vcpu *vcpu, u64 perf_hw_id) kvm_pmu_incr_counter(pmc); } } + +void kvm_pmu_trigger_event(struct kvm_vcpu *vcpu, u64 perf_hw_id) +{ + if (is_passthrough_pmu_enabled(vcpu)) + __kvm_passthrough_pmu_trigger_event(vcpu, perf_hw_id); + else + __kvm_pmu_trigger_event(vcpu, perf_hw_id); +} EXPORT_SYMBOL_GPL(kvm_pmu_trigger_event); static bool is_masked_filter_valid(const struct kvm_x86_pmu_event_filter *filter) diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h index 6f44fe056368..0fc37a06fe48 100644 --- a/arch/x86/kvm/pmu.h +++ b/arch/x86/kvm/pmu.h @@ -277,6 +277,7 @@ static inline bool is_passthrough_pmu_enabled(struct kvm_vcpu *vcpu) void kvm_pmu_deliver_pmi(struct kvm_vcpu *vcpu); void kvm_pmu_handle_event(struct kvm_vcpu *vcpu); +void kvm_passthrough_pmu_handle_event(struct kvm_vcpu *vcpu); int kvm_pmu_rdpmc(struct kvm_vcpu *vcpu, unsigned pmc, u64 *data); bool kvm_pmu_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx); bool kvm_pmu_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index fe7da1a16c3b..1bbf312cbd73 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -10726,8 +10726,12 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) } if (kvm_check_request(KVM_REQ_STEAL_UPDATE, vcpu)) record_steal_time(vcpu); - if (kvm_check_request(KVM_REQ_PMU, vcpu)) - kvm_pmu_handle_event(vcpu); + if (kvm_check_request(KVM_REQ_PMU, vcpu)) { + if (is_passthrough_pmu_enabled(vcpu)) + kvm_passthrough_pmu_handle_event(vcpu); + else + kvm_pmu_handle_event(vcpu); + } if (kvm_check_request(KVM_REQ_PMI, vcpu)) kvm_pmu_deliver_pmi(vcpu); #ifdef CONFIG_KVM_SMM