Message ID | 20210827005718.585190-8-seanjc@google.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [01/15] KVM: x86: Register perf callbacks after calling vendor's hardware_setup() | expand |
On Thu, Aug 26, 2021 at 05:57:10PM -0700, Sean Christopherson wrote: > diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h > index 5cedc0e8a5d5..4c5ba4128b38 100644 > --- a/arch/x86/kvm/x86.h > +++ b/arch/x86/kvm/x86.h > @@ -395,9 +395,10 @@ static inline void kvm_unregister_perf_callbacks(void) > > DECLARE_PER_CPU(struct kvm_vcpu *, current_vcpu); > > -static inline void kvm_before_interrupt(struct kvm_vcpu *vcpu) > +static inline void kvm_before_interrupt(struct kvm_vcpu *vcpu, bool is_nmi) > { > __this_cpu_write(current_vcpu, vcpu); > + WRITE_ONCE(vcpu->arch.handling_nmi_from_guest, is_nmi); > > kvm_register_perf_callbacks(); > } > @@ -406,6 +407,7 @@ static inline void kvm_after_interrupt(struct kvm_vcpu *vcpu) > { > kvm_unregister_perf_callbacks(); > > + WRITE_ONCE(vcpu->arch.handling_nmi_from_guest, false); > __this_cpu_write(current_vcpu, NULL); > } Does this rely on kvm_{,un}register_perf_callback() being a function call and thus implying a sequence point to order the stores?
On Fri, Aug 27, 2021, Peter Zijlstra wrote: > On Thu, Aug 26, 2021 at 05:57:10PM -0700, Sean Christopherson wrote: > > diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h > > index 5cedc0e8a5d5..4c5ba4128b38 100644 > > --- a/arch/x86/kvm/x86.h > > +++ b/arch/x86/kvm/x86.h > > @@ -395,9 +395,10 @@ static inline void kvm_unregister_perf_callbacks(void) > > > > DECLARE_PER_CPU(struct kvm_vcpu *, current_vcpu); > > > > -static inline void kvm_before_interrupt(struct kvm_vcpu *vcpu) > > +static inline void kvm_before_interrupt(struct kvm_vcpu *vcpu, bool is_nmi) > > { > > __this_cpu_write(current_vcpu, vcpu); > > + WRITE_ONCE(vcpu->arch.handling_nmi_from_guest, is_nmi); > > > > kvm_register_perf_callbacks(); > > } > > @@ -406,6 +407,7 @@ static inline void kvm_after_interrupt(struct kvm_vcpu *vcpu) > > { > > kvm_unregister_perf_callbacks(); > > > > + WRITE_ONCE(vcpu->arch.handling_nmi_from_guest, false); > > __this_cpu_write(current_vcpu, NULL); > > } > > Does this rely on kvm_{,un}register_perf_callback() being a function > call and thus implying a sequence point to order the stores? No, I'm just terrible at remembering which macros provide what ordering guarantees, i.e. I was thinking WRITE_ONCE provided guarantees against compiler reordering.
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 1ea4943a73d7..465b35736d9b 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -763,6 +763,7 @@ struct kvm_vcpu_arch { unsigned nmi_pending; /* NMI queued after currently running handler */ bool nmi_injected; /* Trying to inject an NMI this entry */ bool smi_pending; /* SMI queued after currently running handler */ + bool handling_nmi_from_guest; struct kvm_mtrr mtrr_state; u64 pat; @@ -1874,8 +1875,6 @@ int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu); int kvm_complete_insn_gp(struct kvm_vcpu *vcpu, int err); void __kvm_request_immediate_exit(struct kvm_vcpu *vcpu); -int kvm_is_in_guest(void); - void __user *__x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size); bool kvm_vcpu_is_reset_bsp(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index 0772bad9165c..2b8934b452ea 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c @@ -87,7 +87,7 @@ static void kvm_perf_overflow_intr(struct perf_event *perf_event, * woken up. So we should wake it, but this is impossible from * NMI context. Do it from irq work instead. */ - if (!kvm_is_in_guest()) + if (!pmc->vcpu->arch.handling_nmi_from_guest) irq_work_queue(&pmc_to_pmu(pmc)->irq_work); else kvm_make_request(KVM_REQ_PMI, pmc->vcpu); diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 1a70e11f0487..3fc6767e5fd8 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -3843,7 +3843,7 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu) } if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI)) - kvm_before_interrupt(vcpu); + kvm_before_interrupt(vcpu, true); kvm_load_host_xsave_state(vcpu); stgi(); diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index f19d72136f77..f08980ef7c44 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -6344,7 +6344,7 @@ void vmx_do_interrupt_nmi_irqoff(unsigned long entry); static void handle_interrupt_nmi_irqoff(struct kvm_vcpu *vcpu, unsigned long entry) { - kvm_before_interrupt(vcpu); + kvm_before_interrupt(vcpu, entry == (unsigned long)asm_exc_nmi_noist); vmx_do_interrupt_nmi_irqoff(entry); kvm_after_interrupt(vcpu); } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index bc4ee6ea7752..d4d91944fde7 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -8267,7 +8267,7 @@ static void kvm_timer_init(void) DEFINE_PER_CPU(struct kvm_vcpu *, current_vcpu); EXPORT_PER_CPU_SYMBOL_GPL(current_vcpu); -int kvm_is_in_guest(void) +static int kvm_is_in_guest(void) { return __this_cpu_read(current_vcpu) != NULL; } @@ -9678,7 +9678,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) * interrupts on processors that implement an interrupt shadow, the * stat.exits increment will do nicely. */ - kvm_before_interrupt(vcpu); + kvm_before_interrupt(vcpu, false); local_irq_enable(); ++vcpu->stat.exits; local_irq_disable(); diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index 5cedc0e8a5d5..4c5ba4128b38 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -395,9 +395,10 @@ static inline void kvm_unregister_perf_callbacks(void) DECLARE_PER_CPU(struct kvm_vcpu *, current_vcpu); -static inline void kvm_before_interrupt(struct kvm_vcpu *vcpu) +static inline void kvm_before_interrupt(struct kvm_vcpu *vcpu, bool is_nmi) { __this_cpu_write(current_vcpu, vcpu); + WRITE_ONCE(vcpu->arch.handling_nmi_from_guest, is_nmi); kvm_register_perf_callbacks(); } @@ -406,6 +407,7 @@ static inline void kvm_after_interrupt(struct kvm_vcpu *vcpu) { kvm_unregister_perf_callbacks(); + WRITE_ONCE(vcpu->arch.handling_nmi_from_guest, false); __this_cpu_write(current_vcpu, NULL); }
Add a dedicated flag to detect the case where KVM's PMC overflow callback was originally invoked in response to an NMI that arrived while the guest was running. Using current_vcpu is less precise as IRQs also set current_vcpu (though presumably KVM's callback should not be reached in that case), and more importantly, this will allow dropping current_vcpu as the perf callbacks can switch to kvm_running_vcpu now that the perf callbacks are precisely registered, i.e. kvm_running_vcpu doesn't need to be used to detect if a PMI arrived in the guest. Fixes: dd60d217062f ("KVM: x86: Fix perf timer mode IP reporting") Signed-off-by: Sean Christopherson <seanjc@google.com> --- arch/x86/include/asm/kvm_host.h | 3 +-- arch/x86/kvm/pmu.c | 2 +- arch/x86/kvm/svm/svm.c | 2 +- arch/x86/kvm/vmx/vmx.c | 2 +- arch/x86/kvm/x86.c | 4 ++-- arch/x86/kvm/x86.h | 4 +++- 6 files changed, 9 insertions(+), 8 deletions(-)