Message ID | 1587468026-15753-2-git-send-email-wanpengli@tencent.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | KVM: VMX: Tscdeadline timer emulation fastpath | expand |
On 21/04/20 13:20, Wanpeng Li wrote: > + case MSR_IA32_TSCDEADLINE: > + if (!kvm_x86_ops.event_needs_reinjection(vcpu)) { > + data = kvm_read_edx_eax(vcpu); > + if (!handle_fastpath_set_tscdeadline(vcpu, data)) > + ret = EXIT_FASTPATH_CONT_RUN; > + } > break; Can you explain the event_needs_reinjection case? Also, does this break AMD which does not implement the callback? > + > + reg = kvm_lapic_get_reg(apic, APIC_LVTT); > + if (kvm_apic_hw_enabled(apic) && !(reg & APIC_LVT_MASKED)) { > + vector = reg & APIC_VECTOR_MASK; > + kvm_lapic_clear_vector(vector, apic->regs + APIC_TMR); > + > + if (vcpu->arch.apicv_active) { > + if (pi_test_and_set_pir(vector, &vmx->pi_desc)) > + return; > + > + if (pi_test_and_set_on(&vmx->pi_desc)) > + return; > + > + vmx_sync_pir_to_irr(vcpu); > + } else { > + kvm_lapic_set_irr(vector, apic); > + kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu), false); > + vmx_inject_irq(vcpu); > + } > + } This is mostly a copy of if (kvm_x86_ops.deliver_posted_interrupt(vcpu, vector)) { kvm_lapic_set_irr(vector, apic); kvm_make_request(KVM_REQ_EVENT, vcpu); kvm_vcpu_kick(vcpu); } break; (is it required to do vmx_sync_pir_to_irr?). So you should not special case LVTT and move this code to lapic.c instead. But even before that... > > + > + if (kvm_start_hv_timer(apic)) { > + if (kvm_check_request(KVM_REQ_PENDING_TIMER, vcpu)) { > + if (kvm_x86_ops.interrupt_allowed(vcpu)) { > + kvm_clear_request(KVM_REQ_PENDING_TIMER, vcpu); > + kvm_x86_ops.fast_deliver_interrupt(vcpu); > + atomic_set(&apic->lapic_timer.pending, 0); > + apic->lapic_timer.tscdeadline = 0; > + return 0; > + } > + return 1; Is it actually common that the timer is set back in time and therefore this code is executed? Paolo
On Tue, 21 Apr 2020 at 19:37, Paolo Bonzini <pbonzini@redhat.com> wrote: > > On 21/04/20 13:20, Wanpeng Li wrote: > > + case MSR_IA32_TSCDEADLINE: > > + if (!kvm_x86_ops.event_needs_reinjection(vcpu)) { > > + data = kvm_read_edx_eax(vcpu); > > + if (!handle_fastpath_set_tscdeadline(vcpu, data)) > > + ret = EXIT_FASTPATH_CONT_RUN; > > + } > > break; > > Can you explain the event_needs_reinjection case? Also, does this break This is used to catch the case vmexit occurred while another event was being delivered to guest software, I move the vmx_exit_handlers_fastpath() call after vmx_complete_interrupts() which will decode such event and make kvm_event_needs_reinjection return true. > AMD which does not implement the callback? Now I add the tscdeadline msr emulation and vmx-preemption timer fastpath pair for Intel platform. > > > + > > + reg = kvm_lapic_get_reg(apic, APIC_LVTT); > > + if (kvm_apic_hw_enabled(apic) && !(reg & APIC_LVT_MASKED)) { > > + vector = reg & APIC_VECTOR_MASK; > > + kvm_lapic_clear_vector(vector, apic->regs + APIC_TMR); > > + > > + if (vcpu->arch.apicv_active) { > > + if (pi_test_and_set_pir(vector, &vmx->pi_desc)) > > + return; > > + > > + if (pi_test_and_set_on(&vmx->pi_desc)) > > + return; > > + > > + vmx_sync_pir_to_irr(vcpu); > > + } else { > > + kvm_lapic_set_irr(vector, apic); > > + kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu), false); > > + vmx_inject_irq(vcpu); > > + } > > + } > > This is mostly a copy of > > if (kvm_x86_ops.deliver_posted_interrupt(vcpu, vector)) { > kvm_lapic_set_irr(vector, apic); > kvm_make_request(KVM_REQ_EVENT, vcpu); > kvm_vcpu_kick(vcpu); > } > break; > > (is it required to do vmx_sync_pir_to_irr?). So you should not special I observe send notification vector as in kvm_x86_ops.deliver_posted_interrupt() is ~900 cycles worse than vmx_sync_pir_to_irr in my case. It needs to wait guest vmentry, then the physical cpu ack the notification vector, read posted-interrupt desciptor etc. For the non-APICv part, original copy needs to wait inject_pending_event to do these stuff. > case LVTT and move this code to lapic.c instead. But even before that... > > > > > + > > + if (kvm_start_hv_timer(apic)) { > > + if (kvm_check_request(KVM_REQ_PENDING_TIMER, vcpu)) { > > + if (kvm_x86_ops.interrupt_allowed(vcpu)) { > > + kvm_clear_request(KVM_REQ_PENDING_TIMER, vcpu); > > + kvm_x86_ops.fast_deliver_interrupt(vcpu); > > + atomic_set(&apic->lapic_timer.pending, 0); > > + apic->lapic_timer.tscdeadline = 0; > > + return 0; > > + } > > + return 1; > > > Is it actually common that the timer is set back in time and therefore > this code is executed? It is used to handle the already-expired timer. Wanpeng
On 22/04/20 02:48, Wanpeng Li wrote: > On Tue, 21 Apr 2020 at 19:37, Paolo Bonzini <pbonzini@redhat.com> wrote: >> On 21/04/20 13:20, Wanpeng Li wrote: >>> + case MSR_IA32_TSCDEADLINE: >>> + if (!kvm_x86_ops.event_needs_reinjection(vcpu)) { >>> + data = kvm_read_edx_eax(vcpu); >>> + if (!handle_fastpath_set_tscdeadline(vcpu, data)) >>> + ret = EXIT_FASTPATH_CONT_RUN; >>> + } >>> break; >> Can you explain the event_needs_reinjection case? Also, does this break > This is used to catch the case vmexit occurred while another event was > being delivered to guest software, I move the > vmx_exit_handlers_fastpath() call after vmx_complete_interrupts() > which will decode such event and make kvm_event_needs_reinjection > return true. This doesn't need a callback, kvm_event_needs_reinjection should be enough. You should also check other conditions such as vcpu->mode == EXITING_GUEST_MODE || kvm_request_pending(vcpu) || need_resched() || signal_pending(current) before doing CONT_RUN. >> AMD which does not implement the callback? > Now I add the tscdeadline msr emulation and vmx-preemption timer > fastpath pair for Intel platform. But this would cause a crash if you run on AMD. Paolo
On Wed, 22 Apr 2020 at 16:38, Paolo Bonzini <pbonzini@redhat.com> wrote: > > On 22/04/20 02:48, Wanpeng Li wrote: > > On Tue, 21 Apr 2020 at 19:37, Paolo Bonzini <pbonzini@redhat.com> wrote: > >> On 21/04/20 13:20, Wanpeng Li wrote: > >>> + case MSR_IA32_TSCDEADLINE: > >>> + if (!kvm_x86_ops.event_needs_reinjection(vcpu)) { > >>> + data = kvm_read_edx_eax(vcpu); > >>> + if (!handle_fastpath_set_tscdeadline(vcpu, data)) > >>> + ret = EXIT_FASTPATH_CONT_RUN; > >>> + } > >>> break; > >> Can you explain the event_needs_reinjection case? Also, does this break > > This is used to catch the case vmexit occurred while another event was > > being delivered to guest software, I move the > > vmx_exit_handlers_fastpath() call after vmx_complete_interrupts() > > which will decode such event and make kvm_event_needs_reinjection > > return true. > > This doesn't need a callback, kvm_event_needs_reinjection should be enough. > > You should also check other conditions such as > > vcpu->mode == EXITING_GUEST_MODE || kvm_request_pending(vcpu) > || need_resched() || signal_pending(current) > > before doing CONT_RUN. Agreed. > > >> AMD which does not implement the callback? > > Now I add the tscdeadline msr emulation and vmx-preemption timer > > fastpath pair for Intel platform. > > But this would cause a crash if you run on AMD. Yes. :) Wanpeng
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index f26df2c..1626ce3 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -188,6 +188,7 @@ enum { enum exit_fastpath_completion { EXIT_FASTPATH_NONE, EXIT_FASTPATH_SKIP_EMUL_INS, + EXIT_FASTPATH_CONT_RUN, }; struct x86_emulate_ctxt; @@ -1265,6 +1266,8 @@ struct kvm_x86_ops { bool (*apic_init_signal_blocked)(struct kvm_vcpu *vcpu); int (*enable_direct_tlbflush)(struct kvm_vcpu *vcpu); + void (*fast_deliver_interrupt)(struct kvm_vcpu *vcpu); + bool (*event_needs_reinjection)(struct kvm_vcpu *vcpu); }; struct kvm_x86_init_ops { diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 38f7dc9..9e54301 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -91,6 +91,7 @@ static inline int __apic_test_and_clear_vector(int vec, void *bitmap) } struct static_key_deferred apic_hw_disabled __read_mostly; +EXPORT_SYMBOL_GPL(apic_hw_disabled); struct static_key_deferred apic_sw_disabled __read_mostly; static inline int apic_enabled(struct kvm_lapic *apic) @@ -311,21 +312,6 @@ static inline int apic_lvt_enabled(struct kvm_lapic *apic, int lvt_type) return !(kvm_lapic_get_reg(apic, lvt_type) & APIC_LVT_MASKED); } -static inline int apic_lvtt_oneshot(struct kvm_lapic *apic) -{ - return apic->lapic_timer.timer_mode == APIC_LVT_TIMER_ONESHOT; -} - -static inline int apic_lvtt_period(struct kvm_lapic *apic) -{ - return apic->lapic_timer.timer_mode == APIC_LVT_TIMER_PERIODIC; -} - -static inline int apic_lvtt_tscdeadline(struct kvm_lapic *apic) -{ - return apic->lapic_timer.timer_mode == APIC_LVT_TIMER_TSCDEADLINE; -} - static inline int apic_lvt_nmi_mode(u32 lvt_val) { return (lvt_val & (APIC_MODE_MASK | APIC_LVT_MASKED)) == APIC_DM_NMI; @@ -1781,7 +1767,7 @@ static void cancel_hv_timer(struct kvm_lapic *apic) apic->lapic_timer.hv_timer_in_use = false; } -static bool start_hv_timer(struct kvm_lapic *apic) +bool kvm_start_hv_timer(struct kvm_lapic *apic) { struct kvm_timer *ktimer = &apic->lapic_timer; struct kvm_vcpu *vcpu = apic->vcpu; @@ -1847,7 +1833,7 @@ static void restart_apic_timer(struct kvm_lapic *apic) if (!apic_lvtt_period(apic) && atomic_read(&apic->lapic_timer.pending)) goto out; - if (!start_hv_timer(apic)) + if (!kvm_start_hv_timer(apic)) start_sw_timer(apic); out: preempt_enable(); diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index 7f15f9e..4c917fd 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h @@ -251,6 +251,7 @@ void kvm_lapic_expired_hv_timer(struct kvm_vcpu *vcpu); bool kvm_lapic_hv_timer_in_use(struct kvm_vcpu *vcpu); void kvm_lapic_restart_hv_timer(struct kvm_vcpu *vcpu); bool kvm_can_post_timer_interrupt(struct kvm_vcpu *vcpu); +bool kvm_start_hv_timer(struct kvm_lapic *apic); static inline enum lapic_mode kvm_apic_mode(u64 apic_base) { @@ -262,4 +263,19 @@ static inline u8 kvm_xapic_id(struct kvm_lapic *apic) return kvm_lapic_get_reg(apic, APIC_ID) >> 24; } +static inline int apic_lvtt_oneshot(struct kvm_lapic *apic) +{ + return apic->lapic_timer.timer_mode == APIC_LVT_TIMER_ONESHOT; +} + +static inline int apic_lvtt_period(struct kvm_lapic *apic) +{ + return apic->lapic_timer.timer_mode == APIC_LVT_TIMER_PERIODIC; +} + +static inline int apic_lvtt_tscdeadline(struct kvm_lapic *apic) +{ + return apic->lapic_timer.timer_mode == APIC_LVT_TIMER_TSCDEADLINE; +} + #endif diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 766303b..7688e40 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -6559,6 +6559,54 @@ void vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp) } } +static bool vmx_event_needs_reinjection(struct kvm_vcpu *vcpu) +{ + return (to_vmx(vcpu)->idt_vectoring_info & VECTORING_INFO_VALID_MASK) && + kvm_event_needs_reinjection(vcpu); +} + +static void vmx_fast_deliver_interrupt(struct kvm_vcpu *vcpu) +{ + u32 reg; + int vector; + struct kvm_lapic *apic = vcpu->arch.apic; + struct vcpu_vmx *vmx = to_vmx(vcpu); + + reg = kvm_lapic_get_reg(apic, APIC_LVTT); + if (kvm_apic_hw_enabled(apic) && !(reg & APIC_LVT_MASKED)) { + vector = reg & APIC_VECTOR_MASK; + kvm_lapic_clear_vector(vector, apic->regs + APIC_TMR); + + if (vcpu->arch.apicv_active) { + if (pi_test_and_set_pir(vector, &vmx->pi_desc)) + return; + + if (pi_test_and_set_on(&vmx->pi_desc)) + return; + + vmx_sync_pir_to_irr(vcpu); + } else { + kvm_lapic_set_irr(vector, apic); + kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu), false); + vmx_inject_irq(vcpu); + } + } +} + +static enum exit_fastpath_completion vmx_exit_handlers_fastpath(struct kvm_vcpu *vcpu) +{ + if (!is_guest_mode(vcpu)) { + switch (to_vmx(vcpu)->exit_reason) { + case EXIT_REASON_MSR_WRITE: + return handle_fastpath_set_msr_irqoff(vcpu); + default: + return EXIT_FASTPATH_NONE; + } + } + + return EXIT_FASTPATH_NONE; +} + bool __vmx_vcpu_run(struct vcpu_vmx *vmx, unsigned long *regs, bool launched); static enum exit_fastpath_completion vmx_vcpu_run(struct kvm_vcpu *vcpu) @@ -6566,6 +6614,7 @@ static enum exit_fastpath_completion vmx_vcpu_run(struct kvm_vcpu *vcpu) enum exit_fastpath_completion exit_fastpath; struct vcpu_vmx *vmx = to_vmx(vcpu); unsigned long cr3, cr4; +continue_vmx_vcpu_run: /* Record the guest's net vcpu time for enforced NMI injections. */ if (unlikely(!enable_vnmi && @@ -6733,17 +6782,16 @@ static enum exit_fastpath_completion vmx_vcpu_run(struct kvm_vcpu *vcpu) if (unlikely(vmx->exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY)) return EXIT_FASTPATH_NONE; - if (!is_guest_mode(vcpu) && vmx->exit_reason == EXIT_REASON_MSR_WRITE) - exit_fastpath = handle_fastpath_set_msr_irqoff(vcpu); - else - exit_fastpath = EXIT_FASTPATH_NONE; - vmx->loaded_vmcs->launched = 1; vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD); vmx_recover_nmi_blocking(vmx); vmx_complete_interrupts(vmx); + exit_fastpath = vmx_exit_handlers_fastpath(vcpu); + if (exit_fastpath == EXIT_FASTPATH_CONT_RUN) + goto continue_vmx_vcpu_run; + return exit_fastpath; } @@ -7885,6 +7933,8 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = { .nested_get_evmcs_version = NULL, .need_emulation_on_page_fault = vmx_need_emulation_on_page_fault, .apic_init_signal_blocked = vmx_apic_init_signal_blocked, + .fast_deliver_interrupt = vmx_fast_deliver_interrupt, + .event_needs_reinjection = vmx_event_needs_reinjection, }; static __init int hardware_setup(void) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 59958ce..9c6733d 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1609,27 +1609,70 @@ static int handle_fastpath_set_x2apic_icr_irqoff(struct kvm_vcpu *vcpu, u64 data return 1; } +static int handle_fastpath_set_tscdeadline(struct kvm_vcpu *vcpu, u64 data) +{ + struct kvm_lapic *apic = vcpu->arch.apic; + + if (!lapic_in_kernel(vcpu) || apic_lvtt_oneshot(apic) || + apic_lvtt_period(apic)) + return 0; + + if (!kvm_x86_ops.set_hv_timer || + kvm_mwait_in_guest(vcpu->kvm) || + kvm_can_post_timer_interrupt(vcpu)) + return 1; + + hrtimer_cancel(&apic->lapic_timer.timer); + apic->lapic_timer.tscdeadline = data; + atomic_set(&apic->lapic_timer.pending, 0); + + if (kvm_start_hv_timer(apic)) { + if (kvm_check_request(KVM_REQ_PENDING_TIMER, vcpu)) { + if (kvm_x86_ops.interrupt_allowed(vcpu)) { + kvm_clear_request(KVM_REQ_PENDING_TIMER, vcpu); + kvm_x86_ops.fast_deliver_interrupt(vcpu); + atomic_set(&apic->lapic_timer.pending, 0); + apic->lapic_timer.tscdeadline = 0; + return 0; + } + return 1; + } + return 0; + } + + return 1; +} + enum exit_fastpath_completion handle_fastpath_set_msr_irqoff(struct kvm_vcpu *vcpu) { u32 msr = kvm_rcx_read(vcpu); u64 data; - int ret = 0; + int ret = EXIT_FASTPATH_NONE; switch (msr) { case APIC_BASE_MSR + (APIC_ICR >> 4): data = kvm_read_edx_eax(vcpu); - ret = handle_fastpath_set_x2apic_icr_irqoff(vcpu, data); + if (!handle_fastpath_set_x2apic_icr_irqoff(vcpu, data)) + ret = EXIT_FASTPATH_SKIP_EMUL_INS; + break; + case MSR_IA32_TSCDEADLINE: + if (!kvm_x86_ops.event_needs_reinjection(vcpu)) { + data = kvm_read_edx_eax(vcpu); + if (!handle_fastpath_set_tscdeadline(vcpu, data)) + ret = EXIT_FASTPATH_CONT_RUN; + } break; default: - return EXIT_FASTPATH_NONE; + ret = EXIT_FASTPATH_NONE; } - if (!ret) { + if (ret != EXIT_FASTPATH_NONE) { trace_kvm_msr_write(msr, data); - return EXIT_FASTPATH_SKIP_EMUL_INS; + if (ret == EXIT_FASTPATH_CONT_RUN) + kvm_skip_emulated_instruction(vcpu); } - return EXIT_FASTPATH_NONE; + return ret; } EXPORT_SYMBOL_GPL(handle_fastpath_set_msr_irqoff);