Message ID | 1354968271-21335-3-git-send-email-yang.z.zhang@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Sat, Dec 08, 2012 at 08:04:30PM +0800, Yang Zhang wrote: > Virtual interrupt delivery avoids KVM to inject vAPIC interrupts > manually, which is fully taken care of by the hardware. This needs > some special awareness into existing interrupr injection path: > > - for pending interrupt, instead of direct injection, we may need > update architecture specific indicators before resuming to guest. > > - A pending interrupt, which is masked by ISR, should be also > considered in above update action, since hardware will decide > when to inject it at right time. Current has_interrupt and > get_interrupt only returns a valid vector from injection p.o.v. > > Signed-off-by: Kevin Tian <kevin.tian@intel.com> > Signed-off-by: Yang Zhang <yang.z.zhang@intel.com> > --- > arch/x86/include/asm/kvm_host.h | 4 + > arch/x86/include/asm/vmx.h | 11 ++++ > arch/x86/kvm/irq.c | 79 +++++++++++++++++++++------ > arch/x86/kvm/lapic.c | 101 +++++++++++++++++++++++++++++++--- > arch/x86/kvm/lapic.h | 11 ++++ > arch/x86/kvm/svm.c | 19 ++++++ > arch/x86/kvm/vmx.c | 116 +++++++++++++++++++++++++++++++++++++- > arch/x86/kvm/x86.c | 18 ++++-- > virt/kvm/ioapic.c | 35 ++++++++++++ > virt/kvm/ioapic.h | 1 + > 10 files changed, 358 insertions(+), 37 deletions(-) > > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h > index dc87b65..7e26d1a 100644 > --- a/arch/x86/include/asm/kvm_host.h > +++ b/arch/x86/include/asm/kvm_host.h > @@ -697,6 +697,9 @@ struct kvm_x86_ops { > void (*enable_nmi_window)(struct kvm_vcpu *vcpu); > void (*enable_irq_window)(struct kvm_vcpu *vcpu); > void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr); > + int (*has_virtual_interrupt_delivery)(struct kvm_vcpu *vcpu); > + void (*update_irq)(struct kvm_vcpu *vcpu); > + void (*update_eoi_exitmap)(struct kvm_vcpu *vcpu, int vector, bool set); > int (*set_tss_addr)(struct kvm *kvm, unsigned int addr); > int (*get_tdp_level)(void); > u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio); > @@ -991,6 +994,7 @@ int kvm_age_hva(struct kvm *kvm, unsigned long hva); > int kvm_test_age_hva(struct kvm *kvm, unsigned long hva); > void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); > int cpuid_maxphyaddr(struct kvm_vcpu *vcpu); > +int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v); > int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu); > int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu); > int kvm_cpu_get_interrupt(struct kvm_vcpu *v); > diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h > index 21101b6..1003341 100644 > --- a/arch/x86/include/asm/vmx.h > +++ b/arch/x86/include/asm/vmx.h > @@ -62,6 +62,7 @@ > #define EXIT_REASON_MCE_DURING_VMENTRY 41 > #define EXIT_REASON_TPR_BELOW_THRESHOLD 43 > #define EXIT_REASON_APIC_ACCESS 44 > +#define EXIT_REASON_EOI_INDUCED 45 > #define EXIT_REASON_EPT_VIOLATION 48 > #define EXIT_REASON_EPT_MISCONFIG 49 > #define EXIT_REASON_WBINVD 54 > @@ -143,6 +144,7 @@ > #define SECONDARY_EXEC_WBINVD_EXITING 0x00000040 > #define SECONDARY_EXEC_UNRESTRICTED_GUEST 0x00000080 > #define SECONDARY_EXEC_APIC_REGISTER_VIRT 0x00000100 > +#define SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY 0x00000200 > #define SECONDARY_EXEC_PAUSE_LOOP_EXITING 0x00000400 > #define SECONDARY_EXEC_ENABLE_INVPCID 0x00001000 > > @@ -180,6 +182,7 @@ enum vmcs_field { > GUEST_GS_SELECTOR = 0x0000080a, > GUEST_LDTR_SELECTOR = 0x0000080c, > GUEST_TR_SELECTOR = 0x0000080e, > + GUEST_INTR_STATUS = 0x00000810, > HOST_ES_SELECTOR = 0x00000c00, > HOST_CS_SELECTOR = 0x00000c02, > HOST_SS_SELECTOR = 0x00000c04, > @@ -207,6 +210,14 @@ enum vmcs_field { > APIC_ACCESS_ADDR_HIGH = 0x00002015, > EPT_POINTER = 0x0000201a, > EPT_POINTER_HIGH = 0x0000201b, > + EOI_EXIT_BITMAP0 = 0x0000201c, > + EOI_EXIT_BITMAP0_HIGH = 0x0000201d, > + EOI_EXIT_BITMAP1 = 0x0000201e, > + EOI_EXIT_BITMAP1_HIGH = 0x0000201f, > + EOI_EXIT_BITMAP2 = 0x00002020, > + EOI_EXIT_BITMAP2_HIGH = 0x00002021, > + EOI_EXIT_BITMAP3 = 0x00002022, > + EOI_EXIT_BITMAP3_HIGH = 0x00002023, > GUEST_PHYSICAL_ADDRESS = 0x00002400, > GUEST_PHYSICAL_ADDRESS_HIGH = 0x00002401, > VMCS_LINK_POINTER = 0x00002800, > diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c > index 7e06ba1..5cbc631 100644 > --- a/arch/x86/kvm/irq.c > +++ b/arch/x86/kvm/irq.c > @@ -38,50 +38,95 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) > EXPORT_SYMBOL(kvm_cpu_has_pending_timer); > > /* > + * check if there is injectable interrupt: > + * a. when virtual interrupt delivery enabled, > + * interrupt from apic will handled by hardware, > + * we don't need to check it here. > + * b. without virtual interrupt delivery, handle > + * it in old way: check apic firstly, then pic. > + */ Old way is incorrect and after the patch guests with apicv and without will behave differently. The old way should be fixed separately though. I'll look into it. > +int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v) > +{ > + if (!irqchip_in_kernel(v->kvm)) > + return v->arch.interrupt.pending; > + > + if (kvm_apic_vid_enabled(v)) > + return kvm_cpu_has_extint(v); /* non-APIC */ > + else if (kvm_apic_has_interrupt(v) == -1) /* LAPIC */ > + return kvm_cpu_has_extint(v); /* non-APIC */ > + > + return 1; > +} > +EXPORT_SYMBOL_GPL(kvm_cpu_has_injectable_intr); Why export it? > + > +/* > * check if there is pending interrupt without > * intack. > */ > int kvm_cpu_has_interrupt(struct kvm_vcpu *v) > { > - struct kvm_pic *s; > - > if (!irqchip_in_kernel(v->kvm)) > return v->arch.interrupt.pending; > > - if (kvm_apic_has_interrupt(v) == -1) { /* LAPIC */ > - if (kvm_apic_accept_pic_intr(v)) { > - s = pic_irqchip(v->kvm); /* PIC */ > - return s->output; > - } else > - return 0; > - } > + if (kvm_apic_has_interrupt(v) == -1) /* LAPIC */ > + return kvm_cpu_has_extint(v); /* non-APIC */ > return 1; > } > EXPORT_SYMBOL_GPL(kvm_cpu_has_interrupt); > > /* > + * check if there is pending interrupt from > + * non-APIC source without intack. > + */ > +int kvm_cpu_has_extint(struct kvm_vcpu *v) static, drop from lapic.h. > +{ > + struct kvm_pic *s; > + > + if (kvm_apic_accept_pic_intr(v)) { > + s = pic_irqchip(v->kvm); /* PIC */ > + return s->output; > + } else > + return 0; > +} > + > +/* > * Read pending interrupt vector and intack. > */ > int kvm_cpu_get_interrupt(struct kvm_vcpu *v) > { > - struct kvm_pic *s; > int vector; > > if (!irqchip_in_kernel(v->kvm)) > return v->arch.interrupt.nr; > > - vector = kvm_get_apic_interrupt(v); /* APIC */ > - if (vector == -1) { > - if (kvm_apic_accept_pic_intr(v)) { > - s = pic_irqchip(v->kvm); > - s->output = 0; /* PIC */ > - vector = kvm_pic_read_irq(v->kvm); > - } > + if (kvm_apic_vid_enabled(v)) > + vector = kvm_cpu_get_extint(v); /* non-APIC */ > + else { > + vector = kvm_get_apic_interrupt(v); /* APIC */ > + if (vector == -1) > + vector = kvm_cpu_get_extint(v); /* non-APIC */ > } > return vector; > } > EXPORT_SYMBOL_GPL(kvm_cpu_get_interrupt); > > +/* > + * Read pending interrupt(from non-APIC source) > + * vector and intack. > + */ > +int kvm_cpu_get_extint(struct kvm_vcpu *v) static, drop from lapic.h. > +{ > + struct kvm_pic *s; > + int vector = -1; > + > + if (kvm_apic_accept_pic_intr(v)) { > + s = pic_irqchip(v->kvm); > + s->output = 0; /* PIC */ > + vector = kvm_pic_read_irq(v->kvm); > + } > + return vector; > +} > + > void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu) > { > kvm_inject_apic_timer_irqs(vcpu); > diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c > index 0664c13..2109a6a 100644 > --- a/arch/x86/kvm/lapic.c > +++ b/arch/x86/kvm/lapic.c > @@ -236,12 +236,14 @@ static inline void kvm_apic_set_id(struct kvm_lapic *apic, u8 id) > { > apic_set_reg(apic, APIC_ID, id << 24); > recalculate_apic_map(apic->vcpu->kvm); > + ioapic_update_eoi_exitmap(apic->vcpu->kvm); > } > > static inline void kvm_apic_set_ldr(struct kvm_lapic *apic, u32 id) > { > apic_set_reg(apic, APIC_LDR, id); > recalculate_apic_map(apic->vcpu->kvm); > + ioapic_update_eoi_exitmap(apic->vcpu->kvm); > } > > static inline int apic_lvt_enabled(struct kvm_lapic *apic, int lvt_type) > @@ -398,6 +400,7 @@ int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu) > > return highest_irr; > } > +EXPORT_SYMBOL_GPL(kvm_lapic_find_highest_irr); > May be we should pass highest irr to update_irq() instead of exporting the function. > static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, > int vector, int level, int trig_mode); > @@ -577,6 +580,64 @@ int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source, > return result; > } > > +static void kvm_apic_update_eoi_exitmap(struct kvm_vcpu *vcpu, > + int vector, bool set) > +{ > + kvm_x86_ops->update_eoi_exitmap(vcpu, vector, set); > +} > + > +void kvm_update_eoi_exitmap(struct kvm *kvm, struct kvm_lapic_irq *irq) > +{ > + struct kvm_vcpu *vcpu; > + struct kvm_lapic **dst; > + struct kvm_apic_map *map; > + unsigned long bitmap = 1; > + bool set; > + int i; > + /* KVM only support 255 vcpu*/ Lets use KVM_MAX_VCPUS instead. > + unsigned long vcpu_map[4]; DECLARE_BITMAP(vcpu_map, KVM_MAX_VCPUS) > + > + bitmap_zero(vcpu_map, 255); > + > + rcu_read_lock(); > + map = rcu_dereference(kvm->arch.apic_map); > + > + if (unlikely(!map)) { > + bitmap_fill(vcpu_map, 255); > + goto out; > + } > + > + if (irq->dest_mode == 0) { /* physical mode */ > + if (irq->delivery_mode == APIC_DM_LOWEST || > + irq->dest_id == 0xff) { > + bitmap_fill(vcpu_map, 255); > + goto out; > + } > + dst = &map->phys_map[irq->dest_id & 0xff]; > + } else { > + u32 mda = irq->dest_id << (32 - map->ldr_bits); > + > + dst = map->logical_map[apic_cluster_id(map, mda)]; > + > + bitmap = apic_logical_id(map, mda); > + } > + > + for_each_set_bit(i, &bitmap, 16) { > + if (!dst[i]) > + continue; > + set_bit(dst[i]->vcpu->vcpu_id, vcpu_map); > + } > + > +out: > + rcu_read_unlock(); > + kvm_for_each_vcpu(i, vcpu, kvm) { > + if (!kvm_apic_present(vcpu)) > + continue; > + set = test_bit(vcpu->vcpu_id, vcpu_map); > + kvm_apic_update_eoi_exitmap(vcpu, irq->vector, set); > + } > + > +} > bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, > struct kvm_lapic_irq *irq, int *r) > { > @@ -740,6 +801,19 @@ int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2) > return vcpu1->arch.apic_arb_prio - vcpu2->arch.apic_arb_prio; > } > > +static void kvm_ioapic_send_eoi(struct kvm_lapic *apic, int vector) > +{ > + if (!(kvm_apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI) && > + kvm_ioapic_handles_vector(apic->vcpu->kvm, vector)) { > + int trigger_mode; > + if (apic_test_vector(vector, apic->regs + APIC_TMR)) > + trigger_mode = IOAPIC_LEVEL_TRIG; > + else > + trigger_mode = IOAPIC_EDGE_TRIG; > + kvm_ioapic_update_eoi(apic->vcpu->kvm, vector, trigger_mode); > + } > +} > + > static int apic_set_eoi(struct kvm_lapic *apic) > { > int vector = apic_find_highest_isr(apic); > @@ -756,19 +830,26 @@ static int apic_set_eoi(struct kvm_lapic *apic) > apic_clear_isr(vector, apic); > apic_update_ppr(apic); > > - if (!(kvm_apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI) && > - kvm_ioapic_handles_vector(apic->vcpu->kvm, vector)) { > - int trigger_mode; > - if (apic_test_vector(vector, apic->regs + APIC_TMR)) > - trigger_mode = IOAPIC_LEVEL_TRIG; > - else > - trigger_mode = IOAPIC_EDGE_TRIG; > - kvm_ioapic_update_eoi(apic->vcpu->kvm, vector, trigger_mode); > - } > + kvm_ioapic_send_eoi(apic, vector); > kvm_make_request(KVM_REQ_EVENT, apic->vcpu); > return vector; > } > > +/* > + * this interface assumes a trap-like exit, which has already finished > + * desired side effect including vISR and vPPR update. > + */ > +void kvm_apic_set_eoi_accelerated(struct kvm_vcpu *vcpu, int vector) > +{ > + struct kvm_lapic *apic = vcpu->arch.apic; > + > + trace_kvm_eoi(apic, vector); > + > + kvm_ioapic_send_eoi(apic, vector); > + kvm_make_request(KVM_REQ_EVENT, apic->vcpu); > +} > +EXPORT_SYMBOL_GPL(kvm_apic_set_eoi_accelerated); > + > static void apic_send_ipi(struct kvm_lapic *apic) > { > u32 icr_low = kvm_apic_get_reg(apic, APIC_ICR); > @@ -1071,6 +1152,7 @@ static int apic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val) > if (!apic_x2apic_mode(apic)) { > apic_set_reg(apic, APIC_DFR, val | 0x0FFFFFFF); > recalculate_apic_map(apic->vcpu->kvm); > + ioapic_update_eoi_exitmap(apic->vcpu->kvm); > } else > ret = 1; > break; > @@ -1318,6 +1400,7 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value) > else > static_key_slow_inc(&apic_hw_disabled.key); > recalculate_apic_map(vcpu->kvm); > + ioapic_update_eoi_exitmap(apic->vcpu->kvm); > } > > if (!kvm_vcpu_is_bsp(apic->vcpu)) > diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h > index 9a8ee22..10e3f66 100644 > --- a/arch/x86/kvm/lapic.h > +++ b/arch/x86/kvm/lapic.h > @@ -39,6 +39,9 @@ void kvm_free_lapic(struct kvm_vcpu *vcpu); > int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu); > int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu); > int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu); > +int kvm_cpu_has_extint(struct kvm_vcpu *v); > +int kvm_cpu_get_extint(struct kvm_vcpu *v); > +int kvm_apic_get_highest_irr(struct kvm_vcpu *vcpu); > void kvm_lapic_reset(struct kvm_vcpu *vcpu); > u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu); > void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8); > @@ -55,6 +58,8 @@ int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type); > bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, > struct kvm_lapic_irq *irq, int *r); > > +void kvm_update_eoi_exitmap(struct kvm *kvm, struct kvm_lapic_irq *irq); Should provide empty function in ia64 code. > + > u64 kvm_get_apic_base(struct kvm_vcpu *vcpu); > void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data); > void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu, > @@ -65,6 +70,7 @@ u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu); > void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data); > > void kvm_apic_write_nodecode(struct kvm_vcpu *vcpu, u32 offset); > +void kvm_apic_set_eoi_accelerated(struct kvm_vcpu *vcpu, int vector); > > void kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr); > void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu); > @@ -126,4 +132,9 @@ static inline int kvm_lapic_enabled(struct kvm_vcpu *vcpu) > return kvm_apic_present(vcpu) && kvm_apic_sw_enabled(vcpu->arch.apic); > } > > +static inline bool kvm_apic_vid_enabled(struct kvm_vcpu *vcpu) > +{ > + return kvm_x86_ops->has_virtual_interrupt_delivery(vcpu); > +} > + > #endif > diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c > index dcb7952..38d7d38 100644 > --- a/arch/x86/kvm/svm.c > +++ b/arch/x86/kvm/svm.c > @@ -3573,6 +3573,22 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr) > set_cr_intercept(svm, INTERCEPT_CR8_WRITE); > } > > +static int svm_has_virtual_interrupt_delivery(struct kvm_vcpu *vcpu) > +{ > + return 0; > +} > + > +static void svm_update_irq(struct kvm_vcpu *vcpu) > +{ > + return ; > +} > + > +static void svm_update_eoi_exitmap(struct kvm_vcpu *vcpu, int vector, > + int trig_mode, int always_set) > +{ > + return ; > +} > + > static int svm_nmi_allowed(struct kvm_vcpu *vcpu) > { > struct vcpu_svm *svm = to_svm(vcpu); > @@ -4292,6 +4308,9 @@ static struct kvm_x86_ops svm_x86_ops = { > .enable_nmi_window = enable_nmi_window, > .enable_irq_window = enable_irq_window, > .update_cr8_intercept = update_cr8_intercept, > + .has_virtual_interrupt_delivery = svm_has_virtual_interrupt_delivery, > + .update_irq = svm_update_irq; > + .update_eoi_exitmap = svm_update_eoi_exitmap; > > .set_tss_addr = svm_set_tss_addr, > .get_tdp_level = get_npt_level, > diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c > index 4838e4f..cd4148b 100644 > --- a/arch/x86/kvm/vmx.c > +++ b/arch/x86/kvm/vmx.c > @@ -432,6 +432,9 @@ struct vcpu_vmx { > > bool rdtscp_enabled; > > + u8 eoi_exitmap_changed; > + u32 eoi_exit_bitmap[8]; > + > /* Support for a guest hypervisor (nested VMX) */ > struct nested_vmx nested; > }; > @@ -770,6 +773,12 @@ static inline bool cpu_has_vmx_apic_register_virt(void) > SECONDARY_EXEC_APIC_REGISTER_VIRT; > } > > +static inline bool cpu_has_vmx_virtual_intr_delivery(void) > +{ > + return vmcs_config.cpu_based_2nd_exec_ctrl & > + SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY; > +} > + > static inline bool cpu_has_vmx_flexpriority(void) > { > return cpu_has_vmx_tpr_shadow() && > @@ -2508,7 +2517,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) > SECONDARY_EXEC_PAUSE_LOOP_EXITING | > SECONDARY_EXEC_RDTSCP | > SECONDARY_EXEC_ENABLE_INVPCID | > - SECONDARY_EXEC_APIC_REGISTER_VIRT; > + SECONDARY_EXEC_APIC_REGISTER_VIRT | > + SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY; > if (adjust_vmx_controls(min2, opt2, > MSR_IA32_VMX_PROCBASED_CTLS2, > &_cpu_based_2nd_exec_control) < 0) > @@ -2522,7 +2532,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) > > if (!(_cpu_based_exec_control & CPU_BASED_TPR_SHADOW)) > _cpu_based_2nd_exec_control &= ~( > - SECONDARY_EXEC_APIC_REGISTER_VIRT); > + SECONDARY_EXEC_APIC_REGISTER_VIRT | > + SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY); > > if (_cpu_based_2nd_exec_control & SECONDARY_EXEC_ENABLE_EPT) { > /* CR3 accesses and invlpg don't need to cause VM Exits when EPT > @@ -2721,9 +2732,13 @@ static __init int hardware_setup(void) > if (!cpu_has_vmx_ple()) > ple_gap = 0; > > - if (!cpu_has_vmx_apic_register_virt()) > + if (!cpu_has_vmx_apic_register_virt() || > + !cpu_has_vmx_virtual_intr_delivery()) > enable_apicv_reg_vid = 0; > > + if (enable_apicv_reg_vid) > + kvm_x86_ops->update_cr8_intercept = NULL; > + > if (nested) > nested_vmx_setup_ctls_msrs(); > > @@ -3838,7 +3853,8 @@ static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx) > if (!ple_gap) > exec_control &= ~SECONDARY_EXEC_PAUSE_LOOP_EXITING; > if (!enable_apicv_reg_vid) > - exec_control &= ~SECONDARY_EXEC_APIC_REGISTER_VIRT; > + exec_control &= ~(SECONDARY_EXEC_APIC_REGISTER_VIRT | > + SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY); > return exec_control; > } > > @@ -3883,6 +3899,15 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) > vmx_secondary_exec_control(vmx)); > } > > + if (enable_apicv_reg_vid) { > + vmcs_write64(EOI_EXIT_BITMAP0, 0); > + vmcs_write64(EOI_EXIT_BITMAP1, 0); > + vmcs_write64(EOI_EXIT_BITMAP2, 0); > + vmcs_write64(EOI_EXIT_BITMAP3, 0); > + > + vmcs_write16(GUEST_INTR_STATUS, 0); > + } > + > if (ple_gap) { > vmcs_write32(PLE_GAP, ple_gap); > vmcs_write32(PLE_WINDOW, ple_window); > @@ -4806,6 +4831,16 @@ static int handle_apic_access(struct kvm_vcpu *vcpu) > return emulate_instruction(vcpu, 0) == EMULATE_DONE; > } > > +static int handle_apic_eoi_induced(struct kvm_vcpu *vcpu) > +{ > + unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); > + int vector = exit_qualification & 0xff; > + > + /* EOI-induced VM exit is trap-like and thus no need to adjust IP */ > + kvm_apic_set_eoi_accelerated(vcpu, vector); > + return 1; > +} > + > static int handle_apic_write(struct kvm_vcpu *vcpu) > { > unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); > @@ -5756,6 +5791,7 @@ static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = { > [EXIT_REASON_TPR_BELOW_THRESHOLD] = handle_tpr_below_threshold, > [EXIT_REASON_APIC_ACCESS] = handle_apic_access, > [EXIT_REASON_APIC_WRITE] = handle_apic_write, > + [EXIT_REASON_EOI_INDUCED] = handle_apic_eoi_induced, > [EXIT_REASON_WBINVD] = handle_wbinvd, > [EXIT_REASON_XSETBV] = handle_xsetbv, > [EXIT_REASON_TASK_SWITCH] = handle_task_switch, > @@ -6105,6 +6141,75 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr) > vmcs_write32(TPR_THRESHOLD, irr); > } > > +static int vmx_has_virtual_interrupt_delivery(struct kvm_vcpu *vcpu) > +{ > + return irqchip_in_kernel(vcpu->kvm) && enable_apicv_reg_vid; No need to check irqchip_in_kernel() here. Callers check it. > +} > + > +static void vmx_update_rvi(int vector) > +{ > + u16 status; > + u8 old; > + > + status = vmcs_read16(GUEST_INTR_STATUS); > + old = (u8)status & 0xff; > + if ((u8)vector != old) { > + status &= ~0xff; > + status |= (u8)vector; > + vmcs_write16(GUEST_INTR_STATUS, status); > + } > +} > + > +static void vmx_update_irq(struct kvm_vcpu *vcpu) > +{ > + int vector; > + struct vcpu_vmx *vmx = to_vmx(vcpu); > + > + if (!enable_apicv_reg_vid) > + return ; > + > + vector = kvm_lapic_find_highest_irr(vcpu); > + if (vector == -1) > + return; > + > + vmx_update_rvi(vector); > + > + if (vmx->eoi_exitmap_changed) { As I said in previous reviews define new request bit. When bitmap is recalculated do kvm_make_request(vcpu, KVM_REQ_EOIBITMAP) on vcpu entry do if (kvm_check_request(vcpu, KVM_REQ_EOIBITMAP) update_eoibitmap(vcpu); > + int index; > + for_each_set_bit(index, > + (unsigned long *)(&vmx->eoi_exitmap_changed), 8) > + vmcs_write32(EOI_EXIT_BITMAP0 + index, > + vmx->eoi_exit_bitmap[index]); > + vmx->eoi_exitmap_changed = 0; > + } > +} > + > +static void vmx_update_eoi_exitmap(struct kvm_vcpu *vcpu, > + int vector, bool set) Caller passes u32 to vector. > +{ > + struct vcpu_vmx *vmx = to_vmx(vcpu); > + int index, changed; > + > + if (!enable_apicv_reg_vid) > + return ; > + > + if (WARN_ONCE((vector < 0) || (vector > 255), > + "KVM VMX: vector (%d) out of range\n", vector)) > + return; > + > + index = vector >> 5; > + > + if (set) > + changed = !test_and_set_bit(vector, > + (unsigned long *)&vmx->eoi_exit_bitmap); > + else > + changed = !test_and_clear_bit(vector, > + (unsigned long *)&vmx->eoi_exit_bitmap); > + > + if (changed) > + vmx->eoi_exitmap_changed |= 1 << index; eoi exit bitmap changes should be rare enough to not care about tracking changes. If function is called update vmcs on the next entry. > +} > + > static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx) > { > u32 exit_intr_info; > @@ -7365,6 +7470,9 @@ static struct kvm_x86_ops vmx_x86_ops = { > .enable_nmi_window = enable_nmi_window, > .enable_irq_window = enable_irq_window, > .update_cr8_intercept = update_cr8_intercept, > + .has_virtual_interrupt_delivery = vmx_has_virtual_interrupt_delivery, > + .update_irq = vmx_update_irq, > + .update_eoi_exitmap = vmx_update_eoi_exitmap, > > .set_tss_addr = vmx_set_tss_addr, > .get_tdp_level = get_ept_level, > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > index 3bdaf29..408445b 100644 > --- a/arch/x86/kvm/x86.c > +++ b/arch/x86/kvm/x86.c > @@ -5534,12 +5534,13 @@ static void inject_pending_event(struct kvm_vcpu *vcpu) > vcpu->arch.nmi_injected = true; > kvm_x86_ops->set_nmi(vcpu); > } > - } else if (kvm_cpu_has_interrupt(vcpu)) { > - if (kvm_x86_ops->interrupt_allowed(vcpu)) { > - kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu), > - false); > - kvm_x86_ops->set_irq(vcpu); > - } > + } else if (kvm_cpu_has_injectable_intr(vcpu) && > + kvm_x86_ops->interrupt_allowed(vcpu)) { > + int vector; > + vector = kvm_cpu_get_interrupt(vcpu); > + > + kvm_queue_interrupt(vcpu, vector, false); Why introduce vector variable here? Leave it as it was. > + kvm_x86_ops->set_irq(vcpu); > } > } > > @@ -5663,10 +5664,13 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) > /* enable NMI/IRQ window open exits if needed */ > if (vcpu->arch.nmi_pending) > kvm_x86_ops->enable_nmi_window(vcpu); > - else if (kvm_cpu_has_interrupt(vcpu) || req_int_win) > + else if (kvm_cpu_has_injectable_intr(vcpu) || req_int_win) > kvm_x86_ops->enable_irq_window(vcpu); > > if (kvm_lapic_enabled(vcpu)) { > + /* update archtecture specific hints for APIC > + * virtual interrupt delivery */ > + kvm_x86_ops->update_irq(vcpu); > update_cr8_intercept(vcpu); > kvm_lapic_sync_to_vapic(vcpu); > } > diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c > index cfb7e4d..081225a 100644 > --- a/virt/kvm/ioapic.c > +++ b/virt/kvm/ioapic.c > @@ -115,6 +115,40 @@ static void update_handled_vectors(struct kvm_ioapic *ioapic) > smp_wmb(); > } > > +void _ioapic_update_eoi_exitmap(struct kvm_ioapic *ioapic, int pin) > +{ Bette make is ioapic_update_eoi_exitmap_one() or something. Underscore is undescriptive. > + union kvm_ioapic_redirect_entry *e; > + > + e = &ioapic->redirtbl[pin]; > + > + /* PIT is a special case: which is edge trig but have EOI hook. > + * Always set the eoi exit bitmap for PIT interrupt*/ No hacks please. Check that ack notifier is register for gsi. > + if (e->fields.mask != 0x1 && !e->fields.mask please. > + (e->fields.trig_mode == IOAPIC_LEVEL_TRIG || (pin == 2))) { > + struct kvm_lapic_irq irqe; > + > + irqe.dest_id = e->fields.dest_id; > + irqe.vector = e->fields.vector; > + irqe.dest_mode = e->fields.dest_mode; > + irqe.delivery_mode = e->fields.delivery_mode << 8; > + kvm_update_eoi_exitmap(ioapic->kvm, &irqe); > + } > +} > + > +void ioapic_update_eoi_exitmap(struct kvm *kvm) > +{ > + struct kvm_ioapic *ioapic = kvm->arch.vioapic; > + union kvm_ioapic_redirect_entry *e; > + int index = 0; > + > + while (index < IOAPIC_NUM_PINS) { this if for() loop. > + e = &ioapic->redirtbl[index]; > + if (e->fields.vector >= 0 && e->fields.vector <= 255) vector field is u8, the check is always true. Better move mask check here. > + _ioapic_update_eoi_exitmap(ioapic, index); > + index++; > + } > +} > + > static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val) > { > unsigned index; > @@ -156,6 +190,7 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val) > if (e->fields.trig_mode == IOAPIC_LEVEL_TRIG > && ioapic->irr & (1 << index)) > ioapic_service(ioapic, index); > + _ioapic_update_eoi_exitmap(ioapic, index); > break; > } > } > diff --git a/virt/kvm/ioapic.h b/virt/kvm/ioapic.h > index a30abfe..e8d67cf 100644 > --- a/virt/kvm/ioapic.h > +++ b/virt/kvm/ioapic.h > @@ -82,5 +82,6 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, > struct kvm_lapic_irq *irq); > int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state); > int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state); > +void ioapic_update_eoi_exitmap(struct kvm *kvm); > > #endif > -- > 1.7.1 -- Gleb. -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Gleb Natapov wrote on 2012-12-09: > On Sat, Dec 08, 2012 at 08:04:30PM +0800, Yang Zhang wrote: >> diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c >> index cfb7e4d..081225a 100644 >> --- a/virt/kvm/ioapic.c >> +++ b/virt/kvm/ioapic.c >> @@ -115,6 +115,40 @@ static void update_handled_vectors(struct kvm_ioapic > *ioapic) >> smp_wmb(); >> } >> +void _ioapic_update_eoi_exitmap(struct kvm_ioapic *ioapic, int pin) >> +{ > Bette make is ioapic_update_eoi_exitmap_one() or something. Underscore > is undescriptive. > >> + union kvm_ioapic_redirect_entry *e; >> + >> + e = &ioapic->redirtbl[pin]; >> + >> + /* PIT is a special case: which is edge trig but have EOI hook. >> + * Always set the eoi exit bitmap for PIT interrupt*/ > No hacks please. Check that ack notifier is register for gsi. Do you mean do this in kvm_register_irq_ack_notifier()? The problem is that we cannot get the vector when calling this function. Because this function is called during device initializing, and guest is not starting at that time. Best regards, Yang -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Mon, Dec 10, 2012 at 01:34:02AM +0000, Zhang, Yang Z wrote: > Gleb Natapov wrote on 2012-12-09: > > On Sat, Dec 08, 2012 at 08:04:30PM +0800, Yang Zhang wrote: > >> diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c > >> index cfb7e4d..081225a 100644 > >> --- a/virt/kvm/ioapic.c > >> +++ b/virt/kvm/ioapic.c > >> @@ -115,6 +115,40 @@ static void update_handled_vectors(struct kvm_ioapic > > *ioapic) > >> smp_wmb(); > >> } > >> +void _ioapic_update_eoi_exitmap(struct kvm_ioapic *ioapic, int pin) > >> +{ > > Bette make is ioapic_update_eoi_exitmap_one() or something. Underscore > > is undescriptive. > > > >> + union kvm_ioapic_redirect_entry *e; > >> + > >> + e = &ioapic->redirtbl[pin]; > >> + > >> + /* PIT is a special case: which is edge trig but have EOI hook. > >> + * Always set the eoi exit bitmap for PIT interrupt*/ > > No hacks please. Check that ack notifier is register for gsi. > Do you mean do this in kvm_register_irq_ack_notifier()? The problem is that we cannot get the vector when calling this function. Because this function is called during device initializing, and guest is not starting at that time. > Call ioapic_update_eoi_exitmap() in kvm_register_irq_ack_notifier(), check that gsi (pin) has notifier registered in ioapic_update_eoi_exitmap(). -- Gleb. -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Gleb Natapov wrote on 2012-12-10: > On Mon, Dec 10, 2012 at 01:34:02AM +0000, Zhang, Yang Z wrote: >> Gleb Natapov wrote on 2012-12-09: >>> On Sat, Dec 08, 2012 at 08:04:30PM +0800, Yang Zhang wrote: >>>> diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c >>>> index cfb7e4d..081225a 100644 >>>> --- a/virt/kvm/ioapic.c >>>> +++ b/virt/kvm/ioapic.c >>>> @@ -115,6 +115,40 @@ static void update_handled_vectors(struct > kvm_ioapic >>> *ioapic) >>>> smp_wmb(); >>>> } >>>> +void _ioapic_update_eoi_exitmap(struct kvm_ioapic *ioapic, int pin) >>>> +{ >>> Bette make is ioapic_update_eoi_exitmap_one() or something. Underscore >>> is undescriptive. >>> >>>> + union kvm_ioapic_redirect_entry *e; >>>> + >>>> + e = &ioapic->redirtbl[pin]; >>>> + >>>> + /* PIT is a special case: which is edge trig but have EOI hook. >>>> + * Always set the eoi exit bitmap for PIT interrupt*/ >>> No hacks please. Check that ack notifier is register for gsi. >> Do you mean do this in kvm_register_irq_ack_notifier()? The problem is that > we cannot get the vector when calling this function. Because this function is > called during device initializing, and guest is not starting at that time. >> > Call ioapic_update_eoi_exitmap() in kvm_register_irq_ack_notifier(), > check that gsi (pin) has notifier registered in ioapic_update_eoi_exitmap(). > Ok. Best regards, Yang -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index dc87b65..7e26d1a 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -697,6 +697,9 @@ struct kvm_x86_ops { void (*enable_nmi_window)(struct kvm_vcpu *vcpu); void (*enable_irq_window)(struct kvm_vcpu *vcpu); void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr); + int (*has_virtual_interrupt_delivery)(struct kvm_vcpu *vcpu); + void (*update_irq)(struct kvm_vcpu *vcpu); + void (*update_eoi_exitmap)(struct kvm_vcpu *vcpu, int vector, bool set); int (*set_tss_addr)(struct kvm *kvm, unsigned int addr); int (*get_tdp_level)(void); u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio); @@ -991,6 +994,7 @@ int kvm_age_hva(struct kvm *kvm, unsigned long hva); int kvm_test_age_hva(struct kvm *kvm, unsigned long hva); void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); int cpuid_maxphyaddr(struct kvm_vcpu *vcpu); +int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v); int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu); int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu); int kvm_cpu_get_interrupt(struct kvm_vcpu *v); diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 21101b6..1003341 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -62,6 +62,7 @@ #define EXIT_REASON_MCE_DURING_VMENTRY 41 #define EXIT_REASON_TPR_BELOW_THRESHOLD 43 #define EXIT_REASON_APIC_ACCESS 44 +#define EXIT_REASON_EOI_INDUCED 45 #define EXIT_REASON_EPT_VIOLATION 48 #define EXIT_REASON_EPT_MISCONFIG 49 #define EXIT_REASON_WBINVD 54 @@ -143,6 +144,7 @@ #define SECONDARY_EXEC_WBINVD_EXITING 0x00000040 #define SECONDARY_EXEC_UNRESTRICTED_GUEST 0x00000080 #define SECONDARY_EXEC_APIC_REGISTER_VIRT 0x00000100 +#define SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY 0x00000200 #define SECONDARY_EXEC_PAUSE_LOOP_EXITING 0x00000400 #define SECONDARY_EXEC_ENABLE_INVPCID 0x00001000 @@ -180,6 +182,7 @@ enum vmcs_field { GUEST_GS_SELECTOR = 0x0000080a, GUEST_LDTR_SELECTOR = 0x0000080c, GUEST_TR_SELECTOR = 0x0000080e, + GUEST_INTR_STATUS = 0x00000810, HOST_ES_SELECTOR = 0x00000c00, HOST_CS_SELECTOR = 0x00000c02, HOST_SS_SELECTOR = 0x00000c04, @@ -207,6 +210,14 @@ enum vmcs_field { APIC_ACCESS_ADDR_HIGH = 0x00002015, EPT_POINTER = 0x0000201a, EPT_POINTER_HIGH = 0x0000201b, + EOI_EXIT_BITMAP0 = 0x0000201c, + EOI_EXIT_BITMAP0_HIGH = 0x0000201d, + EOI_EXIT_BITMAP1 = 0x0000201e, + EOI_EXIT_BITMAP1_HIGH = 0x0000201f, + EOI_EXIT_BITMAP2 = 0x00002020, + EOI_EXIT_BITMAP2_HIGH = 0x00002021, + EOI_EXIT_BITMAP3 = 0x00002022, + EOI_EXIT_BITMAP3_HIGH = 0x00002023, GUEST_PHYSICAL_ADDRESS = 0x00002400, GUEST_PHYSICAL_ADDRESS_HIGH = 0x00002401, VMCS_LINK_POINTER = 0x00002800, diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c index 7e06ba1..5cbc631 100644 --- a/arch/x86/kvm/irq.c +++ b/arch/x86/kvm/irq.c @@ -38,50 +38,95 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) EXPORT_SYMBOL(kvm_cpu_has_pending_timer); /* + * check if there is injectable interrupt: + * a. when virtual interrupt delivery enabled, + * interrupt from apic will handled by hardware, + * we don't need to check it here. + * b. without virtual interrupt delivery, handle + * it in old way: check apic firstly, then pic. + */ +int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v) +{ + if (!irqchip_in_kernel(v->kvm)) + return v->arch.interrupt.pending; + + if (kvm_apic_vid_enabled(v)) + return kvm_cpu_has_extint(v); /* non-APIC */ + else if (kvm_apic_has_interrupt(v) == -1) /* LAPIC */ + return kvm_cpu_has_extint(v); /* non-APIC */ + + return 1; +} +EXPORT_SYMBOL_GPL(kvm_cpu_has_injectable_intr); + +/* * check if there is pending interrupt without * intack. */ int kvm_cpu_has_interrupt(struct kvm_vcpu *v) { - struct kvm_pic *s; - if (!irqchip_in_kernel(v->kvm)) return v->arch.interrupt.pending; - if (kvm_apic_has_interrupt(v) == -1) { /* LAPIC */ - if (kvm_apic_accept_pic_intr(v)) { - s = pic_irqchip(v->kvm); /* PIC */ - return s->output; - } else - return 0; - } + if (kvm_apic_has_interrupt(v) == -1) /* LAPIC */ + return kvm_cpu_has_extint(v); /* non-APIC */ return 1; } EXPORT_SYMBOL_GPL(kvm_cpu_has_interrupt); /* + * check if there is pending interrupt from + * non-APIC source without intack. + */ +int kvm_cpu_has_extint(struct kvm_vcpu *v) +{ + struct kvm_pic *s; + + if (kvm_apic_accept_pic_intr(v)) { + s = pic_irqchip(v->kvm); /* PIC */ + return s->output; + } else + return 0; +} + +/* * Read pending interrupt vector and intack. */ int kvm_cpu_get_interrupt(struct kvm_vcpu *v) { - struct kvm_pic *s; int vector; if (!irqchip_in_kernel(v->kvm)) return v->arch.interrupt.nr; - vector = kvm_get_apic_interrupt(v); /* APIC */ - if (vector == -1) { - if (kvm_apic_accept_pic_intr(v)) { - s = pic_irqchip(v->kvm); - s->output = 0; /* PIC */ - vector = kvm_pic_read_irq(v->kvm); - } + if (kvm_apic_vid_enabled(v)) + vector = kvm_cpu_get_extint(v); /* non-APIC */ + else { + vector = kvm_get_apic_interrupt(v); /* APIC */ + if (vector == -1) + vector = kvm_cpu_get_extint(v); /* non-APIC */ } return vector; } EXPORT_SYMBOL_GPL(kvm_cpu_get_interrupt); +/* + * Read pending interrupt(from non-APIC source) + * vector and intack. + */ +int kvm_cpu_get_extint(struct kvm_vcpu *v) +{ + struct kvm_pic *s; + int vector = -1; + + if (kvm_apic_accept_pic_intr(v)) { + s = pic_irqchip(v->kvm); + s->output = 0; /* PIC */ + vector = kvm_pic_read_irq(v->kvm); + } + return vector; +} + void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu) { kvm_inject_apic_timer_irqs(vcpu); diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 0664c13..2109a6a 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -236,12 +236,14 @@ static inline void kvm_apic_set_id(struct kvm_lapic *apic, u8 id) { apic_set_reg(apic, APIC_ID, id << 24); recalculate_apic_map(apic->vcpu->kvm); + ioapic_update_eoi_exitmap(apic->vcpu->kvm); } static inline void kvm_apic_set_ldr(struct kvm_lapic *apic, u32 id) { apic_set_reg(apic, APIC_LDR, id); recalculate_apic_map(apic->vcpu->kvm); + ioapic_update_eoi_exitmap(apic->vcpu->kvm); } static inline int apic_lvt_enabled(struct kvm_lapic *apic, int lvt_type) @@ -398,6 +400,7 @@ int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu) return highest_irr; } +EXPORT_SYMBOL_GPL(kvm_lapic_find_highest_irr); static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, int vector, int level, int trig_mode); @@ -577,6 +580,64 @@ int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source, return result; } +static void kvm_apic_update_eoi_exitmap(struct kvm_vcpu *vcpu, + int vector, bool set) +{ + kvm_x86_ops->update_eoi_exitmap(vcpu, vector, set); +} + +void kvm_update_eoi_exitmap(struct kvm *kvm, struct kvm_lapic_irq *irq) +{ + struct kvm_vcpu *vcpu; + struct kvm_lapic **dst; + struct kvm_apic_map *map; + unsigned long bitmap = 1; + bool set; + int i; + /* KVM only support 255 vcpu*/ + unsigned long vcpu_map[4]; + + bitmap_zero(vcpu_map, 255); + + rcu_read_lock(); + map = rcu_dereference(kvm->arch.apic_map); + + if (unlikely(!map)) { + bitmap_fill(vcpu_map, 255); + goto out; + } + + if (irq->dest_mode == 0) { /* physical mode */ + if (irq->delivery_mode == APIC_DM_LOWEST || + irq->dest_id == 0xff) { + bitmap_fill(vcpu_map, 255); + goto out; + } + dst = &map->phys_map[irq->dest_id & 0xff]; + } else { + u32 mda = irq->dest_id << (32 - map->ldr_bits); + + dst = map->logical_map[apic_cluster_id(map, mda)]; + + bitmap = apic_logical_id(map, mda); + } + + for_each_set_bit(i, &bitmap, 16) { + if (!dst[i]) + continue; + set_bit(dst[i]->vcpu->vcpu_id, vcpu_map); + } + +out: + rcu_read_unlock(); + kvm_for_each_vcpu(i, vcpu, kvm) { + if (!kvm_apic_present(vcpu)) + continue; + set = test_bit(vcpu->vcpu_id, vcpu_map); + kvm_apic_update_eoi_exitmap(vcpu, irq->vector, set); + } + +} bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, struct kvm_lapic_irq *irq, int *r) { @@ -740,6 +801,19 @@ int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2) return vcpu1->arch.apic_arb_prio - vcpu2->arch.apic_arb_prio; } +static void kvm_ioapic_send_eoi(struct kvm_lapic *apic, int vector) +{ + if (!(kvm_apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI) && + kvm_ioapic_handles_vector(apic->vcpu->kvm, vector)) { + int trigger_mode; + if (apic_test_vector(vector, apic->regs + APIC_TMR)) + trigger_mode = IOAPIC_LEVEL_TRIG; + else + trigger_mode = IOAPIC_EDGE_TRIG; + kvm_ioapic_update_eoi(apic->vcpu->kvm, vector, trigger_mode); + } +} + static int apic_set_eoi(struct kvm_lapic *apic) { int vector = apic_find_highest_isr(apic); @@ -756,19 +830,26 @@ static int apic_set_eoi(struct kvm_lapic *apic) apic_clear_isr(vector, apic); apic_update_ppr(apic); - if (!(kvm_apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI) && - kvm_ioapic_handles_vector(apic->vcpu->kvm, vector)) { - int trigger_mode; - if (apic_test_vector(vector, apic->regs + APIC_TMR)) - trigger_mode = IOAPIC_LEVEL_TRIG; - else - trigger_mode = IOAPIC_EDGE_TRIG; - kvm_ioapic_update_eoi(apic->vcpu->kvm, vector, trigger_mode); - } + kvm_ioapic_send_eoi(apic, vector); kvm_make_request(KVM_REQ_EVENT, apic->vcpu); return vector; } +/* + * this interface assumes a trap-like exit, which has already finished + * desired side effect including vISR and vPPR update. + */ +void kvm_apic_set_eoi_accelerated(struct kvm_vcpu *vcpu, int vector) +{ + struct kvm_lapic *apic = vcpu->arch.apic; + + trace_kvm_eoi(apic, vector); + + kvm_ioapic_send_eoi(apic, vector); + kvm_make_request(KVM_REQ_EVENT, apic->vcpu); +} +EXPORT_SYMBOL_GPL(kvm_apic_set_eoi_accelerated); + static void apic_send_ipi(struct kvm_lapic *apic) { u32 icr_low = kvm_apic_get_reg(apic, APIC_ICR); @@ -1071,6 +1152,7 @@ static int apic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val) if (!apic_x2apic_mode(apic)) { apic_set_reg(apic, APIC_DFR, val | 0x0FFFFFFF); recalculate_apic_map(apic->vcpu->kvm); + ioapic_update_eoi_exitmap(apic->vcpu->kvm); } else ret = 1; break; @@ -1318,6 +1400,7 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value) else static_key_slow_inc(&apic_hw_disabled.key); recalculate_apic_map(vcpu->kvm); + ioapic_update_eoi_exitmap(apic->vcpu->kvm); } if (!kvm_vcpu_is_bsp(apic->vcpu)) diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index 9a8ee22..10e3f66 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h @@ -39,6 +39,9 @@ void kvm_free_lapic(struct kvm_vcpu *vcpu); int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu); int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu); int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu); +int kvm_cpu_has_extint(struct kvm_vcpu *v); +int kvm_cpu_get_extint(struct kvm_vcpu *v); +int kvm_apic_get_highest_irr(struct kvm_vcpu *vcpu); void kvm_lapic_reset(struct kvm_vcpu *vcpu); u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu); void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8); @@ -55,6 +58,8 @@ int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type); bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, struct kvm_lapic_irq *irq, int *r); +void kvm_update_eoi_exitmap(struct kvm *kvm, struct kvm_lapic_irq *irq); + u64 kvm_get_apic_base(struct kvm_vcpu *vcpu); void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data); void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu, @@ -65,6 +70,7 @@ u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu); void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data); void kvm_apic_write_nodecode(struct kvm_vcpu *vcpu, u32 offset); +void kvm_apic_set_eoi_accelerated(struct kvm_vcpu *vcpu, int vector); void kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr); void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu); @@ -126,4 +132,9 @@ static inline int kvm_lapic_enabled(struct kvm_vcpu *vcpu) return kvm_apic_present(vcpu) && kvm_apic_sw_enabled(vcpu->arch.apic); } +static inline bool kvm_apic_vid_enabled(struct kvm_vcpu *vcpu) +{ + return kvm_x86_ops->has_virtual_interrupt_delivery(vcpu); +} + #endif diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index dcb7952..38d7d38 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -3573,6 +3573,22 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr) set_cr_intercept(svm, INTERCEPT_CR8_WRITE); } +static int svm_has_virtual_interrupt_delivery(struct kvm_vcpu *vcpu) +{ + return 0; +} + +static void svm_update_irq(struct kvm_vcpu *vcpu) +{ + return ; +} + +static void svm_update_eoi_exitmap(struct kvm_vcpu *vcpu, int vector, + int trig_mode, int always_set) +{ + return ; +} + static int svm_nmi_allowed(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); @@ -4292,6 +4308,9 @@ static struct kvm_x86_ops svm_x86_ops = { .enable_nmi_window = enable_nmi_window, .enable_irq_window = enable_irq_window, .update_cr8_intercept = update_cr8_intercept, + .has_virtual_interrupt_delivery = svm_has_virtual_interrupt_delivery, + .update_irq = svm_update_irq; + .update_eoi_exitmap = svm_update_eoi_exitmap; .set_tss_addr = svm_set_tss_addr, .get_tdp_level = get_npt_level, diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 4838e4f..cd4148b 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -432,6 +432,9 @@ struct vcpu_vmx { bool rdtscp_enabled; + u8 eoi_exitmap_changed; + u32 eoi_exit_bitmap[8]; + /* Support for a guest hypervisor (nested VMX) */ struct nested_vmx nested; }; @@ -770,6 +773,12 @@ static inline bool cpu_has_vmx_apic_register_virt(void) SECONDARY_EXEC_APIC_REGISTER_VIRT; } +static inline bool cpu_has_vmx_virtual_intr_delivery(void) +{ + return vmcs_config.cpu_based_2nd_exec_ctrl & + SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY; +} + static inline bool cpu_has_vmx_flexpriority(void) { return cpu_has_vmx_tpr_shadow() && @@ -2508,7 +2517,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) SECONDARY_EXEC_PAUSE_LOOP_EXITING | SECONDARY_EXEC_RDTSCP | SECONDARY_EXEC_ENABLE_INVPCID | - SECONDARY_EXEC_APIC_REGISTER_VIRT; + SECONDARY_EXEC_APIC_REGISTER_VIRT | + SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY; if (adjust_vmx_controls(min2, opt2, MSR_IA32_VMX_PROCBASED_CTLS2, &_cpu_based_2nd_exec_control) < 0) @@ -2522,7 +2532,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) if (!(_cpu_based_exec_control & CPU_BASED_TPR_SHADOW)) _cpu_based_2nd_exec_control &= ~( - SECONDARY_EXEC_APIC_REGISTER_VIRT); + SECONDARY_EXEC_APIC_REGISTER_VIRT | + SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY); if (_cpu_based_2nd_exec_control & SECONDARY_EXEC_ENABLE_EPT) { /* CR3 accesses and invlpg don't need to cause VM Exits when EPT @@ -2721,9 +2732,13 @@ static __init int hardware_setup(void) if (!cpu_has_vmx_ple()) ple_gap = 0; - if (!cpu_has_vmx_apic_register_virt()) + if (!cpu_has_vmx_apic_register_virt() || + !cpu_has_vmx_virtual_intr_delivery()) enable_apicv_reg_vid = 0; + if (enable_apicv_reg_vid) + kvm_x86_ops->update_cr8_intercept = NULL; + if (nested) nested_vmx_setup_ctls_msrs(); @@ -3838,7 +3853,8 @@ static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx) if (!ple_gap) exec_control &= ~SECONDARY_EXEC_PAUSE_LOOP_EXITING; if (!enable_apicv_reg_vid) - exec_control &= ~SECONDARY_EXEC_APIC_REGISTER_VIRT; + exec_control &= ~(SECONDARY_EXEC_APIC_REGISTER_VIRT | + SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY); return exec_control; } @@ -3883,6 +3899,15 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) vmx_secondary_exec_control(vmx)); } + if (enable_apicv_reg_vid) { + vmcs_write64(EOI_EXIT_BITMAP0, 0); + vmcs_write64(EOI_EXIT_BITMAP1, 0); + vmcs_write64(EOI_EXIT_BITMAP2, 0); + vmcs_write64(EOI_EXIT_BITMAP3, 0); + + vmcs_write16(GUEST_INTR_STATUS, 0); + } + if (ple_gap) { vmcs_write32(PLE_GAP, ple_gap); vmcs_write32(PLE_WINDOW, ple_window); @@ -4806,6 +4831,16 @@ static int handle_apic_access(struct kvm_vcpu *vcpu) return emulate_instruction(vcpu, 0) == EMULATE_DONE; } +static int handle_apic_eoi_induced(struct kvm_vcpu *vcpu) +{ + unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); + int vector = exit_qualification & 0xff; + + /* EOI-induced VM exit is trap-like and thus no need to adjust IP */ + kvm_apic_set_eoi_accelerated(vcpu, vector); + return 1; +} + static int handle_apic_write(struct kvm_vcpu *vcpu) { unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); @@ -5756,6 +5791,7 @@ static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = { [EXIT_REASON_TPR_BELOW_THRESHOLD] = handle_tpr_below_threshold, [EXIT_REASON_APIC_ACCESS] = handle_apic_access, [EXIT_REASON_APIC_WRITE] = handle_apic_write, + [EXIT_REASON_EOI_INDUCED] = handle_apic_eoi_induced, [EXIT_REASON_WBINVD] = handle_wbinvd, [EXIT_REASON_XSETBV] = handle_xsetbv, [EXIT_REASON_TASK_SWITCH] = handle_task_switch, @@ -6105,6 +6141,75 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr) vmcs_write32(TPR_THRESHOLD, irr); } +static int vmx_has_virtual_interrupt_delivery(struct kvm_vcpu *vcpu) +{ + return irqchip_in_kernel(vcpu->kvm) && enable_apicv_reg_vid; +} + +static void vmx_update_rvi(int vector) +{ + u16 status; + u8 old; + + status = vmcs_read16(GUEST_INTR_STATUS); + old = (u8)status & 0xff; + if ((u8)vector != old) { + status &= ~0xff; + status |= (u8)vector; + vmcs_write16(GUEST_INTR_STATUS, status); + } +} + +static void vmx_update_irq(struct kvm_vcpu *vcpu) +{ + int vector; + struct vcpu_vmx *vmx = to_vmx(vcpu); + + if (!enable_apicv_reg_vid) + return ; + + vector = kvm_lapic_find_highest_irr(vcpu); + if (vector == -1) + return; + + vmx_update_rvi(vector); + + if (vmx->eoi_exitmap_changed) { + int index; + for_each_set_bit(index, + (unsigned long *)(&vmx->eoi_exitmap_changed), 8) + vmcs_write32(EOI_EXIT_BITMAP0 + index, + vmx->eoi_exit_bitmap[index]); + vmx->eoi_exitmap_changed = 0; + } +} + +static void vmx_update_eoi_exitmap(struct kvm_vcpu *vcpu, + int vector, bool set) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + int index, changed; + + if (!enable_apicv_reg_vid) + return ; + + if (WARN_ONCE((vector < 0) || (vector > 255), + "KVM VMX: vector (%d) out of range\n", vector)) + return; + + index = vector >> 5; + + if (set) + changed = !test_and_set_bit(vector, + (unsigned long *)&vmx->eoi_exit_bitmap); + else + changed = !test_and_clear_bit(vector, + (unsigned long *)&vmx->eoi_exit_bitmap); + + if (changed) + vmx->eoi_exitmap_changed |= 1 << index; +} + static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx) { u32 exit_intr_info; @@ -7365,6 +7470,9 @@ static struct kvm_x86_ops vmx_x86_ops = { .enable_nmi_window = enable_nmi_window, .enable_irq_window = enable_irq_window, .update_cr8_intercept = update_cr8_intercept, + .has_virtual_interrupt_delivery = vmx_has_virtual_interrupt_delivery, + .update_irq = vmx_update_irq, + .update_eoi_exitmap = vmx_update_eoi_exitmap, .set_tss_addr = vmx_set_tss_addr, .get_tdp_level = get_ept_level, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 3bdaf29..408445b 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5534,12 +5534,13 @@ static void inject_pending_event(struct kvm_vcpu *vcpu) vcpu->arch.nmi_injected = true; kvm_x86_ops->set_nmi(vcpu); } - } else if (kvm_cpu_has_interrupt(vcpu)) { - if (kvm_x86_ops->interrupt_allowed(vcpu)) { - kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu), - false); - kvm_x86_ops->set_irq(vcpu); - } + } else if (kvm_cpu_has_injectable_intr(vcpu) && + kvm_x86_ops->interrupt_allowed(vcpu)) { + int vector; + vector = kvm_cpu_get_interrupt(vcpu); + + kvm_queue_interrupt(vcpu, vector, false); + kvm_x86_ops->set_irq(vcpu); } } @@ -5663,10 +5664,13 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) /* enable NMI/IRQ window open exits if needed */ if (vcpu->arch.nmi_pending) kvm_x86_ops->enable_nmi_window(vcpu); - else if (kvm_cpu_has_interrupt(vcpu) || req_int_win) + else if (kvm_cpu_has_injectable_intr(vcpu) || req_int_win) kvm_x86_ops->enable_irq_window(vcpu); if (kvm_lapic_enabled(vcpu)) { + /* update archtecture specific hints for APIC + * virtual interrupt delivery */ + kvm_x86_ops->update_irq(vcpu); update_cr8_intercept(vcpu); kvm_lapic_sync_to_vapic(vcpu); } diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c index cfb7e4d..081225a 100644 --- a/virt/kvm/ioapic.c +++ b/virt/kvm/ioapic.c @@ -115,6 +115,40 @@ static void update_handled_vectors(struct kvm_ioapic *ioapic) smp_wmb(); } +void _ioapic_update_eoi_exitmap(struct kvm_ioapic *ioapic, int pin) +{ + union kvm_ioapic_redirect_entry *e; + + e = &ioapic->redirtbl[pin]; + + /* PIT is a special case: which is edge trig but have EOI hook. + * Always set the eoi exit bitmap for PIT interrupt*/ + if (e->fields.mask != 0x1 && + (e->fields.trig_mode == IOAPIC_LEVEL_TRIG || (pin == 2))) { + struct kvm_lapic_irq irqe; + + irqe.dest_id = e->fields.dest_id; + irqe.vector = e->fields.vector; + irqe.dest_mode = e->fields.dest_mode; + irqe.delivery_mode = e->fields.delivery_mode << 8; + kvm_update_eoi_exitmap(ioapic->kvm, &irqe); + } +} + +void ioapic_update_eoi_exitmap(struct kvm *kvm) +{ + struct kvm_ioapic *ioapic = kvm->arch.vioapic; + union kvm_ioapic_redirect_entry *e; + int index = 0; + + while (index < IOAPIC_NUM_PINS) { + e = &ioapic->redirtbl[index]; + if (e->fields.vector >= 0 && e->fields.vector <= 255) + _ioapic_update_eoi_exitmap(ioapic, index); + index++; + } +} + static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val) { unsigned index; @@ -156,6 +190,7 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val) if (e->fields.trig_mode == IOAPIC_LEVEL_TRIG && ioapic->irr & (1 << index)) ioapic_service(ioapic, index); + _ioapic_update_eoi_exitmap(ioapic, index); break; } } diff --git a/virt/kvm/ioapic.h b/virt/kvm/ioapic.h index a30abfe..e8d67cf 100644 --- a/virt/kvm/ioapic.h +++ b/virt/kvm/ioapic.h @@ -82,5 +82,6 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, struct kvm_lapic_irq *irq); int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state); int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state); +void ioapic_update_eoi_exitmap(struct kvm *kvm); #endif