Message ID | 1361281183-22759-3-git-send-email-yang.z.zhang@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Hi Marcelo, Can you help to review this patch? Many thanks if you can review it quickly. Zhang, Yang Z wrote on 2013-02-19: > From: Yang Zhang <yang.z.zhang@Intel.com> > > Posted Interrupt allows APIC interrupts to inject into guest directly > without any vmexit. > > - When delivering a interrupt to guest, if target vcpu is running, > update Posted-interrupt requests bitmap and send a notification event > to the vcpu. Then the vcpu will handle this interrupt automatically, > without any software involvemnt. > - If target vcpu is not running or there already a notification event > pending in the vcpu, do nothing. The interrupt will be handled by > next vm entry > Signed-off-by: Yang Zhang <yang.z.zhang@Intel.com> > --- > arch/x86/include/asm/entry_arch.h | 4 + > arch/x86/include/asm/hw_irq.h | 1 + > arch/x86/include/asm/irq_vectors.h | 5 + > arch/x86/include/asm/kvm_host.h | 3 + arch/x86/include/asm/vmx.h > | 4 + arch/x86/kernel/entry_64.S | 5 + > arch/x86/kernel/irq.c | 20 +++++ > arch/x86/kernel/irqinit.c | 4 + arch/x86/kvm/lapic.c > | 19 ++++- arch/x86/kvm/lapic.h | 1 + > arch/x86/kvm/svm.c | 13 +++ arch/x86/kvm/vmx.c > | 157 +++++++++++++++++++++++++++++++----- arch/x86/kvm/x86.c > | 1 + 13 files changed, 214 insertions(+), 23 > deletions(-) > diff --git a/arch/x86/include/asm/entry_arch.h > b/arch/x86/include/asm/entry_arch.h index 40afa00..9bd4eca 100644 --- > a/arch/x86/include/asm/entry_arch.h +++ > b/arch/x86/include/asm/entry_arch.h @@ -19,6 +19,10 @@ > BUILD_INTERRUPT(reboot_interrupt,REBOOT_VECTOR) > > BUILD_INTERRUPT(x86_platform_ipi, X86_PLATFORM_IPI_VECTOR) > +#ifdef CONFIG_HAVE_KVM +BUILD_INTERRUPT(kvm_posted_intr_ipi, > POSTED_INTR_VECTOR) +#endif + > /* > * every pentium local APIC has two 'local interrupts', with a > * soft-definable vector attached to both interrupts, one of > diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h > index eb92a6e..cebef02 100644 > --- a/arch/x86/include/asm/hw_irq.h > +++ b/arch/x86/include/asm/hw_irq.h > @@ -28,6 +28,7 @@ > /* Interrupt handlers registered during init_IRQ */ extern void > apic_timer_interrupt(void); extern void x86_platform_ipi(void); +extern > void kvm_posted_intr_ipi(void); extern void error_interrupt(void); > extern void irq_work_interrupt(void); > diff --git a/arch/x86/include/asm/irq_vectors.h > b/arch/x86/include/asm/irq_vectors.h index 1508e51..774dc9f 100644 --- > a/arch/x86/include/asm/irq_vectors.h +++ > b/arch/x86/include/asm/irq_vectors.h @@ -102,6 +102,11 @@ > */ > #define X86_PLATFORM_IPI_VECTOR 0xf7 > +/* Vector for KVM to deliver posted interrupt IPI */ > +#ifdef CONFIG_HAVE_KVM > +#define POSTED_INTR_VECTOR 0xf2 > +#endif > + > /* > * IRQ work vector: > */ > diff --git a/arch/x86/include/asm/kvm_host.h > b/arch/x86/include/asm/kvm_host.h index b8388e9..79da55e 100644 --- > a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h > @@ -704,6 +704,9 @@ struct kvm_x86_ops { > void (*hwapic_isr_update)(struct kvm *kvm, int isr); > void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap); > void (*set_virtual_x2apic_mode)(struct kvm_vcpu *vcpu, bool set); > + bool (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector, > + int *result, bool *delivered); > + void (*sync_pir_to_irr)(struct kvm_vcpu *vcpu); > int (*set_tss_addr)(struct kvm *kvm, unsigned int addr); > int (*get_tdp_level)(void); > u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio); > diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h > index 5c9dbad..ce8ac80 100644 > --- a/arch/x86/include/asm/vmx.h > +++ b/arch/x86/include/asm/vmx.h > @@ -158,6 +158,7 @@ > #define PIN_BASED_EXT_INTR_MASK 0x00000001 > #define PIN_BASED_NMI_EXITING 0x00000008 > #define PIN_BASED_VIRTUAL_NMIS 0x00000020 > +#define PIN_BASED_POSTED_INTR 0x00000080 > > #define VM_EXIT_SAVE_DEBUG_CONTROLS 0x00000002 #define > VM_EXIT_HOST_ADDR_SPACE_SIZE 0x00000200 @@ -180,6 +181,7 @@ > /* VMCS Encodings */ enum vmcs_field { VIRTUAL_PROCESSOR_ID > = 0x00000000, + POSTED_INTR_NV = 0x00000002, > GUEST_ES_SELECTOR = 0x00000800, GUEST_CS_SELECTOR > = 0x00000802, GUEST_SS_SELECTOR = 0x00000804, @@ > -214,6 +216,8 @@ enum vmcs_field { VIRTUAL_APIC_PAGE_ADDR_HIGH = > 0x00002013, APIC_ACCESS_ADDR = 0x00002014, APIC_ACCESS_ADDR_HIGH = > 0x00002015, > + POSTED_INTR_DESC_ADDR = 0x00002016, > + POSTED_INTR_DESC_ADDR_HIGH = 0x00002017, > EPT_POINTER = 0x0000201a, > EPT_POINTER_HIGH = 0x0000201b, > EOI_EXIT_BITMAP0 = 0x0000201c, > diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S > index 70641af..b409846 100644 > --- a/arch/x86/kernel/entry_64.S > +++ b/arch/x86/kernel/entry_64.S > @@ -1177,6 +1177,11 @@ apicinterrupt LOCAL_TIMER_VECTOR \ > apicinterrupt X86_PLATFORM_IPI_VECTOR \ > x86_platform_ipi smp_x86_platform_ipi > +#ifdef CONFIG_HAVE_KVM > +apicinterrupt POSTED_INTR_VECTOR \ > + kvm_posted_intr_ipi smp_posted_intr_ipi > +#endif > + > apicinterrupt THRESHOLD_APIC_VECTOR \ > threshold_interrupt smp_threshold_interrupt > apicinterrupt THERMAL_APIC_VECTOR \ > diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c > index e4595f1..da74d65 100644 > --- a/arch/x86/kernel/irq.c > +++ b/arch/x86/kernel/irq.c > @@ -228,6 +228,26 @@ void smp_x86_platform_ipi(struct pt_regs *regs) > set_irq_regs(old_regs); > } > +#ifdef CONFIG_HAVE_KVM > +/* > + * Handler for POSTED_INTERRUPT_VECTOR. > + */ > +void smp_posted_intr_ipi(struct pt_regs *regs) > +{ > + struct pt_regs *old_regs = set_irq_regs(regs); > + > + ack_APIC_irq(); > + > + irq_enter(); > + > + exit_idle(); > + > + irq_exit(); > + > + set_irq_regs(old_regs); > +} > +#endif > + > EXPORT_SYMBOL_GPL(vector_used_by_percpu_irq); > > #ifdef CONFIG_HOTPLUG_CPU > diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c > index 6e03b0d..2329a54 100644 > --- a/arch/x86/kernel/irqinit.c > +++ b/arch/x86/kernel/irqinit.c > @@ -205,6 +205,10 @@ static void __init apic_intr_init(void) > > /* IPI for X86 platform specific use */ > alloc_intr_gate(X86_PLATFORM_IPI_VECTOR, x86_platform_ipi); > +#ifdef CONFIG_HAVE_KVM + /* IPI for KVM to deliver posted interrupt */ > + alloc_intr_gate(POSTED_INTR_VECTOR, kvm_posted_intr_ipi); +#endif > > /* IPI vectors for APIC spurious and error interrupts */ > alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); > diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c > index 02b51dd..ebc32bb 100644 > --- a/arch/x86/kvm/lapic.c > +++ b/arch/x86/kvm/lapic.c > @@ -357,6 +357,12 @@ static u8 count_vectors(void *bitmap) > return count; > } > +int kvm_apic_test_irr(int vec, struct kvm_lapic *apic) > +{ > + return apic_test_vector(vec, apic->regs + APIC_IRR); > +} > +EXPORT_SYMBOL_GPL(kvm_apic_test_irr); > + > static inline int apic_test_and_set_irr(int vec, struct kvm_lapic > *apic) { apic->irr_pending = true; > @@ -379,6 +385,7 @@ static inline int apic_find_highest_irr(struct kvm_lapic > *apic) > if (!apic->irr_pending) > return -1; > + kvm_x86_ops->sync_pir_to_irr(apic->vcpu); > result = apic_search_irr(apic); > ASSERT(result == -1 || result >= 16); > @@ -685,6 +692,7 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int > delivery_mode, > { > int result = 0; > struct kvm_vcpu *vcpu = apic->vcpu; > + bool delivered = false; > > switch (delivery_mode) { > case APIC_DM_LOWEST: > @@ -700,7 +708,10 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int > delivery_mode, > } else > apic_clear_vector(vector, apic->regs + APIC_TMR); > - result = !apic_test_and_set_irr(vector, apic); > + if (!kvm_x86_ops->deliver_posted_interrupt(vcpu, vector, > + &result, &delivered)) > + result = !apic_test_and_set_irr(vector, apic); > + > trace_kvm_apic_accept_irq(vcpu->vcpu_id, delivery_mode, > trig_mode, vector, !result); > if (!result) { > @@ -710,8 +721,10 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int > delivery_mode, > break; > } > - kvm_make_request(KVM_REQ_EVENT, vcpu); - kvm_vcpu_kick(vcpu); + if > (!delivered) { + kvm_make_request(KVM_REQ_EVENT, vcpu); > + kvm_vcpu_kick(vcpu); + } > break; > > case APIC_DM_REMRD: > diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h > index 1676d34..1a7016c 100644 > --- a/arch/x86/kvm/lapic.h > +++ b/arch/x86/kvm/lapic.h > @@ -157,5 +157,6 @@ static inline u16 apic_logical_id(struct kvm_apic_map > *map, u32 ldr) > void kvm_calculate_eoi_exitmap(struct kvm_vcpu *vcpu, > struct kvm_lapic_irq *irq, > u64 *eoi_bitmap); > +int kvm_apic_test_irr(int vec, struct kvm_lapic *apic); > > #endif > diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c > index a7d60d7..9e705e3 100644 > --- a/arch/x86/kvm/svm.c > +++ b/arch/x86/kvm/svm.c > @@ -3591,6 +3591,17 @@ static void svm_hwapic_isr_update(struct kvm *kvm, > int isr) > return; > } > +static void svm_sync_pir_to_irr(struct kvm_vcpu *vcpu) > +{ > + return; > +} > + > +static bool svm_deliver_posted_interrupt(struct kvm_vcpu *vcpu, > + int vector, int *result, bool *delivered) > +{ > + return false; > +} > + > static int svm_nmi_allowed(struct kvm_vcpu *vcpu) { struct vcpu_svm > *svm = to_svm(vcpu); @@ -4319,6 +4330,8 @@ static struct kvm_x86_ops > svm_x86_ops = { .vm_has_apicv = svm_vm_has_apicv, .load_eoi_exitmap = > svm_load_eoi_exitmap, .hwapic_isr_update = svm_hwapic_isr_update, > + .sync_pir_to_irr = svm_sync_pir_to_irr, > + .deliver_posted_interrupt = svm_deliver_posted_interrupt, > > .set_tss_addr = svm_set_tss_addr, > .get_tdp_level = get_npt_level, > diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c > index 436b134..2fdf537 100644 > --- a/arch/x86/kvm/vmx.c > +++ b/arch/x86/kvm/vmx.c > @@ -84,7 +84,8 @@ module_param(vmm_exclusive, bool, S_IRUGO); > static bool __read_mostly fasteoi = 1; > module_param(fasteoi, bool, S_IRUGO); > -static bool __read_mostly enable_apicv_reg_vid; > +static bool __read_mostly enable_apicv = 1; > +module_param(enable_apicv, bool, S_IRUGO); > > /* > * If nested=1, nested virtualization is supported, i.e., guests may use > @@ -365,6 +366,36 @@ struct nested_vmx { > struct page *apic_access_page; > }; > +#define POSTED_INTR_ON 0 +/* Posted-Interrupt Descriptor */ +struct > pi_desc { + u32 pir[8]; /* Posted interrupt requested */ + union { > + struct { + u8 on:1, + rsvd:7; + } control; + u32 rsvd[8]; > + } u; +} __aligned(64); + +static bool pi_test_and_set_on(struct > pi_desc *pi_desc) +{ + return test_and_set_bit(POSTED_INTR_ON, > + (unsigned long *)&pi_desc->u.control); +} + +static bool > pi_test_and_clear_on(struct pi_desc *pi_desc) +{ + return > test_and_clear_bit(POSTED_INTR_ON, + (unsigned long > *)&pi_desc->u.control); +} + +static int pi_test_and_set_pir(int vector, > struct pi_desc *pi_desc) +{ + return test_and_set_bit(vector, (unsigned > long *)pi_desc->pir); +} + > struct vcpu_vmx { > struct kvm_vcpu vcpu; > unsigned long host_rsp; > @@ -429,6 +460,9 @@ struct vcpu_vmx { > > bool rdtscp_enabled; > + /* Posted interrupt descriptor */ > + struct pi_desc pi_desc; > + > /* Support for a guest hypervisor (nested VMX) */ > struct nested_vmx nested; > }; > @@ -783,6 +817,18 @@ static inline bool > cpu_has_vmx_virtual_intr_delivery(void) > SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY; > } > +static inline bool cpu_has_vmx_posted_intr(void) +{ + return > vmcs_config.pin_based_exec_ctrl & PIN_BASED_POSTED_INTR; +} + +static > inline bool cpu_has_vmx_apicv(void) +{ + return > cpu_has_vmx_apic_register_virt() && > + cpu_has_vmx_virtual_intr_delivery() && + cpu_has_vmx_posted_intr(); > +} + > static inline bool cpu_has_vmx_flexpriority(void) > { > return cpu_has_vmx_tpr_shadow() && > @@ -2530,12 +2576,6 @@ static __init int setup_vmcs_config(struct vmcs_config > *vmcs_conf) > u32 _vmexit_control = 0; > u32 _vmentry_control = 0; > - min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING; > - opt = PIN_BASED_VIRTUAL_NMIS; > - if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PINBASED_CTLS, > - &_pin_based_exec_control) < 0) > - return -EIO; > - > min = CPU_BASED_HLT_EXITING | > #ifdef CONFIG_X86_64 > CPU_BASED_CR8_LOAD_EXITING | > @@ -2612,6 +2652,17 @@ static __init int setup_vmcs_config(struct vmcs_config > *vmcs_conf) > &_vmexit_control) < 0) > return -EIO; > + min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING; > + opt = PIN_BASED_VIRTUAL_NMIS | PIN_BASED_POSTED_INTR; > + if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PINBASED_CTLS, > + &_pin_based_exec_control) < 0) > + return -EIO; > + > + if (!(_cpu_based_2nd_exec_control & > + SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY) || > + !(_vmexit_control & VM_EXIT_ACK_INTR_ON_EXIT)) > + _pin_based_exec_control &= ~PIN_BASED_POSTED_INTR; > + > min = 0; opt = VM_ENTRY_LOAD_IA32_PAT; if (adjust_vmx_controls(min, > opt, MSR_IA32_VMX_ENTRY_CTLS, @@ -2790,11 +2841,10 @@ static __init int > hardware_setup(void) if (!cpu_has_vmx_ple()) ple_gap = 0; > - if (!cpu_has_vmx_apic_register_virt() || > - !cpu_has_vmx_virtual_intr_delivery()) - enable_apicv_reg_vid = 0; > + if (!cpu_has_vmx_apicv()) + enable_apicv = 0; > > - if (enable_apicv_reg_vid) > + if (enable_apicv) > kvm_x86_ops->update_cr8_intercept = NULL; > else > kvm_x86_ops->hwapic_irr_update = NULL; > @@ -3871,6 +3921,62 @@ static void > vmx_disable_intercept_msr_write_x2apic(u32 msr) > msr, MSR_TYPE_W); > } > +static int vmx_vm_has_apicv(struct kvm *kvm) +{ + return enable_apicv > && irqchip_in_kernel(kvm); +} + +static bool > vmx_deliver_posted_interrupt(struct kvm_vcpu *vcpu, + int vector, int > *result, bool *delivered) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + > + if (!vmx_vm_has_apicv(vcpu->kvm)) + return false; + + if > (kvm_apic_test_irr(vector, vcpu->arch.apic)) + goto out; + else { > + *result = !pi_test_and_set_pir(vector, &vmx->pi_desc); + if > (!*result) + goto out; + } + + if (!pi_test_and_set_on(&vmx->pi_desc) > && + (vcpu->mode == IN_GUEST_MODE)) { > + kvm_make_request(KVM_REQ_EVENT, vcpu); > + apic->send_IPI_mask(get_cpu_mask(vcpu->cpu), + > POSTED_INTR_VECTOR); + *delivered = true; + } +out: + return true; +} + > +static void vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu) +{ + struct > vcpu_vmx *vmx = to_vmx(vcpu); + struct kvm_lapic *apic = > vcpu->arch.apic; + unsigned int i, old, new, ret_val, irr_offset, > pir_val; + + if (!vmx_vm_has_apicv(vcpu->kvm) || > + !pi_test_and_clear_on(&vmx->pi_desc)) + return; + + for (i = 0; i > <= 7; i++) { + pir_val = xchg(&vmx->pi_desc.pir[i], 0); + if (pir_val) > { + irr_offset = APIC_IRR + i * 0x10; + do { + old = > kvm_apic_get_reg(apic, irr_offset); + new = old | pir_val; > + ret_val = cmpxchg((u32 *)(apic->regs + + irr_offset), old, > new); + } while (unlikely(ret_val != old)); + } + } +} + > /* > * Set up the vmcs's constant host-state fields, i.e., host-state fields that > * will not change in the lifetime of the guest. > @@ -3931,6 +4037,15 @@ static void set_cr4_guest_host_mask(struct vcpu_vmx > *vmx) > vmcs_writel(CR4_GUEST_HOST_MASK, > ~vmx->vcpu.arch.cr4_guest_owned_bits); } > +static u32 vmx_pin_based_exec_ctrl(struct vcpu_vmx *vmx) > +{ > + u32 pin_based_exec_ctrl = vmcs_config.pin_based_exec_ctrl; > + > + if (!vmx_vm_has_apicv(vmx->vcpu.kvm)) > + pin_based_exec_ctrl &= ~PIN_BASED_POSTED_INTR; > + return pin_based_exec_ctrl; > +} > + > static u32 vmx_exec_control(struct vcpu_vmx *vmx) { u32 exec_control = > vmcs_config.cpu_based_exec_ctrl; @@ -3948,11 +4063,6 @@ static u32 > vmx_exec_control(struct vcpu_vmx *vmx) return exec_control; } > -static int vmx_vm_has_apicv(struct kvm *kvm) > -{ > - return enable_apicv_reg_vid && irqchip_in_kernel(kvm); > -} > - > static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx) { u32 > exec_control = vmcs_config.cpu_based_2nd_exec_ctrl; @@ -4008,8 +4118,7 > @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) > vmcs_write64(VMCS_LINK_POINTER, -1ull); /* 22.3.1.5 */ > > /* Control */ > - vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, > - vmcs_config.pin_based_exec_ctrl); > + vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, vmx_pin_based_exec_ctrl(vmx)); > > vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, > vmx_exec_control(vmx)); > > @@ -4018,13 +4127,16 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) > vmx_secondary_exec_control(vmx)); > } > - if (enable_apicv_reg_vid) { > + if (vmx_vm_has_apicv(vmx->vcpu.kvm)) { > vmcs_write64(EOI_EXIT_BITMAP0, 0); > vmcs_write64(EOI_EXIT_BITMAP1, 0); > vmcs_write64(EOI_EXIT_BITMAP2, 0); > vmcs_write64(EOI_EXIT_BITMAP3, 0); > > vmcs_write16(GUEST_INTR_STATUS, 0); > + > + vmcs_write64(POSTED_INTR_NV, POSTED_INTR_VECTOR); > + vmcs_write64(POSTED_INTR_DESC_ADDR, __pa((&vmx->pi_desc))); > } > > if (ple_gap) { @@ -4174,6 +4286,9 @@ static int vmx_vcpu_reset(struct > kvm_vcpu *vcpu) vmcs_write64(APIC_ACCESS_ADDR, > page_to_phys(vmx->vcpu.kvm->arch.apic_access_page)); > + if (vmx_vm_has_apicv(vcpu->kvm)) > + memset(&vmx->pi_desc, 0, sizeof(struct pi_desc)); > + > if (vmx->vpid != 0) > vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid); > @@ -7650,6 +7765,8 @@ static struct kvm_x86_ops vmx_x86_ops = { > .load_eoi_exitmap = vmx_load_eoi_exitmap, > .hwapic_irr_update = vmx_hwapic_irr_update, > .hwapic_isr_update = vmx_hwapic_isr_update, > + .sync_pir_to_irr = vmx_sync_pir_to_irr, > + .deliver_posted_interrupt = vmx_deliver_posted_interrupt, > > .set_tss_addr = vmx_set_tss_addr, .get_tdp_level = get_ept_level, @@ > -7753,7 +7870,7 @@ static int __init vmx_init(void) > memcpy(vmx_msr_bitmap_longmode_x2apic, vmx_msr_bitmap_longmode, > PAGE_SIZE); > - if (enable_apicv_reg_vid) { > + if (enable_apicv) { > for (msr = 0x800; msr <= 0x8ff; msr++) > vmx_disable_intercept_msr_read_x2apic(msr); > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > index f1fa37e..62f8c94 100644 > --- a/arch/x86/kvm/x86.c > +++ b/arch/x86/kvm/x86.c > @@ -2679,6 +2679,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) > static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu, > struct kvm_lapic_state *s) { + kvm_x86_ops->sync_pir_to_irr(vcpu); > memcpy(s->regs, vcpu->arch.apic->regs, sizeof *s); > > return 0; > -- > 1.7.1 Best regards, Yang -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Thu, Feb 21, 2013 at 06:04:52AM +0000, Zhang, Yang Z wrote: > Hi Marcelo, > > Can you help to review this patch? Many thanks if you can review it quickly. > The patch is only 2 days on the list. Be patient. > Zhang, Yang Z wrote on 2013-02-19: > > From: Yang Zhang <yang.z.zhang@Intel.com> > > > > Posted Interrupt allows APIC interrupts to inject into guest directly > > without any vmexit. > > > > - When delivering a interrupt to guest, if target vcpu is running, > > update Posted-interrupt requests bitmap and send a notification event > > to the vcpu. Then the vcpu will handle this interrupt automatically, > > without any software involvemnt. > > - If target vcpu is not running or there already a notification event > > pending in the vcpu, do nothing. The interrupt will be handled by > > next vm entry > > Signed-off-by: Yang Zhang <yang.z.zhang@Intel.com> > > --- > > arch/x86/include/asm/entry_arch.h | 4 + > > arch/x86/include/asm/hw_irq.h | 1 + > > arch/x86/include/asm/irq_vectors.h | 5 + > > arch/x86/include/asm/kvm_host.h | 3 + arch/x86/include/asm/vmx.h > > | 4 + arch/x86/kernel/entry_64.S | 5 + > > arch/x86/kernel/irq.c | 20 +++++ > > arch/x86/kernel/irqinit.c | 4 + arch/x86/kvm/lapic.c > > | 19 ++++- arch/x86/kvm/lapic.h | 1 + > > arch/x86/kvm/svm.c | 13 +++ arch/x86/kvm/vmx.c > > | 157 +++++++++++++++++++++++++++++++----- arch/x86/kvm/x86.c > > | 1 + 13 files changed, 214 insertions(+), 23 > > deletions(-) > > diff --git a/arch/x86/include/asm/entry_arch.h > > b/arch/x86/include/asm/entry_arch.h index 40afa00..9bd4eca 100644 --- > > a/arch/x86/include/asm/entry_arch.h +++ > > b/arch/x86/include/asm/entry_arch.h @@ -19,6 +19,10 @@ > > BUILD_INTERRUPT(reboot_interrupt,REBOOT_VECTOR) > > > > BUILD_INTERRUPT(x86_platform_ipi, X86_PLATFORM_IPI_VECTOR) > > +#ifdef CONFIG_HAVE_KVM +BUILD_INTERRUPT(kvm_posted_intr_ipi, > > POSTED_INTR_VECTOR) +#endif + > > /* > > * every pentium local APIC has two 'local interrupts', with a > > * soft-definable vector attached to both interrupts, one of > > diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h > > index eb92a6e..cebef02 100644 > > --- a/arch/x86/include/asm/hw_irq.h > > +++ b/arch/x86/include/asm/hw_irq.h > > @@ -28,6 +28,7 @@ > > /* Interrupt handlers registered during init_IRQ */ extern void > > apic_timer_interrupt(void); extern void x86_platform_ipi(void); +extern > > void kvm_posted_intr_ipi(void); extern void error_interrupt(void); > > extern void irq_work_interrupt(void); > > diff --git a/arch/x86/include/asm/irq_vectors.h > > b/arch/x86/include/asm/irq_vectors.h index 1508e51..774dc9f 100644 --- > > a/arch/x86/include/asm/irq_vectors.h +++ > > b/arch/x86/include/asm/irq_vectors.h @@ -102,6 +102,11 @@ > > */ > > #define X86_PLATFORM_IPI_VECTOR 0xf7 > > +/* Vector for KVM to deliver posted interrupt IPI */ > > +#ifdef CONFIG_HAVE_KVM > > +#define POSTED_INTR_VECTOR 0xf2 > > +#endif > > + > > /* > > * IRQ work vector: > > */ > > diff --git a/arch/x86/include/asm/kvm_host.h > > b/arch/x86/include/asm/kvm_host.h index b8388e9..79da55e 100644 --- > > a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h > > @@ -704,6 +704,9 @@ struct kvm_x86_ops { > > void (*hwapic_isr_update)(struct kvm *kvm, int isr); > > void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap); > > void (*set_virtual_x2apic_mode)(struct kvm_vcpu *vcpu, bool set); > > + bool (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector, > > + int *result, bool *delivered); > > + void (*sync_pir_to_irr)(struct kvm_vcpu *vcpu); > > int (*set_tss_addr)(struct kvm *kvm, unsigned int addr); > > int (*get_tdp_level)(void); > > u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio); > > diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h > > index 5c9dbad..ce8ac80 100644 > > --- a/arch/x86/include/asm/vmx.h > > +++ b/arch/x86/include/asm/vmx.h > > @@ -158,6 +158,7 @@ > > #define PIN_BASED_EXT_INTR_MASK 0x00000001 > > #define PIN_BASED_NMI_EXITING 0x00000008 > > #define PIN_BASED_VIRTUAL_NMIS 0x00000020 > > +#define PIN_BASED_POSTED_INTR 0x00000080 > > > > #define VM_EXIT_SAVE_DEBUG_CONTROLS 0x00000002 #define > > VM_EXIT_HOST_ADDR_SPACE_SIZE 0x00000200 @@ -180,6 +181,7 @@ > > /* VMCS Encodings */ enum vmcs_field { VIRTUAL_PROCESSOR_ID > > = 0x00000000, + POSTED_INTR_NV = 0x00000002, > > GUEST_ES_SELECTOR = 0x00000800, GUEST_CS_SELECTOR > > = 0x00000802, GUEST_SS_SELECTOR = 0x00000804, @@ > > -214,6 +216,8 @@ enum vmcs_field { VIRTUAL_APIC_PAGE_ADDR_HIGH = > > 0x00002013, APIC_ACCESS_ADDR = 0x00002014, APIC_ACCESS_ADDR_HIGH = > > 0x00002015, > > + POSTED_INTR_DESC_ADDR = 0x00002016, > > + POSTED_INTR_DESC_ADDR_HIGH = 0x00002017, > > EPT_POINTER = 0x0000201a, > > EPT_POINTER_HIGH = 0x0000201b, > > EOI_EXIT_BITMAP0 = 0x0000201c, > > diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S > > index 70641af..b409846 100644 > > --- a/arch/x86/kernel/entry_64.S > > +++ b/arch/x86/kernel/entry_64.S > > @@ -1177,6 +1177,11 @@ apicinterrupt LOCAL_TIMER_VECTOR \ > > apicinterrupt X86_PLATFORM_IPI_VECTOR \ > > x86_platform_ipi smp_x86_platform_ipi > > +#ifdef CONFIG_HAVE_KVM > > +apicinterrupt POSTED_INTR_VECTOR \ > > + kvm_posted_intr_ipi smp_posted_intr_ipi > > +#endif > > + > > apicinterrupt THRESHOLD_APIC_VECTOR \ > > threshold_interrupt smp_threshold_interrupt > > apicinterrupt THERMAL_APIC_VECTOR \ > > diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c > > index e4595f1..da74d65 100644 > > --- a/arch/x86/kernel/irq.c > > +++ b/arch/x86/kernel/irq.c > > @@ -228,6 +228,26 @@ void smp_x86_platform_ipi(struct pt_regs *regs) > > set_irq_regs(old_regs); > > } > > +#ifdef CONFIG_HAVE_KVM > > +/* > > + * Handler for POSTED_INTERRUPT_VECTOR. > > + */ > > +void smp_posted_intr_ipi(struct pt_regs *regs) > > +{ > > + struct pt_regs *old_regs = set_irq_regs(regs); > > + > > + ack_APIC_irq(); > > + > > + irq_enter(); > > + > > + exit_idle(); > > + > > + irq_exit(); > > + > > + set_irq_regs(old_regs); > > +} > > +#endif > > + > > EXPORT_SYMBOL_GPL(vector_used_by_percpu_irq); > > > > #ifdef CONFIG_HOTPLUG_CPU > > diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c > > index 6e03b0d..2329a54 100644 > > --- a/arch/x86/kernel/irqinit.c > > +++ b/arch/x86/kernel/irqinit.c > > @@ -205,6 +205,10 @@ static void __init apic_intr_init(void) > > > > /* IPI for X86 platform specific use */ > > alloc_intr_gate(X86_PLATFORM_IPI_VECTOR, x86_platform_ipi); > > +#ifdef CONFIG_HAVE_KVM + /* IPI for KVM to deliver posted interrupt */ > > + alloc_intr_gate(POSTED_INTR_VECTOR, kvm_posted_intr_ipi); +#endif > > > > /* IPI vectors for APIC spurious and error interrupts */ > > alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); > > diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c > > index 02b51dd..ebc32bb 100644 > > --- a/arch/x86/kvm/lapic.c > > +++ b/arch/x86/kvm/lapic.c > > @@ -357,6 +357,12 @@ static u8 count_vectors(void *bitmap) > > return count; > > } > > +int kvm_apic_test_irr(int vec, struct kvm_lapic *apic) > > +{ > > + return apic_test_vector(vec, apic->regs + APIC_IRR); > > +} > > +EXPORT_SYMBOL_GPL(kvm_apic_test_irr); > > + > > static inline int apic_test_and_set_irr(int vec, struct kvm_lapic > > *apic) { apic->irr_pending = true; > > @@ -379,6 +385,7 @@ static inline int apic_find_highest_irr(struct kvm_lapic > > *apic) > > if (!apic->irr_pending) > > return -1; > > + kvm_x86_ops->sync_pir_to_irr(apic->vcpu); > > result = apic_search_irr(apic); > > ASSERT(result == -1 || result >= 16); > > @@ -685,6 +692,7 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int > > delivery_mode, > > { > > int result = 0; > > struct kvm_vcpu *vcpu = apic->vcpu; > > + bool delivered = false; > > > > switch (delivery_mode) { > > case APIC_DM_LOWEST: > > @@ -700,7 +708,10 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int > > delivery_mode, > > } else > > apic_clear_vector(vector, apic->regs + APIC_TMR); > > - result = !apic_test_and_set_irr(vector, apic); > > + if (!kvm_x86_ops->deliver_posted_interrupt(vcpu, vector, > > + &result, &delivered)) > > + result = !apic_test_and_set_irr(vector, apic); > > + > > trace_kvm_apic_accept_irq(vcpu->vcpu_id, delivery_mode, > > trig_mode, vector, !result); > > if (!result) { > > @@ -710,8 +721,10 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int > > delivery_mode, > > break; > > } > > - kvm_make_request(KVM_REQ_EVENT, vcpu); - kvm_vcpu_kick(vcpu); + if > > (!delivered) { + kvm_make_request(KVM_REQ_EVENT, vcpu); > > + kvm_vcpu_kick(vcpu); + } > > break; > > > > case APIC_DM_REMRD: > > diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h > > index 1676d34..1a7016c 100644 > > --- a/arch/x86/kvm/lapic.h > > +++ b/arch/x86/kvm/lapic.h > > @@ -157,5 +157,6 @@ static inline u16 apic_logical_id(struct kvm_apic_map > > *map, u32 ldr) > > void kvm_calculate_eoi_exitmap(struct kvm_vcpu *vcpu, > > struct kvm_lapic_irq *irq, > > u64 *eoi_bitmap); > > +int kvm_apic_test_irr(int vec, struct kvm_lapic *apic); > > > > #endif > > diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c > > index a7d60d7..9e705e3 100644 > > --- a/arch/x86/kvm/svm.c > > +++ b/arch/x86/kvm/svm.c > > @@ -3591,6 +3591,17 @@ static void svm_hwapic_isr_update(struct kvm *kvm, > > int isr) > > return; > > } > > +static void svm_sync_pir_to_irr(struct kvm_vcpu *vcpu) > > +{ > > + return; > > +} > > + > > +static bool svm_deliver_posted_interrupt(struct kvm_vcpu *vcpu, > > + int vector, int *result, bool *delivered) > > +{ > > + return false; > > +} > > + > > static int svm_nmi_allowed(struct kvm_vcpu *vcpu) { struct vcpu_svm > > *svm = to_svm(vcpu); @@ -4319,6 +4330,8 @@ static struct kvm_x86_ops > > svm_x86_ops = { .vm_has_apicv = svm_vm_has_apicv, .load_eoi_exitmap = > > svm_load_eoi_exitmap, .hwapic_isr_update = svm_hwapic_isr_update, > > + .sync_pir_to_irr = svm_sync_pir_to_irr, > > + .deliver_posted_interrupt = svm_deliver_posted_interrupt, > > > > .set_tss_addr = svm_set_tss_addr, > > .get_tdp_level = get_npt_level, > > diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c > > index 436b134..2fdf537 100644 > > --- a/arch/x86/kvm/vmx.c > > +++ b/arch/x86/kvm/vmx.c > > @@ -84,7 +84,8 @@ module_param(vmm_exclusive, bool, S_IRUGO); > > static bool __read_mostly fasteoi = 1; > > module_param(fasteoi, bool, S_IRUGO); > > -static bool __read_mostly enable_apicv_reg_vid; > > +static bool __read_mostly enable_apicv = 1; > > +module_param(enable_apicv, bool, S_IRUGO); > > > > /* > > * If nested=1, nested virtualization is supported, i.e., guests may use > > @@ -365,6 +366,36 @@ struct nested_vmx { > > struct page *apic_access_page; > > }; > > +#define POSTED_INTR_ON 0 +/* Posted-Interrupt Descriptor */ +struct > > pi_desc { + u32 pir[8]; /* Posted interrupt requested */ + union { > > + struct { + u8 on:1, + rsvd:7; + } control; + u32 rsvd[8]; > > + } u; +} __aligned(64); + +static bool pi_test_and_set_on(struct > > pi_desc *pi_desc) +{ + return test_and_set_bit(POSTED_INTR_ON, > > + (unsigned long *)&pi_desc->u.control); +} + +static bool > > pi_test_and_clear_on(struct pi_desc *pi_desc) +{ + return > > test_and_clear_bit(POSTED_INTR_ON, + (unsigned long > > *)&pi_desc->u.control); +} + +static int pi_test_and_set_pir(int vector, > > struct pi_desc *pi_desc) +{ + return test_and_set_bit(vector, (unsigned > > long *)pi_desc->pir); +} + > > struct vcpu_vmx { > > struct kvm_vcpu vcpu; > > unsigned long host_rsp; > > @@ -429,6 +460,9 @@ struct vcpu_vmx { > > > > bool rdtscp_enabled; > > + /* Posted interrupt descriptor */ > > + struct pi_desc pi_desc; > > + > > /* Support for a guest hypervisor (nested VMX) */ > > struct nested_vmx nested; > > }; > > @@ -783,6 +817,18 @@ static inline bool > > cpu_has_vmx_virtual_intr_delivery(void) > > SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY; > > } > > +static inline bool cpu_has_vmx_posted_intr(void) +{ + return > > vmcs_config.pin_based_exec_ctrl & PIN_BASED_POSTED_INTR; +} + +static > > inline bool cpu_has_vmx_apicv(void) +{ + return > > cpu_has_vmx_apic_register_virt() && > > + cpu_has_vmx_virtual_intr_delivery() && + cpu_has_vmx_posted_intr(); > > +} + > > static inline bool cpu_has_vmx_flexpriority(void) > > { > > return cpu_has_vmx_tpr_shadow() && > > @@ -2530,12 +2576,6 @@ static __init int setup_vmcs_config(struct vmcs_config > > *vmcs_conf) > > u32 _vmexit_control = 0; > > u32 _vmentry_control = 0; > > - min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING; > > - opt = PIN_BASED_VIRTUAL_NMIS; > > - if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PINBASED_CTLS, > > - &_pin_based_exec_control) < 0) > > - return -EIO; > > - > > min = CPU_BASED_HLT_EXITING | > > #ifdef CONFIG_X86_64 > > CPU_BASED_CR8_LOAD_EXITING | > > @@ -2612,6 +2652,17 @@ static __init int setup_vmcs_config(struct vmcs_config > > *vmcs_conf) > > &_vmexit_control) < 0) > > return -EIO; > > + min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING; > > + opt = PIN_BASED_VIRTUAL_NMIS | PIN_BASED_POSTED_INTR; > > + if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PINBASED_CTLS, > > + &_pin_based_exec_control) < 0) > > + return -EIO; > > + > > + if (!(_cpu_based_2nd_exec_control & > > + SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY) || > > + !(_vmexit_control & VM_EXIT_ACK_INTR_ON_EXIT)) > > + _pin_based_exec_control &= ~PIN_BASED_POSTED_INTR; > > + > > min = 0; opt = VM_ENTRY_LOAD_IA32_PAT; if (adjust_vmx_controls(min, > > opt, MSR_IA32_VMX_ENTRY_CTLS, @@ -2790,11 +2841,10 @@ static __init int > > hardware_setup(void) if (!cpu_has_vmx_ple()) ple_gap = 0; > > - if (!cpu_has_vmx_apic_register_virt() || > > - !cpu_has_vmx_virtual_intr_delivery()) - enable_apicv_reg_vid = 0; > > + if (!cpu_has_vmx_apicv()) + enable_apicv = 0; > > > > - if (enable_apicv_reg_vid) > > + if (enable_apicv) > > kvm_x86_ops->update_cr8_intercept = NULL; > > else > > kvm_x86_ops->hwapic_irr_update = NULL; > > @@ -3871,6 +3921,62 @@ static void > > vmx_disable_intercept_msr_write_x2apic(u32 msr) > > msr, MSR_TYPE_W); > > } > > +static int vmx_vm_has_apicv(struct kvm *kvm) +{ + return enable_apicv > > && irqchip_in_kernel(kvm); +} + +static bool > > vmx_deliver_posted_interrupt(struct kvm_vcpu *vcpu, + int vector, int > > *result, bool *delivered) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + > > + if (!vmx_vm_has_apicv(vcpu->kvm)) + return false; + + if > > (kvm_apic_test_irr(vector, vcpu->arch.apic)) + goto out; + else { > > + *result = !pi_test_and_set_pir(vector, &vmx->pi_desc); + if > > (!*result) + goto out; + } + + if (!pi_test_and_set_on(&vmx->pi_desc) > > && + (vcpu->mode == IN_GUEST_MODE)) { > > + kvm_make_request(KVM_REQ_EVENT, vcpu); > > + apic->send_IPI_mask(get_cpu_mask(vcpu->cpu), + > > POSTED_INTR_VECTOR); + *delivered = true; + } +out: + return true; +} + > > +static void vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu) +{ + struct > > vcpu_vmx *vmx = to_vmx(vcpu); + struct kvm_lapic *apic = > > vcpu->arch.apic; + unsigned int i, old, new, ret_val, irr_offset, > > pir_val; + + if (!vmx_vm_has_apicv(vcpu->kvm) || > > + !pi_test_and_clear_on(&vmx->pi_desc)) + return; + + for (i = 0; i > > <= 7; i++) { + pir_val = xchg(&vmx->pi_desc.pir[i], 0); + if (pir_val) > > { + irr_offset = APIC_IRR + i * 0x10; + do { + old = > > kvm_apic_get_reg(apic, irr_offset); + new = old | pir_val; > > + ret_val = cmpxchg((u32 *)(apic->regs + + irr_offset), old, > > new); + } while (unlikely(ret_val != old)); + } + } +} + > > /* > > * Set up the vmcs's constant host-state fields, i.e., host-state fields that > > * will not change in the lifetime of the guest. > > @@ -3931,6 +4037,15 @@ static void set_cr4_guest_host_mask(struct vcpu_vmx > > *vmx) > > vmcs_writel(CR4_GUEST_HOST_MASK, > > ~vmx->vcpu.arch.cr4_guest_owned_bits); } > > +static u32 vmx_pin_based_exec_ctrl(struct vcpu_vmx *vmx) > > +{ > > + u32 pin_based_exec_ctrl = vmcs_config.pin_based_exec_ctrl; > > + > > + if (!vmx_vm_has_apicv(vmx->vcpu.kvm)) > > + pin_based_exec_ctrl &= ~PIN_BASED_POSTED_INTR; > > + return pin_based_exec_ctrl; > > +} > > + > > static u32 vmx_exec_control(struct vcpu_vmx *vmx) { u32 exec_control = > > vmcs_config.cpu_based_exec_ctrl; @@ -3948,11 +4063,6 @@ static u32 > > vmx_exec_control(struct vcpu_vmx *vmx) return exec_control; } > > -static int vmx_vm_has_apicv(struct kvm *kvm) > > -{ > > - return enable_apicv_reg_vid && irqchip_in_kernel(kvm); > > -} > > - > > static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx) { u32 > > exec_control = vmcs_config.cpu_based_2nd_exec_ctrl; @@ -4008,8 +4118,7 > > @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) > > vmcs_write64(VMCS_LINK_POINTER, -1ull); /* 22.3.1.5 */ > > > > /* Control */ > > - vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, > > - vmcs_config.pin_based_exec_ctrl); > > + vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, vmx_pin_based_exec_ctrl(vmx)); > > > > vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, > > vmx_exec_control(vmx)); > > > > @@ -4018,13 +4127,16 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) > > vmx_secondary_exec_control(vmx)); > > } > > - if (enable_apicv_reg_vid) { > > + if (vmx_vm_has_apicv(vmx->vcpu.kvm)) { > > vmcs_write64(EOI_EXIT_BITMAP0, 0); > > vmcs_write64(EOI_EXIT_BITMAP1, 0); > > vmcs_write64(EOI_EXIT_BITMAP2, 0); > > vmcs_write64(EOI_EXIT_BITMAP3, 0); > > > > vmcs_write16(GUEST_INTR_STATUS, 0); > > + > > + vmcs_write64(POSTED_INTR_NV, POSTED_INTR_VECTOR); > > + vmcs_write64(POSTED_INTR_DESC_ADDR, __pa((&vmx->pi_desc))); > > } > > > > if (ple_gap) { @@ -4174,6 +4286,9 @@ static int vmx_vcpu_reset(struct > > kvm_vcpu *vcpu) vmcs_write64(APIC_ACCESS_ADDR, > > page_to_phys(vmx->vcpu.kvm->arch.apic_access_page)); > > + if (vmx_vm_has_apicv(vcpu->kvm)) > > + memset(&vmx->pi_desc, 0, sizeof(struct pi_desc)); > > + > > if (vmx->vpid != 0) > > vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid); > > @@ -7650,6 +7765,8 @@ static struct kvm_x86_ops vmx_x86_ops = { > > .load_eoi_exitmap = vmx_load_eoi_exitmap, > > .hwapic_irr_update = vmx_hwapic_irr_update, > > .hwapic_isr_update = vmx_hwapic_isr_update, > > + .sync_pir_to_irr = vmx_sync_pir_to_irr, > > + .deliver_posted_interrupt = vmx_deliver_posted_interrupt, > > > > .set_tss_addr = vmx_set_tss_addr, .get_tdp_level = get_ept_level, @@ > > -7753,7 +7870,7 @@ static int __init vmx_init(void) > > memcpy(vmx_msr_bitmap_longmode_x2apic, vmx_msr_bitmap_longmode, > > PAGE_SIZE); > > - if (enable_apicv_reg_vid) { > > + if (enable_apicv) { > > for (msr = 0x800; msr <= 0x8ff; msr++) > > vmx_disable_intercept_msr_read_x2apic(msr); > > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > > index f1fa37e..62f8c94 100644 > > --- a/arch/x86/kvm/x86.c > > +++ b/arch/x86/kvm/x86.c > > @@ -2679,6 +2679,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) > > static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu, > > struct kvm_lapic_state *s) { + kvm_x86_ops->sync_pir_to_irr(vcpu); > > memcpy(s->regs, vcpu->arch.apic->regs, sizeof *s); > > > > return 0; > > -- > > 1.7.1 > > > Best regards, > Yang > -- Gleb. -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/arch/x86/include/asm/entry_arch.h b/arch/x86/include/asm/entry_arch.h index 40afa00..9bd4eca 100644 --- a/arch/x86/include/asm/entry_arch.h +++ b/arch/x86/include/asm/entry_arch.h @@ -19,6 +19,10 @@ BUILD_INTERRUPT(reboot_interrupt,REBOOT_VECTOR) BUILD_INTERRUPT(x86_platform_ipi, X86_PLATFORM_IPI_VECTOR) +#ifdef CONFIG_HAVE_KVM +BUILD_INTERRUPT(kvm_posted_intr_ipi, POSTED_INTR_VECTOR) +#endif + /* * every pentium local APIC has two 'local interrupts', with a * soft-definable vector attached to both interrupts, one of diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index eb92a6e..cebef02 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h @@ -28,6 +28,7 @@ /* Interrupt handlers registered during init_IRQ */ extern void apic_timer_interrupt(void); extern void x86_platform_ipi(void); +extern void kvm_posted_intr_ipi(void); extern void error_interrupt(void); extern void irq_work_interrupt(void); diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index 1508e51..774dc9f 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h @@ -102,6 +102,11 @@ */ #define X86_PLATFORM_IPI_VECTOR 0xf7 +/* Vector for KVM to deliver posted interrupt IPI */ +#ifdef CONFIG_HAVE_KVM +#define POSTED_INTR_VECTOR 0xf2 +#endif + /* * IRQ work vector: */ diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index b8388e9..79da55e 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -704,6 +704,9 @@ struct kvm_x86_ops { void (*hwapic_isr_update)(struct kvm *kvm, int isr); void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap); void (*set_virtual_x2apic_mode)(struct kvm_vcpu *vcpu, bool set); + bool (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector, + int *result, bool *delivered); + void (*sync_pir_to_irr)(struct kvm_vcpu *vcpu); int (*set_tss_addr)(struct kvm *kvm, unsigned int addr); int (*get_tdp_level)(void); u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio); diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 5c9dbad..ce8ac80 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -158,6 +158,7 @@ #define PIN_BASED_EXT_INTR_MASK 0x00000001 #define PIN_BASED_NMI_EXITING 0x00000008 #define PIN_BASED_VIRTUAL_NMIS 0x00000020 +#define PIN_BASED_POSTED_INTR 0x00000080 #define VM_EXIT_SAVE_DEBUG_CONTROLS 0x00000002 #define VM_EXIT_HOST_ADDR_SPACE_SIZE 0x00000200 @@ -180,6 +181,7 @@ /* VMCS Encodings */ enum vmcs_field { VIRTUAL_PROCESSOR_ID = 0x00000000, + POSTED_INTR_NV = 0x00000002, GUEST_ES_SELECTOR = 0x00000800, GUEST_CS_SELECTOR = 0x00000802, GUEST_SS_SELECTOR = 0x00000804, @@ -214,6 +216,8 @@ enum vmcs_field { VIRTUAL_APIC_PAGE_ADDR_HIGH = 0x00002013, APIC_ACCESS_ADDR = 0x00002014, APIC_ACCESS_ADDR_HIGH = 0x00002015, + POSTED_INTR_DESC_ADDR = 0x00002016, + POSTED_INTR_DESC_ADDR_HIGH = 0x00002017, EPT_POINTER = 0x0000201a, EPT_POINTER_HIGH = 0x0000201b, EOI_EXIT_BITMAP0 = 0x0000201c, diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 70641af..b409846 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -1177,6 +1177,11 @@ apicinterrupt LOCAL_TIMER_VECTOR \ apicinterrupt X86_PLATFORM_IPI_VECTOR \ x86_platform_ipi smp_x86_platform_ipi +#ifdef CONFIG_HAVE_KVM +apicinterrupt POSTED_INTR_VECTOR \ + kvm_posted_intr_ipi smp_posted_intr_ipi +#endif + apicinterrupt THRESHOLD_APIC_VECTOR \ threshold_interrupt smp_threshold_interrupt apicinterrupt THERMAL_APIC_VECTOR \ diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index e4595f1..da74d65 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -228,6 +228,26 @@ void smp_x86_platform_ipi(struct pt_regs *regs) set_irq_regs(old_regs); } +#ifdef CONFIG_HAVE_KVM +/* + * Handler for POSTED_INTERRUPT_VECTOR. + */ +void smp_posted_intr_ipi(struct pt_regs *regs) +{ + struct pt_regs *old_regs = set_irq_regs(regs); + + ack_APIC_irq(); + + irq_enter(); + + exit_idle(); + + irq_exit(); + + set_irq_regs(old_regs); +} +#endif + EXPORT_SYMBOL_GPL(vector_used_by_percpu_irq); #ifdef CONFIG_HOTPLUG_CPU diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c index 6e03b0d..2329a54 100644 --- a/arch/x86/kernel/irqinit.c +++ b/arch/x86/kernel/irqinit.c @@ -205,6 +205,10 @@ static void __init apic_intr_init(void) /* IPI for X86 platform specific use */ alloc_intr_gate(X86_PLATFORM_IPI_VECTOR, x86_platform_ipi); +#ifdef CONFIG_HAVE_KVM + /* IPI for KVM to deliver posted interrupt */ + alloc_intr_gate(POSTED_INTR_VECTOR, kvm_posted_intr_ipi); +#endif /* IPI vectors for APIC spurious and error interrupts */ alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 02b51dd..ebc32bb 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -357,6 +357,12 @@ static u8 count_vectors(void *bitmap) return count; } +int kvm_apic_test_irr(int vec, struct kvm_lapic *apic) +{ + return apic_test_vector(vec, apic->regs + APIC_IRR); +} +EXPORT_SYMBOL_GPL(kvm_apic_test_irr); + static inline int apic_test_and_set_irr(int vec, struct kvm_lapic *apic) { apic->irr_pending = true; @@ -379,6 +385,7 @@ static inline int apic_find_highest_irr(struct kvm_lapic *apic) if (!apic->irr_pending) return -1; + kvm_x86_ops->sync_pir_to_irr(apic->vcpu); result = apic_search_irr(apic); ASSERT(result == -1 || result >= 16); @@ -685,6 +692,7 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, { int result = 0; struct kvm_vcpu *vcpu = apic->vcpu; + bool delivered = false; switch (delivery_mode) { case APIC_DM_LOWEST: @@ -700,7 +708,10 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, } else apic_clear_vector(vector, apic->regs + APIC_TMR); - result = !apic_test_and_set_irr(vector, apic); + if (!kvm_x86_ops->deliver_posted_interrupt(vcpu, vector, + &result, &delivered)) + result = !apic_test_and_set_irr(vector, apic); + trace_kvm_apic_accept_irq(vcpu->vcpu_id, delivery_mode, trig_mode, vector, !result); if (!result) { @@ -710,8 +721,10 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, break; } - kvm_make_request(KVM_REQ_EVENT, vcpu); - kvm_vcpu_kick(vcpu); + if (!delivered) { + kvm_make_request(KVM_REQ_EVENT, vcpu); + kvm_vcpu_kick(vcpu); + } break; case APIC_DM_REMRD: diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index 1676d34..1a7016c 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h @@ -157,5 +157,6 @@ static inline u16 apic_logical_id(struct kvm_apic_map *map, u32 ldr) void kvm_calculate_eoi_exitmap(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq, u64 *eoi_bitmap); +int kvm_apic_test_irr(int vec, struct kvm_lapic *apic); #endif diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index a7d60d7..9e705e3 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -3591,6 +3591,17 @@ static void svm_hwapic_isr_update(struct kvm *kvm, int isr) return; } +static void svm_sync_pir_to_irr(struct kvm_vcpu *vcpu) +{ + return; +} + +static bool svm_deliver_posted_interrupt(struct kvm_vcpu *vcpu, + int vector, int *result, bool *delivered) +{ + return false; +} + static int svm_nmi_allowed(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); @@ -4319,6 +4330,8 @@ static struct kvm_x86_ops svm_x86_ops = { .vm_has_apicv = svm_vm_has_apicv, .load_eoi_exitmap = svm_load_eoi_exitmap, .hwapic_isr_update = svm_hwapic_isr_update, + .sync_pir_to_irr = svm_sync_pir_to_irr, + .deliver_posted_interrupt = svm_deliver_posted_interrupt, .set_tss_addr = svm_set_tss_addr, .get_tdp_level = get_npt_level, diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 436b134..2fdf537 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -84,7 +84,8 @@ module_param(vmm_exclusive, bool, S_IRUGO); static bool __read_mostly fasteoi = 1; module_param(fasteoi, bool, S_IRUGO); -static bool __read_mostly enable_apicv_reg_vid; +static bool __read_mostly enable_apicv = 1; +module_param(enable_apicv, bool, S_IRUGO); /* * If nested=1, nested virtualization is supported, i.e., guests may use @@ -365,6 +366,36 @@ struct nested_vmx { struct page *apic_access_page; }; +#define POSTED_INTR_ON 0 +/* Posted-Interrupt Descriptor */ +struct pi_desc { + u32 pir[8]; /* Posted interrupt requested */ + union { + struct { + u8 on:1, + rsvd:7; + } control; + u32 rsvd[8]; + } u; +} __aligned(64); + +static bool pi_test_and_set_on(struct pi_desc *pi_desc) +{ + return test_and_set_bit(POSTED_INTR_ON, + (unsigned long *)&pi_desc->u.control); +} + +static bool pi_test_and_clear_on(struct pi_desc *pi_desc) +{ + return test_and_clear_bit(POSTED_INTR_ON, + (unsigned long *)&pi_desc->u.control); +} + +static int pi_test_and_set_pir(int vector, struct pi_desc *pi_desc) +{ + return test_and_set_bit(vector, (unsigned long *)pi_desc->pir); +} + struct vcpu_vmx { struct kvm_vcpu vcpu; unsigned long host_rsp; @@ -429,6 +460,9 @@ struct vcpu_vmx { bool rdtscp_enabled; + /* Posted interrupt descriptor */ + struct pi_desc pi_desc; + /* Support for a guest hypervisor (nested VMX) */ struct nested_vmx nested; }; @@ -783,6 +817,18 @@ static inline bool cpu_has_vmx_virtual_intr_delivery(void) SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY; } +static inline bool cpu_has_vmx_posted_intr(void) +{ + return vmcs_config.pin_based_exec_ctrl & PIN_BASED_POSTED_INTR; +} + +static inline bool cpu_has_vmx_apicv(void) +{ + return cpu_has_vmx_apic_register_virt() && + cpu_has_vmx_virtual_intr_delivery() && + cpu_has_vmx_posted_intr(); +} + static inline bool cpu_has_vmx_flexpriority(void) { return cpu_has_vmx_tpr_shadow() && @@ -2530,12 +2576,6 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) u32 _vmexit_control = 0; u32 _vmentry_control = 0; - min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING; - opt = PIN_BASED_VIRTUAL_NMIS; - if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PINBASED_CTLS, - &_pin_based_exec_control) < 0) - return -EIO; - min = CPU_BASED_HLT_EXITING | #ifdef CONFIG_X86_64 CPU_BASED_CR8_LOAD_EXITING | @@ -2612,6 +2652,17 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) &_vmexit_control) < 0) return -EIO; + min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING; + opt = PIN_BASED_VIRTUAL_NMIS | PIN_BASED_POSTED_INTR; + if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PINBASED_CTLS, + &_pin_based_exec_control) < 0) + return -EIO; + + if (!(_cpu_based_2nd_exec_control & + SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY) || + !(_vmexit_control & VM_EXIT_ACK_INTR_ON_EXIT)) + _pin_based_exec_control &= ~PIN_BASED_POSTED_INTR; + min = 0; opt = VM_ENTRY_LOAD_IA32_PAT; if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_ENTRY_CTLS, @@ -2790,11 +2841,10 @@ static __init int hardware_setup(void) if (!cpu_has_vmx_ple()) ple_gap = 0; - if (!cpu_has_vmx_apic_register_virt() || - !cpu_has_vmx_virtual_intr_delivery()) - enable_apicv_reg_vid = 0; + if (!cpu_has_vmx_apicv()) + enable_apicv = 0; - if (enable_apicv_reg_vid) + if (enable_apicv) kvm_x86_ops->update_cr8_intercept = NULL; else kvm_x86_ops->hwapic_irr_update = NULL; @@ -3871,6 +3921,62 @@ static void vmx_disable_intercept_msr_write_x2apic(u32 msr) msr, MSR_TYPE_W); } +static int vmx_vm_has_apicv(struct kvm *kvm) +{ + return enable_apicv && irqchip_in_kernel(kvm); +} + +static bool vmx_deliver_posted_interrupt(struct kvm_vcpu *vcpu, + int vector, int *result, bool *delivered) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + + if (!vmx_vm_has_apicv(vcpu->kvm)) + return false; + + if (kvm_apic_test_irr(vector, vcpu->arch.apic)) + goto out; + else { + *result = !pi_test_and_set_pir(vector, &vmx->pi_desc); + if (!*result) + goto out; + } + + if (!pi_test_and_set_on(&vmx->pi_desc) && + (vcpu->mode == IN_GUEST_MODE)) { + kvm_make_request(KVM_REQ_EVENT, vcpu); + apic->send_IPI_mask(get_cpu_mask(vcpu->cpu), + POSTED_INTR_VECTOR); + *delivered = true; + } +out: + return true; +} + +static void vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + struct kvm_lapic *apic = vcpu->arch.apic; + unsigned int i, old, new, ret_val, irr_offset, pir_val; + + if (!vmx_vm_has_apicv(vcpu->kvm) || + !pi_test_and_clear_on(&vmx->pi_desc)) + return; + + for (i = 0; i <= 7; i++) { + pir_val = xchg(&vmx->pi_desc.pir[i], 0); + if (pir_val) { + irr_offset = APIC_IRR + i * 0x10; + do { + old = kvm_apic_get_reg(apic, irr_offset); + new = old | pir_val; + ret_val = cmpxchg((u32 *)(apic->regs + + irr_offset), old, new); + } while (unlikely(ret_val != old)); + } + } +} + /* * Set up the vmcs's constant host-state fields, i.e., host-state fields that * will not change in the lifetime of the guest. @@ -3931,6 +4037,15 @@ static void set_cr4_guest_host_mask(struct vcpu_vmx *vmx) vmcs_writel(CR4_GUEST_HOST_MASK, ~vmx->vcpu.arch.cr4_guest_owned_bits); } +static u32 vmx_pin_based_exec_ctrl(struct vcpu_vmx *vmx) +{ + u32 pin_based_exec_ctrl = vmcs_config.pin_based_exec_ctrl; + + if (!vmx_vm_has_apicv(vmx->vcpu.kvm)) + pin_based_exec_ctrl &= ~PIN_BASED_POSTED_INTR; + return pin_based_exec_ctrl; +} + static u32 vmx_exec_control(struct vcpu_vmx *vmx) { u32 exec_control = vmcs_config.cpu_based_exec_ctrl; @@ -3948,11 +4063,6 @@ static u32 vmx_exec_control(struct vcpu_vmx *vmx) return exec_control; } -static int vmx_vm_has_apicv(struct kvm *kvm) -{ - return enable_apicv_reg_vid && irqchip_in_kernel(kvm); -} - static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx) { u32 exec_control = vmcs_config.cpu_based_2nd_exec_ctrl; @@ -4008,8 +4118,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) vmcs_write64(VMCS_LINK_POINTER, -1ull); /* 22.3.1.5 */ /* Control */ - vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, - vmcs_config.pin_based_exec_ctrl); + vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, vmx_pin_based_exec_ctrl(vmx)); vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, vmx_exec_control(vmx)); @@ -4018,13 +4127,16 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) vmx_secondary_exec_control(vmx)); } - if (enable_apicv_reg_vid) { + if (vmx_vm_has_apicv(vmx->vcpu.kvm)) { vmcs_write64(EOI_EXIT_BITMAP0, 0); vmcs_write64(EOI_EXIT_BITMAP1, 0); vmcs_write64(EOI_EXIT_BITMAP2, 0); vmcs_write64(EOI_EXIT_BITMAP3, 0); vmcs_write16(GUEST_INTR_STATUS, 0); + + vmcs_write64(POSTED_INTR_NV, POSTED_INTR_VECTOR); + vmcs_write64(POSTED_INTR_DESC_ADDR, __pa((&vmx->pi_desc))); } if (ple_gap) { @@ -4174,6 +4286,9 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) vmcs_write64(APIC_ACCESS_ADDR, page_to_phys(vmx->vcpu.kvm->arch.apic_access_page)); + if (vmx_vm_has_apicv(vcpu->kvm)) + memset(&vmx->pi_desc, 0, sizeof(struct pi_desc)); + if (vmx->vpid != 0) vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid); @@ -7650,6 +7765,8 @@ static struct kvm_x86_ops vmx_x86_ops = { .load_eoi_exitmap = vmx_load_eoi_exitmap, .hwapic_irr_update = vmx_hwapic_irr_update, .hwapic_isr_update = vmx_hwapic_isr_update, + .sync_pir_to_irr = vmx_sync_pir_to_irr, + .deliver_posted_interrupt = vmx_deliver_posted_interrupt, .set_tss_addr = vmx_set_tss_addr, .get_tdp_level = get_ept_level, @@ -7753,7 +7870,7 @@ static int __init vmx_init(void) memcpy(vmx_msr_bitmap_longmode_x2apic, vmx_msr_bitmap_longmode, PAGE_SIZE); - if (enable_apicv_reg_vid) { + if (enable_apicv) { for (msr = 0x800; msr <= 0x8ff; msr++) vmx_disable_intercept_msr_read_x2apic(msr); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index f1fa37e..62f8c94 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2679,6 +2679,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s) { + kvm_x86_ops->sync_pir_to_irr(vcpu); memcpy(s->regs, vcpu->arch.apic->regs, sizeof *s); return 0;