diff mbox

[v3,2/2] KVM: VMX: Add Posted Interrupt supporting

Message ID 1361281183-22759-3-git-send-email-yang.z.zhang@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Zhang, Yang Z Feb. 19, 2013, 1:39 p.m. UTC
From: Yang Zhang <yang.z.zhang@Intel.com>

Posted Interrupt allows APIC interrupts to inject into guest directly
without any vmexit.

- When delivering a interrupt to guest, if target vcpu is running,
  update Posted-interrupt requests bitmap and send a notification event
  to the vcpu. Then the vcpu will handle this interrupt automatically,
  without any software involvemnt.

- If target vcpu is not running or there already a notification event
  pending in the vcpu, do nothing. The interrupt will be handled by
  next vm entry

Signed-off-by: Yang Zhang <yang.z.zhang@Intel.com>
---
 arch/x86/include/asm/entry_arch.h  |    4 +
 arch/x86/include/asm/hw_irq.h      |    1 +
 arch/x86/include/asm/irq_vectors.h |    5 +
 arch/x86/include/asm/kvm_host.h    |    3 +
 arch/x86/include/asm/vmx.h         |    4 +
 arch/x86/kernel/entry_64.S         |    5 +
 arch/x86/kernel/irq.c              |   20 +++++
 arch/x86/kernel/irqinit.c          |    4 +
 arch/x86/kvm/lapic.c               |   19 ++++-
 arch/x86/kvm/lapic.h               |    1 +
 arch/x86/kvm/svm.c                 |   13 +++
 arch/x86/kvm/vmx.c                 |  157 +++++++++++++++++++++++++++++++-----
 arch/x86/kvm/x86.c                 |    1 +
 13 files changed, 214 insertions(+), 23 deletions(-)

Comments

Zhang, Yang Z Feb. 21, 2013, 6:04 a.m. UTC | #1
Hi Marcelo,

Can you help to review this patch? Many thanks if you can review it quickly.

Zhang, Yang Z wrote on 2013-02-19:
> From: Yang Zhang <yang.z.zhang@Intel.com>
> 
> Posted Interrupt allows APIC interrupts to inject into guest directly
> without any vmexit.
> 
> - When delivering a interrupt to guest, if target vcpu is running,
>   update Posted-interrupt requests bitmap and send a notification event
>   to the vcpu. Then the vcpu will handle this interrupt automatically,
>   without any software involvemnt.
> - If target vcpu is not running or there already a notification event
>   pending in the vcpu, do nothing. The interrupt will be handled by
>   next vm entry
> Signed-off-by: Yang Zhang <yang.z.zhang@Intel.com>
> ---
>  arch/x86/include/asm/entry_arch.h  |    4 +
>  arch/x86/include/asm/hw_irq.h      |    1 +
>  arch/x86/include/asm/irq_vectors.h |    5 +
>  arch/x86/include/asm/kvm_host.h    |    3 + arch/x86/include/asm/vmx.h 
>         |    4 + arch/x86/kernel/entry_64.S         |    5 +
>  arch/x86/kernel/irq.c              |   20 +++++
>  arch/x86/kernel/irqinit.c          |    4 + arch/x86/kvm/lapic.c       
>         |   19 ++++- arch/x86/kvm/lapic.h               |    1 +
>  arch/x86/kvm/svm.c                 |   13 +++ arch/x86/kvm/vmx.c       
>           |  157 +++++++++++++++++++++++++++++++----- arch/x86/kvm/x86.c
>                  |    1 + 13 files changed, 214 insertions(+), 23
>  deletions(-)
> diff --git a/arch/x86/include/asm/entry_arch.h
> b/arch/x86/include/asm/entry_arch.h index 40afa00..9bd4eca 100644 ---
> a/arch/x86/include/asm/entry_arch.h +++
> b/arch/x86/include/asm/entry_arch.h @@ -19,6 +19,10 @@
> BUILD_INTERRUPT(reboot_interrupt,REBOOT_VECTOR)
> 
>  BUILD_INTERRUPT(x86_platform_ipi, X86_PLATFORM_IPI_VECTOR)
> +#ifdef CONFIG_HAVE_KVM +BUILD_INTERRUPT(kvm_posted_intr_ipi,
> POSTED_INTR_VECTOR) +#endif +
>  /*
>   * every pentium local APIC has two 'local interrupts', with a
>   * soft-definable vector attached to both interrupts, one of
> diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
> index eb92a6e..cebef02 100644
> --- a/arch/x86/include/asm/hw_irq.h
> +++ b/arch/x86/include/asm/hw_irq.h
> @@ -28,6 +28,7 @@
>  /* Interrupt handlers registered during init_IRQ */ extern void
>  apic_timer_interrupt(void); extern void x86_platform_ipi(void); +extern
>  void kvm_posted_intr_ipi(void); extern void error_interrupt(void);
>  extern void irq_work_interrupt(void);
> diff --git a/arch/x86/include/asm/irq_vectors.h
> b/arch/x86/include/asm/irq_vectors.h index 1508e51..774dc9f 100644 ---
> a/arch/x86/include/asm/irq_vectors.h +++
> b/arch/x86/include/asm/irq_vectors.h @@ -102,6 +102,11 @@
>   */
>  #define X86_PLATFORM_IPI_VECTOR		0xf7
> +/* Vector for KVM to deliver posted interrupt IPI */
> +#ifdef CONFIG_HAVE_KVM
> +#define POSTED_INTR_VECTOR		0xf2
> +#endif
> +
>  /*
>   * IRQ work vector:
>   */
> diff --git a/arch/x86/include/asm/kvm_host.h
> b/arch/x86/include/asm/kvm_host.h index b8388e9..79da55e 100644 ---
> a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h
> @@ -704,6 +704,9 @@ struct kvm_x86_ops {
>  	void (*hwapic_isr_update)(struct kvm *kvm, int isr);
>  	void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap);
>  	void (*set_virtual_x2apic_mode)(struct kvm_vcpu *vcpu, bool set);
> +	bool (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector,
> +					int *result, bool *delivered);
> +	void (*sync_pir_to_irr)(struct kvm_vcpu *vcpu);
>  	int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
>  	int (*get_tdp_level)(void);
>  	u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio);
> diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
> index 5c9dbad..ce8ac80 100644
> --- a/arch/x86/include/asm/vmx.h
> +++ b/arch/x86/include/asm/vmx.h
> @@ -158,6 +158,7 @@
>  #define PIN_BASED_EXT_INTR_MASK                 0x00000001
>  #define PIN_BASED_NMI_EXITING                   0x00000008
>  #define PIN_BASED_VIRTUAL_NMIS                  0x00000020
> +#define PIN_BASED_POSTED_INTR                   0x00000080
> 
>  #define VM_EXIT_SAVE_DEBUG_CONTROLS             0x00000002 #define
>  VM_EXIT_HOST_ADDR_SPACE_SIZE            0x00000200 @@ -180,6 +181,7 @@
>  /* VMCS Encodings */ enum vmcs_field { 	VIRTUAL_PROCESSOR_ID           
>  = 0x00000000, +	POSTED_INTR_NV                  = 0x00000002,
>  	GUEST_ES_SELECTOR               = 0x00000800, 	GUEST_CS_SELECTOR      
>          = 0x00000802, 	GUEST_SS_SELECTOR               = 0x00000804, @@
>  -214,6 +216,8 @@ enum vmcs_field { 	VIRTUAL_APIC_PAGE_ADDR_HIGH     =
>  0x00002013, 	APIC_ACCESS_ADDR		= 0x00002014, 	APIC_ACCESS_ADDR_HIGH		=
>  0x00002015,
> +	POSTED_INTR_DESC_ADDR           = 0x00002016,
> +	POSTED_INTR_DESC_ADDR_HIGH      = 0x00002017,
>  	EPT_POINTER                     = 0x0000201a,
>  	EPT_POINTER_HIGH                = 0x0000201b,
>  	EOI_EXIT_BITMAP0                = 0x0000201c,
> diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
> index 70641af..b409846 100644
> --- a/arch/x86/kernel/entry_64.S
> +++ b/arch/x86/kernel/entry_64.S
> @@ -1177,6 +1177,11 @@ apicinterrupt LOCAL_TIMER_VECTOR \
>  apicinterrupt X86_PLATFORM_IPI_VECTOR \
>  	x86_platform_ipi smp_x86_platform_ipi
> +#ifdef CONFIG_HAVE_KVM
> +apicinterrupt POSTED_INTR_VECTOR \
> +	kvm_posted_intr_ipi smp_posted_intr_ipi
> +#endif
> +
>  apicinterrupt THRESHOLD_APIC_VECTOR \
>  	threshold_interrupt smp_threshold_interrupt
>  apicinterrupt THERMAL_APIC_VECTOR \
> diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
> index e4595f1..da74d65 100644
> --- a/arch/x86/kernel/irq.c
> +++ b/arch/x86/kernel/irq.c
> @@ -228,6 +228,26 @@ void smp_x86_platform_ipi(struct pt_regs *regs)
>  	set_irq_regs(old_regs);
>  }
> +#ifdef CONFIG_HAVE_KVM
> +/*
> + * Handler for POSTED_INTERRUPT_VECTOR.
> + */
> +void smp_posted_intr_ipi(struct pt_regs *regs)
> +{
> +	struct pt_regs *old_regs = set_irq_regs(regs);
> +
> +	ack_APIC_irq();
> +
> +	irq_enter();
> +
> +	exit_idle();
> +
> +	irq_exit();
> +
> +	set_irq_regs(old_regs);
> +}
> +#endif
> +
>  EXPORT_SYMBOL_GPL(vector_used_by_percpu_irq);
>  
>  #ifdef CONFIG_HOTPLUG_CPU
> diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
> index 6e03b0d..2329a54 100644
> --- a/arch/x86/kernel/irqinit.c
> +++ b/arch/x86/kernel/irqinit.c
> @@ -205,6 +205,10 @@ static void __init apic_intr_init(void)
> 
>  	/* IPI for X86 platform specific use */
>  	alloc_intr_gate(X86_PLATFORM_IPI_VECTOR, x86_platform_ipi);
> +#ifdef CONFIG_HAVE_KVM +	/* IPI for KVM to deliver posted interrupt */
> +	alloc_intr_gate(POSTED_INTR_VECTOR, kvm_posted_intr_ipi); +#endif
> 
>  	/* IPI vectors for APIC spurious and error interrupts */
>  	alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);
> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> index 02b51dd..ebc32bb 100644
> --- a/arch/x86/kvm/lapic.c
> +++ b/arch/x86/kvm/lapic.c
> @@ -357,6 +357,12 @@ static u8 count_vectors(void *bitmap)
>  	return count;
>  }
> +int kvm_apic_test_irr(int vec, struct kvm_lapic *apic)
> +{
> +	return apic_test_vector(vec, apic->regs + APIC_IRR);
> +}
> +EXPORT_SYMBOL_GPL(kvm_apic_test_irr);
> +
>  static inline int apic_test_and_set_irr(int vec, struct kvm_lapic
>  *apic) { 	apic->irr_pending = true;
> @@ -379,6 +385,7 @@ static inline int apic_find_highest_irr(struct kvm_lapic
> *apic)
>  	if (!apic->irr_pending)
>  		return -1;
> +	kvm_x86_ops->sync_pir_to_irr(apic->vcpu);
>  	result = apic_search_irr(apic);
>  	ASSERT(result == -1 || result >= 16);
> @@ -685,6 +692,7 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int
> delivery_mode,
>  {
>  	int result = 0;
>  	struct kvm_vcpu *vcpu = apic->vcpu;
> +	bool delivered = false;
> 
>  	switch (delivery_mode) {
>  	case APIC_DM_LOWEST:
> @@ -700,7 +708,10 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int
> delivery_mode,
>  		} else
>  			apic_clear_vector(vector, apic->regs + APIC_TMR);
> -		result = !apic_test_and_set_irr(vector, apic);
> +		if (!kvm_x86_ops->deliver_posted_interrupt(vcpu, vector,
> +						&result, &delivered))
> +			result = !apic_test_and_set_irr(vector, apic);
> +
>  		trace_kvm_apic_accept_irq(vcpu->vcpu_id, delivery_mode,
>  					  trig_mode, vector, !result);
>  		if (!result) {
> @@ -710,8 +721,10 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int
> delivery_mode,
>  			break;
>  		}
> -		kvm_make_request(KVM_REQ_EVENT, vcpu); -		kvm_vcpu_kick(vcpu); +		if
> (!delivered) { +			kvm_make_request(KVM_REQ_EVENT, vcpu);
> +			kvm_vcpu_kick(vcpu); +		}
>  		break;
>  
>  	case APIC_DM_REMRD:
> diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
> index 1676d34..1a7016c 100644
> --- a/arch/x86/kvm/lapic.h
> +++ b/arch/x86/kvm/lapic.h
> @@ -157,5 +157,6 @@ static inline u16 apic_logical_id(struct kvm_apic_map
> *map, u32 ldr)
>  void kvm_calculate_eoi_exitmap(struct kvm_vcpu *vcpu,
>  				struct kvm_lapic_irq *irq,
>  				u64 *eoi_bitmap);
> +int kvm_apic_test_irr(int vec, struct kvm_lapic *apic);
> 
>  #endif
> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> index a7d60d7..9e705e3 100644
> --- a/arch/x86/kvm/svm.c
> +++ b/arch/x86/kvm/svm.c
> @@ -3591,6 +3591,17 @@ static void svm_hwapic_isr_update(struct kvm *kvm,
> int isr)
>  	return;
>  }
> +static void svm_sync_pir_to_irr(struct kvm_vcpu *vcpu)
> +{
> +	return;
> +}
> +
> +static bool svm_deliver_posted_interrupt(struct kvm_vcpu *vcpu,
> +		int vector, int *result, bool *delivered)
> +{
> +	return false;
> +}
> +
>  static int svm_nmi_allowed(struct kvm_vcpu *vcpu) { 	struct vcpu_svm
>  *svm = to_svm(vcpu); @@ -4319,6 +4330,8 @@ static struct kvm_x86_ops
>  svm_x86_ops = { 	.vm_has_apicv = svm_vm_has_apicv, 	.load_eoi_exitmap =
>  svm_load_eoi_exitmap, 	.hwapic_isr_update = svm_hwapic_isr_update,
> +	.sync_pir_to_irr = svm_sync_pir_to_irr,
> +	.deliver_posted_interrupt = svm_deliver_posted_interrupt,
> 
>  	.set_tss_addr = svm_set_tss_addr,
>  	.get_tdp_level = get_npt_level,
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index 436b134..2fdf537 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -84,7 +84,8 @@ module_param(vmm_exclusive, bool, S_IRUGO);
>  static bool __read_mostly fasteoi = 1;
>  module_param(fasteoi, bool, S_IRUGO);
> -static bool __read_mostly enable_apicv_reg_vid;
> +static bool __read_mostly enable_apicv = 1;
> +module_param(enable_apicv, bool, S_IRUGO);
> 
>  /*
>   * If nested=1, nested virtualization is supported, i.e., guests may use
> @@ -365,6 +366,36 @@ struct nested_vmx {
>  	struct page *apic_access_page;
>  };
> +#define POSTED_INTR_ON  0 +/* Posted-Interrupt Descriptor */ +struct
> pi_desc { +	u32 pir[8];     /* Posted interrupt requested */ +	union {
> +		struct { +			u8  on:1, +			    rsvd:7; +		} control; +		u32 rsvd[8];
> +	} u; +} __aligned(64); + +static bool pi_test_and_set_on(struct
> pi_desc *pi_desc) +{ +	return test_and_set_bit(POSTED_INTR_ON,
> +			(unsigned long *)&pi_desc->u.control); +} + +static bool
> pi_test_and_clear_on(struct pi_desc *pi_desc) +{ +	return
> test_and_clear_bit(POSTED_INTR_ON, +			(unsigned long
> *)&pi_desc->u.control); +} + +static int pi_test_and_set_pir(int vector,
> struct pi_desc *pi_desc) +{ +	return test_and_set_bit(vector, (unsigned
> long *)pi_desc->pir); +} +
>  struct vcpu_vmx {
>  	struct kvm_vcpu       vcpu;
>  	unsigned long         host_rsp;
> @@ -429,6 +460,9 @@ struct vcpu_vmx {
> 
>  	bool rdtscp_enabled;
> +	/* Posted interrupt descriptor */
> +	struct pi_desc pi_desc;
> +
>  	/* Support for a guest hypervisor (nested VMX) */
>  	struct nested_vmx nested;
>  };
> @@ -783,6 +817,18 @@ static inline bool
> cpu_has_vmx_virtual_intr_delivery(void)
>  		SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY;
>  }
> +static inline bool cpu_has_vmx_posted_intr(void) +{ +	return
> vmcs_config.pin_based_exec_ctrl & PIN_BASED_POSTED_INTR; +} + +static
> inline bool cpu_has_vmx_apicv(void) +{ +	return
> cpu_has_vmx_apic_register_virt() &&
> +		cpu_has_vmx_virtual_intr_delivery() && +		cpu_has_vmx_posted_intr();
> +} +
>  static inline bool cpu_has_vmx_flexpriority(void)
>  {
>  	return cpu_has_vmx_tpr_shadow() &&
> @@ -2530,12 +2576,6 @@ static __init int setup_vmcs_config(struct vmcs_config
> *vmcs_conf)
>  	u32 _vmexit_control = 0;
>  	u32 _vmentry_control = 0;
> -	min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING;
> -	opt = PIN_BASED_VIRTUAL_NMIS;
> -	if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PINBASED_CTLS,
> -				&_pin_based_exec_control) < 0)
> -		return -EIO;
> -
>  	min = CPU_BASED_HLT_EXITING |
>  #ifdef CONFIG_X86_64
>  	      CPU_BASED_CR8_LOAD_EXITING |
> @@ -2612,6 +2652,17 @@ static __init int setup_vmcs_config(struct vmcs_config
> *vmcs_conf)
>  				&_vmexit_control) < 0)
>  		return -EIO;
> +	min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING;
> +	opt = PIN_BASED_VIRTUAL_NMIS | PIN_BASED_POSTED_INTR;
> +	if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PINBASED_CTLS,
> +				&_pin_based_exec_control) < 0)
> +		return -EIO;
> +
> +	if (!(_cpu_based_2nd_exec_control &
> +		SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY) ||
> +		!(_vmexit_control & VM_EXIT_ACK_INTR_ON_EXIT))
> +		_pin_based_exec_control &= ~PIN_BASED_POSTED_INTR;
> +
>  	min = 0; 	opt = VM_ENTRY_LOAD_IA32_PAT; 	if (adjust_vmx_controls(min,
>  opt, MSR_IA32_VMX_ENTRY_CTLS, @@ -2790,11 +2841,10 @@ static __init int
>  hardware_setup(void) 	if (!cpu_has_vmx_ple()) 		ple_gap = 0;
> -	if (!cpu_has_vmx_apic_register_virt() ||
> -				!cpu_has_vmx_virtual_intr_delivery()) -		enable_apicv_reg_vid = 0;
> +	if (!cpu_has_vmx_apicv()) +		enable_apicv = 0;
> 
> -	if (enable_apicv_reg_vid)
> +	if (enable_apicv)
>  		kvm_x86_ops->update_cr8_intercept = NULL;
>  	else
>  		kvm_x86_ops->hwapic_irr_update = NULL;
> @@ -3871,6 +3921,62 @@ static void
> vmx_disable_intercept_msr_write_x2apic(u32 msr)
>  			msr, MSR_TYPE_W);
>  }
> +static int vmx_vm_has_apicv(struct kvm *kvm) +{ +	return enable_apicv
> && irqchip_in_kernel(kvm); +} + +static bool
> vmx_deliver_posted_interrupt(struct kvm_vcpu *vcpu, +		int vector, int
> *result, bool *delivered) +{ +	struct vcpu_vmx *vmx = to_vmx(vcpu); +
> +	if (!vmx_vm_has_apicv(vcpu->kvm)) +		return false; + +	if
> (kvm_apic_test_irr(vector, vcpu->arch.apic)) +		goto out; +	else {
> +		*result = !pi_test_and_set_pir(vector, &vmx->pi_desc); +		if
> (!*result) +			goto out; +	} + +	if (!pi_test_and_set_on(&vmx->pi_desc)
> && +			(vcpu->mode == IN_GUEST_MODE)) {
> +		kvm_make_request(KVM_REQ_EVENT, vcpu);
> +		apic->send_IPI_mask(get_cpu_mask(vcpu->cpu), +				   
> POSTED_INTR_VECTOR); +		*delivered = true; +	} +out: +	return true; +} +
> +static void vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu) +{ +	struct
> vcpu_vmx *vmx = to_vmx(vcpu); +	struct kvm_lapic *apic =
> vcpu->arch.apic; +	unsigned int i, old, new, ret_val, irr_offset,
> pir_val; + +	if (!vmx_vm_has_apicv(vcpu->kvm) ||
> +			!pi_test_and_clear_on(&vmx->pi_desc)) +		return; + +	for (i = 0; i
> <= 7; i++) { +		pir_val = xchg(&vmx->pi_desc.pir[i], 0); +		if (pir_val)
> { +			irr_offset = APIC_IRR + i * 0x10; +			do { +				old =
> kvm_apic_get_reg(apic, irr_offset); +				new = old | pir_val;
> +				ret_val = cmpxchg((u32 *)(apic->regs + +						irr_offset), old,
> new); +			} while (unlikely(ret_val != old)); +		} +	} +} +
>  /*
>   * Set up the vmcs's constant host-state fields, i.e., host-state fields that
>   * will not change in the lifetime of the guest.
> @@ -3931,6 +4037,15 @@ static void set_cr4_guest_host_mask(struct vcpu_vmx
> *vmx)
>  	vmcs_writel(CR4_GUEST_HOST_MASK,
>  ~vmx->vcpu.arch.cr4_guest_owned_bits); }
> +static u32 vmx_pin_based_exec_ctrl(struct vcpu_vmx *vmx)
> +{
> +	u32 pin_based_exec_ctrl = vmcs_config.pin_based_exec_ctrl;
> +
> +	if (!vmx_vm_has_apicv(vmx->vcpu.kvm))
> +		pin_based_exec_ctrl &= ~PIN_BASED_POSTED_INTR;
> +	return pin_based_exec_ctrl;
> +}
> +
>  static u32 vmx_exec_control(struct vcpu_vmx *vmx) { 	u32 exec_control =
>  vmcs_config.cpu_based_exec_ctrl; @@ -3948,11 +4063,6 @@ static u32
>  vmx_exec_control(struct vcpu_vmx *vmx) 	return exec_control; }
> -static int vmx_vm_has_apicv(struct kvm *kvm)
> -{
> -	return enable_apicv_reg_vid && irqchip_in_kernel(kvm);
> -}
> -
>  static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx) { 	u32
>  exec_control = vmcs_config.cpu_based_2nd_exec_ctrl; @@ -4008,8 +4118,7
>  @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
>  	vmcs_write64(VMCS_LINK_POINTER, -1ull); /* 22.3.1.5 */
>  
>  	/* Control */
> -	vmcs_write32(PIN_BASED_VM_EXEC_CONTROL,
> -		vmcs_config.pin_based_exec_ctrl);
> +	vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, vmx_pin_based_exec_ctrl(vmx));
> 
>  	vmcs_write32(CPU_BASED_VM_EXEC_CONTROL,
> vmx_exec_control(vmx));
> 
> @@ -4018,13 +4127,16 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
>  				vmx_secondary_exec_control(vmx));
>  	}
> -	if (enable_apicv_reg_vid) {
> +	if (vmx_vm_has_apicv(vmx->vcpu.kvm)) {
>  		vmcs_write64(EOI_EXIT_BITMAP0, 0);
>  		vmcs_write64(EOI_EXIT_BITMAP1, 0);
>  		vmcs_write64(EOI_EXIT_BITMAP2, 0);
>  		vmcs_write64(EOI_EXIT_BITMAP3, 0);
>  
>  		vmcs_write16(GUEST_INTR_STATUS, 0);
> +
> +		vmcs_write64(POSTED_INTR_NV, POSTED_INTR_VECTOR);
> +		vmcs_write64(POSTED_INTR_DESC_ADDR, __pa((&vmx->pi_desc)));
>  	}
>  
>  	if (ple_gap) { @@ -4174,6 +4286,9 @@ static int vmx_vcpu_reset(struct
>  kvm_vcpu *vcpu) 		vmcs_write64(APIC_ACCESS_ADDR, 			    
>  page_to_phys(vmx->vcpu.kvm->arch.apic_access_page));
> +	if (vmx_vm_has_apicv(vcpu->kvm))
> +		memset(&vmx->pi_desc, 0, sizeof(struct pi_desc));
> +
>  	if (vmx->vpid != 0)
>  		vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid);
> @@ -7650,6 +7765,8 @@ static struct kvm_x86_ops vmx_x86_ops = {
>  	.load_eoi_exitmap = vmx_load_eoi_exitmap,
>  	.hwapic_irr_update = vmx_hwapic_irr_update,
>  	.hwapic_isr_update = vmx_hwapic_isr_update,
> +	.sync_pir_to_irr = vmx_sync_pir_to_irr,
> +	.deliver_posted_interrupt = vmx_deliver_posted_interrupt,
> 
>  	.set_tss_addr = vmx_set_tss_addr, 	.get_tdp_level = get_ept_level, @@
>  -7753,7 +7870,7 @@ static int __init vmx_init(void)
>  	memcpy(vmx_msr_bitmap_longmode_x2apic, 			vmx_msr_bitmap_longmode,
>  PAGE_SIZE);
> -	if (enable_apicv_reg_vid) {
> +	if (enable_apicv) {
>  		for (msr = 0x800; msr <= 0x8ff; msr++)
>  			vmx_disable_intercept_msr_read_x2apic(msr);
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index f1fa37e..62f8c94 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -2679,6 +2679,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
>  static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu, 				   
>  struct kvm_lapic_state *s) { +	kvm_x86_ops->sync_pir_to_irr(vcpu);
>  	memcpy(s->regs, vcpu->arch.apic->regs, sizeof *s);
>  
>  	return 0;
> --
> 1.7.1


Best regards,
Yang


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Gleb Natapov Feb. 21, 2013, 6:22 a.m. UTC | #2
On Thu, Feb 21, 2013 at 06:04:52AM +0000, Zhang, Yang Z wrote:
> Hi Marcelo,
> 
> Can you help to review this patch? Many thanks if you can review it quickly.
> 
The patch is only 2 days on the list. Be patient.

> Zhang, Yang Z wrote on 2013-02-19:
> > From: Yang Zhang <yang.z.zhang@Intel.com>
> > 
> > Posted Interrupt allows APIC interrupts to inject into guest directly
> > without any vmexit.
> > 
> > - When delivering a interrupt to guest, if target vcpu is running,
> >   update Posted-interrupt requests bitmap and send a notification event
> >   to the vcpu. Then the vcpu will handle this interrupt automatically,
> >   without any software involvemnt.
> > - If target vcpu is not running or there already a notification event
> >   pending in the vcpu, do nothing. The interrupt will be handled by
> >   next vm entry
> > Signed-off-by: Yang Zhang <yang.z.zhang@Intel.com>
> > ---
> >  arch/x86/include/asm/entry_arch.h  |    4 +
> >  arch/x86/include/asm/hw_irq.h      |    1 +
> >  arch/x86/include/asm/irq_vectors.h |    5 +
> >  arch/x86/include/asm/kvm_host.h    |    3 + arch/x86/include/asm/vmx.h 
> >         |    4 + arch/x86/kernel/entry_64.S         |    5 +
> >  arch/x86/kernel/irq.c              |   20 +++++
> >  arch/x86/kernel/irqinit.c          |    4 + arch/x86/kvm/lapic.c       
> >         |   19 ++++- arch/x86/kvm/lapic.h               |    1 +
> >  arch/x86/kvm/svm.c                 |   13 +++ arch/x86/kvm/vmx.c       
> >           |  157 +++++++++++++++++++++++++++++++----- arch/x86/kvm/x86.c
> >                  |    1 + 13 files changed, 214 insertions(+), 23
> >  deletions(-)
> > diff --git a/arch/x86/include/asm/entry_arch.h
> > b/arch/x86/include/asm/entry_arch.h index 40afa00..9bd4eca 100644 ---
> > a/arch/x86/include/asm/entry_arch.h +++
> > b/arch/x86/include/asm/entry_arch.h @@ -19,6 +19,10 @@
> > BUILD_INTERRUPT(reboot_interrupt,REBOOT_VECTOR)
> > 
> >  BUILD_INTERRUPT(x86_platform_ipi, X86_PLATFORM_IPI_VECTOR)
> > +#ifdef CONFIG_HAVE_KVM +BUILD_INTERRUPT(kvm_posted_intr_ipi,
> > POSTED_INTR_VECTOR) +#endif +
> >  /*
> >   * every pentium local APIC has two 'local interrupts', with a
> >   * soft-definable vector attached to both interrupts, one of
> > diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
> > index eb92a6e..cebef02 100644
> > --- a/arch/x86/include/asm/hw_irq.h
> > +++ b/arch/x86/include/asm/hw_irq.h
> > @@ -28,6 +28,7 @@
> >  /* Interrupt handlers registered during init_IRQ */ extern void
> >  apic_timer_interrupt(void); extern void x86_platform_ipi(void); +extern
> >  void kvm_posted_intr_ipi(void); extern void error_interrupt(void);
> >  extern void irq_work_interrupt(void);
> > diff --git a/arch/x86/include/asm/irq_vectors.h
> > b/arch/x86/include/asm/irq_vectors.h index 1508e51..774dc9f 100644 ---
> > a/arch/x86/include/asm/irq_vectors.h +++
> > b/arch/x86/include/asm/irq_vectors.h @@ -102,6 +102,11 @@
> >   */
> >  #define X86_PLATFORM_IPI_VECTOR		0xf7
> > +/* Vector for KVM to deliver posted interrupt IPI */
> > +#ifdef CONFIG_HAVE_KVM
> > +#define POSTED_INTR_VECTOR		0xf2
> > +#endif
> > +
> >  /*
> >   * IRQ work vector:
> >   */
> > diff --git a/arch/x86/include/asm/kvm_host.h
> > b/arch/x86/include/asm/kvm_host.h index b8388e9..79da55e 100644 ---
> > a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h
> > @@ -704,6 +704,9 @@ struct kvm_x86_ops {
> >  	void (*hwapic_isr_update)(struct kvm *kvm, int isr);
> >  	void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap);
> >  	void (*set_virtual_x2apic_mode)(struct kvm_vcpu *vcpu, bool set);
> > +	bool (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector,
> > +					int *result, bool *delivered);
> > +	void (*sync_pir_to_irr)(struct kvm_vcpu *vcpu);
> >  	int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
> >  	int (*get_tdp_level)(void);
> >  	u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio);
> > diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
> > index 5c9dbad..ce8ac80 100644
> > --- a/arch/x86/include/asm/vmx.h
> > +++ b/arch/x86/include/asm/vmx.h
> > @@ -158,6 +158,7 @@
> >  #define PIN_BASED_EXT_INTR_MASK                 0x00000001
> >  #define PIN_BASED_NMI_EXITING                   0x00000008
> >  #define PIN_BASED_VIRTUAL_NMIS                  0x00000020
> > +#define PIN_BASED_POSTED_INTR                   0x00000080
> > 
> >  #define VM_EXIT_SAVE_DEBUG_CONTROLS             0x00000002 #define
> >  VM_EXIT_HOST_ADDR_SPACE_SIZE            0x00000200 @@ -180,6 +181,7 @@
> >  /* VMCS Encodings */ enum vmcs_field { 	VIRTUAL_PROCESSOR_ID           
> >  = 0x00000000, +	POSTED_INTR_NV                  = 0x00000002,
> >  	GUEST_ES_SELECTOR               = 0x00000800, 	GUEST_CS_SELECTOR      
> >          = 0x00000802, 	GUEST_SS_SELECTOR               = 0x00000804, @@
> >  -214,6 +216,8 @@ enum vmcs_field { 	VIRTUAL_APIC_PAGE_ADDR_HIGH     =
> >  0x00002013, 	APIC_ACCESS_ADDR		= 0x00002014, 	APIC_ACCESS_ADDR_HIGH		=
> >  0x00002015,
> > +	POSTED_INTR_DESC_ADDR           = 0x00002016,
> > +	POSTED_INTR_DESC_ADDR_HIGH      = 0x00002017,
> >  	EPT_POINTER                     = 0x0000201a,
> >  	EPT_POINTER_HIGH                = 0x0000201b,
> >  	EOI_EXIT_BITMAP0                = 0x0000201c,
> > diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
> > index 70641af..b409846 100644
> > --- a/arch/x86/kernel/entry_64.S
> > +++ b/arch/x86/kernel/entry_64.S
> > @@ -1177,6 +1177,11 @@ apicinterrupt LOCAL_TIMER_VECTOR \
> >  apicinterrupt X86_PLATFORM_IPI_VECTOR \
> >  	x86_platform_ipi smp_x86_platform_ipi
> > +#ifdef CONFIG_HAVE_KVM
> > +apicinterrupt POSTED_INTR_VECTOR \
> > +	kvm_posted_intr_ipi smp_posted_intr_ipi
> > +#endif
> > +
> >  apicinterrupt THRESHOLD_APIC_VECTOR \
> >  	threshold_interrupt smp_threshold_interrupt
> >  apicinterrupt THERMAL_APIC_VECTOR \
> > diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
> > index e4595f1..da74d65 100644
> > --- a/arch/x86/kernel/irq.c
> > +++ b/arch/x86/kernel/irq.c
> > @@ -228,6 +228,26 @@ void smp_x86_platform_ipi(struct pt_regs *regs)
> >  	set_irq_regs(old_regs);
> >  }
> > +#ifdef CONFIG_HAVE_KVM
> > +/*
> > + * Handler for POSTED_INTERRUPT_VECTOR.
> > + */
> > +void smp_posted_intr_ipi(struct pt_regs *regs)
> > +{
> > +	struct pt_regs *old_regs = set_irq_regs(regs);
> > +
> > +	ack_APIC_irq();
> > +
> > +	irq_enter();
> > +
> > +	exit_idle();
> > +
> > +	irq_exit();
> > +
> > +	set_irq_regs(old_regs);
> > +}
> > +#endif
> > +
> >  EXPORT_SYMBOL_GPL(vector_used_by_percpu_irq);
> >  
> >  #ifdef CONFIG_HOTPLUG_CPU
> > diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
> > index 6e03b0d..2329a54 100644
> > --- a/arch/x86/kernel/irqinit.c
> > +++ b/arch/x86/kernel/irqinit.c
> > @@ -205,6 +205,10 @@ static void __init apic_intr_init(void)
> > 
> >  	/* IPI for X86 platform specific use */
> >  	alloc_intr_gate(X86_PLATFORM_IPI_VECTOR, x86_platform_ipi);
> > +#ifdef CONFIG_HAVE_KVM +	/* IPI for KVM to deliver posted interrupt */
> > +	alloc_intr_gate(POSTED_INTR_VECTOR, kvm_posted_intr_ipi); +#endif
> > 
> >  	/* IPI vectors for APIC spurious and error interrupts */
> >  	alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);
> > diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> > index 02b51dd..ebc32bb 100644
> > --- a/arch/x86/kvm/lapic.c
> > +++ b/arch/x86/kvm/lapic.c
> > @@ -357,6 +357,12 @@ static u8 count_vectors(void *bitmap)
> >  	return count;
> >  }
> > +int kvm_apic_test_irr(int vec, struct kvm_lapic *apic)
> > +{
> > +	return apic_test_vector(vec, apic->regs + APIC_IRR);
> > +}
> > +EXPORT_SYMBOL_GPL(kvm_apic_test_irr);
> > +
> >  static inline int apic_test_and_set_irr(int vec, struct kvm_lapic
> >  *apic) { 	apic->irr_pending = true;
> > @@ -379,6 +385,7 @@ static inline int apic_find_highest_irr(struct kvm_lapic
> > *apic)
> >  	if (!apic->irr_pending)
> >  		return -1;
> > +	kvm_x86_ops->sync_pir_to_irr(apic->vcpu);
> >  	result = apic_search_irr(apic);
> >  	ASSERT(result == -1 || result >= 16);
> > @@ -685,6 +692,7 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int
> > delivery_mode,
> >  {
> >  	int result = 0;
> >  	struct kvm_vcpu *vcpu = apic->vcpu;
> > +	bool delivered = false;
> > 
> >  	switch (delivery_mode) {
> >  	case APIC_DM_LOWEST:
> > @@ -700,7 +708,10 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int
> > delivery_mode,
> >  		} else
> >  			apic_clear_vector(vector, apic->regs + APIC_TMR);
> > -		result = !apic_test_and_set_irr(vector, apic);
> > +		if (!kvm_x86_ops->deliver_posted_interrupt(vcpu, vector,
> > +						&result, &delivered))
> > +			result = !apic_test_and_set_irr(vector, apic);
> > +
> >  		trace_kvm_apic_accept_irq(vcpu->vcpu_id, delivery_mode,
> >  					  trig_mode, vector, !result);
> >  		if (!result) {
> > @@ -710,8 +721,10 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int
> > delivery_mode,
> >  			break;
> >  		}
> > -		kvm_make_request(KVM_REQ_EVENT, vcpu); -		kvm_vcpu_kick(vcpu); +		if
> > (!delivered) { +			kvm_make_request(KVM_REQ_EVENT, vcpu);
> > +			kvm_vcpu_kick(vcpu); +		}
> >  		break;
> >  
> >  	case APIC_DM_REMRD:
> > diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
> > index 1676d34..1a7016c 100644
> > --- a/arch/x86/kvm/lapic.h
> > +++ b/arch/x86/kvm/lapic.h
> > @@ -157,5 +157,6 @@ static inline u16 apic_logical_id(struct kvm_apic_map
> > *map, u32 ldr)
> >  void kvm_calculate_eoi_exitmap(struct kvm_vcpu *vcpu,
> >  				struct kvm_lapic_irq *irq,
> >  				u64 *eoi_bitmap);
> > +int kvm_apic_test_irr(int vec, struct kvm_lapic *apic);
> > 
> >  #endif
> > diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> > index a7d60d7..9e705e3 100644
> > --- a/arch/x86/kvm/svm.c
> > +++ b/arch/x86/kvm/svm.c
> > @@ -3591,6 +3591,17 @@ static void svm_hwapic_isr_update(struct kvm *kvm,
> > int isr)
> >  	return;
> >  }
> > +static void svm_sync_pir_to_irr(struct kvm_vcpu *vcpu)
> > +{
> > +	return;
> > +}
> > +
> > +static bool svm_deliver_posted_interrupt(struct kvm_vcpu *vcpu,
> > +		int vector, int *result, bool *delivered)
> > +{
> > +	return false;
> > +}
> > +
> >  static int svm_nmi_allowed(struct kvm_vcpu *vcpu) { 	struct vcpu_svm
> >  *svm = to_svm(vcpu); @@ -4319,6 +4330,8 @@ static struct kvm_x86_ops
> >  svm_x86_ops = { 	.vm_has_apicv = svm_vm_has_apicv, 	.load_eoi_exitmap =
> >  svm_load_eoi_exitmap, 	.hwapic_isr_update = svm_hwapic_isr_update,
> > +	.sync_pir_to_irr = svm_sync_pir_to_irr,
> > +	.deliver_posted_interrupt = svm_deliver_posted_interrupt,
> > 
> >  	.set_tss_addr = svm_set_tss_addr,
> >  	.get_tdp_level = get_npt_level,
> > diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> > index 436b134..2fdf537 100644
> > --- a/arch/x86/kvm/vmx.c
> > +++ b/arch/x86/kvm/vmx.c
> > @@ -84,7 +84,8 @@ module_param(vmm_exclusive, bool, S_IRUGO);
> >  static bool __read_mostly fasteoi = 1;
> >  module_param(fasteoi, bool, S_IRUGO);
> > -static bool __read_mostly enable_apicv_reg_vid;
> > +static bool __read_mostly enable_apicv = 1;
> > +module_param(enable_apicv, bool, S_IRUGO);
> > 
> >  /*
> >   * If nested=1, nested virtualization is supported, i.e., guests may use
> > @@ -365,6 +366,36 @@ struct nested_vmx {
> >  	struct page *apic_access_page;
> >  };
> > +#define POSTED_INTR_ON  0 +/* Posted-Interrupt Descriptor */ +struct
> > pi_desc { +	u32 pir[8];     /* Posted interrupt requested */ +	union {
> > +		struct { +			u8  on:1, +			    rsvd:7; +		} control; +		u32 rsvd[8];
> > +	} u; +} __aligned(64); + +static bool pi_test_and_set_on(struct
> > pi_desc *pi_desc) +{ +	return test_and_set_bit(POSTED_INTR_ON,
> > +			(unsigned long *)&pi_desc->u.control); +} + +static bool
> > pi_test_and_clear_on(struct pi_desc *pi_desc) +{ +	return
> > test_and_clear_bit(POSTED_INTR_ON, +			(unsigned long
> > *)&pi_desc->u.control); +} + +static int pi_test_and_set_pir(int vector,
> > struct pi_desc *pi_desc) +{ +	return test_and_set_bit(vector, (unsigned
> > long *)pi_desc->pir); +} +
> >  struct vcpu_vmx {
> >  	struct kvm_vcpu       vcpu;
> >  	unsigned long         host_rsp;
> > @@ -429,6 +460,9 @@ struct vcpu_vmx {
> > 
> >  	bool rdtscp_enabled;
> > +	/* Posted interrupt descriptor */
> > +	struct pi_desc pi_desc;
> > +
> >  	/* Support for a guest hypervisor (nested VMX) */
> >  	struct nested_vmx nested;
> >  };
> > @@ -783,6 +817,18 @@ static inline bool
> > cpu_has_vmx_virtual_intr_delivery(void)
> >  		SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY;
> >  }
> > +static inline bool cpu_has_vmx_posted_intr(void) +{ +	return
> > vmcs_config.pin_based_exec_ctrl & PIN_BASED_POSTED_INTR; +} + +static
> > inline bool cpu_has_vmx_apicv(void) +{ +	return
> > cpu_has_vmx_apic_register_virt() &&
> > +		cpu_has_vmx_virtual_intr_delivery() && +		cpu_has_vmx_posted_intr();
> > +} +
> >  static inline bool cpu_has_vmx_flexpriority(void)
> >  {
> >  	return cpu_has_vmx_tpr_shadow() &&
> > @@ -2530,12 +2576,6 @@ static __init int setup_vmcs_config(struct vmcs_config
> > *vmcs_conf)
> >  	u32 _vmexit_control = 0;
> >  	u32 _vmentry_control = 0;
> > -	min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING;
> > -	opt = PIN_BASED_VIRTUAL_NMIS;
> > -	if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PINBASED_CTLS,
> > -				&_pin_based_exec_control) < 0)
> > -		return -EIO;
> > -
> >  	min = CPU_BASED_HLT_EXITING |
> >  #ifdef CONFIG_X86_64
> >  	      CPU_BASED_CR8_LOAD_EXITING |
> > @@ -2612,6 +2652,17 @@ static __init int setup_vmcs_config(struct vmcs_config
> > *vmcs_conf)
> >  				&_vmexit_control) < 0)
> >  		return -EIO;
> > +	min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING;
> > +	opt = PIN_BASED_VIRTUAL_NMIS | PIN_BASED_POSTED_INTR;
> > +	if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PINBASED_CTLS,
> > +				&_pin_based_exec_control) < 0)
> > +		return -EIO;
> > +
> > +	if (!(_cpu_based_2nd_exec_control &
> > +		SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY) ||
> > +		!(_vmexit_control & VM_EXIT_ACK_INTR_ON_EXIT))
> > +		_pin_based_exec_control &= ~PIN_BASED_POSTED_INTR;
> > +
> >  	min = 0; 	opt = VM_ENTRY_LOAD_IA32_PAT; 	if (adjust_vmx_controls(min,
> >  opt, MSR_IA32_VMX_ENTRY_CTLS, @@ -2790,11 +2841,10 @@ static __init int
> >  hardware_setup(void) 	if (!cpu_has_vmx_ple()) 		ple_gap = 0;
> > -	if (!cpu_has_vmx_apic_register_virt() ||
> > -				!cpu_has_vmx_virtual_intr_delivery()) -		enable_apicv_reg_vid = 0;
> > +	if (!cpu_has_vmx_apicv()) +		enable_apicv = 0;
> > 
> > -	if (enable_apicv_reg_vid)
> > +	if (enable_apicv)
> >  		kvm_x86_ops->update_cr8_intercept = NULL;
> >  	else
> >  		kvm_x86_ops->hwapic_irr_update = NULL;
> > @@ -3871,6 +3921,62 @@ static void
> > vmx_disable_intercept_msr_write_x2apic(u32 msr)
> >  			msr, MSR_TYPE_W);
> >  }
> > +static int vmx_vm_has_apicv(struct kvm *kvm) +{ +	return enable_apicv
> > && irqchip_in_kernel(kvm); +} + +static bool
> > vmx_deliver_posted_interrupt(struct kvm_vcpu *vcpu, +		int vector, int
> > *result, bool *delivered) +{ +	struct vcpu_vmx *vmx = to_vmx(vcpu); +
> > +	if (!vmx_vm_has_apicv(vcpu->kvm)) +		return false; + +	if
> > (kvm_apic_test_irr(vector, vcpu->arch.apic)) +		goto out; +	else {
> > +		*result = !pi_test_and_set_pir(vector, &vmx->pi_desc); +		if
> > (!*result) +			goto out; +	} + +	if (!pi_test_and_set_on(&vmx->pi_desc)
> > && +			(vcpu->mode == IN_GUEST_MODE)) {
> > +		kvm_make_request(KVM_REQ_EVENT, vcpu);
> > +		apic->send_IPI_mask(get_cpu_mask(vcpu->cpu), +				   
> > POSTED_INTR_VECTOR); +		*delivered = true; +	} +out: +	return true; +} +
> > +static void vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu) +{ +	struct
> > vcpu_vmx *vmx = to_vmx(vcpu); +	struct kvm_lapic *apic =
> > vcpu->arch.apic; +	unsigned int i, old, new, ret_val, irr_offset,
> > pir_val; + +	if (!vmx_vm_has_apicv(vcpu->kvm) ||
> > +			!pi_test_and_clear_on(&vmx->pi_desc)) +		return; + +	for (i = 0; i
> > <= 7; i++) { +		pir_val = xchg(&vmx->pi_desc.pir[i], 0); +		if (pir_val)
> > { +			irr_offset = APIC_IRR + i * 0x10; +			do { +				old =
> > kvm_apic_get_reg(apic, irr_offset); +				new = old | pir_val;
> > +				ret_val = cmpxchg((u32 *)(apic->regs + +						irr_offset), old,
> > new); +			} while (unlikely(ret_val != old)); +		} +	} +} +
> >  /*
> >   * Set up the vmcs's constant host-state fields, i.e., host-state fields that
> >   * will not change in the lifetime of the guest.
> > @@ -3931,6 +4037,15 @@ static void set_cr4_guest_host_mask(struct vcpu_vmx
> > *vmx)
> >  	vmcs_writel(CR4_GUEST_HOST_MASK,
> >  ~vmx->vcpu.arch.cr4_guest_owned_bits); }
> > +static u32 vmx_pin_based_exec_ctrl(struct vcpu_vmx *vmx)
> > +{
> > +	u32 pin_based_exec_ctrl = vmcs_config.pin_based_exec_ctrl;
> > +
> > +	if (!vmx_vm_has_apicv(vmx->vcpu.kvm))
> > +		pin_based_exec_ctrl &= ~PIN_BASED_POSTED_INTR;
> > +	return pin_based_exec_ctrl;
> > +}
> > +
> >  static u32 vmx_exec_control(struct vcpu_vmx *vmx) { 	u32 exec_control =
> >  vmcs_config.cpu_based_exec_ctrl; @@ -3948,11 +4063,6 @@ static u32
> >  vmx_exec_control(struct vcpu_vmx *vmx) 	return exec_control; }
> > -static int vmx_vm_has_apicv(struct kvm *kvm)
> > -{
> > -	return enable_apicv_reg_vid && irqchip_in_kernel(kvm);
> > -}
> > -
> >  static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx) { 	u32
> >  exec_control = vmcs_config.cpu_based_2nd_exec_ctrl; @@ -4008,8 +4118,7
> >  @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
> >  	vmcs_write64(VMCS_LINK_POINTER, -1ull); /* 22.3.1.5 */
> >  
> >  	/* Control */
> > -	vmcs_write32(PIN_BASED_VM_EXEC_CONTROL,
> > -		vmcs_config.pin_based_exec_ctrl);
> > +	vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, vmx_pin_based_exec_ctrl(vmx));
> > 
> >  	vmcs_write32(CPU_BASED_VM_EXEC_CONTROL,
> > vmx_exec_control(vmx));
> > 
> > @@ -4018,13 +4127,16 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
> >  				vmx_secondary_exec_control(vmx));
> >  	}
> > -	if (enable_apicv_reg_vid) {
> > +	if (vmx_vm_has_apicv(vmx->vcpu.kvm)) {
> >  		vmcs_write64(EOI_EXIT_BITMAP0, 0);
> >  		vmcs_write64(EOI_EXIT_BITMAP1, 0);
> >  		vmcs_write64(EOI_EXIT_BITMAP2, 0);
> >  		vmcs_write64(EOI_EXIT_BITMAP3, 0);
> >  
> >  		vmcs_write16(GUEST_INTR_STATUS, 0);
> > +
> > +		vmcs_write64(POSTED_INTR_NV, POSTED_INTR_VECTOR);
> > +		vmcs_write64(POSTED_INTR_DESC_ADDR, __pa((&vmx->pi_desc)));
> >  	}
> >  
> >  	if (ple_gap) { @@ -4174,6 +4286,9 @@ static int vmx_vcpu_reset(struct
> >  kvm_vcpu *vcpu) 		vmcs_write64(APIC_ACCESS_ADDR, 			    
> >  page_to_phys(vmx->vcpu.kvm->arch.apic_access_page));
> > +	if (vmx_vm_has_apicv(vcpu->kvm))
> > +		memset(&vmx->pi_desc, 0, sizeof(struct pi_desc));
> > +
> >  	if (vmx->vpid != 0)
> >  		vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid);
> > @@ -7650,6 +7765,8 @@ static struct kvm_x86_ops vmx_x86_ops = {
> >  	.load_eoi_exitmap = vmx_load_eoi_exitmap,
> >  	.hwapic_irr_update = vmx_hwapic_irr_update,
> >  	.hwapic_isr_update = vmx_hwapic_isr_update,
> > +	.sync_pir_to_irr = vmx_sync_pir_to_irr,
> > +	.deliver_posted_interrupt = vmx_deliver_posted_interrupt,
> > 
> >  	.set_tss_addr = vmx_set_tss_addr, 	.get_tdp_level = get_ept_level, @@
> >  -7753,7 +7870,7 @@ static int __init vmx_init(void)
> >  	memcpy(vmx_msr_bitmap_longmode_x2apic, 			vmx_msr_bitmap_longmode,
> >  PAGE_SIZE);
> > -	if (enable_apicv_reg_vid) {
> > +	if (enable_apicv) {
> >  		for (msr = 0x800; msr <= 0x8ff; msr++)
> >  			vmx_disable_intercept_msr_read_x2apic(msr);
> > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> > index f1fa37e..62f8c94 100644
> > --- a/arch/x86/kvm/x86.c
> > +++ b/arch/x86/kvm/x86.c
> > @@ -2679,6 +2679,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
> >  static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu, 				   
> >  struct kvm_lapic_state *s) { +	kvm_x86_ops->sync_pir_to_irr(vcpu);
> >  	memcpy(s->regs, vcpu->arch.apic->regs, sizeof *s);
> >  
> >  	return 0;
> > --
> > 1.7.1
> 
> 
> Best regards,
> Yang
> 

--
			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/arch/x86/include/asm/entry_arch.h b/arch/x86/include/asm/entry_arch.h
index 40afa00..9bd4eca 100644
--- a/arch/x86/include/asm/entry_arch.h
+++ b/arch/x86/include/asm/entry_arch.h
@@ -19,6 +19,10 @@  BUILD_INTERRUPT(reboot_interrupt,REBOOT_VECTOR)
 
 BUILD_INTERRUPT(x86_platform_ipi, X86_PLATFORM_IPI_VECTOR)
 
+#ifdef CONFIG_HAVE_KVM
+BUILD_INTERRUPT(kvm_posted_intr_ipi, POSTED_INTR_VECTOR)
+#endif
+
 /*
  * every pentium local APIC has two 'local interrupts', with a
  * soft-definable vector attached to both interrupts, one of
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index eb92a6e..cebef02 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -28,6 +28,7 @@ 
 /* Interrupt handlers registered during init_IRQ */
 extern void apic_timer_interrupt(void);
 extern void x86_platform_ipi(void);
+extern void kvm_posted_intr_ipi(void);
 extern void error_interrupt(void);
 extern void irq_work_interrupt(void);
 
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
index 1508e51..774dc9f 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -102,6 +102,11 @@ 
  */
 #define X86_PLATFORM_IPI_VECTOR		0xf7
 
+/* Vector for KVM to deliver posted interrupt IPI */
+#ifdef CONFIG_HAVE_KVM
+#define POSTED_INTR_VECTOR		0xf2
+#endif
+
 /*
  * IRQ work vector:
  */
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index b8388e9..79da55e 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -704,6 +704,9 @@  struct kvm_x86_ops {
 	void (*hwapic_isr_update)(struct kvm *kvm, int isr);
 	void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap);
 	void (*set_virtual_x2apic_mode)(struct kvm_vcpu *vcpu, bool set);
+	bool (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector,
+					int *result, bool *delivered);
+	void (*sync_pir_to_irr)(struct kvm_vcpu *vcpu);
 	int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
 	int (*get_tdp_level)(void);
 	u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio);
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index 5c9dbad..ce8ac80 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -158,6 +158,7 @@ 
 #define PIN_BASED_EXT_INTR_MASK                 0x00000001
 #define PIN_BASED_NMI_EXITING                   0x00000008
 #define PIN_BASED_VIRTUAL_NMIS                  0x00000020
+#define PIN_BASED_POSTED_INTR                   0x00000080
 
 #define VM_EXIT_SAVE_DEBUG_CONTROLS             0x00000002
 #define VM_EXIT_HOST_ADDR_SPACE_SIZE            0x00000200
@@ -180,6 +181,7 @@ 
 /* VMCS Encodings */
 enum vmcs_field {
 	VIRTUAL_PROCESSOR_ID            = 0x00000000,
+	POSTED_INTR_NV                  = 0x00000002,
 	GUEST_ES_SELECTOR               = 0x00000800,
 	GUEST_CS_SELECTOR               = 0x00000802,
 	GUEST_SS_SELECTOR               = 0x00000804,
@@ -214,6 +216,8 @@  enum vmcs_field {
 	VIRTUAL_APIC_PAGE_ADDR_HIGH     = 0x00002013,
 	APIC_ACCESS_ADDR		= 0x00002014,
 	APIC_ACCESS_ADDR_HIGH		= 0x00002015,
+	POSTED_INTR_DESC_ADDR           = 0x00002016,
+	POSTED_INTR_DESC_ADDR_HIGH      = 0x00002017,
 	EPT_POINTER                     = 0x0000201a,
 	EPT_POINTER_HIGH                = 0x0000201b,
 	EOI_EXIT_BITMAP0                = 0x0000201c,
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 70641af..b409846 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -1177,6 +1177,11 @@  apicinterrupt LOCAL_TIMER_VECTOR \
 apicinterrupt X86_PLATFORM_IPI_VECTOR \
 	x86_platform_ipi smp_x86_platform_ipi
 
+#ifdef CONFIG_HAVE_KVM
+apicinterrupt POSTED_INTR_VECTOR \
+	kvm_posted_intr_ipi smp_posted_intr_ipi
+#endif
+
 apicinterrupt THRESHOLD_APIC_VECTOR \
 	threshold_interrupt smp_threshold_interrupt
 apicinterrupt THERMAL_APIC_VECTOR \
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index e4595f1..da74d65 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -228,6 +228,26 @@  void smp_x86_platform_ipi(struct pt_regs *regs)
 	set_irq_regs(old_regs);
 }
 
+#ifdef CONFIG_HAVE_KVM
+/*
+ * Handler for POSTED_INTERRUPT_VECTOR.
+ */
+void smp_posted_intr_ipi(struct pt_regs *regs)
+{
+	struct pt_regs *old_regs = set_irq_regs(regs);
+
+	ack_APIC_irq();
+
+	irq_enter();
+
+	exit_idle();
+
+	irq_exit();
+
+	set_irq_regs(old_regs);
+}
+#endif
+
 EXPORT_SYMBOL_GPL(vector_used_by_percpu_irq);
 
 #ifdef CONFIG_HOTPLUG_CPU
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
index 6e03b0d..2329a54 100644
--- a/arch/x86/kernel/irqinit.c
+++ b/arch/x86/kernel/irqinit.c
@@ -205,6 +205,10 @@  static void __init apic_intr_init(void)
 
 	/* IPI for X86 platform specific use */
 	alloc_intr_gate(X86_PLATFORM_IPI_VECTOR, x86_platform_ipi);
+#ifdef CONFIG_HAVE_KVM
+	/* IPI for KVM to deliver posted interrupt */
+	alloc_intr_gate(POSTED_INTR_VECTOR, kvm_posted_intr_ipi);
+#endif
 
 	/* IPI vectors for APIC spurious and error interrupts */
 	alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 02b51dd..ebc32bb 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -357,6 +357,12 @@  static u8 count_vectors(void *bitmap)
 	return count;
 }
 
+int kvm_apic_test_irr(int vec, struct kvm_lapic *apic)
+{
+	return apic_test_vector(vec, apic->regs + APIC_IRR);
+}
+EXPORT_SYMBOL_GPL(kvm_apic_test_irr);
+
 static inline int apic_test_and_set_irr(int vec, struct kvm_lapic *apic)
 {
 	apic->irr_pending = true;
@@ -379,6 +385,7 @@  static inline int apic_find_highest_irr(struct kvm_lapic *apic)
 	if (!apic->irr_pending)
 		return -1;
 
+	kvm_x86_ops->sync_pir_to_irr(apic->vcpu);
 	result = apic_search_irr(apic);
 	ASSERT(result == -1 || result >= 16);
 
@@ -685,6 +692,7 @@  static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
 {
 	int result = 0;
 	struct kvm_vcpu *vcpu = apic->vcpu;
+	bool delivered = false;
 
 	switch (delivery_mode) {
 	case APIC_DM_LOWEST:
@@ -700,7 +708,10 @@  static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
 		} else
 			apic_clear_vector(vector, apic->regs + APIC_TMR);
 
-		result = !apic_test_and_set_irr(vector, apic);
+		if (!kvm_x86_ops->deliver_posted_interrupt(vcpu, vector,
+						&result, &delivered))
+			result = !apic_test_and_set_irr(vector, apic);
+
 		trace_kvm_apic_accept_irq(vcpu->vcpu_id, delivery_mode,
 					  trig_mode, vector, !result);
 		if (!result) {
@@ -710,8 +721,10 @@  static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
 			break;
 		}
 
-		kvm_make_request(KVM_REQ_EVENT, vcpu);
-		kvm_vcpu_kick(vcpu);
+		if (!delivered) {
+			kvm_make_request(KVM_REQ_EVENT, vcpu);
+			kvm_vcpu_kick(vcpu);
+		}
 		break;
 
 	case APIC_DM_REMRD:
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 1676d34..1a7016c 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -157,5 +157,6 @@  static inline u16 apic_logical_id(struct kvm_apic_map *map, u32 ldr)
 void kvm_calculate_eoi_exitmap(struct kvm_vcpu *vcpu,
 				struct kvm_lapic_irq *irq,
 				u64 *eoi_bitmap);
+int kvm_apic_test_irr(int vec, struct kvm_lapic *apic);
 
 #endif
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index a7d60d7..9e705e3 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -3591,6 +3591,17 @@  static void svm_hwapic_isr_update(struct kvm *kvm, int isr)
 	return;
 }
 
+static void svm_sync_pir_to_irr(struct kvm_vcpu *vcpu)
+{
+	return;
+}
+
+static bool svm_deliver_posted_interrupt(struct kvm_vcpu *vcpu,
+		int vector, int *result, bool *delivered)
+{
+	return false;
+}
+
 static int svm_nmi_allowed(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
@@ -4319,6 +4330,8 @@  static struct kvm_x86_ops svm_x86_ops = {
 	.vm_has_apicv = svm_vm_has_apicv,
 	.load_eoi_exitmap = svm_load_eoi_exitmap,
 	.hwapic_isr_update = svm_hwapic_isr_update,
+	.sync_pir_to_irr = svm_sync_pir_to_irr,
+	.deliver_posted_interrupt = svm_deliver_posted_interrupt,
 
 	.set_tss_addr = svm_set_tss_addr,
 	.get_tdp_level = get_npt_level,
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 436b134..2fdf537 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -84,7 +84,8 @@  module_param(vmm_exclusive, bool, S_IRUGO);
 static bool __read_mostly fasteoi = 1;
 module_param(fasteoi, bool, S_IRUGO);
 
-static bool __read_mostly enable_apicv_reg_vid;
+static bool __read_mostly enable_apicv = 1;
+module_param(enable_apicv, bool, S_IRUGO);
 
 /*
  * If nested=1, nested virtualization is supported, i.e., guests may use
@@ -365,6 +366,36 @@  struct nested_vmx {
 	struct page *apic_access_page;
 };
 
+#define POSTED_INTR_ON  0
+/* Posted-Interrupt Descriptor */
+struct pi_desc {
+	u32 pir[8];     /* Posted interrupt requested */
+	union {
+		struct {
+			u8  on:1,
+			    rsvd:7;
+		} control;
+		u32 rsvd[8];
+	} u;
+} __aligned(64);
+
+static bool pi_test_and_set_on(struct pi_desc *pi_desc)
+{
+	return test_and_set_bit(POSTED_INTR_ON,
+			(unsigned long *)&pi_desc->u.control);
+}
+
+static bool pi_test_and_clear_on(struct pi_desc *pi_desc)
+{
+	return test_and_clear_bit(POSTED_INTR_ON,
+			(unsigned long *)&pi_desc->u.control);
+}
+
+static int pi_test_and_set_pir(int vector, struct pi_desc *pi_desc)
+{
+	return test_and_set_bit(vector, (unsigned long *)pi_desc->pir);
+}
+
 struct vcpu_vmx {
 	struct kvm_vcpu       vcpu;
 	unsigned long         host_rsp;
@@ -429,6 +460,9 @@  struct vcpu_vmx {
 
 	bool rdtscp_enabled;
 
+	/* Posted interrupt descriptor */
+	struct pi_desc pi_desc;
+
 	/* Support for a guest hypervisor (nested VMX) */
 	struct nested_vmx nested;
 };
@@ -783,6 +817,18 @@  static inline bool cpu_has_vmx_virtual_intr_delivery(void)
 		SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY;
 }
 
+static inline bool cpu_has_vmx_posted_intr(void)
+{
+	return vmcs_config.pin_based_exec_ctrl & PIN_BASED_POSTED_INTR;
+}
+
+static inline bool cpu_has_vmx_apicv(void)
+{
+	return cpu_has_vmx_apic_register_virt() &&
+		cpu_has_vmx_virtual_intr_delivery() &&
+		cpu_has_vmx_posted_intr();
+}
+
 static inline bool cpu_has_vmx_flexpriority(void)
 {
 	return cpu_has_vmx_tpr_shadow() &&
@@ -2530,12 +2576,6 @@  static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
 	u32 _vmexit_control = 0;
 	u32 _vmentry_control = 0;
 
-	min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING;
-	opt = PIN_BASED_VIRTUAL_NMIS;
-	if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PINBASED_CTLS,
-				&_pin_based_exec_control) < 0)
-		return -EIO;
-
 	min = CPU_BASED_HLT_EXITING |
 #ifdef CONFIG_X86_64
 	      CPU_BASED_CR8_LOAD_EXITING |
@@ -2612,6 +2652,17 @@  static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
 				&_vmexit_control) < 0)
 		return -EIO;
 
+	min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING;
+	opt = PIN_BASED_VIRTUAL_NMIS | PIN_BASED_POSTED_INTR;
+	if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PINBASED_CTLS,
+				&_pin_based_exec_control) < 0)
+		return -EIO;
+
+	if (!(_cpu_based_2nd_exec_control &
+		SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY) ||
+		!(_vmexit_control & VM_EXIT_ACK_INTR_ON_EXIT))
+		_pin_based_exec_control &= ~PIN_BASED_POSTED_INTR;
+
 	min = 0;
 	opt = VM_ENTRY_LOAD_IA32_PAT;
 	if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_ENTRY_CTLS,
@@ -2790,11 +2841,10 @@  static __init int hardware_setup(void)
 	if (!cpu_has_vmx_ple())
 		ple_gap = 0;
 
-	if (!cpu_has_vmx_apic_register_virt() ||
-				!cpu_has_vmx_virtual_intr_delivery())
-		enable_apicv_reg_vid = 0;
+	if (!cpu_has_vmx_apicv())
+		enable_apicv = 0;
 
-	if (enable_apicv_reg_vid)
+	if (enable_apicv)
 		kvm_x86_ops->update_cr8_intercept = NULL;
 	else
 		kvm_x86_ops->hwapic_irr_update = NULL;
@@ -3871,6 +3921,62 @@  static void vmx_disable_intercept_msr_write_x2apic(u32 msr)
 			msr, MSR_TYPE_W);
 }
 
+static int vmx_vm_has_apicv(struct kvm *kvm)
+{
+	return enable_apicv && irqchip_in_kernel(kvm);
+}
+
+static bool vmx_deliver_posted_interrupt(struct kvm_vcpu *vcpu,
+		int vector, int *result, bool *delivered)
+{
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
+
+	if (!vmx_vm_has_apicv(vcpu->kvm))
+		return false;
+
+	if (kvm_apic_test_irr(vector, vcpu->arch.apic))
+		goto out;
+	else {
+		*result = !pi_test_and_set_pir(vector, &vmx->pi_desc);
+		if (!*result)
+			goto out;
+	}
+
+	if (!pi_test_and_set_on(&vmx->pi_desc) &&
+			(vcpu->mode == IN_GUEST_MODE)) {
+		kvm_make_request(KVM_REQ_EVENT, vcpu);
+		apic->send_IPI_mask(get_cpu_mask(vcpu->cpu),
+				    POSTED_INTR_VECTOR);
+		*delivered = true;
+	}
+out:
+	return true;
+}
+
+static void vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu)
+{
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
+	struct kvm_lapic *apic = vcpu->arch.apic;
+	unsigned int i, old, new, ret_val, irr_offset, pir_val;
+
+	if (!vmx_vm_has_apicv(vcpu->kvm) ||
+			!pi_test_and_clear_on(&vmx->pi_desc))
+		return;
+
+	for (i = 0; i <= 7; i++) {
+		pir_val = xchg(&vmx->pi_desc.pir[i], 0);
+		if (pir_val) {
+			irr_offset = APIC_IRR + i * 0x10;
+			do {
+				old = kvm_apic_get_reg(apic, irr_offset);
+				new = old | pir_val;
+				ret_val = cmpxchg((u32 *)(apic->regs +
+						irr_offset), old, new);
+			} while (unlikely(ret_val != old));
+		}
+	}
+}
+
 /*
  * Set up the vmcs's constant host-state fields, i.e., host-state fields that
  * will not change in the lifetime of the guest.
@@ -3931,6 +4037,15 @@  static void set_cr4_guest_host_mask(struct vcpu_vmx *vmx)
 	vmcs_writel(CR4_GUEST_HOST_MASK, ~vmx->vcpu.arch.cr4_guest_owned_bits);
 }
 
+static u32 vmx_pin_based_exec_ctrl(struct vcpu_vmx *vmx)
+{
+	u32 pin_based_exec_ctrl = vmcs_config.pin_based_exec_ctrl;
+
+	if (!vmx_vm_has_apicv(vmx->vcpu.kvm))
+		pin_based_exec_ctrl &= ~PIN_BASED_POSTED_INTR;
+	return pin_based_exec_ctrl;
+}
+
 static u32 vmx_exec_control(struct vcpu_vmx *vmx)
 {
 	u32 exec_control = vmcs_config.cpu_based_exec_ctrl;
@@ -3948,11 +4063,6 @@  static u32 vmx_exec_control(struct vcpu_vmx *vmx)
 	return exec_control;
 }
 
-static int vmx_vm_has_apicv(struct kvm *kvm)
-{
-	return enable_apicv_reg_vid && irqchip_in_kernel(kvm);
-}
-
 static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx)
 {
 	u32 exec_control = vmcs_config.cpu_based_2nd_exec_ctrl;
@@ -4008,8 +4118,7 @@  static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
 	vmcs_write64(VMCS_LINK_POINTER, -1ull); /* 22.3.1.5 */
 
 	/* Control */
-	vmcs_write32(PIN_BASED_VM_EXEC_CONTROL,
-		vmcs_config.pin_based_exec_ctrl);
+	vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, vmx_pin_based_exec_ctrl(vmx));
 
 	vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, vmx_exec_control(vmx));
 
@@ -4018,13 +4127,16 @@  static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
 				vmx_secondary_exec_control(vmx));
 	}
 
-	if (enable_apicv_reg_vid) {
+	if (vmx_vm_has_apicv(vmx->vcpu.kvm)) {
 		vmcs_write64(EOI_EXIT_BITMAP0, 0);
 		vmcs_write64(EOI_EXIT_BITMAP1, 0);
 		vmcs_write64(EOI_EXIT_BITMAP2, 0);
 		vmcs_write64(EOI_EXIT_BITMAP3, 0);
 
 		vmcs_write16(GUEST_INTR_STATUS, 0);
+
+		vmcs_write64(POSTED_INTR_NV, POSTED_INTR_VECTOR);
+		vmcs_write64(POSTED_INTR_DESC_ADDR, __pa((&vmx->pi_desc)));
 	}
 
 	if (ple_gap) {
@@ -4174,6 +4286,9 @@  static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
 		vmcs_write64(APIC_ACCESS_ADDR,
 			     page_to_phys(vmx->vcpu.kvm->arch.apic_access_page));
 
+	if (vmx_vm_has_apicv(vcpu->kvm))
+		memset(&vmx->pi_desc, 0, sizeof(struct pi_desc));
+
 	if (vmx->vpid != 0)
 		vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid);
 
@@ -7650,6 +7765,8 @@  static struct kvm_x86_ops vmx_x86_ops = {
 	.load_eoi_exitmap = vmx_load_eoi_exitmap,
 	.hwapic_irr_update = vmx_hwapic_irr_update,
 	.hwapic_isr_update = vmx_hwapic_isr_update,
+	.sync_pir_to_irr = vmx_sync_pir_to_irr,
+	.deliver_posted_interrupt = vmx_deliver_posted_interrupt,
 
 	.set_tss_addr = vmx_set_tss_addr,
 	.get_tdp_level = get_ept_level,
@@ -7753,7 +7870,7 @@  static int __init vmx_init(void)
 	memcpy(vmx_msr_bitmap_longmode_x2apic,
 			vmx_msr_bitmap_longmode, PAGE_SIZE);
 
-	if (enable_apicv_reg_vid) {
+	if (enable_apicv) {
 		for (msr = 0x800; msr <= 0x8ff; msr++)
 			vmx_disable_intercept_msr_read_x2apic(msr);
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index f1fa37e..62f8c94 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2679,6 +2679,7 @@  void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
 static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu,
 				    struct kvm_lapic_state *s)
 {
+	kvm_x86_ops->sync_pir_to_irr(vcpu);
 	memcpy(s->regs, vcpu->arch.apic->regs, sizeof *s);
 
 	return 0;