diff mbox

[v5,4/7] kvm, mem-hotplug: Reload L1' apic access page on migration in vcpu_enter_guest().

Message ID 1410413886-32213-5-git-send-email-tangchen@cn.fujitsu.com (mailing list archive)
State New, archived
Headers show

Commit Message

tangchen Sept. 11, 2014, 5:38 a.m. UTC
apic access page is pinned in memory. As a result, it cannot be migrated/hot-removed.
Actually, it is not necessary to be pinned.

The hpa of apic access page is stored in VMCS APIC_ACCESS_ADDR pointer. When
the page is migrated, kvm_mmu_notifier_invalidate_page() will invalidate the
corresponding ept entry. This patch introduces a new vcpu request named
KVM_REQ_APIC_PAGE_RELOAD, and makes this request to all the vcpus at this time,
and force all the vcpus exit guest, and re-enter guest till they updates the VMCS
APIC_ACCESS_ADDR pointer to the new apic access page address, and updates
kvm->arch.apic_access_page to the new page.

Signed-off-by: Tang Chen <tangchen@cn.fujitsu.com>
---
 arch/x86/include/asm/kvm_host.h |  1 +
 arch/x86/kvm/svm.c              |  6 ++++++
 arch/x86/kvm/vmx.c              |  6 ++++++
 arch/x86/kvm/x86.c              | 15 +++++++++++++++
 include/linux/kvm_host.h        |  2 ++
 virt/kvm/kvm_main.c             | 12 ++++++++++++
 6 files changed, 42 insertions(+)

Comments

Paolo Bonzini Sept. 11, 2014, 9:21 a.m. UTC | #1
Il 11/09/2014 07:38, Tang Chen ha scritto:
> apic access page is pinned in memory. As a result, it cannot be migrated/hot-removed.
> Actually, it is not necessary to be pinned.
> 
> The hpa of apic access page is stored in VMCS APIC_ACCESS_ADDR pointer. When
> the page is migrated, kvm_mmu_notifier_invalidate_page() will invalidate the
> corresponding ept entry. This patch introduces a new vcpu request named
> KVM_REQ_APIC_PAGE_RELOAD, and makes this request to all the vcpus at this time,
> and force all the vcpus exit guest, and re-enter guest till they updates the VMCS
> APIC_ACCESS_ADDR pointer to the new apic access page address, and updates
> kvm->arch.apic_access_page to the new page.
> 
> Signed-off-by: Tang Chen <tangchen@cn.fujitsu.com>
> ---
>  arch/x86/include/asm/kvm_host.h |  1 +
>  arch/x86/kvm/svm.c              |  6 ++++++
>  arch/x86/kvm/vmx.c              |  6 ++++++
>  arch/x86/kvm/x86.c              | 15 +++++++++++++++
>  include/linux/kvm_host.h        |  2 ++
>  virt/kvm/kvm_main.c             | 12 ++++++++++++
>  6 files changed, 42 insertions(+)
> 
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 35171c7..514183e 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -739,6 +739,7 @@ struct kvm_x86_ops {
>  	void (*hwapic_isr_update)(struct kvm *kvm, int isr);
>  	void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap);
>  	void (*set_virtual_x2apic_mode)(struct kvm_vcpu *vcpu, bool set);
> +	void (*set_apic_access_page_addr)(struct kvm *kvm, hpa_t hpa);
>  	void (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector);
>  	void (*sync_pir_to_irr)(struct kvm_vcpu *vcpu);
>  	int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> index 1d941ad..f2eacc4 100644
> --- a/arch/x86/kvm/svm.c
> +++ b/arch/x86/kvm/svm.c
> @@ -3619,6 +3619,11 @@ static void svm_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set)
>  	return;
>  }
>  
> +static void svm_set_apic_access_page_addr(struct kvm *kvm, hpa_t hpa)
> +{
> +	return;
> +}
> +
>  static int svm_vm_has_apicv(struct kvm *kvm)
>  {
>  	return 0;
> @@ -4373,6 +4378,7 @@ static struct kvm_x86_ops svm_x86_ops = {
>  	.enable_irq_window = enable_irq_window,
>  	.update_cr8_intercept = update_cr8_intercept,
>  	.set_virtual_x2apic_mode = svm_set_virtual_x2apic_mode,
> +	.set_apic_access_page_addr = svm_set_apic_access_page_addr,
>  	.vm_has_apicv = svm_vm_has_apicv,
>  	.load_eoi_exitmap = svm_load_eoi_exitmap,
>  	.hwapic_isr_update = svm_hwapic_isr_update,
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index 63c4c3e..da6d55d 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -7093,6 +7093,11 @@ static void vmx_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set)
>  	vmx_set_msr_bitmap(vcpu);
>  }
>  
> +static void vmx_set_apic_access_page_addr(struct kvm *kvm, hpa_t hpa)
> +{
> +	vmcs_write64(APIC_ACCESS_ADDR, hpa);

This has to be guarded by "if (!is_guest_mode(vcpu))".

> +}
> +
>  static void vmx_hwapic_isr_update(struct kvm *kvm, int isr)
>  {
>  	u16 status;
> @@ -8910,6 +8915,7 @@ static struct kvm_x86_ops vmx_x86_ops = {
>  	.enable_irq_window = enable_irq_window,
>  	.update_cr8_intercept = update_cr8_intercept,
>  	.set_virtual_x2apic_mode = vmx_set_virtual_x2apic_mode,
> +	.set_apic_access_page_addr = vmx_set_apic_access_page_addr,
>  	.vm_has_apicv = vmx_vm_has_apicv,
>  	.load_eoi_exitmap = vmx_load_eoi_exitmap,
>  	.hwapic_irr_update = vmx_hwapic_irr_update,
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index e05bd58..96f4188 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -5989,6 +5989,19 @@ static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu)
>  	kvm_apic_update_tmr(vcpu, tmr);
>  }
>  
> +static void vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu)
> +{
> +	/*
> +	 * apic access page could be migrated. When the page is being migrated,
> +	 * GUP will wait till the migrate entry is replaced with the new pte
> +	 * entry pointing to the new page.
> +	 */
> +	vcpu->kvm->arch.apic_access_page = gfn_to_page(vcpu->kvm,
> +				APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT);
> +	kvm_x86_ops->set_apic_access_page_addr(vcpu->kvm,
> +				page_to_phys(vcpu->kvm->arch.apic_access_page));
> +}
> +
>  /*
>   * Returns 1 to let __vcpu_run() continue the guest execution loop without
>   * exiting to the userspace.  Otherwise, the value will be returned to the
> @@ -6049,6 +6062,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
>  			kvm_deliver_pmi(vcpu);
>  		if (kvm_check_request(KVM_REQ_SCAN_IOAPIC, vcpu))
>  			vcpu_scan_ioapic(vcpu);
> +		if (kvm_check_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu))
> +			vcpu_reload_apic_access_page(vcpu);
>  	}
>  
>  	if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index a4c33b3..8be076a 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -136,6 +136,7 @@ static inline bool is_error_page(struct page *page)
>  #define KVM_REQ_GLOBAL_CLOCK_UPDATE 22
>  #define KVM_REQ_ENABLE_IBS        23
>  #define KVM_REQ_DISABLE_IBS       24
> +#define KVM_REQ_APIC_PAGE_RELOAD  25
>  
>  #define KVM_USERSPACE_IRQ_SOURCE_ID		0
>  #define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID	1
> @@ -579,6 +580,7 @@ void kvm_flush_remote_tlbs(struct kvm *kvm);
>  void kvm_reload_remote_mmus(struct kvm *kvm);
>  void kvm_make_mclock_inprogress_request(struct kvm *kvm);
>  void kvm_make_scan_ioapic_request(struct kvm *kvm);
> +void kvm_reload_apic_access_page(struct kvm *kvm);
>  
>  long kvm_arch_dev_ioctl(struct file *filp,
>  			unsigned int ioctl, unsigned long arg);
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index 33712fb..d8280de 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -210,6 +210,11 @@ void kvm_make_scan_ioapic_request(struct kvm *kvm)
>  	make_all_cpus_request(kvm, KVM_REQ_SCAN_IOAPIC);
>  }
>  
> +void kvm_reload_apic_access_page(struct kvm *kvm)
> +{
> +	make_all_cpus_request(kvm, KVM_REQ_APIC_PAGE_RELOAD);
> +}
> +
>  int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
>  {
>  	struct page *page;
> @@ -294,6 +299,13 @@ static void kvm_mmu_notifier_invalidate_page(struct mmu_notifier *mn,
>  	if (need_tlb_flush)
>  		kvm_flush_remote_tlbs(kvm);
>  
> +	/*
> +	 * The physical address of apic access page is stroed in VMCS.

Typo: stored, not stroed.

> +	 * So need to update it when it becomes invalid.

Please remove "so need to".

Paolo

> +	 */
> +	if (address == gfn_to_hva(kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT))
> +		kvm_reload_apic_access_page(kvm);
> +
>  	spin_unlock(&kvm->mmu_lock);
>  	srcu_read_unlock(&kvm->srcu, idx);
>  }
> 

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Gleb Natapov Sept. 11, 2014, 10:12 a.m. UTC | #2
On Thu, Sep 11, 2014 at 11:21:49AM +0200, Paolo Bonzini wrote:
> Il 11/09/2014 07:38, Tang Chen ha scritto:
> > diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> > index 63c4c3e..da6d55d 100644
> > --- a/arch/x86/kvm/vmx.c
> > +++ b/arch/x86/kvm/vmx.c
> > @@ -7093,6 +7093,11 @@ static void vmx_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set)
> >  	vmx_set_msr_bitmap(vcpu);
> >  }
> >  
> > +static void vmx_set_apic_access_page_addr(struct kvm *kvm, hpa_t hpa)
> > +{
> > +	vmcs_write64(APIC_ACCESS_ADDR, hpa);
> 
> This has to be guarded by "if (!is_guest_mode(vcpu))".
> 
We do need to write it if L1 and L2 share APIC_ACCESS_ADDR and skip
it otherwise, no?

> > +}
> > +

--
			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
tangchen Sept. 11, 2014, 10:20 a.m. UTC | #3
On 09/11/2014 05:21 PM, Paolo Bonzini wrote:
> Il 11/09/2014 07:38, Tang Chen ha scritto:
>> apic access page is pinned in memory. As a result, it cannot be migrated/hot-removed.
>> Actually, it is not necessary to be pinned.
>>
>> The hpa of apic access page is stored in VMCS APIC_ACCESS_ADDR pointer. When
>> the page is migrated, kvm_mmu_notifier_invalidate_page() will invalidate the
>> corresponding ept entry. This patch introduces a new vcpu request named
>> KVM_REQ_APIC_PAGE_RELOAD, and makes this request to all the vcpus at this time,
>> and force all the vcpus exit guest, and re-enter guest till they updates the VMCS
>> APIC_ACCESS_ADDR pointer to the new apic access page address, and updates
>> kvm->arch.apic_access_page to the new page.
>>
>> Signed-off-by: Tang Chen <tangchen@cn.fujitsu.com>
>> ---
>>   arch/x86/include/asm/kvm_host.h |  1 +
>>   arch/x86/kvm/svm.c              |  6 ++++++
>>   arch/x86/kvm/vmx.c              |  6 ++++++
>>   arch/x86/kvm/x86.c              | 15 +++++++++++++++
>>   include/linux/kvm_host.h        |  2 ++
>>   virt/kvm/kvm_main.c             | 12 ++++++++++++
>>   6 files changed, 42 insertions(+)
>>
>> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
>> index 35171c7..514183e 100644
>> --- a/arch/x86/include/asm/kvm_host.h
>> +++ b/arch/x86/include/asm/kvm_host.h
>> @@ -739,6 +739,7 @@ struct kvm_x86_ops {
>>   	void (*hwapic_isr_update)(struct kvm *kvm, int isr);
>>   	void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap);
>>   	void (*set_virtual_x2apic_mode)(struct kvm_vcpu *vcpu, bool set);
>> +	void (*set_apic_access_page_addr)(struct kvm *kvm, hpa_t hpa);
>>   	void (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector);
>>   	void (*sync_pir_to_irr)(struct kvm_vcpu *vcpu);
>>   	int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
>> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
>> index 1d941ad..f2eacc4 100644
>> --- a/arch/x86/kvm/svm.c
>> +++ b/arch/x86/kvm/svm.c
>> @@ -3619,6 +3619,11 @@ static void svm_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set)
>>   	return;
>>   }
>>   
>> +static void svm_set_apic_access_page_addr(struct kvm *kvm, hpa_t hpa)
>> +{
>> +	return;
>> +}
>> +
>>   static int svm_vm_has_apicv(struct kvm *kvm)
>>   {
>>   	return 0;
>> @@ -4373,6 +4378,7 @@ static struct kvm_x86_ops svm_x86_ops = {
>>   	.enable_irq_window = enable_irq_window,
>>   	.update_cr8_intercept = update_cr8_intercept,
>>   	.set_virtual_x2apic_mode = svm_set_virtual_x2apic_mode,
>> +	.set_apic_access_page_addr = svm_set_apic_access_page_addr,
>>   	.vm_has_apicv = svm_vm_has_apicv,
>>   	.load_eoi_exitmap = svm_load_eoi_exitmap,
>>   	.hwapic_isr_update = svm_hwapic_isr_update,
>> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
>> index 63c4c3e..da6d55d 100644
>> --- a/arch/x86/kvm/vmx.c
>> +++ b/arch/x86/kvm/vmx.c
>> @@ -7093,6 +7093,11 @@ static void vmx_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set)
>>   	vmx_set_msr_bitmap(vcpu);
>>   }
>>   
>> +static void vmx_set_apic_access_page_addr(struct kvm *kvm, hpa_t hpa)
>> +{
>> +	vmcs_write64(APIC_ACCESS_ADDR, hpa);
> This has to be guarded by "if (!is_guest_mode(vcpu))".

Since we cannot get vcpu through kvm, I'd like to move this check to
vcpu_reload_apic_access_page() when 
kvm_x86_ops->set_apic_access_page_addr()
is called.

Thanks.
>
>> +}
>> +
>>   static void vmx_hwapic_isr_update(struct kvm *kvm, int isr)
>>   {
>>   	u16 status;
>> @@ -8910,6 +8915,7 @@ static struct kvm_x86_ops vmx_x86_ops = {
>>   	.enable_irq_window = enable_irq_window,
>>   	.update_cr8_intercept = update_cr8_intercept,
>>   	.set_virtual_x2apic_mode = vmx_set_virtual_x2apic_mode,
>> +	.set_apic_access_page_addr = vmx_set_apic_access_page_addr,
>>   	.vm_has_apicv = vmx_vm_has_apicv,
>>   	.load_eoi_exitmap = vmx_load_eoi_exitmap,
>>   	.hwapic_irr_update = vmx_hwapic_irr_update,
>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
>> index e05bd58..96f4188 100644
>> --- a/arch/x86/kvm/x86.c
>> +++ b/arch/x86/kvm/x86.c
>> @@ -5989,6 +5989,19 @@ static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu)
>>   	kvm_apic_update_tmr(vcpu, tmr);
>>   }
>>   
>> +static void vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu)
>> +{
>> +	/*
>> +	 * apic access page could be migrated. When the page is being migrated,
>> +	 * GUP will wait till the migrate entry is replaced with the new pte
>> +	 * entry pointing to the new page.
>> +	 */
>> +	vcpu->kvm->arch.apic_access_page = gfn_to_page(vcpu->kvm,
>> +				APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT);
>> +	kvm_x86_ops->set_apic_access_page_addr(vcpu->kvm,
>> +				page_to_phys(vcpu->kvm->arch.apic_access_page));
>> +}
>> +
>>   /*
>>    * Returns 1 to let __vcpu_run() continue the guest execution loop without
>>    * exiting to the userspace.  Otherwise, the value will be returned to the
>> @@ -6049,6 +6062,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
>>   			kvm_deliver_pmi(vcpu);
>>   		if (kvm_check_request(KVM_REQ_SCAN_IOAPIC, vcpu))
>>   			vcpu_scan_ioapic(vcpu);
>> +		if (kvm_check_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu))
>> +			vcpu_reload_apic_access_page(vcpu);
>>   	}
>>   
>>   	if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
>> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
>> index a4c33b3..8be076a 100644
>> --- a/include/linux/kvm_host.h
>> +++ b/include/linux/kvm_host.h
>> @@ -136,6 +136,7 @@ static inline bool is_error_page(struct page *page)
>>   #define KVM_REQ_GLOBAL_CLOCK_UPDATE 22
>>   #define KVM_REQ_ENABLE_IBS        23
>>   #define KVM_REQ_DISABLE_IBS       24
>> +#define KVM_REQ_APIC_PAGE_RELOAD  25
>>   
>>   #define KVM_USERSPACE_IRQ_SOURCE_ID		0
>>   #define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID	1
>> @@ -579,6 +580,7 @@ void kvm_flush_remote_tlbs(struct kvm *kvm);
>>   void kvm_reload_remote_mmus(struct kvm *kvm);
>>   void kvm_make_mclock_inprogress_request(struct kvm *kvm);
>>   void kvm_make_scan_ioapic_request(struct kvm *kvm);
>> +void kvm_reload_apic_access_page(struct kvm *kvm);
>>   
>>   long kvm_arch_dev_ioctl(struct file *filp,
>>   			unsigned int ioctl, unsigned long arg);
>> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
>> index 33712fb..d8280de 100644
>> --- a/virt/kvm/kvm_main.c
>> +++ b/virt/kvm/kvm_main.c
>> @@ -210,6 +210,11 @@ void kvm_make_scan_ioapic_request(struct kvm *kvm)
>>   	make_all_cpus_request(kvm, KVM_REQ_SCAN_IOAPIC);
>>   }
>>   
>> +void kvm_reload_apic_access_page(struct kvm *kvm)
>> +{
>> +	make_all_cpus_request(kvm, KVM_REQ_APIC_PAGE_RELOAD);
>> +}
>> +
>>   int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
>>   {
>>   	struct page *page;
>> @@ -294,6 +299,13 @@ static void kvm_mmu_notifier_invalidate_page(struct mmu_notifier *mn,
>>   	if (need_tlb_flush)
>>   		kvm_flush_remote_tlbs(kvm);
>>   
>> +	/*
>> +	 * The physical address of apic access page is stroed in VMCS.
> Typo: stored, not stroed.
>
>> +	 * So need to update it when it becomes invalid.
> Please remove "so need to".
>
> Paolo
>
>> +	 */
>> +	if (address == gfn_to_hva(kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT))
>> +		kvm_reload_apic_access_page(kvm);
>> +
>>   	spin_unlock(&kvm->mmu_lock);
>>   	srcu_read_unlock(&kvm->srcu, idx);
>>   }
>>
> .
>

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Paolo Bonzini Sept. 11, 2014, 10:39 a.m. UTC | #4
Il 11/09/2014 12:20, tangchen ha scritto:
>>>
>>> +    vmcs_write64(APIC_ACCESS_ADDR, hpa);
>> This has to be guarded by "if (!is_guest_mode(vcpu))".
> 
> Since we cannot get vcpu through kvm, I'd like to move this check to
> vcpu_reload_apic_access_page() when
> kvm_x86_ops->set_apic_access_page_addr()
> is called.

Good idea!  Though passing the vcpu to vmx_set_apic_access_page_addr
would also work.

Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Paolo Bonzini Sept. 11, 2014, 10:47 a.m. UTC | #5
Il 11/09/2014 12:12, Gleb Natapov ha scritto:
> On Thu, Sep 11, 2014 at 11:21:49AM +0200, Paolo Bonzini wrote:
>> Il 11/09/2014 07:38, Tang Chen ha scritto:
>>> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
>>> index 63c4c3e..da6d55d 100644
>>> --- a/arch/x86/kvm/vmx.c
>>> +++ b/arch/x86/kvm/vmx.c
>>> @@ -7093,6 +7093,11 @@ static void vmx_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set)
>>>  	vmx_set_msr_bitmap(vcpu);
>>>  }
>>>  
>>> +static void vmx_set_apic_access_page_addr(struct kvm *kvm, hpa_t hpa)
>>> +{
>>> +	vmcs_write64(APIC_ACCESS_ADDR, hpa);
>>
>> This has to be guarded by "if (!is_guest_mode(vcpu))".
>>
> We do need to write it if L1 and L2 share APIC_ACCESS_ADDR and skip
> it otherwise, no?

Yes, but this would be handled by patch 6:

 		} else if (vm_need_virtualize_apic_accesses(vmx->vcpu.kvm)) {
+			struct page *page = gfn_to_page(vmx->vcpu.kvm,
+				APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT);
 			exec_control |=
 				SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
-			vmcs_write64(APIC_ACCESS_ADDR,
-				page_to_phys(vcpu->kvm->arch.apic_access_page));
+			vmcs_write64(APIC_ACCESS_ADDR, page_to_phys(page));
+			/*
+			 * Do not pin apic access page in memory so that memory
+			 * hotplug process is able to migrate it.
+			 */
+			put_page(page);
 		}

However, this is also useless code duplication because the above snippet could
reuse vcpu_reload_apic_access_page too.

So I think you cannot do the is_guest_mode check in
kvm_vcpu_reload_apic_access_page and also not in
vmx_reload_apic_access_page.  But you could do something like

kvm_vcpu_reload_apic_access_page(...)
{
	...
	kvm_x86_ops->reload_apic_access_page(...);
}
EXPORT_SYMBOL_GPL(kvm_vcpu_reload_apic_access_page);

/* used in vcpu_enter_guest only */
vcpu_reload_apic_access_page(...)
{
	if (!is_guest_mode(vcpu))
		kvm_vcpu_reload_apic_access_page(...)
}

Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Gleb Natapov Sept. 11, 2014, 11:30 a.m. UTC | #6
On Thu, Sep 11, 2014 at 12:47:16PM +0200, Paolo Bonzini wrote:
> Il 11/09/2014 12:12, Gleb Natapov ha scritto:
> > On Thu, Sep 11, 2014 at 11:21:49AM +0200, Paolo Bonzini wrote:
> >> Il 11/09/2014 07:38, Tang Chen ha scritto:
> >>> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> >>> index 63c4c3e..da6d55d 100644
> >>> --- a/arch/x86/kvm/vmx.c
> >>> +++ b/arch/x86/kvm/vmx.c
> >>> @@ -7093,6 +7093,11 @@ static void vmx_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set)
> >>>  	vmx_set_msr_bitmap(vcpu);
> >>>  }
> >>>  
> >>> +static void vmx_set_apic_access_page_addr(struct kvm *kvm, hpa_t hpa)
> >>> +{
> >>> +	vmcs_write64(APIC_ACCESS_ADDR, hpa);
> >>
> >> This has to be guarded by "if (!is_guest_mode(vcpu))".
> >>
> > We do need to write it if L1 and L2 share APIC_ACCESS_ADDR and skip
> > it otherwise, no?
> 
> Yes, but this would be handled by patch 6:
> 
>  		} else if (vm_need_virtualize_apic_accesses(vmx->vcpu.kvm)) {
> +			struct page *page = gfn_to_page(vmx->vcpu.kvm,
> +				APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT);
>  			exec_control |=
>  				SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
> -			vmcs_write64(APIC_ACCESS_ADDR,
> -				page_to_phys(vcpu->kvm->arch.apic_access_page));
> +			vmcs_write64(APIC_ACCESS_ADDR, page_to_phys(page));
> +			/*
> +			 * Do not pin apic access page in memory so that memory
> +			 * hotplug process is able to migrate it.
> +			 */
> +			put_page(page);
>  		}
This code is in prepare_vmcs02() and is executed during L1->L2 vmentry. What happens
when apic access page is migrated while L2 is running? It needs to be update somewhere.

> 
> However, this is also useless code duplication because the above snippet could
> reuse vcpu_reload_apic_access_page too.
> 
> So I think you cannot do the is_guest_mode check in
> kvm_vcpu_reload_apic_access_page and also not in
> vmx_reload_apic_access_page.  But you could do something like
> 
> kvm_vcpu_reload_apic_access_page(...)
> {
> 	...
> 	kvm_x86_ops->reload_apic_access_page(...);
> }
> EXPORT_SYMBOL_GPL(kvm_vcpu_reload_apic_access_page);
> 
> /* used in vcpu_enter_guest only */
> vcpu_reload_apic_access_page(...)
> {
> 	if (!is_guest_mode(vcpu))
> 		kvm_vcpu_reload_apic_access_page(...)
> }
> 
> Paolo

--
			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Paolo Bonzini Sept. 11, 2014, 1:05 p.m. UTC | #7
Il 11/09/2014 13:30, Gleb Natapov ha scritto:
>> > +			vmcs_write64(APIC_ACCESS_ADDR, page_to_phys(page));
>> > +			/*
>> > +			 * Do not pin apic access page in memory so that memory
>> > +			 * hotplug process is able to migrate it.
>> > +			 */
>> > +			put_page(page);
>> >  		}
> This code is in prepare_vmcs02() and is executed during L1->L2 vmentry. What happens
> when apic access page is migrated while L2 is running? It needs to be update somewhere.

Before it is migrated, the MMU notifier is called and will force a
vmexit on all CPUs.  The reload code will call GUP again on the page
again and swap it in.

Paolo

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Gleb Natapov Sept. 11, 2014, 1:59 p.m. UTC | #8
On Thu, Sep 11, 2014 at 03:05:05PM +0200, Paolo Bonzini wrote:
> Il 11/09/2014 13:30, Gleb Natapov ha scritto:
> >> > +			vmcs_write64(APIC_ACCESS_ADDR, page_to_phys(page));
> >> > +			/*
> >> > +			 * Do not pin apic access page in memory so that memory
> >> > +			 * hotplug process is able to migrate it.
> >> > +			 */
> >> > +			put_page(page);
> >> >  		}
> > This code is in prepare_vmcs02() and is executed during L1->L2 vmentry. What happens
> > when apic access page is migrated while L2 is running? It needs to be update somewhere.
> 
> Before it is migrated, the MMU notifier is called and will force a
> vmexit on all CPUs.  The reload code will call GUP again on the page
> again and swap it in.
> 
This is how it will work without "if (!is_guest_mode(vcpu))". But,
unless I am missing something, with this check it will not work while
vcpu is in L2.

Suppose vmcs01->APIC_ACCESS_ADDR = 0xf000. During L2 entry
vmcs02->APIC_ACCESS_ADDR is set to 0xf000 too (by prepare_vmcs02). Now
0xf000 is migrated to 0x8000, mmu notifier is called, it forces vmexit,
but vcpu is in a guest mode so vmcs02->APIC_ACCESS_ADDR is never updated
to 0x8000 because of "if (!is_guest_mode(vcpu))" check. So what am I
missing here?

--
			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Paolo Bonzini Sept. 11, 2014, 2:06 p.m. UTC | #9
Il 11/09/2014 15:59, Gleb Natapov ha scritto:
> 
> Suppose vmcs01->APIC_ACCESS_ADDR = 0xf000. During L2 entry
> vmcs02->APIC_ACCESS_ADDR is set to 0xf000 too (by prepare_vmcs02). Now
> 0xf000 is migrated to 0x8000, mmu notifier is called, it forces vmexit,
> but vcpu is in a guest mode so vmcs02->APIC_ACCESS_ADDR is never updated
> to 0x8000 because of "if (!is_guest_mode(vcpu))" check. So what am I
> missing here?

Right, guest mode isn't left as soon as you execute nested_vmx_vmexit,
because this isn't an L2->L1 exit.  So we need an "else" for that "if
(!is_guest_mode(vcpu))", in which case the hpa is ignored and
vmcs12->apic_access_addr is used instead?

Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Gleb Natapov Sept. 11, 2014, 2:21 p.m. UTC | #10
On Thu, Sep 11, 2014 at 04:06:58PM +0200, Paolo Bonzini wrote:
> Il 11/09/2014 15:59, Gleb Natapov ha scritto:
> > 
> > Suppose vmcs01->APIC_ACCESS_ADDR = 0xf000. During L2 entry
> > vmcs02->APIC_ACCESS_ADDR is set to 0xf000 too (by prepare_vmcs02). Now
> > 0xf000 is migrated to 0x8000, mmu notifier is called, it forces vmexit,
> > but vcpu is in a guest mode so vmcs02->APIC_ACCESS_ADDR is never updated
> > to 0x8000 because of "if (!is_guest_mode(vcpu))" check. So what am I
> > missing here?
> 
> Right, guest mode isn't left as soon as you execute nested_vmx_vmexit,
> because this isn't an L2->L1 exit.  So we need an "else" for that "if
> (!is_guest_mode(vcpu))", in which case the hpa is ignored and
> vmcs12->apic_access_addr is used instead?
> 
As far as I can tell the if that is needed there is:

if (!is_guest_mode() || !(vmcs12->secondary_vm_exec_control & ECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES))
    write(PIC_ACCESS_ADDR)

In other words if L2 shares L1 apic access page then reload, otherwise do nothing.

--
			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Paolo Bonzini Sept. 11, 2014, 2:24 p.m. UTC | #11
Il 11/09/2014 16:21, Gleb Natapov ha scritto:
> As far as I can tell the if that is needed there is:
> 
> if (!is_guest_mode() || !(vmcs12->secondary_vm_exec_control & ECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES))
>     write(PIC_ACCESS_ADDR)
> 
> In other words if L2 shares L1 apic access page then reload, otherwise do nothing.

What if the page being swapped out is L1's APIC access page?  We don't
run prepare_vmcs12 in that case because it's an L2->L0->L2 entry, so we
need to "do something".

Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Gleb Natapov Sept. 11, 2014, 2:31 p.m. UTC | #12
On Thu, Sep 11, 2014 at 04:24:04PM +0200, Paolo Bonzini wrote:
> Il 11/09/2014 16:21, Gleb Natapov ha scritto:
> > As far as I can tell the if that is needed there is:
> > 
> > if (!is_guest_mode() || !(vmcs12->secondary_vm_exec_control & ECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES))
> >     write(PIC_ACCESS_ADDR)
> > 
> > In other words if L2 shares L1 apic access page then reload, otherwise do nothing.
> 
> What if the page being swapped out is L1's APIC access page?  We don't
> run prepare_vmcs12 in that case because it's an L2->L0->L2 entry, so we
> need to "do something".
We will do something on L2->L1 exit. We will call kvm_reload_apic_access_page().
That is what patch 5 of this series is doing.

--
			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Paolo Bonzini Sept. 11, 2014, 2:37 p.m. UTC | #13
Il 11/09/2014 16:31, Gleb Natapov ha scritto:
>> > What if the page being swapped out is L1's APIC access page?  We don't
>> > run prepare_vmcs12 in that case because it's an L2->L0->L2 entry, so we
>> > need to "do something".
> We will do something on L2->L1 exit. We will call kvm_reload_apic_access_page().
> That is what patch 5 of this series is doing.

Sorry, I meant "the APIC access page prepared by L1" for L2's execution.

You wrote:

> if (!is_guest_mode() || !(vmcs12->secondary_vm_exec_control & ECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES))
>     write(PIC_ACCESS_ADDR)
> 
> In other words if L2 shares L1 apic access page then reload, otherwise do nothing.

but in that case you have to redo nested_get_page, so "do nothing"
doesn't work.

Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Gleb Natapov Sept. 11, 2014, 2:47 p.m. UTC | #14
On Thu, Sep 11, 2014 at 04:37:39PM +0200, Paolo Bonzini wrote:
> Il 11/09/2014 16:31, Gleb Natapov ha scritto:
> >> > What if the page being swapped out is L1's APIC access page?  We don't
> >> > run prepare_vmcs12 in that case because it's an L2->L0->L2 entry, so we
> >> > need to "do something".
> > We will do something on L2->L1 exit. We will call kvm_reload_apic_access_page().
> > That is what patch 5 of this series is doing.
> 
> Sorry, I meant "the APIC access page prepared by L1" for L2's execution.
> 
> You wrote:
> 
> > if (!is_guest_mode() || !(vmcs12->secondary_vm_exec_control & ECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES))
> >     write(PIC_ACCESS_ADDR)
> > 
> > In other words if L2 shares L1 apic access page then reload, otherwise do nothing.
> 
> but in that case you have to redo nested_get_page, so "do nothing"
> doesn't work.
> 
Ah, 7/7 is new in this submission. Before that this page was still
pinned.  Looking at 7/7 now I do not see how it can work since it has no
code for mmu notifier to detect that it deals with such page and call
kvm_reload_apic_access_page().  I said to Tang previously that nested
kvm has a bunch of pinned page that are hard to deal with and suggested
to iron out non nested case first :(

--
			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
tangchen Sept. 12, 2014, 3:32 a.m. UTC | #15
Hi Gleb, Paolo,

On 09/11/2014 10:47 PM, Gleb Natapov wrote:
> On Thu, Sep 11, 2014 at 04:37:39PM +0200, Paolo Bonzini wrote:
>> Il 11/09/2014 16:31, Gleb Natapov ha scritto:
>>>>> What if the page being swapped out is L1's APIC access page?  We don't
>>>>> run prepare_vmcs12 in that case because it's an L2->L0->L2 entry, so we
>>>>> need to "do something".
>>> We will do something on L2->L1 exit. We will call kvm_reload_apic_access_page().
>>> That is what patch 5 of this series is doing.
>> Sorry, I meant "the APIC access page prepared by L1" for L2's execution.
>>
>> You wrote:
>>
>>> if (!is_guest_mode() || !(vmcs12->secondary_vm_exec_control & ECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES))
>>>      write(PIC_ACCESS_ADDR)
>>>
>>> In other words if L2 shares L1 apic access page then reload, otherwise do nothing.
>> but in that case you have to redo nested_get_page, so "do nothing"
>> doesn't work.
>>
> Ah, 7/7 is new in this submission. Before that this page was still
> pinned.  Looking at 7/7 now I do not see how it can work since it has no
> code for mmu notifier to detect that it deals with such page and call
> kvm_reload_apic_access_page().

Since L1 and L2 share one apic page, if the page is unmapped, 
mmu_notifier will
be called, and :

  - if vcpu is in L1, a L1->L0 exit is rised. apic page's pa will be 
updated in the next
    L0->L1 entry by making vcpu request.

  - if vcpu is in L2 (is_guest_mode, right?), a L2->L0 exit is rised. 
nested_vmx_vmexit()
    will not be called since it is called in L2->L1 exit. It returns 
from vmx_vcpu_run()
    directly, right ? So we should update apic page in L0->L2 entry. 
This is also done
    by making vcpu request, right ?.

    prepare_vmcs02() is called in L1->L2 entry, and nested_vmx_vmexit() 
is called in
    L2->L1 exit. So we also need to update L1's vmcs in 
nested_vmx_vmexit() in patch 5/7.

IIUC, I think patch 1~6 has done such things.

And yes, the is_guest_mode() check is not needed.

> I said to Tang previously that nested
> kvm has a bunch of pinned page that are hard to deal with and suggested
> to iron out non nested case first :(

Yes, and maybe adding patch 7 is not a good idea for now.

Thanks.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
tangchen Sept. 12, 2014, 3:36 a.m. UTC | #16
Hi Paolo,

On 09/11/2014 10:24 PM, Paolo Bonzini wrote:
> Il 11/09/2014 16:21, Gleb Natapov ha scritto:
>> As far as I can tell the if that is needed there is:
>>
>> if (!is_guest_mode() || !(vmcs12->secondary_vm_exec_control & ECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES))
>>      write(PIC_ACCESS_ADDR)
>>
>> In other words if L2 shares L1 apic access page then reload, otherwise do nothing.
> What if the page being swapped out is L1's APIC access page?  We don't
> run prepare_vmcs12 in that case because it's an L2->L0->L2 entry, so we
> need to "do something".

Are you talking about the case that L1 and L2 have different apic pages ?
I think I didn't deal with this situation in this patch set.

Sorry I didn't say it clearly. Here, I assume L1 and L2 share the same 
apic page.
If we are in L2, and the page is migrated, we updated L2's vmcs by 
making vcpu
request. And of course, we should also update L1's vmcs. This is done by 
patch 5.
We make vcpu request again in nested_vmx_exit().

Thanks.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 35171c7..514183e 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -739,6 +739,7 @@  struct kvm_x86_ops {
 	void (*hwapic_isr_update)(struct kvm *kvm, int isr);
 	void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap);
 	void (*set_virtual_x2apic_mode)(struct kvm_vcpu *vcpu, bool set);
+	void (*set_apic_access_page_addr)(struct kvm *kvm, hpa_t hpa);
 	void (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector);
 	void (*sync_pir_to_irr)(struct kvm_vcpu *vcpu);
 	int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 1d941ad..f2eacc4 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -3619,6 +3619,11 @@  static void svm_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set)
 	return;
 }
 
+static void svm_set_apic_access_page_addr(struct kvm *kvm, hpa_t hpa)
+{
+	return;
+}
+
 static int svm_vm_has_apicv(struct kvm *kvm)
 {
 	return 0;
@@ -4373,6 +4378,7 @@  static struct kvm_x86_ops svm_x86_ops = {
 	.enable_irq_window = enable_irq_window,
 	.update_cr8_intercept = update_cr8_intercept,
 	.set_virtual_x2apic_mode = svm_set_virtual_x2apic_mode,
+	.set_apic_access_page_addr = svm_set_apic_access_page_addr,
 	.vm_has_apicv = svm_vm_has_apicv,
 	.load_eoi_exitmap = svm_load_eoi_exitmap,
 	.hwapic_isr_update = svm_hwapic_isr_update,
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 63c4c3e..da6d55d 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -7093,6 +7093,11 @@  static void vmx_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set)
 	vmx_set_msr_bitmap(vcpu);
 }
 
+static void vmx_set_apic_access_page_addr(struct kvm *kvm, hpa_t hpa)
+{
+	vmcs_write64(APIC_ACCESS_ADDR, hpa);
+}
+
 static void vmx_hwapic_isr_update(struct kvm *kvm, int isr)
 {
 	u16 status;
@@ -8910,6 +8915,7 @@  static struct kvm_x86_ops vmx_x86_ops = {
 	.enable_irq_window = enable_irq_window,
 	.update_cr8_intercept = update_cr8_intercept,
 	.set_virtual_x2apic_mode = vmx_set_virtual_x2apic_mode,
+	.set_apic_access_page_addr = vmx_set_apic_access_page_addr,
 	.vm_has_apicv = vmx_vm_has_apicv,
 	.load_eoi_exitmap = vmx_load_eoi_exitmap,
 	.hwapic_irr_update = vmx_hwapic_irr_update,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index e05bd58..96f4188 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -5989,6 +5989,19 @@  static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu)
 	kvm_apic_update_tmr(vcpu, tmr);
 }
 
+static void vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu)
+{
+	/*
+	 * apic access page could be migrated. When the page is being migrated,
+	 * GUP will wait till the migrate entry is replaced with the new pte
+	 * entry pointing to the new page.
+	 */
+	vcpu->kvm->arch.apic_access_page = gfn_to_page(vcpu->kvm,
+				APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT);
+	kvm_x86_ops->set_apic_access_page_addr(vcpu->kvm,
+				page_to_phys(vcpu->kvm->arch.apic_access_page));
+}
+
 /*
  * Returns 1 to let __vcpu_run() continue the guest execution loop without
  * exiting to the userspace.  Otherwise, the value will be returned to the
@@ -6049,6 +6062,8 @@  static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 			kvm_deliver_pmi(vcpu);
 		if (kvm_check_request(KVM_REQ_SCAN_IOAPIC, vcpu))
 			vcpu_scan_ioapic(vcpu);
+		if (kvm_check_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu))
+			vcpu_reload_apic_access_page(vcpu);
 	}
 
 	if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index a4c33b3..8be076a 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -136,6 +136,7 @@  static inline bool is_error_page(struct page *page)
 #define KVM_REQ_GLOBAL_CLOCK_UPDATE 22
 #define KVM_REQ_ENABLE_IBS        23
 #define KVM_REQ_DISABLE_IBS       24
+#define KVM_REQ_APIC_PAGE_RELOAD  25
 
 #define KVM_USERSPACE_IRQ_SOURCE_ID		0
 #define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID	1
@@ -579,6 +580,7 @@  void kvm_flush_remote_tlbs(struct kvm *kvm);
 void kvm_reload_remote_mmus(struct kvm *kvm);
 void kvm_make_mclock_inprogress_request(struct kvm *kvm);
 void kvm_make_scan_ioapic_request(struct kvm *kvm);
+void kvm_reload_apic_access_page(struct kvm *kvm);
 
 long kvm_arch_dev_ioctl(struct file *filp,
 			unsigned int ioctl, unsigned long arg);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 33712fb..d8280de 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -210,6 +210,11 @@  void kvm_make_scan_ioapic_request(struct kvm *kvm)
 	make_all_cpus_request(kvm, KVM_REQ_SCAN_IOAPIC);
 }
 
+void kvm_reload_apic_access_page(struct kvm *kvm)
+{
+	make_all_cpus_request(kvm, KVM_REQ_APIC_PAGE_RELOAD);
+}
+
 int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
 {
 	struct page *page;
@@ -294,6 +299,13 @@  static void kvm_mmu_notifier_invalidate_page(struct mmu_notifier *mn,
 	if (need_tlb_flush)
 		kvm_flush_remote_tlbs(kvm);
 
+	/*
+	 * The physical address of apic access page is stroed in VMCS.
+	 * So need to update it when it becomes invalid.
+	 */
+	if (address == gfn_to_hva(kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT))
+		kvm_reload_apic_access_page(kvm);
+
 	spin_unlock(&kvm->mmu_lock);
 	srcu_read_unlock(&kvm->srcu, idx);
 }