Message ID | 1410413886-32213-5-git-send-email-tangchen@cn.fujitsu.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Il 11/09/2014 07:38, Tang Chen ha scritto: > apic access page is pinned in memory. As a result, it cannot be migrated/hot-removed. > Actually, it is not necessary to be pinned. > > The hpa of apic access page is stored in VMCS APIC_ACCESS_ADDR pointer. When > the page is migrated, kvm_mmu_notifier_invalidate_page() will invalidate the > corresponding ept entry. This patch introduces a new vcpu request named > KVM_REQ_APIC_PAGE_RELOAD, and makes this request to all the vcpus at this time, > and force all the vcpus exit guest, and re-enter guest till they updates the VMCS > APIC_ACCESS_ADDR pointer to the new apic access page address, and updates > kvm->arch.apic_access_page to the new page. > > Signed-off-by: Tang Chen <tangchen@cn.fujitsu.com> > --- > arch/x86/include/asm/kvm_host.h | 1 + > arch/x86/kvm/svm.c | 6 ++++++ > arch/x86/kvm/vmx.c | 6 ++++++ > arch/x86/kvm/x86.c | 15 +++++++++++++++ > include/linux/kvm_host.h | 2 ++ > virt/kvm/kvm_main.c | 12 ++++++++++++ > 6 files changed, 42 insertions(+) > > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h > index 35171c7..514183e 100644 > --- a/arch/x86/include/asm/kvm_host.h > +++ b/arch/x86/include/asm/kvm_host.h > @@ -739,6 +739,7 @@ struct kvm_x86_ops { > void (*hwapic_isr_update)(struct kvm *kvm, int isr); > void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap); > void (*set_virtual_x2apic_mode)(struct kvm_vcpu *vcpu, bool set); > + void (*set_apic_access_page_addr)(struct kvm *kvm, hpa_t hpa); > void (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector); > void (*sync_pir_to_irr)(struct kvm_vcpu *vcpu); > int (*set_tss_addr)(struct kvm *kvm, unsigned int addr); > diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c > index 1d941ad..f2eacc4 100644 > --- a/arch/x86/kvm/svm.c > +++ b/arch/x86/kvm/svm.c > @@ -3619,6 +3619,11 @@ static void svm_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set) > return; > } > > +static void svm_set_apic_access_page_addr(struct kvm *kvm, hpa_t hpa) > +{ > + return; > +} > + > static int svm_vm_has_apicv(struct kvm *kvm) > { > return 0; > @@ -4373,6 +4378,7 @@ static struct kvm_x86_ops svm_x86_ops = { > .enable_irq_window = enable_irq_window, > .update_cr8_intercept = update_cr8_intercept, > .set_virtual_x2apic_mode = svm_set_virtual_x2apic_mode, > + .set_apic_access_page_addr = svm_set_apic_access_page_addr, > .vm_has_apicv = svm_vm_has_apicv, > .load_eoi_exitmap = svm_load_eoi_exitmap, > .hwapic_isr_update = svm_hwapic_isr_update, > diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c > index 63c4c3e..da6d55d 100644 > --- a/arch/x86/kvm/vmx.c > +++ b/arch/x86/kvm/vmx.c > @@ -7093,6 +7093,11 @@ static void vmx_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set) > vmx_set_msr_bitmap(vcpu); > } > > +static void vmx_set_apic_access_page_addr(struct kvm *kvm, hpa_t hpa) > +{ > + vmcs_write64(APIC_ACCESS_ADDR, hpa); This has to be guarded by "if (!is_guest_mode(vcpu))". > +} > + > static void vmx_hwapic_isr_update(struct kvm *kvm, int isr) > { > u16 status; > @@ -8910,6 +8915,7 @@ static struct kvm_x86_ops vmx_x86_ops = { > .enable_irq_window = enable_irq_window, > .update_cr8_intercept = update_cr8_intercept, > .set_virtual_x2apic_mode = vmx_set_virtual_x2apic_mode, > + .set_apic_access_page_addr = vmx_set_apic_access_page_addr, > .vm_has_apicv = vmx_vm_has_apicv, > .load_eoi_exitmap = vmx_load_eoi_exitmap, > .hwapic_irr_update = vmx_hwapic_irr_update, > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > index e05bd58..96f4188 100644 > --- a/arch/x86/kvm/x86.c > +++ b/arch/x86/kvm/x86.c > @@ -5989,6 +5989,19 @@ static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu) > kvm_apic_update_tmr(vcpu, tmr); > } > > +static void vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu) > +{ > + /* > + * apic access page could be migrated. When the page is being migrated, > + * GUP will wait till the migrate entry is replaced with the new pte > + * entry pointing to the new page. > + */ > + vcpu->kvm->arch.apic_access_page = gfn_to_page(vcpu->kvm, > + APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT); > + kvm_x86_ops->set_apic_access_page_addr(vcpu->kvm, > + page_to_phys(vcpu->kvm->arch.apic_access_page)); > +} > + > /* > * Returns 1 to let __vcpu_run() continue the guest execution loop without > * exiting to the userspace. Otherwise, the value will be returned to the > @@ -6049,6 +6062,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) > kvm_deliver_pmi(vcpu); > if (kvm_check_request(KVM_REQ_SCAN_IOAPIC, vcpu)) > vcpu_scan_ioapic(vcpu); > + if (kvm_check_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu)) > + vcpu_reload_apic_access_page(vcpu); > } > > if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) { > diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h > index a4c33b3..8be076a 100644 > --- a/include/linux/kvm_host.h > +++ b/include/linux/kvm_host.h > @@ -136,6 +136,7 @@ static inline bool is_error_page(struct page *page) > #define KVM_REQ_GLOBAL_CLOCK_UPDATE 22 > #define KVM_REQ_ENABLE_IBS 23 > #define KVM_REQ_DISABLE_IBS 24 > +#define KVM_REQ_APIC_PAGE_RELOAD 25 > > #define KVM_USERSPACE_IRQ_SOURCE_ID 0 > #define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID 1 > @@ -579,6 +580,7 @@ void kvm_flush_remote_tlbs(struct kvm *kvm); > void kvm_reload_remote_mmus(struct kvm *kvm); > void kvm_make_mclock_inprogress_request(struct kvm *kvm); > void kvm_make_scan_ioapic_request(struct kvm *kvm); > +void kvm_reload_apic_access_page(struct kvm *kvm); > > long kvm_arch_dev_ioctl(struct file *filp, > unsigned int ioctl, unsigned long arg); > diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c > index 33712fb..d8280de 100644 > --- a/virt/kvm/kvm_main.c > +++ b/virt/kvm/kvm_main.c > @@ -210,6 +210,11 @@ void kvm_make_scan_ioapic_request(struct kvm *kvm) > make_all_cpus_request(kvm, KVM_REQ_SCAN_IOAPIC); > } > > +void kvm_reload_apic_access_page(struct kvm *kvm) > +{ > + make_all_cpus_request(kvm, KVM_REQ_APIC_PAGE_RELOAD); > +} > + > int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id) > { > struct page *page; > @@ -294,6 +299,13 @@ static void kvm_mmu_notifier_invalidate_page(struct mmu_notifier *mn, > if (need_tlb_flush) > kvm_flush_remote_tlbs(kvm); > > + /* > + * The physical address of apic access page is stroed in VMCS. Typo: stored, not stroed. > + * So need to update it when it becomes invalid. Please remove "so need to". Paolo > + */ > + if (address == gfn_to_hva(kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT)) > + kvm_reload_apic_access_page(kvm); > + > spin_unlock(&kvm->mmu_lock); > srcu_read_unlock(&kvm->srcu, idx); > } > -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Thu, Sep 11, 2014 at 11:21:49AM +0200, Paolo Bonzini wrote: > Il 11/09/2014 07:38, Tang Chen ha scritto: > > diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c > > index 63c4c3e..da6d55d 100644 > > --- a/arch/x86/kvm/vmx.c > > +++ b/arch/x86/kvm/vmx.c > > @@ -7093,6 +7093,11 @@ static void vmx_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set) > > vmx_set_msr_bitmap(vcpu); > > } > > > > +static void vmx_set_apic_access_page_addr(struct kvm *kvm, hpa_t hpa) > > +{ > > + vmcs_write64(APIC_ACCESS_ADDR, hpa); > > This has to be guarded by "if (!is_guest_mode(vcpu))". > We do need to write it if L1 and L2 share APIC_ACCESS_ADDR and skip it otherwise, no? > > +} > > + -- Gleb. -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 09/11/2014 05:21 PM, Paolo Bonzini wrote: > Il 11/09/2014 07:38, Tang Chen ha scritto: >> apic access page is pinned in memory. As a result, it cannot be migrated/hot-removed. >> Actually, it is not necessary to be pinned. >> >> The hpa of apic access page is stored in VMCS APIC_ACCESS_ADDR pointer. When >> the page is migrated, kvm_mmu_notifier_invalidate_page() will invalidate the >> corresponding ept entry. This patch introduces a new vcpu request named >> KVM_REQ_APIC_PAGE_RELOAD, and makes this request to all the vcpus at this time, >> and force all the vcpus exit guest, and re-enter guest till they updates the VMCS >> APIC_ACCESS_ADDR pointer to the new apic access page address, and updates >> kvm->arch.apic_access_page to the new page. >> >> Signed-off-by: Tang Chen <tangchen@cn.fujitsu.com> >> --- >> arch/x86/include/asm/kvm_host.h | 1 + >> arch/x86/kvm/svm.c | 6 ++++++ >> arch/x86/kvm/vmx.c | 6 ++++++ >> arch/x86/kvm/x86.c | 15 +++++++++++++++ >> include/linux/kvm_host.h | 2 ++ >> virt/kvm/kvm_main.c | 12 ++++++++++++ >> 6 files changed, 42 insertions(+) >> >> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h >> index 35171c7..514183e 100644 >> --- a/arch/x86/include/asm/kvm_host.h >> +++ b/arch/x86/include/asm/kvm_host.h >> @@ -739,6 +739,7 @@ struct kvm_x86_ops { >> void (*hwapic_isr_update)(struct kvm *kvm, int isr); >> void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap); >> void (*set_virtual_x2apic_mode)(struct kvm_vcpu *vcpu, bool set); >> + void (*set_apic_access_page_addr)(struct kvm *kvm, hpa_t hpa); >> void (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector); >> void (*sync_pir_to_irr)(struct kvm_vcpu *vcpu); >> int (*set_tss_addr)(struct kvm *kvm, unsigned int addr); >> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c >> index 1d941ad..f2eacc4 100644 >> --- a/arch/x86/kvm/svm.c >> +++ b/arch/x86/kvm/svm.c >> @@ -3619,6 +3619,11 @@ static void svm_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set) >> return; >> } >> >> +static void svm_set_apic_access_page_addr(struct kvm *kvm, hpa_t hpa) >> +{ >> + return; >> +} >> + >> static int svm_vm_has_apicv(struct kvm *kvm) >> { >> return 0; >> @@ -4373,6 +4378,7 @@ static struct kvm_x86_ops svm_x86_ops = { >> .enable_irq_window = enable_irq_window, >> .update_cr8_intercept = update_cr8_intercept, >> .set_virtual_x2apic_mode = svm_set_virtual_x2apic_mode, >> + .set_apic_access_page_addr = svm_set_apic_access_page_addr, >> .vm_has_apicv = svm_vm_has_apicv, >> .load_eoi_exitmap = svm_load_eoi_exitmap, >> .hwapic_isr_update = svm_hwapic_isr_update, >> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c >> index 63c4c3e..da6d55d 100644 >> --- a/arch/x86/kvm/vmx.c >> +++ b/arch/x86/kvm/vmx.c >> @@ -7093,6 +7093,11 @@ static void vmx_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set) >> vmx_set_msr_bitmap(vcpu); >> } >> >> +static void vmx_set_apic_access_page_addr(struct kvm *kvm, hpa_t hpa) >> +{ >> + vmcs_write64(APIC_ACCESS_ADDR, hpa); > This has to be guarded by "if (!is_guest_mode(vcpu))". Since we cannot get vcpu through kvm, I'd like to move this check to vcpu_reload_apic_access_page() when kvm_x86_ops->set_apic_access_page_addr() is called. Thanks. > >> +} >> + >> static void vmx_hwapic_isr_update(struct kvm *kvm, int isr) >> { >> u16 status; >> @@ -8910,6 +8915,7 @@ static struct kvm_x86_ops vmx_x86_ops = { >> .enable_irq_window = enable_irq_window, >> .update_cr8_intercept = update_cr8_intercept, >> .set_virtual_x2apic_mode = vmx_set_virtual_x2apic_mode, >> + .set_apic_access_page_addr = vmx_set_apic_access_page_addr, >> .vm_has_apicv = vmx_vm_has_apicv, >> .load_eoi_exitmap = vmx_load_eoi_exitmap, >> .hwapic_irr_update = vmx_hwapic_irr_update, >> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c >> index e05bd58..96f4188 100644 >> --- a/arch/x86/kvm/x86.c >> +++ b/arch/x86/kvm/x86.c >> @@ -5989,6 +5989,19 @@ static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu) >> kvm_apic_update_tmr(vcpu, tmr); >> } >> >> +static void vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu) >> +{ >> + /* >> + * apic access page could be migrated. When the page is being migrated, >> + * GUP will wait till the migrate entry is replaced with the new pte >> + * entry pointing to the new page. >> + */ >> + vcpu->kvm->arch.apic_access_page = gfn_to_page(vcpu->kvm, >> + APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT); >> + kvm_x86_ops->set_apic_access_page_addr(vcpu->kvm, >> + page_to_phys(vcpu->kvm->arch.apic_access_page)); >> +} >> + >> /* >> * Returns 1 to let __vcpu_run() continue the guest execution loop without >> * exiting to the userspace. Otherwise, the value will be returned to the >> @@ -6049,6 +6062,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) >> kvm_deliver_pmi(vcpu); >> if (kvm_check_request(KVM_REQ_SCAN_IOAPIC, vcpu)) >> vcpu_scan_ioapic(vcpu); >> + if (kvm_check_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu)) >> + vcpu_reload_apic_access_page(vcpu); >> } >> >> if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) { >> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h >> index a4c33b3..8be076a 100644 >> --- a/include/linux/kvm_host.h >> +++ b/include/linux/kvm_host.h >> @@ -136,6 +136,7 @@ static inline bool is_error_page(struct page *page) >> #define KVM_REQ_GLOBAL_CLOCK_UPDATE 22 >> #define KVM_REQ_ENABLE_IBS 23 >> #define KVM_REQ_DISABLE_IBS 24 >> +#define KVM_REQ_APIC_PAGE_RELOAD 25 >> >> #define KVM_USERSPACE_IRQ_SOURCE_ID 0 >> #define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID 1 >> @@ -579,6 +580,7 @@ void kvm_flush_remote_tlbs(struct kvm *kvm); >> void kvm_reload_remote_mmus(struct kvm *kvm); >> void kvm_make_mclock_inprogress_request(struct kvm *kvm); >> void kvm_make_scan_ioapic_request(struct kvm *kvm); >> +void kvm_reload_apic_access_page(struct kvm *kvm); >> >> long kvm_arch_dev_ioctl(struct file *filp, >> unsigned int ioctl, unsigned long arg); >> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c >> index 33712fb..d8280de 100644 >> --- a/virt/kvm/kvm_main.c >> +++ b/virt/kvm/kvm_main.c >> @@ -210,6 +210,11 @@ void kvm_make_scan_ioapic_request(struct kvm *kvm) >> make_all_cpus_request(kvm, KVM_REQ_SCAN_IOAPIC); >> } >> >> +void kvm_reload_apic_access_page(struct kvm *kvm) >> +{ >> + make_all_cpus_request(kvm, KVM_REQ_APIC_PAGE_RELOAD); >> +} >> + >> int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id) >> { >> struct page *page; >> @@ -294,6 +299,13 @@ static void kvm_mmu_notifier_invalidate_page(struct mmu_notifier *mn, >> if (need_tlb_flush) >> kvm_flush_remote_tlbs(kvm); >> >> + /* >> + * The physical address of apic access page is stroed in VMCS. > Typo: stored, not stroed. > >> + * So need to update it when it becomes invalid. > Please remove "so need to". > > Paolo > >> + */ >> + if (address == gfn_to_hva(kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT)) >> + kvm_reload_apic_access_page(kvm); >> + >> spin_unlock(&kvm->mmu_lock); >> srcu_read_unlock(&kvm->srcu, idx); >> } >> > . > -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Il 11/09/2014 12:20, tangchen ha scritto: >>> >>> + vmcs_write64(APIC_ACCESS_ADDR, hpa); >> This has to be guarded by "if (!is_guest_mode(vcpu))". > > Since we cannot get vcpu through kvm, I'd like to move this check to > vcpu_reload_apic_access_page() when > kvm_x86_ops->set_apic_access_page_addr() > is called. Good idea! Though passing the vcpu to vmx_set_apic_access_page_addr would also work. Paolo -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Il 11/09/2014 12:12, Gleb Natapov ha scritto: > On Thu, Sep 11, 2014 at 11:21:49AM +0200, Paolo Bonzini wrote: >> Il 11/09/2014 07:38, Tang Chen ha scritto: >>> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c >>> index 63c4c3e..da6d55d 100644 >>> --- a/arch/x86/kvm/vmx.c >>> +++ b/arch/x86/kvm/vmx.c >>> @@ -7093,6 +7093,11 @@ static void vmx_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set) >>> vmx_set_msr_bitmap(vcpu); >>> } >>> >>> +static void vmx_set_apic_access_page_addr(struct kvm *kvm, hpa_t hpa) >>> +{ >>> + vmcs_write64(APIC_ACCESS_ADDR, hpa); >> >> This has to be guarded by "if (!is_guest_mode(vcpu))". >> > We do need to write it if L1 and L2 share APIC_ACCESS_ADDR and skip > it otherwise, no? Yes, but this would be handled by patch 6: } else if (vm_need_virtualize_apic_accesses(vmx->vcpu.kvm)) { + struct page *page = gfn_to_page(vmx->vcpu.kvm, + APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT); exec_control |= SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; - vmcs_write64(APIC_ACCESS_ADDR, - page_to_phys(vcpu->kvm->arch.apic_access_page)); + vmcs_write64(APIC_ACCESS_ADDR, page_to_phys(page)); + /* + * Do not pin apic access page in memory so that memory + * hotplug process is able to migrate it. + */ + put_page(page); } However, this is also useless code duplication because the above snippet could reuse vcpu_reload_apic_access_page too. So I think you cannot do the is_guest_mode check in kvm_vcpu_reload_apic_access_page and also not in vmx_reload_apic_access_page. But you could do something like kvm_vcpu_reload_apic_access_page(...) { ... kvm_x86_ops->reload_apic_access_page(...); } EXPORT_SYMBOL_GPL(kvm_vcpu_reload_apic_access_page); /* used in vcpu_enter_guest only */ vcpu_reload_apic_access_page(...) { if (!is_guest_mode(vcpu)) kvm_vcpu_reload_apic_access_page(...) } Paolo -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Thu, Sep 11, 2014 at 12:47:16PM +0200, Paolo Bonzini wrote: > Il 11/09/2014 12:12, Gleb Natapov ha scritto: > > On Thu, Sep 11, 2014 at 11:21:49AM +0200, Paolo Bonzini wrote: > >> Il 11/09/2014 07:38, Tang Chen ha scritto: > >>> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c > >>> index 63c4c3e..da6d55d 100644 > >>> --- a/arch/x86/kvm/vmx.c > >>> +++ b/arch/x86/kvm/vmx.c > >>> @@ -7093,6 +7093,11 @@ static void vmx_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set) > >>> vmx_set_msr_bitmap(vcpu); > >>> } > >>> > >>> +static void vmx_set_apic_access_page_addr(struct kvm *kvm, hpa_t hpa) > >>> +{ > >>> + vmcs_write64(APIC_ACCESS_ADDR, hpa); > >> > >> This has to be guarded by "if (!is_guest_mode(vcpu))". > >> > > We do need to write it if L1 and L2 share APIC_ACCESS_ADDR and skip > > it otherwise, no? > > Yes, but this would be handled by patch 6: > > } else if (vm_need_virtualize_apic_accesses(vmx->vcpu.kvm)) { > + struct page *page = gfn_to_page(vmx->vcpu.kvm, > + APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT); > exec_control |= > SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; > - vmcs_write64(APIC_ACCESS_ADDR, > - page_to_phys(vcpu->kvm->arch.apic_access_page)); > + vmcs_write64(APIC_ACCESS_ADDR, page_to_phys(page)); > + /* > + * Do not pin apic access page in memory so that memory > + * hotplug process is able to migrate it. > + */ > + put_page(page); > } This code is in prepare_vmcs02() and is executed during L1->L2 vmentry. What happens when apic access page is migrated while L2 is running? It needs to be update somewhere. > > However, this is also useless code duplication because the above snippet could > reuse vcpu_reload_apic_access_page too. > > So I think you cannot do the is_guest_mode check in > kvm_vcpu_reload_apic_access_page and also not in > vmx_reload_apic_access_page. But you could do something like > > kvm_vcpu_reload_apic_access_page(...) > { > ... > kvm_x86_ops->reload_apic_access_page(...); > } > EXPORT_SYMBOL_GPL(kvm_vcpu_reload_apic_access_page); > > /* used in vcpu_enter_guest only */ > vcpu_reload_apic_access_page(...) > { > if (!is_guest_mode(vcpu)) > kvm_vcpu_reload_apic_access_page(...) > } > > Paolo -- Gleb. -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Il 11/09/2014 13:30, Gleb Natapov ha scritto: >> > + vmcs_write64(APIC_ACCESS_ADDR, page_to_phys(page)); >> > + /* >> > + * Do not pin apic access page in memory so that memory >> > + * hotplug process is able to migrate it. >> > + */ >> > + put_page(page); >> > } > This code is in prepare_vmcs02() and is executed during L1->L2 vmentry. What happens > when apic access page is migrated while L2 is running? It needs to be update somewhere. Before it is migrated, the MMU notifier is called and will force a vmexit on all CPUs. The reload code will call GUP again on the page again and swap it in. Paolo -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Thu, Sep 11, 2014 at 03:05:05PM +0200, Paolo Bonzini wrote: > Il 11/09/2014 13:30, Gleb Natapov ha scritto: > >> > + vmcs_write64(APIC_ACCESS_ADDR, page_to_phys(page)); > >> > + /* > >> > + * Do not pin apic access page in memory so that memory > >> > + * hotplug process is able to migrate it. > >> > + */ > >> > + put_page(page); > >> > } > > This code is in prepare_vmcs02() and is executed during L1->L2 vmentry. What happens > > when apic access page is migrated while L2 is running? It needs to be update somewhere. > > Before it is migrated, the MMU notifier is called and will force a > vmexit on all CPUs. The reload code will call GUP again on the page > again and swap it in. > This is how it will work without "if (!is_guest_mode(vcpu))". But, unless I am missing something, with this check it will not work while vcpu is in L2. Suppose vmcs01->APIC_ACCESS_ADDR = 0xf000. During L2 entry vmcs02->APIC_ACCESS_ADDR is set to 0xf000 too (by prepare_vmcs02). Now 0xf000 is migrated to 0x8000, mmu notifier is called, it forces vmexit, but vcpu is in a guest mode so vmcs02->APIC_ACCESS_ADDR is never updated to 0x8000 because of "if (!is_guest_mode(vcpu))" check. So what am I missing here? -- Gleb. -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Il 11/09/2014 15:59, Gleb Natapov ha scritto: > > Suppose vmcs01->APIC_ACCESS_ADDR = 0xf000. During L2 entry > vmcs02->APIC_ACCESS_ADDR is set to 0xf000 too (by prepare_vmcs02). Now > 0xf000 is migrated to 0x8000, mmu notifier is called, it forces vmexit, > but vcpu is in a guest mode so vmcs02->APIC_ACCESS_ADDR is never updated > to 0x8000 because of "if (!is_guest_mode(vcpu))" check. So what am I > missing here? Right, guest mode isn't left as soon as you execute nested_vmx_vmexit, because this isn't an L2->L1 exit. So we need an "else" for that "if (!is_guest_mode(vcpu))", in which case the hpa is ignored and vmcs12->apic_access_addr is used instead? Paolo -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Thu, Sep 11, 2014 at 04:06:58PM +0200, Paolo Bonzini wrote: > Il 11/09/2014 15:59, Gleb Natapov ha scritto: > > > > Suppose vmcs01->APIC_ACCESS_ADDR = 0xf000. During L2 entry > > vmcs02->APIC_ACCESS_ADDR is set to 0xf000 too (by prepare_vmcs02). Now > > 0xf000 is migrated to 0x8000, mmu notifier is called, it forces vmexit, > > but vcpu is in a guest mode so vmcs02->APIC_ACCESS_ADDR is never updated > > to 0x8000 because of "if (!is_guest_mode(vcpu))" check. So what am I > > missing here? > > Right, guest mode isn't left as soon as you execute nested_vmx_vmexit, > because this isn't an L2->L1 exit. So we need an "else" for that "if > (!is_guest_mode(vcpu))", in which case the hpa is ignored and > vmcs12->apic_access_addr is used instead? > As far as I can tell the if that is needed there is: if (!is_guest_mode() || !(vmcs12->secondary_vm_exec_control & ECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) write(PIC_ACCESS_ADDR) In other words if L2 shares L1 apic access page then reload, otherwise do nothing. -- Gleb. -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Il 11/09/2014 16:21, Gleb Natapov ha scritto: > As far as I can tell the if that is needed there is: > > if (!is_guest_mode() || !(vmcs12->secondary_vm_exec_control & ECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) > write(PIC_ACCESS_ADDR) > > In other words if L2 shares L1 apic access page then reload, otherwise do nothing. What if the page being swapped out is L1's APIC access page? We don't run prepare_vmcs12 in that case because it's an L2->L0->L2 entry, so we need to "do something". Paolo -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Thu, Sep 11, 2014 at 04:24:04PM +0200, Paolo Bonzini wrote: > Il 11/09/2014 16:21, Gleb Natapov ha scritto: > > As far as I can tell the if that is needed there is: > > > > if (!is_guest_mode() || !(vmcs12->secondary_vm_exec_control & ECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) > > write(PIC_ACCESS_ADDR) > > > > In other words if L2 shares L1 apic access page then reload, otherwise do nothing. > > What if the page being swapped out is L1's APIC access page? We don't > run prepare_vmcs12 in that case because it's an L2->L0->L2 entry, so we > need to "do something". We will do something on L2->L1 exit. We will call kvm_reload_apic_access_page(). That is what patch 5 of this series is doing. -- Gleb. -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Il 11/09/2014 16:31, Gleb Natapov ha scritto: >> > What if the page being swapped out is L1's APIC access page? We don't >> > run prepare_vmcs12 in that case because it's an L2->L0->L2 entry, so we >> > need to "do something". > We will do something on L2->L1 exit. We will call kvm_reload_apic_access_page(). > That is what patch 5 of this series is doing. Sorry, I meant "the APIC access page prepared by L1" for L2's execution. You wrote: > if (!is_guest_mode() || !(vmcs12->secondary_vm_exec_control & ECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) > write(PIC_ACCESS_ADDR) > > In other words if L2 shares L1 apic access page then reload, otherwise do nothing. but in that case you have to redo nested_get_page, so "do nothing" doesn't work. Paolo -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Thu, Sep 11, 2014 at 04:37:39PM +0200, Paolo Bonzini wrote: > Il 11/09/2014 16:31, Gleb Natapov ha scritto: > >> > What if the page being swapped out is L1's APIC access page? We don't > >> > run prepare_vmcs12 in that case because it's an L2->L0->L2 entry, so we > >> > need to "do something". > > We will do something on L2->L1 exit. We will call kvm_reload_apic_access_page(). > > That is what patch 5 of this series is doing. > > Sorry, I meant "the APIC access page prepared by L1" for L2's execution. > > You wrote: > > > if (!is_guest_mode() || !(vmcs12->secondary_vm_exec_control & ECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) > > write(PIC_ACCESS_ADDR) > > > > In other words if L2 shares L1 apic access page then reload, otherwise do nothing. > > but in that case you have to redo nested_get_page, so "do nothing" > doesn't work. > Ah, 7/7 is new in this submission. Before that this page was still pinned. Looking at 7/7 now I do not see how it can work since it has no code for mmu notifier to detect that it deals with such page and call kvm_reload_apic_access_page(). I said to Tang previously that nested kvm has a bunch of pinned page that are hard to deal with and suggested to iron out non nested case first :( -- Gleb. -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Hi Gleb, Paolo, On 09/11/2014 10:47 PM, Gleb Natapov wrote: > On Thu, Sep 11, 2014 at 04:37:39PM +0200, Paolo Bonzini wrote: >> Il 11/09/2014 16:31, Gleb Natapov ha scritto: >>>>> What if the page being swapped out is L1's APIC access page? We don't >>>>> run prepare_vmcs12 in that case because it's an L2->L0->L2 entry, so we >>>>> need to "do something". >>> We will do something on L2->L1 exit. We will call kvm_reload_apic_access_page(). >>> That is what patch 5 of this series is doing. >> Sorry, I meant "the APIC access page prepared by L1" for L2's execution. >> >> You wrote: >> >>> if (!is_guest_mode() || !(vmcs12->secondary_vm_exec_control & ECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) >>> write(PIC_ACCESS_ADDR) >>> >>> In other words if L2 shares L1 apic access page then reload, otherwise do nothing. >> but in that case you have to redo nested_get_page, so "do nothing" >> doesn't work. >> > Ah, 7/7 is new in this submission. Before that this page was still > pinned. Looking at 7/7 now I do not see how it can work since it has no > code for mmu notifier to detect that it deals with such page and call > kvm_reload_apic_access_page(). Since L1 and L2 share one apic page, if the page is unmapped, mmu_notifier will be called, and : - if vcpu is in L1, a L1->L0 exit is rised. apic page's pa will be updated in the next L0->L1 entry by making vcpu request. - if vcpu is in L2 (is_guest_mode, right?), a L2->L0 exit is rised. nested_vmx_vmexit() will not be called since it is called in L2->L1 exit. It returns from vmx_vcpu_run() directly, right ? So we should update apic page in L0->L2 entry. This is also done by making vcpu request, right ?. prepare_vmcs02() is called in L1->L2 entry, and nested_vmx_vmexit() is called in L2->L1 exit. So we also need to update L1's vmcs in nested_vmx_vmexit() in patch 5/7. IIUC, I think patch 1~6 has done such things. And yes, the is_guest_mode() check is not needed. > I said to Tang previously that nested > kvm has a bunch of pinned page that are hard to deal with and suggested > to iron out non nested case first :( Yes, and maybe adding patch 7 is not a good idea for now. Thanks. -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Hi Paolo, On 09/11/2014 10:24 PM, Paolo Bonzini wrote: > Il 11/09/2014 16:21, Gleb Natapov ha scritto: >> As far as I can tell the if that is needed there is: >> >> if (!is_guest_mode() || !(vmcs12->secondary_vm_exec_control & ECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) >> write(PIC_ACCESS_ADDR) >> >> In other words if L2 shares L1 apic access page then reload, otherwise do nothing. > What if the page being swapped out is L1's APIC access page? We don't > run prepare_vmcs12 in that case because it's an L2->L0->L2 entry, so we > need to "do something". Are you talking about the case that L1 and L2 have different apic pages ? I think I didn't deal with this situation in this patch set. Sorry I didn't say it clearly. Here, I assume L1 and L2 share the same apic page. If we are in L2, and the page is migrated, we updated L2's vmcs by making vcpu request. And of course, we should also update L1's vmcs. This is done by patch 5. We make vcpu request again in nested_vmx_exit(). Thanks. -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 35171c7..514183e 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -739,6 +739,7 @@ struct kvm_x86_ops { void (*hwapic_isr_update)(struct kvm *kvm, int isr); void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap); void (*set_virtual_x2apic_mode)(struct kvm_vcpu *vcpu, bool set); + void (*set_apic_access_page_addr)(struct kvm *kvm, hpa_t hpa); void (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector); void (*sync_pir_to_irr)(struct kvm_vcpu *vcpu); int (*set_tss_addr)(struct kvm *kvm, unsigned int addr); diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 1d941ad..f2eacc4 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -3619,6 +3619,11 @@ static void svm_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set) return; } +static void svm_set_apic_access_page_addr(struct kvm *kvm, hpa_t hpa) +{ + return; +} + static int svm_vm_has_apicv(struct kvm *kvm) { return 0; @@ -4373,6 +4378,7 @@ static struct kvm_x86_ops svm_x86_ops = { .enable_irq_window = enable_irq_window, .update_cr8_intercept = update_cr8_intercept, .set_virtual_x2apic_mode = svm_set_virtual_x2apic_mode, + .set_apic_access_page_addr = svm_set_apic_access_page_addr, .vm_has_apicv = svm_vm_has_apicv, .load_eoi_exitmap = svm_load_eoi_exitmap, .hwapic_isr_update = svm_hwapic_isr_update, diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 63c4c3e..da6d55d 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -7093,6 +7093,11 @@ static void vmx_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set) vmx_set_msr_bitmap(vcpu); } +static void vmx_set_apic_access_page_addr(struct kvm *kvm, hpa_t hpa) +{ + vmcs_write64(APIC_ACCESS_ADDR, hpa); +} + static void vmx_hwapic_isr_update(struct kvm *kvm, int isr) { u16 status; @@ -8910,6 +8915,7 @@ static struct kvm_x86_ops vmx_x86_ops = { .enable_irq_window = enable_irq_window, .update_cr8_intercept = update_cr8_intercept, .set_virtual_x2apic_mode = vmx_set_virtual_x2apic_mode, + .set_apic_access_page_addr = vmx_set_apic_access_page_addr, .vm_has_apicv = vmx_vm_has_apicv, .load_eoi_exitmap = vmx_load_eoi_exitmap, .hwapic_irr_update = vmx_hwapic_irr_update, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index e05bd58..96f4188 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5989,6 +5989,19 @@ static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu) kvm_apic_update_tmr(vcpu, tmr); } +static void vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu) +{ + /* + * apic access page could be migrated. When the page is being migrated, + * GUP will wait till the migrate entry is replaced with the new pte + * entry pointing to the new page. + */ + vcpu->kvm->arch.apic_access_page = gfn_to_page(vcpu->kvm, + APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT); + kvm_x86_ops->set_apic_access_page_addr(vcpu->kvm, + page_to_phys(vcpu->kvm->arch.apic_access_page)); +} + /* * Returns 1 to let __vcpu_run() continue the guest execution loop without * exiting to the userspace. Otherwise, the value will be returned to the @@ -6049,6 +6062,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) kvm_deliver_pmi(vcpu); if (kvm_check_request(KVM_REQ_SCAN_IOAPIC, vcpu)) vcpu_scan_ioapic(vcpu); + if (kvm_check_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu)) + vcpu_reload_apic_access_page(vcpu); } if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) { diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index a4c33b3..8be076a 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -136,6 +136,7 @@ static inline bool is_error_page(struct page *page) #define KVM_REQ_GLOBAL_CLOCK_UPDATE 22 #define KVM_REQ_ENABLE_IBS 23 #define KVM_REQ_DISABLE_IBS 24 +#define KVM_REQ_APIC_PAGE_RELOAD 25 #define KVM_USERSPACE_IRQ_SOURCE_ID 0 #define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID 1 @@ -579,6 +580,7 @@ void kvm_flush_remote_tlbs(struct kvm *kvm); void kvm_reload_remote_mmus(struct kvm *kvm); void kvm_make_mclock_inprogress_request(struct kvm *kvm); void kvm_make_scan_ioapic_request(struct kvm *kvm); +void kvm_reload_apic_access_page(struct kvm *kvm); long kvm_arch_dev_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 33712fb..d8280de 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -210,6 +210,11 @@ void kvm_make_scan_ioapic_request(struct kvm *kvm) make_all_cpus_request(kvm, KVM_REQ_SCAN_IOAPIC); } +void kvm_reload_apic_access_page(struct kvm *kvm) +{ + make_all_cpus_request(kvm, KVM_REQ_APIC_PAGE_RELOAD); +} + int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id) { struct page *page; @@ -294,6 +299,13 @@ static void kvm_mmu_notifier_invalidate_page(struct mmu_notifier *mn, if (need_tlb_flush) kvm_flush_remote_tlbs(kvm); + /* + * The physical address of apic access page is stroed in VMCS. + * So need to update it when it becomes invalid. + */ + if (address == gfn_to_hva(kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT)) + kvm_reload_apic_access_page(kvm); + spin_unlock(&kvm->mmu_lock); srcu_read_unlock(&kvm->srcu, idx); }
apic access page is pinned in memory. As a result, it cannot be migrated/hot-removed. Actually, it is not necessary to be pinned. The hpa of apic access page is stored in VMCS APIC_ACCESS_ADDR pointer. When the page is migrated, kvm_mmu_notifier_invalidate_page() will invalidate the corresponding ept entry. This patch introduces a new vcpu request named KVM_REQ_APIC_PAGE_RELOAD, and makes this request to all the vcpus at this time, and force all the vcpus exit guest, and re-enter guest till they updates the VMCS APIC_ACCESS_ADDR pointer to the new apic access page address, and updates kvm->arch.apic_access_page to the new page. Signed-off-by: Tang Chen <tangchen@cn.fujitsu.com> --- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/svm.c | 6 ++++++ arch/x86/kvm/vmx.c | 6 ++++++ arch/x86/kvm/x86.c | 15 +++++++++++++++ include/linux/kvm_host.h | 2 ++ virt/kvm/kvm_main.c | 12 ++++++++++++ 6 files changed, 42 insertions(+)