Message ID | 5409fdb558e5cc0b5967bbdb831ea28a28107cc5.1467922428.git.pfeiner@google.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Hi Peter, Peter Feiner <pfeiner@google.com> writes: > Between loading the new VMCS and enabling PML, the CPU was unpinned. > If the vCPU thread were migrated to another CPU in the interim (e.g., > due to preemption or sleeping alloc_page), then the VMWRITEs to enable > PML would target the wrong VMCS -- or no VMCS at all: > > [ 2087.266950] vmwrite error: reg 200e value 3fe1d52000 (err -506126336) > [ 2087.267062] vmwrite error: reg 812 value 1ff (err 511) > [ 2087.267125] vmwrite error: reg 401e value 12229c00 (err 304258048) > > This patch ensures that the VMCS remains current while enabling PML by > doing the VMWRITEs while the CPU is pinned. Allocation of the PML buffer > is hoisted out of the critical section. > > Signed-off-by: Peter Feiner <pfeiner@google.com> > --- > arch/x86/kvm/vmx.c | 50 ++++++++++++++++++++++---------------------------- > 1 file changed, 22 insertions(+), 28 deletions(-) > > diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c > index 64a79f2..6d3a8d7 100644 > --- a/arch/x86/kvm/vmx.c > +++ b/arch/x86/kvm/vmx.c > @@ -4876,6 +4876,7 @@ static void ept_set_mmio_spte_mask(void) > } > > #define VMX_XSS_EXIT_BITMAP 0 > + Stray space > /* > * Sets up the vmcs for emulated real mode. > */ > @@ -4979,6 +4980,12 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) > if (vmx_xsaves_supported()) > vmcs_write64(XSS_EXIT_BITMAP, VMX_XSS_EXIT_BITMAP); > > + if (enable_pml) { > + ASSERT(vmx->pml_pg); > + vmcs_write64(PML_ADDRESS, page_to_phys(vmx->pml_pg)); > + vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1); > + } > + > return 0; > } > > @@ -7937,22 +7944,6 @@ static void vmx_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2) > *info2 = vmcs_read32(VM_EXIT_INTR_INFO); > } > > -static int vmx_create_pml_buffer(struct vcpu_vmx *vmx) > -{ > - struct page *pml_pg; > - > - pml_pg = alloc_page(GFP_KERNEL | __GFP_ZERO); > - if (!pml_pg) > - return -ENOMEM; > - > - vmx->pml_pg = pml_pg; > - > - vmcs_write64(PML_ADDRESS, page_to_phys(vmx->pml_pg)); > - vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1); > - > - return 0; > -} > - > static void vmx_destroy_pml_buffer(struct vcpu_vmx *vmx) > { > if (vmx->pml_pg) { > @@ -8881,6 +8872,19 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) > > vmx->vpid = allocate_vpid(); > > + /* > + * If PML is turned on, failure on enabling PML just results in failure > + * of creating the vcpu, therefore we can simplify PML logic (by > + * avoiding dealing with cases, such as enabling PML partially on vcpus > + * for the guest, etc. > + */ > + if (enable_pml) { > + vmx->pml_pg = alloc_page(GFP_KERNEL | __GFP_ZERO); > + err = -ENOMEM; Since err is being set to -ENOMEM after the call to BUILD_BUG_ON a few lines below, you could just move this before the vmx->msrs check. > + if (!vmx->pml_pg) > + goto free_vcpu; > + } > + > err = kvm_vcpu_init(&vmx->vcpu, kvm, id); > if (err) > goto free_vcpu; > @@ -8936,18 +8940,6 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) > vmx->nested.current_vmptr = -1ull; > vmx->nested.current_vmcs12 = NULL; > > - /* > - * If PML is turned on, failure on enabling PML just results in failure > - * of creating the vcpu, therefore we can simplify PML logic (by > - * avoiding dealing with cases, such as enabling PML partially on vcpus > - * for the guest, etc. > - */ > - if (enable_pml) { > - err = vmx_create_pml_buffer(vmx); > - if (err) > - goto free_vmcs; > - } > - > return &vmx->vcpu; > > free_vmcs: > @@ -8958,6 +8950,8 @@ free_msrs: > uninit_vcpu: > kvm_vcpu_uninit(&vmx->vcpu); > free_vcpu: > + if (vmx->pml_pg) > + __free_page(vmx->pml_pg); If we move __free_page to uninit_vcpu(), we can remove the if() check here. Bandan > free_vpid(vmx->vpid); > kmem_cache_free(kvm_vcpu_cache, vmx); > return ERR_PTR(err); -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Thu, Jul 7, 2016 at 2:13 PM, Bandan Das <bsd@redhat.com> wrote: > > Hi Peter, > > Peter Feiner <pfeiner@google.com> writes: > >> Between loading the new VMCS and enabling PML, the CPU was unpinned. >> If the vCPU thread were migrated to another CPU in the interim (e.g., >> due to preemption or sleeping alloc_page), then the VMWRITEs to enable >> PML would target the wrong VMCS -- or no VMCS at all: >> >> [ 2087.266950] vmwrite error: reg 200e value 3fe1d52000 (err -506126336) >> [ 2087.267062] vmwrite error: reg 812 value 1ff (err 511) >> [ 2087.267125] vmwrite error: reg 401e value 12229c00 (err 304258048) >> >> This patch ensures that the VMCS remains current while enabling PML by >> doing the VMWRITEs while the CPU is pinned. Allocation of the PML buffer >> is hoisted out of the critical section. >> >> Signed-off-by: Peter Feiner <pfeiner@google.com> >> --- >> arch/x86/kvm/vmx.c | 50 ++++++++++++++++++++++---------------------------- >> 1 file changed, 22 insertions(+), 28 deletions(-) >> >> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c >> index 64a79f2..6d3a8d7 100644 >> --- a/arch/x86/kvm/vmx.c >> +++ b/arch/x86/kvm/vmx.c >> @@ -4876,6 +4876,7 @@ static void ept_set_mmio_spte_mask(void) >> } >> >> #define VMX_XSS_EXIT_BITMAP 0 >> + > Stray space Fixed for v2. >> /* >> * Sets up the vmcs for emulated real mode. >> */ >> @@ -4979,6 +4980,12 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) >> if (vmx_xsaves_supported()) >> vmcs_write64(XSS_EXIT_BITMAP, VMX_XSS_EXIT_BITMAP); >> >> + if (enable_pml) { >> + ASSERT(vmx->pml_pg); >> + vmcs_write64(PML_ADDRESS, page_to_phys(vmx->pml_pg)); >> + vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1); >> + } >> + >> return 0; >> } >> >> @@ -7937,22 +7944,6 @@ static void vmx_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2) >> *info2 = vmcs_read32(VM_EXIT_INTR_INFO); >> } >> >> -static int vmx_create_pml_buffer(struct vcpu_vmx *vmx) >> -{ >> - struct page *pml_pg; >> - >> - pml_pg = alloc_page(GFP_KERNEL | __GFP_ZERO); >> - if (!pml_pg) >> - return -ENOMEM; >> - >> - vmx->pml_pg = pml_pg; >> - >> - vmcs_write64(PML_ADDRESS, page_to_phys(vmx->pml_pg)); >> - vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1); >> - >> - return 0; >> -} >> - >> static void vmx_destroy_pml_buffer(struct vcpu_vmx *vmx) >> { >> if (vmx->pml_pg) { >> @@ -8881,6 +8872,19 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) >> >> vmx->vpid = allocate_vpid(); >> >> + /* >> + * If PML is turned on, failure on enabling PML just results in failure >> + * of creating the vcpu, therefore we can simplify PML logic (by >> + * avoiding dealing with cases, such as enabling PML partially on vcpus >> + * for the guest, etc. >> + */ >> + if (enable_pml) { >> + vmx->pml_pg = alloc_page(GFP_KERNEL | __GFP_ZERO); >> + err = -ENOMEM; > > Since err is being set to -ENOMEM after the call to BUILD_BUG_ON a few lines below, > you could just move this before the vmx->msrs check. Interspersing the various allocations and error checking paths looked funny, so I moved the err = -ENOMEM statement to the top. >> + if (!vmx->pml_pg) >> + goto free_vcpu; >> + } >> + >> err = kvm_vcpu_init(&vmx->vcpu, kvm, id); >> if (err) >> goto free_vcpu; >> @@ -8936,18 +8940,6 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) >> vmx->nested.current_vmptr = -1ull; >> vmx->nested.current_vmcs12 = NULL; >> >> - /* >> - * If PML is turned on, failure on enabling PML just results in failure >> - * of creating the vcpu, therefore we can simplify PML logic (by >> - * avoiding dealing with cases, such as enabling PML partially on vcpus >> - * for the guest, etc. >> - */ >> - if (enable_pml) { >> - err = vmx_create_pml_buffer(vmx); >> - if (err) >> - goto free_vmcs; >> - } >> - >> return &vmx->vcpu; >> >> free_vmcs: >> @@ -8958,6 +8950,8 @@ free_msrs: >> uninit_vcpu: >> kvm_vcpu_uninit(&vmx->vcpu); >> free_vcpu: >> + if (vmx->pml_pg) >> + __free_page(vmx->pml_pg); > > If we move __free_page to uninit_vcpu(), we can > remove the if() check here. There's no vmx-specific hook in kvm_arch_uninit_vcpu. I threw in a call to vmx_destroy_pml_buffer, which has the conditional logic, in the teardown path instead. I also added an explicit free_pml teardown step -- not necessary but it looks cleaner IMO. > > Bandan Thanks for the review! I'll send v2 in a moment. -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 64a79f2..6d3a8d7 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -4876,6 +4876,7 @@ static void ept_set_mmio_spte_mask(void) } #define VMX_XSS_EXIT_BITMAP 0 + /* * Sets up the vmcs for emulated real mode. */ @@ -4979,6 +4980,12 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) if (vmx_xsaves_supported()) vmcs_write64(XSS_EXIT_BITMAP, VMX_XSS_EXIT_BITMAP); + if (enable_pml) { + ASSERT(vmx->pml_pg); + vmcs_write64(PML_ADDRESS, page_to_phys(vmx->pml_pg)); + vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1); + } + return 0; } @@ -7937,22 +7944,6 @@ static void vmx_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2) *info2 = vmcs_read32(VM_EXIT_INTR_INFO); } -static int vmx_create_pml_buffer(struct vcpu_vmx *vmx) -{ - struct page *pml_pg; - - pml_pg = alloc_page(GFP_KERNEL | __GFP_ZERO); - if (!pml_pg) - return -ENOMEM; - - vmx->pml_pg = pml_pg; - - vmcs_write64(PML_ADDRESS, page_to_phys(vmx->pml_pg)); - vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1); - - return 0; -} - static void vmx_destroy_pml_buffer(struct vcpu_vmx *vmx) { if (vmx->pml_pg) { @@ -8881,6 +8872,19 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) vmx->vpid = allocate_vpid(); + /* + * If PML is turned on, failure on enabling PML just results in failure + * of creating the vcpu, therefore we can simplify PML logic (by + * avoiding dealing with cases, such as enabling PML partially on vcpus + * for the guest, etc. + */ + if (enable_pml) { + vmx->pml_pg = alloc_page(GFP_KERNEL | __GFP_ZERO); + err = -ENOMEM; + if (!vmx->pml_pg) + goto free_vcpu; + } + err = kvm_vcpu_init(&vmx->vcpu, kvm, id); if (err) goto free_vcpu; @@ -8936,18 +8940,6 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) vmx->nested.current_vmptr = -1ull; vmx->nested.current_vmcs12 = NULL; - /* - * If PML is turned on, failure on enabling PML just results in failure - * of creating the vcpu, therefore we can simplify PML logic (by - * avoiding dealing with cases, such as enabling PML partially on vcpus - * for the guest, etc. - */ - if (enable_pml) { - err = vmx_create_pml_buffer(vmx); - if (err) - goto free_vmcs; - } - return &vmx->vcpu; free_vmcs: @@ -8958,6 +8950,8 @@ free_msrs: uninit_vcpu: kvm_vcpu_uninit(&vmx->vcpu); free_vcpu: + if (vmx->pml_pg) + __free_page(vmx->pml_pg); free_vpid(vmx->vpid); kmem_cache_free(kvm_vcpu_cache, vmx); return ERR_PTR(err);
Between loading the new VMCS and enabling PML, the CPU was unpinned. If the vCPU thread were migrated to another CPU in the interim (e.g., due to preemption or sleeping alloc_page), then the VMWRITEs to enable PML would target the wrong VMCS -- or no VMCS at all: [ 2087.266950] vmwrite error: reg 200e value 3fe1d52000 (err -506126336) [ 2087.267062] vmwrite error: reg 812 value 1ff (err 511) [ 2087.267125] vmwrite error: reg 401e value 12229c00 (err 304258048) This patch ensures that the VMCS remains current while enabling PML by doing the VMWRITEs while the CPU is pinned. Allocation of the PML buffer is hoisted out of the critical section. Signed-off-by: Peter Feiner <pfeiner@google.com> --- arch/x86/kvm/vmx.c | 50 ++++++++++++++++++++++---------------------------- 1 file changed, 22 insertions(+), 28 deletions(-)