kvm: ensure VMCS is current while enabling PML

Message ID	5409fdb558e5cc0b5967bbdb831ea28a28107cc5.1467922428.git.pfeiner@google.com (mailing list archive)
State	New, archived
Headers	show Return-Path: <kvm-owner@kernel.org> From: Peter Feiner <pfeiner@google.com> To: kai.huang@linux.intel.com, guangrong.xiao@linux.intel.com, pbonzini@redhat.com Cc: dmatlack@google.com, jmattson@google.com, kvm@vger.kernel.org, Peter Feiner <pfeiner@google.com> Subject: [PATCH] kvm: ensure VMCS is current while enabling PML Date: Thu, 7 Jul 2016 13:17:14 -0700 Message-Id: <5409fdb558e5cc0b5967bbdb831ea28a28107cc5.1467922428.git.pfeiner@google.com> Sender: kvm-owner@vger.kernel.org Precedence: bulk

Message ID

5409fdb558e5cc0b5967bbdb831ea28a28107cc5.1467922428.git.pfeiner@google.com (mailing list archive)

State

New, archived

Headers

From: Peter Feiner <pfeiner@google.com>
To: kai.huang@linux.intel.com, guangrong.xiao@linux.intel.com,
	pbonzini@redhat.com
Cc: dmatlack@google.com, jmattson@google.com, kvm@vger.kernel.org,
	Peter Feiner <pfeiner@google.com>
Subject: [PATCH] kvm: ensure VMCS is current while enabling PML
Date: Thu,  7 Jul 2016 13:17:14 -0700
Message-Id: <5409fdb558e5cc0b5967bbdb831ea28a28107cc5.1467922428.git.pfeiner@google.com>
Sender: kvm-owner@vger.kernel.org
Precedence: bulk

Commit Message

Peter Feiner July 7, 2016, 8:17 p.m. UTC

Between loading the new VMCS and enabling PML, the CPU was unpinned.
If the vCPU thread were migrated to another CPU in the interim (e.g.,
due to preemption or sleeping alloc_page), then the VMWRITEs to enable
PML would target the wrong VMCS -- or no VMCS at all:

  [ 2087.266950] vmwrite error: reg 200e value 3fe1d52000 (err -506126336)
  [ 2087.267062] vmwrite error: reg 812 value 1ff (err 511)
  [ 2087.267125] vmwrite error: reg 401e value 12229c00 (err 304258048)

This patch ensures that the VMCS remains current while enabling PML by
doing the VMWRITEs while the CPU is pinned. Allocation of the PML buffer
is hoisted out of the critical section.

Signed-off-by: Peter Feiner <pfeiner@google.com>
---
 arch/x86/kvm/vmx.c | 50 ++++++++++++++++++++++----------------------------
 1 file changed, 22 insertions(+), 28 deletions(-)

Comments

Bandan Das July 7, 2016, 9:13 p.m. UTC | #1

Hi Peter,

Peter Feiner <pfeiner@google.com> writes:

> Between loading the new VMCS and enabling PML, the CPU was unpinned.
> If the vCPU thread were migrated to another CPU in the interim (e.g.,
> due to preemption or sleeping alloc_page), then the VMWRITEs to enable
> PML would target the wrong VMCS -- or no VMCS at all:
>
>   [ 2087.266950] vmwrite error: reg 200e value 3fe1d52000 (err -506126336)
>   [ 2087.267062] vmwrite error: reg 812 value 1ff (err 511)
>   [ 2087.267125] vmwrite error: reg 401e value 12229c00 (err 304258048)
>
> This patch ensures that the VMCS remains current while enabling PML by
> doing the VMWRITEs while the CPU is pinned. Allocation of the PML buffer
> is hoisted out of the critical section.
>
> Signed-off-by: Peter Feiner <pfeiner@google.com>
> ---
>  arch/x86/kvm/vmx.c | 50 ++++++++++++++++++++++----------------------------
>  1 file changed, 22 insertions(+), 28 deletions(-)
>
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index 64a79f2..6d3a8d7 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -4876,6 +4876,7 @@ static void ept_set_mmio_spte_mask(void)
>  }
>  
>  #define VMX_XSS_EXIT_BITMAP 0
> +
Stray space

>  /*
>   * Sets up the vmcs for emulated real mode.
>   */
> @@ -4979,6 +4980,12 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
>  	if (vmx_xsaves_supported())
>  		vmcs_write64(XSS_EXIT_BITMAP, VMX_XSS_EXIT_BITMAP);
>  
> +	if (enable_pml) {
> +		ASSERT(vmx->pml_pg);
> +		vmcs_write64(PML_ADDRESS, page_to_phys(vmx->pml_pg));
> +		vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1);
> +	}
> +
>  	return 0;
>  }
>  
> @@ -7937,22 +7944,6 @@ static void vmx_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2)
>  	*info2 = vmcs_read32(VM_EXIT_INTR_INFO);
>  }
>  
> -static int vmx_create_pml_buffer(struct vcpu_vmx *vmx)
> -{
> -	struct page *pml_pg;
> -
> -	pml_pg = alloc_page(GFP_KERNEL | __GFP_ZERO);
> -	if (!pml_pg)
> -		return -ENOMEM;
> -
> -	vmx->pml_pg = pml_pg;
> -
> -	vmcs_write64(PML_ADDRESS, page_to_phys(vmx->pml_pg));
> -	vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1);
> -
> -	return 0;
> -}
> -
>  static void vmx_destroy_pml_buffer(struct vcpu_vmx *vmx)
>  {
>  	if (vmx->pml_pg) {
> @@ -8881,6 +8872,19 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
>  
>  	vmx->vpid = allocate_vpid();
>  
> +	/*
> +	 * If PML is turned on, failure on enabling PML just results in failure
> +	 * of creating the vcpu, therefore we can simplify PML logic (by
> +	 * avoiding dealing with cases, such as enabling PML partially on vcpus
> +	 * for the guest, etc.
> +	 */
> +	if (enable_pml) {
> +		vmx->pml_pg = alloc_page(GFP_KERNEL | __GFP_ZERO);
> +		err = -ENOMEM;

Since err is being set to -ENOMEM after the call to BUILD_BUG_ON a few lines below,
you could just move this before the vmx->msrs check.

> +		if (!vmx->pml_pg)
> +			goto free_vcpu;
> +	}
> +
>  	err = kvm_vcpu_init(&vmx->vcpu, kvm, id);
>  	if (err)
>  		goto free_vcpu;
> @@ -8936,18 +8940,6 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
>  	vmx->nested.current_vmptr = -1ull;
>  	vmx->nested.current_vmcs12 = NULL;
>  
> -	/*
> -	 * If PML is turned on, failure on enabling PML just results in failure
> -	 * of creating the vcpu, therefore we can simplify PML logic (by
> -	 * avoiding dealing with cases, such as enabling PML partially on vcpus
> -	 * for the guest, etc.
> -	 */
> -	if (enable_pml) {
> -		err = vmx_create_pml_buffer(vmx);
> -		if (err)
> -			goto free_vmcs;
> -	}
> -
>  	return &vmx->vcpu;
>  
>  free_vmcs:
> @@ -8958,6 +8950,8 @@ free_msrs:
>  uninit_vcpu:
>  	kvm_vcpu_uninit(&vmx->vcpu);
>  free_vcpu:
> +	if (vmx->pml_pg)
> +		__free_page(vmx->pml_pg);

If we move  __free_page to uninit_vcpu(), we can
remove the if() check here.

Bandan

>  	free_vpid(vmx->vpid);
>  	kmem_cache_free(kvm_vcpu_cache, vmx);
>  	return ERR_PTR(err);
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Peter Feiner July 7, 2016, 9:47 p.m. UTC | #2

On Thu, Jul 7, 2016 at 2:13 PM, Bandan Das <bsd@redhat.com> wrote:
>
> Hi Peter,
>
> Peter Feiner <pfeiner@google.com> writes:
>
>> Between loading the new VMCS and enabling PML, the CPU was unpinned.
>> If the vCPU thread were migrated to another CPU in the interim (e.g.,
>> due to preemption or sleeping alloc_page), then the VMWRITEs to enable
>> PML would target the wrong VMCS -- or no VMCS at all:
>>
>>   [ 2087.266950] vmwrite error: reg 200e value 3fe1d52000 (err -506126336)
>>   [ 2087.267062] vmwrite error: reg 812 value 1ff (err 511)
>>   [ 2087.267125] vmwrite error: reg 401e value 12229c00 (err 304258048)
>>
>> This patch ensures that the VMCS remains current while enabling PML by
>> doing the VMWRITEs while the CPU is pinned. Allocation of the PML buffer
>> is hoisted out of the critical section.
>>
>> Signed-off-by: Peter Feiner <pfeiner@google.com>
>> ---
>>  arch/x86/kvm/vmx.c | 50 ++++++++++++++++++++++----------------------------
>>  1 file changed, 22 insertions(+), 28 deletions(-)
>>
>> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
>> index 64a79f2..6d3a8d7 100644
>> --- a/arch/x86/kvm/vmx.c
>> +++ b/arch/x86/kvm/vmx.c
>> @@ -4876,6 +4876,7 @@ static void ept_set_mmio_spte_mask(void)
>>  }
>>
>>  #define VMX_XSS_EXIT_BITMAP 0
>> +
> Stray space

Fixed for v2.

>>  /*
>>   * Sets up the vmcs for emulated real mode.
>>   */
>> @@ -4979,6 +4980,12 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
>>       if (vmx_xsaves_supported())
>>               vmcs_write64(XSS_EXIT_BITMAP, VMX_XSS_EXIT_BITMAP);
>>
>> +     if (enable_pml) {
>> +             ASSERT(vmx->pml_pg);
>> +             vmcs_write64(PML_ADDRESS, page_to_phys(vmx->pml_pg));
>> +             vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1);
>> +     }
>> +
>>       return 0;
>>  }
>>
>> @@ -7937,22 +7944,6 @@ static void vmx_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2)
>>       *info2 = vmcs_read32(VM_EXIT_INTR_INFO);
>>  }
>>
>> -static int vmx_create_pml_buffer(struct vcpu_vmx *vmx)
>> -{
>> -     struct page *pml_pg;
>> -
>> -     pml_pg = alloc_page(GFP_KERNEL | __GFP_ZERO);
>> -     if (!pml_pg)
>> -             return -ENOMEM;
>> -
>> -     vmx->pml_pg = pml_pg;
>> -
>> -     vmcs_write64(PML_ADDRESS, page_to_phys(vmx->pml_pg));
>> -     vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1);
>> -
>> -     return 0;
>> -}
>> -
>>  static void vmx_destroy_pml_buffer(struct vcpu_vmx *vmx)
>>  {
>>       if (vmx->pml_pg) {
>> @@ -8881,6 +8872,19 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
>>
>>       vmx->vpid = allocate_vpid();
>>
>> +     /*
>> +      * If PML is turned on, failure on enabling PML just results in failure
>> +      * of creating the vcpu, therefore we can simplify PML logic (by
>> +      * avoiding dealing with cases, such as enabling PML partially on vcpus
>> +      * for the guest, etc.
>> +      */
>> +     if (enable_pml) {
>> +             vmx->pml_pg = alloc_page(GFP_KERNEL | __GFP_ZERO);
>> +             err = -ENOMEM;
>
> Since err is being set to -ENOMEM after the call to BUILD_BUG_ON a few lines below,
> you could just move this before the vmx->msrs check.

Interspersing the various allocations and error checking paths looked
funny, so I moved the err = -ENOMEM statement to the top.

>> +             if (!vmx->pml_pg)
>> +                     goto free_vcpu;
>> +     }
>> +
>>       err = kvm_vcpu_init(&vmx->vcpu, kvm, id);
>>       if (err)
>>               goto free_vcpu;
>> @@ -8936,18 +8940,6 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
>>       vmx->nested.current_vmptr = -1ull;
>>       vmx->nested.current_vmcs12 = NULL;
>>
>> -     /*
>> -      * If PML is turned on, failure on enabling PML just results in failure
>> -      * of creating the vcpu, therefore we can simplify PML logic (by
>> -      * avoiding dealing with cases, such as enabling PML partially on vcpus
>> -      * for the guest, etc.
>> -      */
>> -     if (enable_pml) {
>> -             err = vmx_create_pml_buffer(vmx);
>> -             if (err)
>> -                     goto free_vmcs;
>> -     }
>> -
>>       return &vmx->vcpu;
>>
>>  free_vmcs:
>> @@ -8958,6 +8950,8 @@ free_msrs:
>>  uninit_vcpu:
>>       kvm_vcpu_uninit(&vmx->vcpu);
>>  free_vcpu:
>> +     if (vmx->pml_pg)
>> +             __free_page(vmx->pml_pg);
>
> If we move  __free_page to uninit_vcpu(), we can
> remove the if() check here.

There's no vmx-specific hook in kvm_arch_uninit_vcpu. I threw in a
call to vmx_destroy_pml_buffer, which has the conditional logic, in
the teardown path instead. I also added an explicit free_pml teardown
step -- not necessary but it looks cleaner IMO.

>
> Bandan

Thanks for the review! I'll send v2 in a moment.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 64a79f2..6d3a8d7 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -4876,6 +4876,7 @@  static void ept_set_mmio_spte_mask(void)
 }
 
 #define VMX_XSS_EXIT_BITMAP 0
+
 /*
  * Sets up the vmcs for emulated real mode.
  */
@@ -4979,6 +4980,12 @@  static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
 	if (vmx_xsaves_supported())
 		vmcs_write64(XSS_EXIT_BITMAP, VMX_XSS_EXIT_BITMAP);
 
+	if (enable_pml) {
+		ASSERT(vmx->pml_pg);
+		vmcs_write64(PML_ADDRESS, page_to_phys(vmx->pml_pg));
+		vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1);
+	}
+
 	return 0;
 }
 
@@ -7937,22 +7944,6 @@  static void vmx_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2)
 	*info2 = vmcs_read32(VM_EXIT_INTR_INFO);
 }
 
-static int vmx_create_pml_buffer(struct vcpu_vmx *vmx)
-{
-	struct page *pml_pg;
-
-	pml_pg = alloc_page(GFP_KERNEL | __GFP_ZERO);
-	if (!pml_pg)
-		return -ENOMEM;
-
-	vmx->pml_pg = pml_pg;
-
-	vmcs_write64(PML_ADDRESS, page_to_phys(vmx->pml_pg));
-	vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1);
-
-	return 0;
-}
-
 static void vmx_destroy_pml_buffer(struct vcpu_vmx *vmx)
 {
 	if (vmx->pml_pg) {
@@ -8881,6 +8872,19 @@  static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
 
 	vmx->vpid = allocate_vpid();
 
+	/*
+	 * If PML is turned on, failure on enabling PML just results in failure
+	 * of creating the vcpu, therefore we can simplify PML logic (by
+	 * avoiding dealing with cases, such as enabling PML partially on vcpus
+	 * for the guest, etc.
+	 */
+	if (enable_pml) {
+		vmx->pml_pg = alloc_page(GFP_KERNEL | __GFP_ZERO);
+		err = -ENOMEM;
+		if (!vmx->pml_pg)
+			goto free_vcpu;
+	}
+
 	err = kvm_vcpu_init(&vmx->vcpu, kvm, id);
 	if (err)
 		goto free_vcpu;
@@ -8936,18 +8940,6 @@  static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
 	vmx->nested.current_vmptr = -1ull;
 	vmx->nested.current_vmcs12 = NULL;
 
-	/*
-	 * If PML is turned on, failure on enabling PML just results in failure
-	 * of creating the vcpu, therefore we can simplify PML logic (by
-	 * avoiding dealing with cases, such as enabling PML partially on vcpus
-	 * for the guest, etc.
-	 */
-	if (enable_pml) {
-		err = vmx_create_pml_buffer(vmx);
-		if (err)
-			goto free_vmcs;
-	}
-
 	return &vmx->vcpu;
 
 free_vmcs:
@@ -8958,6 +8950,8 @@  free_msrs:
 uninit_vcpu:
 	kvm_vcpu_uninit(&vmx->vcpu);
 free_vcpu:
+	if (vmx->pml_pg)
+		__free_page(vmx->pml_pg);
 	free_vpid(vmx->vpid);
 	kmem_cache_free(kvm_vcpu_cache, vmx);
 	return ERR_PTR(err);

kvm: ensure VMCS is current while enabling PML

Commit Message

Comments

Patch