diff mbox series

[6/7] KVM: nVMX: don't load PDPTRS right after nested state set

Message ID 20210217145718.1217358-7-mlevitsk@redhat.com (mailing list archive)
State New, archived
Headers show
Series KVM: random nested fixes | expand

Commit Message

Maxim Levitsky Feb. 17, 2021, 2:57 p.m. UTC
Just like all other nested memory accesses, after a migration loading
PDPTRs should be delayed to first VM entry to ensure
that guest memory is fully initialized.

Just move the call to nested_vmx_load_cr3 to nested_get_vmcs12_pages
to implement this.

Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
---
 arch/x86/kvm/vmx/nested.c | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

Comments

Sean Christopherson Feb. 17, 2021, 5:52 p.m. UTC | #1
On Wed, Feb 17, 2021, Maxim Levitsky wrote:
> Just like all other nested memory accesses, after a migration loading
> PDPTRs should be delayed to first VM entry to ensure
> that guest memory is fully initialized.
> 
> Just move the call to nested_vmx_load_cr3 to nested_get_vmcs12_pages
> to implement this.

I don't love this approach.  KVM_SET_NESTED_STATE will now succeed with a bad
vmcs12.GUEST_CR3.  At a minimum, GUEST_CR3 should be checked in
nested_vmx_check_guest_state().  It also feels like vcpu->arch.cr3 should be set
immediately, e.g. KVM_SET_NESTED_STATE -> KVM_GET_SREGS should reflect L2's CR3
even if KVM_RUN hasn't been invoked.

> Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
> ---
>  arch/x86/kvm/vmx/nested.c | 14 +++++++++-----
>  1 file changed, 9 insertions(+), 5 deletions(-)
> 
> diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
> index f9de729dbea6..26084f8eee82 100644
> --- a/arch/x86/kvm/vmx/nested.c
> +++ b/arch/x86/kvm/vmx/nested.c
> @@ -2596,11 +2596,6 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
>  		return -EINVAL;
>  	}
>  
> -	/* Shadow page tables on either EPT or shadow page tables. */
> -	if (nested_vmx_load_cr3(vcpu, vmcs12->guest_cr3, nested_cpu_has_ept(vmcs12),
> -				entry_failure_code))
> -		return -EINVAL;
> -
>  	/*
>  	 * Immediately write vmcs02.GUEST_CR3.  It will be propagated to vmcs12
>  	 * on nested VM-Exit, which can occur without actually running L2 and
> @@ -3138,11 +3133,16 @@ static bool nested_get_evmcs_page(struct kvm_vcpu *vcpu)
>  static bool nested_get_vmcs12_pages(struct kvm_vcpu *vcpu)
>  {
>  	struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
> +	enum vm_entry_failure_code entry_failure_code;
>  	struct vcpu_vmx *vmx = to_vmx(vcpu);
>  	struct kvm_host_map *map;
>  	struct page *page;
>  	u64 hpa;
>  
> +	if (nested_vmx_load_cr3(vcpu, vmcs12->guest_cr3, nested_cpu_has_ept(vmcs12),
> +				&entry_failure_code))
> +		return false;
> +
>  	if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) {
>  		/*
>  		 * Translate L1 physical address to host physical
> @@ -3386,6 +3386,10 @@ enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu,
>  	}
>  
>  	if (from_vmentry) {
> +		if (nested_vmx_load_cr3(vcpu, vmcs12->guest_cr3,
> +		    nested_cpu_has_ept(vmcs12), &entry_failure_code))
> +			goto vmentry_fail_vmexit_guest_mode;

This neglects to set both exit_reason.basic and vmcs12->exit_qualification.

> +
>  		failed_index = nested_vmx_load_msr(vcpu,
>  						   vmcs12->vm_entry_msr_load_addr,
>  						   vmcs12->vm_entry_msr_load_count);
> -- 
> 2.26.2
>
Paolo Bonzini Feb. 17, 2021, 6:06 p.m. UTC | #2
On 17/02/21 18:52, Sean Christopherson wrote:
>>
>> Just move the call to nested_vmx_load_cr3 to nested_get_vmcs12_pages
>> to implement this.
>
> I don't love this approach.  KVM_SET_NESTED_STATE will now succeed with a bad
> vmcs12.GUEST_CR3.  At a minimum, GUEST_CR3 should be checked in
> nested_vmx_check_guest_state().  It also feels like vcpu->arch.cr3 should be set
> immediately, e.g. KVM_SET_NESTED_STATE -> KVM_GET_SREGS should reflect L2's CR3
> even if KVM_RUN hasn't been invoked.

Note that KVM_SET_NESTED_STATE does not remove the need to invoke 
KVM_SET_SREGS.  Calling KVM_SET_NESTED_STATE does not necessarily saying 
anything about the value of KVM_GET_SREGS after it.

In particular on SVM it's a "feature" that KVM_SET_NESTED_STATE does not 
include any guest register state; the nested state only includes the 
VMCB12 control state and the L1 save state.  But thinking more about it, 
loading the PDPTRs for the guest CR3 might not be advisable even upon 
KVM_SET_SREGS, and we might want to extend KVM_REQ_GET_NESTED_PAGES to 
cover non-nested PDPTRs as well.

Paolo
diff mbox series

Patch

diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index f9de729dbea6..26084f8eee82 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -2596,11 +2596,6 @@  static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
 		return -EINVAL;
 	}
 
-	/* Shadow page tables on either EPT or shadow page tables. */
-	if (nested_vmx_load_cr3(vcpu, vmcs12->guest_cr3, nested_cpu_has_ept(vmcs12),
-				entry_failure_code))
-		return -EINVAL;
-
 	/*
 	 * Immediately write vmcs02.GUEST_CR3.  It will be propagated to vmcs12
 	 * on nested VM-Exit, which can occur without actually running L2 and
@@ -3138,11 +3133,16 @@  static bool nested_get_evmcs_page(struct kvm_vcpu *vcpu)
 static bool nested_get_vmcs12_pages(struct kvm_vcpu *vcpu)
 {
 	struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
+	enum vm_entry_failure_code entry_failure_code;
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 	struct kvm_host_map *map;
 	struct page *page;
 	u64 hpa;
 
+	if (nested_vmx_load_cr3(vcpu, vmcs12->guest_cr3, nested_cpu_has_ept(vmcs12),
+				&entry_failure_code))
+		return false;
+
 	if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) {
 		/*
 		 * Translate L1 physical address to host physical
@@ -3386,6 +3386,10 @@  enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu,
 	}
 
 	if (from_vmentry) {
+		if (nested_vmx_load_cr3(vcpu, vmcs12->guest_cr3,
+		    nested_cpu_has_ept(vmcs12), &entry_failure_code))
+			goto vmentry_fail_vmexit_guest_mode;
+
 		failed_index = nested_vmx_load_msr(vcpu,
 						   vmcs12->vm_entry_msr_load_addr,
 						   vmcs12->vm_entry_msr_load_count);