diff mbox

[2/2] KVM: nVMX: mark vmcs12 pages dirty on L2 exit

Message ID 20170801210040.10295-2-dmatlack@google.com (mailing list archive)
State New, archived
Headers show

Commit Message

David Matlack Aug. 1, 2017, 9 p.m. UTC
The host physical addresses of L1's Virtual APIC Page and Posted
Interrupt descriptor are loaded into the VMCS02. The CPU may write
to these pages via their host physical address while L2 is running,
bypassing address-translation-based dirty tracking (e.g. EPT write
protection). Mark them dirty on every exit from L2 to prevent them
from getting out of sync with dirty tracking.

Also mark the virtual APIC page and the posted interrupt descriptor
dirty when KVM is virtualizing posted interrupt processing.

Signed-off-by: David Matlack <dmatlack@google.com>
---
 arch/x86/kvm/vmx.c | 53 +++++++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 43 insertions(+), 10 deletions(-)

Comments

Paolo Bonzini Aug. 2, 2017, 8:17 a.m. UTC | #1
On 01/08/2017 23:00, David Matlack wrote:
> The host physical addresses of L1's Virtual APIC Page and Posted
> Interrupt descriptor are loaded into the VMCS02. The CPU may write
> to these pages via their host physical address while L2 is running,
> bypassing address-translation-based dirty tracking (e.g. EPT write
> protection). Mark them dirty on every exit from L2 to prevent them
> from getting out of sync with dirty tracking.
> 
> Also mark the virtual APIC page and the posted interrupt descriptor
> dirty when KVM is virtualizing posted interrupt processing.
> 
> Signed-off-by: David Matlack <dmatlack@google.com>

Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>

> ---
>  arch/x86/kvm/vmx.c | 53 +++++++++++++++++++++++++++++++++++++++++++----------
>  1 file changed, 43 insertions(+), 10 deletions(-)
> 
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index 07d2198db225..b277a0409563 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -4952,6 +4952,28 @@ static bool vmx_get_enable_apicv(void)
>  	return enable_apicv;
>  }
>  
> +static void nested_mark_vmcs12_pages_dirty(struct kvm_vcpu *vcpu)
> +{
> +	struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
> +	gfn_t gfn;
> +
> +	/*
> +	 * Don't need to mark the APIC access page dirty; it is never
> +	 * written to by the CPU during APIC virtualization.
> +	 */
> +
> +	if (nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)) {
> +		gfn = vmcs12->virtual_apic_page_addr >> PAGE_SHIFT;
> +		kvm_vcpu_mark_page_dirty(vcpu, gfn);
> +	}
> +
> +	if (nested_cpu_has_posted_intr(vmcs12)) {
> +		gfn = vmcs12->posted_intr_desc_addr >> PAGE_SHIFT;
> +		kvm_vcpu_mark_page_dirty(vcpu, gfn);
> +	}
> +}
> +
> +
>  static void vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu)
>  {
>  	struct vcpu_vmx *vmx = to_vmx(vcpu);
> @@ -4959,18 +4981,15 @@ static void vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu)
>  	void *vapic_page;
>  	u16 status;
>  
> -	if (vmx->nested.pi_desc &&
> -	    vmx->nested.pi_pending) {
> -		vmx->nested.pi_pending = false;
> -		if (!pi_test_and_clear_on(vmx->nested.pi_desc))
> -			return;
> -
> -		max_irr = find_last_bit(
> -			(unsigned long *)vmx->nested.pi_desc->pir, 256);
> +	if (!vmx->nested.pi_desc || !vmx->nested.pi_pending)
> +		return;
>  
> -		if (max_irr == 256)
> -			return;
> +	vmx->nested.pi_pending = false;
> +	if (!pi_test_and_clear_on(vmx->nested.pi_desc))
> +		return;
>  
> +	max_irr = find_last_bit((unsigned long *)vmx->nested.pi_desc->pir, 256);
> +	if (max_irr != 256) {
>  		vapic_page = kmap(vmx->nested.virtual_apic_page);
>  		__kvm_apic_update_irr(vmx->nested.pi_desc->pir, vapic_page);
>  		kunmap(vmx->nested.virtual_apic_page);
> @@ -4982,6 +5001,8 @@ static void vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu)
>  			vmcs_write16(GUEST_INTR_STATUS, status);
>  		}
>  	}
> +
> +	nested_mark_vmcs12_pages_dirty(vcpu);
>  }
>  
>  static inline bool kvm_vcpu_trigger_posted_interrupt(struct kvm_vcpu *vcpu,
> @@ -8029,6 +8050,18 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
>  				vmcs_read32(VM_EXIT_INTR_ERROR_CODE),
>  				KVM_ISA_VMX);
>  
> +	/*
> +	 * The host physical addresses of some pages of guest memory
> +	 * are loaded into VMCS02 (e.g. L1's Virtual APIC Page). The CPU
> +	 * may write to these pages via their host physical address while
> +	 * L2 is running, bypassing any address-translation-based dirty
> +	 * tracking (e.g. EPT write protection).
> +	 *
> +	 * Mark them dirty on every exit from L2 to prevent them from
> +	 * getting out of sync with dirty tracking.
> +	 */
> +	nested_mark_vmcs12_pages_dirty(vcpu);
> +
>  	if (vmx->nested.nested_run_pending)
>  		return false;
>  
>
diff mbox

Patch

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 07d2198db225..b277a0409563 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -4952,6 +4952,28 @@  static bool vmx_get_enable_apicv(void)
 	return enable_apicv;
 }
 
+static void nested_mark_vmcs12_pages_dirty(struct kvm_vcpu *vcpu)
+{
+	struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
+	gfn_t gfn;
+
+	/*
+	 * Don't need to mark the APIC access page dirty; it is never
+	 * written to by the CPU during APIC virtualization.
+	 */
+
+	if (nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)) {
+		gfn = vmcs12->virtual_apic_page_addr >> PAGE_SHIFT;
+		kvm_vcpu_mark_page_dirty(vcpu, gfn);
+	}
+
+	if (nested_cpu_has_posted_intr(vmcs12)) {
+		gfn = vmcs12->posted_intr_desc_addr >> PAGE_SHIFT;
+		kvm_vcpu_mark_page_dirty(vcpu, gfn);
+	}
+}
+
+
 static void vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -4959,18 +4981,15 @@  static void vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu)
 	void *vapic_page;
 	u16 status;
 
-	if (vmx->nested.pi_desc &&
-	    vmx->nested.pi_pending) {
-		vmx->nested.pi_pending = false;
-		if (!pi_test_and_clear_on(vmx->nested.pi_desc))
-			return;
-
-		max_irr = find_last_bit(
-			(unsigned long *)vmx->nested.pi_desc->pir, 256);
+	if (!vmx->nested.pi_desc || !vmx->nested.pi_pending)
+		return;
 
-		if (max_irr == 256)
-			return;
+	vmx->nested.pi_pending = false;
+	if (!pi_test_and_clear_on(vmx->nested.pi_desc))
+		return;
 
+	max_irr = find_last_bit((unsigned long *)vmx->nested.pi_desc->pir, 256);
+	if (max_irr != 256) {
 		vapic_page = kmap(vmx->nested.virtual_apic_page);
 		__kvm_apic_update_irr(vmx->nested.pi_desc->pir, vapic_page);
 		kunmap(vmx->nested.virtual_apic_page);
@@ -4982,6 +5001,8 @@  static void vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu)
 			vmcs_write16(GUEST_INTR_STATUS, status);
 		}
 	}
+
+	nested_mark_vmcs12_pages_dirty(vcpu);
 }
 
 static inline bool kvm_vcpu_trigger_posted_interrupt(struct kvm_vcpu *vcpu,
@@ -8029,6 +8050,18 @@  static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
 				vmcs_read32(VM_EXIT_INTR_ERROR_CODE),
 				KVM_ISA_VMX);
 
+	/*
+	 * The host physical addresses of some pages of guest memory
+	 * are loaded into VMCS02 (e.g. L1's Virtual APIC Page). The CPU
+	 * may write to these pages via their host physical address while
+	 * L2 is running, bypassing any address-translation-based dirty
+	 * tracking (e.g. EPT write protection).
+	 *
+	 * Mark them dirty on every exit from L2 to prevent them from
+	 * getting out of sync with dirty tracking.
+	 */
+	nested_mark_vmcs12_pages_dirty(vcpu);
+
 	if (vmx->nested.nested_run_pending)
 		return false;