diff mbox

[05/12] KVM: MMU: add spte into rmap before logging dirty page

Message ID 1375189330-24066-6-git-send-email-xiaoguangrong@linux.vnet.ibm.com (mailing list archive)
State New, archived
Headers show

Commit Message

Xiao Guangrong July 30, 2013, 1:02 p.m. UTC
kvm_vm_ioctl_get_dirty_log() write-protects the spte based on the dirty
bitmap, we should ensure the writable spte can be found in rmap before the
dirty bitmap is visible. Otherwise, we cleared the dirty bitmap and failed
to write-protect the page

It need the memory barrier to prevent out-of-order that will be added in the
later patch

Signed-off-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
---
 arch/x86/kvm/mmu.c | 25 ++++++++++---------------
 1 file changed, 10 insertions(+), 15 deletions(-)

Comments

Paolo Bonzini July 30, 2013, 1:27 p.m. UTC | #1
Il 30/07/2013 15:02, Xiao Guangrong ha scritto:
> kvm_vm_ioctl_get_dirty_log() write-protects the spte based on the dirty
> bitmap, we should ensure the writable spte can be found in rmap before the
> dirty bitmap is visible. Otherwise, we cleared the dirty bitmap and failed
> to write-protect the page
> 
> It need the memory barrier to prevent out-of-order that will be added in the
> later patch

Do you mean that the later patch will also introduce a memory barrier?

Paolo

> Signed-off-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
> ---
>  arch/x86/kvm/mmu.c | 25 ++++++++++---------------
>  1 file changed, 10 insertions(+), 15 deletions(-)
> 
> diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
> index 0fe56ad..58283bf 100644
> --- a/arch/x86/kvm/mmu.c
> +++ b/arch/x86/kvm/mmu.c
> @@ -2425,6 +2425,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
>  {
>  	u64 spte;
>  	int ret = 0;
> +	bool remap = is_rmap_spte(*sptep);
>  
>  	if (set_mmio_spte(vcpu->kvm, sptep, gfn, pfn, pte_access))
>  		return 0;
> @@ -2490,6 +2491,14 @@ set_pte:
>  	if (mmu_spte_update(sptep, spte))
>  		kvm_flush_remote_tlbs(vcpu->kvm);
>  
> +	if (!remap) {
> +		if (rmap_add(vcpu, sptep, gfn) > RMAP_RECYCLE_THRESHOLD)
> +			rmap_recycle(vcpu, sptep, gfn);
> +
> +		if (level > PT_PAGE_TABLE_LEVEL)
> +			++vcpu->kvm->stat.lpages;
> +	}
> +
>  	if (pte_access & ACC_WRITE_MASK)
>  		mark_page_dirty(vcpu->kvm, gfn);
>  done:
> @@ -2501,9 +2510,6 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
>  			 int level, gfn_t gfn, pfn_t pfn, bool speculative,
>  			 bool host_writable)
>  {
> -	int was_rmapped = 0;
> -	int rmap_count;
> -
>  	pgprintk("%s: spte %llx write_fault %d gfn %llx\n", __func__,
>  		 *sptep, write_fault, gfn);
>  
> @@ -2525,8 +2531,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
>  				 spte_to_pfn(*sptep), pfn);
>  			drop_spte(vcpu->kvm, sptep);
>  			kvm_flush_remote_tlbs(vcpu->kvm);
> -		} else
> -			was_rmapped = 1;
> +		}
>  	}
>  
>  	if (set_spte(vcpu, sptep, pte_access, level, gfn, pfn, speculative,
> @@ -2544,16 +2549,6 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
>  		 is_large_pte(*sptep)? "2MB" : "4kB",
>  		 *sptep & PT_PRESENT_MASK ?"RW":"R", gfn,
>  		 *sptep, sptep);
> -	if (!was_rmapped && is_large_pte(*sptep))
> -		++vcpu->kvm->stat.lpages;
> -
> -	if (is_shadow_present_pte(*sptep)) {
> -		if (!was_rmapped) {
> -			rmap_count = rmap_add(vcpu, sptep, gfn);
> -			if (rmap_count > RMAP_RECYCLE_THRESHOLD)
> -				rmap_recycle(vcpu, sptep, gfn);
> -		}
> -	}
>  
>  	kvm_release_pfn_clean(pfn);
>  }
> 

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Xiao Guangrong July 31, 2013, 7:33 a.m. UTC | #2
On 07/30/2013 09:27 PM, Paolo Bonzini wrote:
> Il 30/07/2013 15:02, Xiao Guangrong ha scritto:
>> kvm_vm_ioctl_get_dirty_log() write-protects the spte based on the dirty
>> bitmap, we should ensure the writable spte can be found in rmap before the
>> dirty bitmap is visible. Otherwise, we cleared the dirty bitmap and failed
>> to write-protect the page
>>
>> It need the memory barrier to prevent out-of-order that will be added in the
>> later patch
> 
> Do you mean that the later patch will also introduce a memory barrier?

No. Sorry for the confusion. I mean we miss the memory barrier in this patch
and will fix it in the latter patch where we introduce the lockless
write-protection.

The memory barrier is added in
[PATCH 11/12] KVM: MMU: locklessly write-protect the page:

+	/*
+	 * We should put the sptep into rmap before dirty log
+	 * otherwise the lockless spte write-protect path will
+	 * clear the dirty bit map but fail to find the spte.
+	 *
+	 * See the comments in kvm_vm_ioctl_get_dirty_log().
+	 */
+	smp_wmb();
+
 	if (pte_access & ACC_WRITE_MASK)

and the barrier in the another side is:
+		/*
+		 * xchg acts as a full barrier that ensures
+		 * clearing dirty bitmap before read rmap.
+		 *
+		 * See the comments in set_spte().
+		 */
 		mask = xchg(&dirty_bitmap[i], 0);

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 0fe56ad..58283bf 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2425,6 +2425,7 @@  static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
 {
 	u64 spte;
 	int ret = 0;
+	bool remap = is_rmap_spte(*sptep);
 
 	if (set_mmio_spte(vcpu->kvm, sptep, gfn, pfn, pte_access))
 		return 0;
@@ -2490,6 +2491,14 @@  set_pte:
 	if (mmu_spte_update(sptep, spte))
 		kvm_flush_remote_tlbs(vcpu->kvm);
 
+	if (!remap) {
+		if (rmap_add(vcpu, sptep, gfn) > RMAP_RECYCLE_THRESHOLD)
+			rmap_recycle(vcpu, sptep, gfn);
+
+		if (level > PT_PAGE_TABLE_LEVEL)
+			++vcpu->kvm->stat.lpages;
+	}
+
 	if (pte_access & ACC_WRITE_MASK)
 		mark_page_dirty(vcpu->kvm, gfn);
 done:
@@ -2501,9 +2510,6 @@  static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
 			 int level, gfn_t gfn, pfn_t pfn, bool speculative,
 			 bool host_writable)
 {
-	int was_rmapped = 0;
-	int rmap_count;
-
 	pgprintk("%s: spte %llx write_fault %d gfn %llx\n", __func__,
 		 *sptep, write_fault, gfn);
 
@@ -2525,8 +2531,7 @@  static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
 				 spte_to_pfn(*sptep), pfn);
 			drop_spte(vcpu->kvm, sptep);
 			kvm_flush_remote_tlbs(vcpu->kvm);
-		} else
-			was_rmapped = 1;
+		}
 	}
 
 	if (set_spte(vcpu, sptep, pte_access, level, gfn, pfn, speculative,
@@ -2544,16 +2549,6 @@  static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
 		 is_large_pte(*sptep)? "2MB" : "4kB",
 		 *sptep & PT_PRESENT_MASK ?"RW":"R", gfn,
 		 *sptep, sptep);
-	if (!was_rmapped && is_large_pte(*sptep))
-		++vcpu->kvm->stat.lpages;
-
-	if (is_shadow_present_pte(*sptep)) {
-		if (!was_rmapped) {
-			rmap_count = rmap_add(vcpu, sptep, gfn);
-			if (rmap_count > RMAP_RECYCLE_THRESHOLD)
-				rmap_recycle(vcpu, sptep, gfn);
-		}
-	}
 
 	kvm_release_pfn_clean(pfn);
 }