diff mbox

[03/10] KVM: switch dirty_log to mmu_lock protection

Message ID 20090921234124.205345161@amt.cnet (mailing list archive)
State New, archived
Headers show

Commit Message

Marcelo Tosatti Sept. 21, 2009, 11:37 p.m. UTC
get_dirty_log vs mark_page_dirty need to be mutually exclusive. Switch
to mmu_lock protection.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>



--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

Avi Kivity Sept. 22, 2009, 6:37 a.m. UTC | #1
On 09/22/2009 02:37 AM, Marcelo Tosatti wrote:
> get_dirty_log vs mark_page_dirty need to be mutually exclusive. Switch
> to mmu_lock protection.
>
>    

I'm not sure all archs use mmu_lock.  Also, I'm unhappy about 
introducing more use (especially as it's often unnecessary, if dirty 
logging is turned off for the slot for example).

I think you can use rcu for this as well.  When you read the log, 
allocate a new empty bitmap, switch the memslots pointer to include it, 
and synchronize_srcu().  Now we are certain everyone is using the new 
bitmap we can copy the old one to usespace and delete it.
Marcelo Tosatti Sept. 22, 2009, 12:44 p.m. UTC | #2
On Tue, Sep 22, 2009 at 09:37:51AM +0300, Avi Kivity wrote:
> On 09/22/2009 02:37 AM, Marcelo Tosatti wrote:
>> get_dirty_log vs mark_page_dirty need to be mutually exclusive. Switch
>> to mmu_lock protection.
>>
>>    
>
> I'm not sure all archs use mmu_lock.  Also, I'm unhappy about  
> introducing more use (especially as it's often unnecessary, if dirty  
> logging is turned off for the slot for example).

True.

> I think you can use rcu for this as well.  When you read the log,  
> allocate a new empty bitmap, switch the memslots pointer to include it,  
> and synchronize_srcu().  Now we are certain everyone is using the new  
> bitmap we can copy the old one to usespace and delete it.

This will slow down get_dirty_log, which can lead to regressions in 
migration and poorer vga refresh updates.

Moreover, since updates are running concurrently, at the time
synchronize_srcu returns you can have plenty of dirty data in the new
bitmap which is not the case with mutual exclusion.

This are the reasons for the choice. Would you prefer a new rw lock?

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Avi Kivity Sept. 22, 2009, 12:52 p.m. UTC | #3
On 09/22/2009 03:44 PM, Marcelo Tosatti wrote:
>
>> I think you can use rcu for this as well.  When you read the log,
>> allocate a new empty bitmap, switch the memslots pointer to include it,
>> and synchronize_srcu().  Now we are certain everyone is using the new
>> bitmap we can copy the old one to usespace and delete it.
>>      
> This will slow down get_dirty_log, which can lead to regressions in
> migration and poorer vga refresh updates.
>    

That's true.  By how much? srcu is not so slow that a 30Hz refresh rate 
would notice.

> Moreover, since updates are running concurrently, at the time
> synchronize_srcu returns you can have plenty of dirty data in the new
> bitmap which is not the case with mutual exclusion.
>    

By the time ioctl() returns you can have dirty data in the new bitmap.  
Nothing wrong with that.

> This are the reasons for the choice. Would you prefer a new rw lock?
>    

Prefer the srcu thing unless convinced it is too slow.  Alternatively, 
we can copy-and-clear the bitmap using cmpxchg (which won't lose data) 
to a temporary buffer, then to userspace.  Locks are unfashionable.
diff mbox

Patch

Index: kvm-slotslock/arch/x86/kvm/paging_tmpl.h
===================================================================
--- kvm-slotslock.orig/arch/x86/kvm/paging_tmpl.h
+++ kvm-slotslock/arch/x86/kvm/paging_tmpl.h
@@ -175,7 +175,9 @@  walk:
 		if (!(pte & PT_ACCESSED_MASK)) {
 			trace_kvm_mmu_set_accessed_bit(table_gfn, index,
 						       sizeof(pte));
+			spin_lock(&vcpu->kvm->mmu_lock);
 			mark_page_dirty(vcpu->kvm, table_gfn);
+			spin_unlock(&vcpu->kvm->mmu_lock);
 			if (FNAME(cmpxchg_gpte)(vcpu->kvm, table_gfn,
 			    index, pte, pte|PT_ACCESSED_MASK))
 				goto walk;
@@ -215,7 +217,9 @@  walk:
 		bool ret;
 
 		trace_kvm_mmu_set_dirty_bit(table_gfn, index, sizeof(pte));
+		spin_lock(&vcpu->kvm->mmu_lock);
 		mark_page_dirty(vcpu->kvm, table_gfn);
+		spin_unlock(&vcpu->kvm->mmu_lock);
 		ret = FNAME(cmpxchg_gpte)(vcpu->kvm, table_gfn, index, pte,
 			    pte|PT_DIRTY_MASK);
 		if (ret)
Index: kvm-slotslock/arch/x86/kvm/x86.c
===================================================================
--- kvm-slotslock.orig/arch/x86/kvm/x86.c
+++ kvm-slotslock/arch/x86/kvm/x86.c
@@ -692,7 +692,9 @@  static void kvm_write_guest_time(struct 
 
 	kunmap_atomic(shared_kaddr, KM_USER0);
 
+	spin_lock(&v->kvm->mmu_lock);
 	mark_page_dirty(v->kvm, vcpu->time >> PAGE_SHIFT);
+	spin_unlock(&v->kvm->mmu_lock);
 }
 
 static int kvm_request_guest_time_update(struct kvm_vcpu *v)
@@ -2147,27 +2149,45 @@  static int kvm_vm_ioctl_reinject(struct 
 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 				      struct kvm_dirty_log *log)
 {
-	int r;
-	int n;
+	int r, n, i;
 	struct kvm_memory_slot *memslot;
-	int is_dirty = 0;
+	unsigned long is_dirty = 0;
+	unsigned long *dirty_bitmap;
 
 	down_write(&kvm->slots_lock);
 
-	r = kvm_get_dirty_log(kvm, log, &is_dirty);
-	if (r)
+	r = -EINVAL;
+	if (log->slot >= KVM_MEMORY_SLOTS)
+		goto out;
+
+	memslot = &kvm->memslots->memslots[log->slot];
+	r = -ENOENT;
+	if (!memslot->dirty_bitmap)
+		goto out;
+
+	n = ALIGN(memslot->npages, BITS_PER_LONG) / 8;
+	r = -ENOMEM;
+	dirty_bitmap = vmalloc(n);
+	if (!dirty_bitmap)
 		goto out;
+	memset(dirty_bitmap, 0, n);
+
+	spin_lock(&kvm->mmu_lock);
+	for (i = 0; !is_dirty && i < n/sizeof(long); ++i)
+		is_dirty = memslot->dirty_bitmap[i];
 
 	/* If nothing is dirty, don't bother messing with page tables. */
 	if (is_dirty) {
-		spin_lock(&kvm->mmu_lock);
+		memcpy(dirty_bitmap, memslot->dirty_bitmap, n);
+		memset(memslot->dirty_bitmap, 0, n);
 		kvm_mmu_slot_remove_write_access(kvm, log->slot);
-		spin_unlock(&kvm->mmu_lock);
 		memslot = &kvm->memslots->memslots[log->slot];
-		n = ALIGN(memslot->npages, BITS_PER_LONG) / 8;
-		memset(memslot->dirty_bitmap, 0, n);
 	}
+	spin_unlock(&kvm->mmu_lock);
+
 	r = 0;
+	if (copy_to_user(log->dirty_bitmap, dirty_bitmap, n))
+		r = -EFAULT;
 out:
 	up_write(&kvm->slots_lock);
 	return r;
@@ -3491,7 +3511,9 @@  static void vapic_exit(struct kvm_vcpu *
 
 	down_read(&vcpu->kvm->slots_lock);
 	kvm_release_page_dirty(apic->vapic_page);
+	spin_lock(&vcpu->kvm->mmu_lock);
 	mark_page_dirty(vcpu->kvm, apic->vapic_addr >> PAGE_SHIFT);
+	spin_unlock(&vcpu->kvm->mmu_lock);
 	up_read(&vcpu->kvm->slots_lock);
 }
 
Index: kvm-slotslock/virt/kvm/kvm_main.c
===================================================================
--- kvm-slotslock.orig/virt/kvm/kvm_main.c
+++ kvm-slotslock/virt/kvm/kvm_main.c
@@ -1007,7 +1007,9 @@  int kvm_write_guest_page(struct kvm *kvm
 	r = copy_to_user((void __user *)addr + offset, data, len);
 	if (r)
 		return -EFAULT;
+	spin_lock(&kvm->mmu_lock);
 	mark_page_dirty(kvm, gfn);
+	spin_unlock(&kvm->mmu_lock);
 	return 0;
 }
 EXPORT_SYMBOL_GPL(kvm_write_guest_page);