From patchwork Fri Apr 9 09:38:08 2010 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Takuya Yoshikawa X-Patchwork-Id: 91672 Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by demeter.kernel.org (8.14.3/8.14.3) with ESMTP id o399YctU014871 for ; Fri, 9 Apr 2010 09:34:38 GMT Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1755019Ab0DIJeh (ORCPT ); Fri, 9 Apr 2010 05:34:37 -0400 Received: from serv2.oss.ntt.co.jp ([222.151.198.100]:44326 "EHLO serv2.oss.ntt.co.jp" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1754334Ab0DIJeg (ORCPT ); Fri, 9 Apr 2010 05:34:36 -0400 Received: from serv2.oss.ntt.co.jp (localhost [127.0.0.1]) by serv2.oss.ntt.co.jp (Postfix) with ESMTP id 20C0C2482A6; Fri, 9 Apr 2010 18:34:35 +0900 (JST) Received: from serv1.oss.ntt.co.jp (serv1.oss.ntt.co.jp [172.19.0.2]) by serv2.oss.ntt.co.jp (Postfix) with ESMTP id 0EFB62482A4; Fri, 9 Apr 2010 18:34:35 +0900 (JST) Received: from yshtky3.kern.oss.ntt.co.jp (unknown [172.17.1.110]) by serv1.oss.ntt.co.jp (Postfix) with SMTP id E603811C111; Fri, 9 Apr 2010 18:34:34 +0900 (JST) Date: Fri, 9 Apr 2010 18:38:08 +0900 From: Takuya Yoshikawa To: avi@redhat.com, mtosatti@redhat.com Cc: kvm@vger.kernel.org, fernando@oss.ntt.co.jp Subject: [PATCH RFC 5/5] KVM: This is the main part of the "moving dirty bitmaps to user space" Message-Id: <20100409183808.b72fc9a3.yoshikawa.takuya@oss.ntt.co.jp> In-Reply-To: <20100409182732.857de4db.yoshikawa.takuya@oss.ntt.co.jp> References: <20100409182732.857de4db.yoshikawa.takuya@oss.ntt.co.jp> X-Mailer: Sylpheed 2.6.0 (GTK+ 2.16.1; i486-pc-linux-gnu) Mime-Version: 1.0 Sender: kvm-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: kvm@vger.kernel.org X-Greylist: IP, sender and recipient auto-whitelisted, not delayed by milter-greylist-4.2.3 (demeter.kernel.org [140.211.167.41]); Fri, 09 Apr 2010 09:35:04 +0000 (UTC) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 450ecfe..995b970 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2642,16 +2642,99 @@ static int kvm_vm_ioctl_reinject(struct kvm *kvm, return 0; } +int kvm_arch_create_dirty_bitmap(struct kvm_memory_slot *memslot) +{ + unsigned long user_addr1; + unsigned long user_addr2; + int dirty_bytes = kvm_dirty_bitmap_bytes(memslot); + + down_write(¤t->mm->mmap_sem); + user_addr1 = do_mmap(NULL, 0, dirty_bytes, + PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, 0); + if (IS_ERR((void *)user_addr1)) { + up_write(¤t->mm->mmap_sem); + return PTR_ERR((void *)user_addr1); + } + user_addr2 = do_mmap(NULL, 0, dirty_bytes, + PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, 0); + if (IS_ERR((void *)user_addr2)) { + do_munmap(current->mm, user_addr1, dirty_bytes); + up_write(¤t->mm->mmap_sem); + return PTR_ERR((void *)user_addr2); + } + up_write(¤t->mm->mmap_sem); + + memslot->dirty_bitmap = (unsigned long __user *)user_addr1; + memslot->dirty_bitmap_old = (unsigned long __user *)user_addr2; + clear_user(memslot->dirty_bitmap, dirty_bytes); + clear_user(memslot->dirty_bitmap_old, dirty_bytes); + + return 0; +} + +void kvm_arch_destroy_dirty_bitmap(struct kvm_memory_slot *memslot) +{ + int n = kvm_dirty_bitmap_bytes(memslot); + + if (!memslot->dirty_bitmap) + return; + + down_write(¤t->mm->mmap_sem); + do_munmap(current->mm, (unsigned long)memslot->dirty_bitmap, n); + do_munmap(current->mm, (unsigned long)memslot->dirty_bitmap_old, n); + up_write(¤t->mm->mmap_sem); + + memslot->dirty_bitmap = NULL; + memslot->dirty_bitmap_old = NULL; +} + +static int kvm_copy_dirty_bitmap(unsigned long __user *to, + const unsigned long __user *from, int n) +{ +#ifdef CONFIG_X86_64 + if (copy_in_user(to, from, n) < 0) { + printk(KERN_WARNING "%s: copy_in_user failed\n", __func__); + return -EFAULT; + } + return 0; +#else + int ret = 0; + void *p = vmalloc(n); + + if (!p) { + ret = -ENOMEM; + goto out; + } + if (copy_from_user(p, from, n) < 0) { + printk(KERN_WARNING "%s: copy_from_user failed\n", __func__); + ret = -EFAULT; + goto out_free; + } + if (copy_to_user(to, p, n) < 0) { + printk(KERN_WARNING "%s: copy_to_user failed\n", __func__); + ret = -EFAULT; + goto out_free; + } + +out_free: + vfree(p); +out: + return ret; +#endif +} + /* * Get (and clear) the dirty memory log for a memory slot. */ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log) { - int r, n, i; + int r, n; struct kvm_memory_slot *memslot; - unsigned long is_dirty = 0; - unsigned long *dirty_bitmap = NULL; + unsigned long __user *dirty_bitmap; + unsigned long __user *dirty_bitmap_old; mutex_lock(&kvm->slots_lock); @@ -2664,44 +2747,37 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, if (!memslot->dirty_bitmap) goto out; - n = kvm_dirty_bitmap_bytes(memslot); - - r = -ENOMEM; - dirty_bitmap = vmalloc(n); - if (!dirty_bitmap) - goto out; - memset(dirty_bitmap, 0, n); + dirty_bitmap = memslot->dirty_bitmap; + dirty_bitmap_old = memslot->dirty_bitmap_old; - for (i = 0; !is_dirty && i < n/sizeof(long); i++) - is_dirty = memslot->dirty_bitmap[i]; + n = kvm_dirty_bitmap_bytes(memslot); + clear_user(dirty_bitmap_old, n); /* If nothing is dirty, don't bother messing with page tables. */ - if (is_dirty) { + if (memslot->is_dirty) { struct kvm_memslots *slots, *old_slots; spin_lock(&kvm->mmu_lock); kvm_mmu_slot_remove_write_access(kvm, log->slot); spin_unlock(&kvm->mmu_lock); + r = -ENOMEM; slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL); if (!slots) - goto out_free; + goto out; memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots)); - slots->memslots[log->slot].dirty_bitmap = dirty_bitmap; + slots->memslots[log->slot].dirty_bitmap = dirty_bitmap_old; + slots->memslots[log->slot].dirty_bitmap_old = dirty_bitmap; + slots->memslots[log->slot].is_dirty = false; old_slots = kvm->memslots; rcu_assign_pointer(kvm->memslots, slots); synchronize_srcu_expedited(&kvm->srcu); - dirty_bitmap = old_slots->memslots[log->slot].dirty_bitmap; kfree(old_slots); } - r = 0; - if (copy_to_user(log->dirty_bitmap, dirty_bitmap, n)) - r = -EFAULT; -out_free: - vfree(dirty_bitmap); + r = kvm_copy_dirty_bitmap(log->dirty_bitmap, dirty_bitmap, n); out: mutex_unlock(&kvm->slots_lock); return r; diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 07092d6..834812f 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -276,6 +276,10 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem, struct kvm_memory_slot old, int user_alloc); +#ifdef __KVM_HAVE_USER_DIRTYBITMAP +int kvm_arch_create_dirty_bitmap(struct kvm_memory_slot *memslot); +void kvm_arch_destroy_dirty_bitmap(struct kvm_memory_slot *memslot); +#endif void kvm_disable_largepages(void); void kvm_arch_flush_shadow(struct kvm *kvm); gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index f919bd1..038a677 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -433,8 +433,12 @@ out_err_nodisable: static void kvm_destroy_dirty_bitmap(struct kvm_memory_slot *memslot) { +#ifdef __KVM_HAVE_USER_DIRTYBITMAP + kvm_arch_destroy_dirty_bitmap(memslot); +#else vfree(memslot->dirty_bitmap); memslot->dirty_bitmap = NULL; +#endif } /* @@ -463,13 +467,26 @@ static void kvm_free_physmem_slot(struct kvm_memory_slot *free, free->rmap = NULL; } +/* + * We don't munmap dirty bitmaps by ourselves in the case of vm destruction. + */ +static void kvm_pre_destroy_dirty_bitmap(struct kvm_memory_slot *memslot) +{ +#ifdef __KVM_HAVE_USER_DIRTYBITMAP + memslot->dirty_bitmap = NULL; + memslot->dirty_bitmap_old = NULL; +#endif +} + void kvm_free_physmem(struct kvm *kvm) { int i; struct kvm_memslots *slots = kvm->memslots; - for (i = 0; i < slots->nmemslots; ++i) + for (i = 0; i < slots->nmemslots; ++i) { + kvm_pre_destroy_dirty_bitmap(&slots->memslots[i]); kvm_free_physmem_slot(&slots->memslots[i], NULL); + } kfree(kvm->memslots); } @@ -523,6 +540,9 @@ static int kvm_vm_release(struct inode *inode, struct file *filp) static int kvm_create_dirty_bitmap(struct kvm_memory_slot *memslot) { +#ifdef __KVM_HAVE_USER_DIRTYBITMAP + return kvm_arch_create_dirty_bitmap(memslot); +#else int dirty_bytes = kvm_dirty_bitmap_bytes(memslot); memslot->dirty_bitmap = vmalloc(dirty_bytes); @@ -530,6 +550,7 @@ static int kvm_create_dirty_bitmap(struct kvm_memory_slot *memslot) return -ENOMEM; memset(memslot->dirty_bitmap, 0, dirty_bytes); +#endif return 0; } @@ -1197,9 +1218,16 @@ void mark_page_dirty(struct kvm *kvm, gfn_t gfn) if (memslot && memslot->dirty_bitmap) { unsigned long rel_gfn = gfn - memslot->base_gfn; +#ifdef __KVM_HAVE_USER_DIRTYBITMAP + if (set_bit_user(rel_gfn, memslot->dirty_bitmap) < 0) + printk(KERN_WARNING "%s: set_bit_user failed\n", __func__); + + memslot->is_dirty = true; +#else /* avoid RMW */ if (!generic_test_le_bit(rel_gfn, memslot->dirty_bitmap)) generic___set_le_bit(rel_gfn, memslot->dirty_bitmap); +#endif } }