From patchwork Tue Jan 25 05:37:56 2011 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Alex Williamson X-Patchwork-Id: 504191 Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by demeter1.kernel.org (8.14.4/8.14.3) with ESMTP id p0P5c78a009447 for ; Tue, 25 Jan 2011 05:38:08 GMT Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1751529Ab1AYFiC (ORCPT ); Tue, 25 Jan 2011 00:38:02 -0500 Received: from mx1.redhat.com ([209.132.183.28]:7485 "EHLO mx1.redhat.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751470Ab1AYFiB (ORCPT ); Tue, 25 Jan 2011 00:38:01 -0500 Received: from int-mx01.intmail.prod.int.phx2.redhat.com (int-mx01.intmail.prod.int.phx2.redhat.com [10.5.11.11]) by mx1.redhat.com (8.13.8/8.13.8) with ESMTP id p0P5bwdC014239 (version=TLSv1/SSLv3 cipher=DHE-RSA-AES256-SHA bits=256 verify=OK); Tue, 25 Jan 2011 00:37:58 -0500 Received: from [10.3.113.53] (ovpn-113-53.phx2.redhat.com [10.3.113.53]) by int-mx01.intmail.prod.int.phx2.redhat.com (8.13.8/8.13.8) with ESMTP id p0P5bv29031112; Tue, 25 Jan 2011 00:37:57 -0500 Subject: Re: [RFC PATCH 0/2] Expose available KVM free memory slot count to help avoid aborts From: Alex Williamson To: Jan Kiszka Cc: Marcelo Tosatti , "kvm@vger.kernel.org" , "ddutile@redhat.com" , "mst@redhat.com" , "avi@redhat.com" , "chrisw@redhat.com" In-Reply-To: <1295883899.3230.9.camel@x201> References: <20110121233040.22262.68117.stgit@s20.home> <20110124093241.GA28654@amt.cnet> <4D3D89B1.30300@siemens.com> <1295883899.3230.9.camel@x201> Date: Mon, 24 Jan 2011 22:37:56 -0700 Message-ID: <1295933876.3230.46.camel@x201> Mime-Version: 1.0 X-Scanned-By: MIMEDefang 2.67 on 10.5.11.11 Sender: kvm-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: kvm@vger.kernel.org X-Greylist: IP, sender and recipient auto-whitelisted, not delayed by milter-greylist-4.2.6 (demeter1.kernel.org [140.211.167.41]); Tue, 25 Jan 2011 05:38:08 +0000 (UTC) diff --git a/arch/ia64/include/asm/kvm_host.h b/arch/ia64/include/asm/kvm_host.h index 2689ee5..11d0ab2 100644 --- a/arch/ia64/include/asm/kvm_host.h +++ b/arch/ia64/include/asm/kvm_host.h @@ -23,10 +23,6 @@ #ifndef __ASM_KVM_HOST_H #define __ASM_KVM_HOST_H -#define KVM_MEMORY_SLOTS 32 -/* memory slots that does not exposed to userspace */ -#define KVM_PRIVATE_MEM_SLOTS 4 - #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 /* define exit reasons from vmm to kvm*/ diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index 70d224d..f1adda2 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c @@ -1814,7 +1814,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, mutex_lock(&kvm->slots_lock); r = -EINVAL; - if (log->slot >= KVM_MEMORY_SLOTS) + if (log->slot >= kvm->memslots->nmemslots) goto out; memslot = &kvm->memslots->memslots[log->slot]; diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index bba3b9b..dc80057 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -29,9 +29,6 @@ #include #define KVM_MAX_VCPUS 1 -#define KVM_MEMORY_SLOTS 32 -/* memory slots that does not exposed to userspace */ -#define KVM_PRIVATE_MEM_SLOTS 4 #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index cef7dbf..92a964c 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -20,9 +20,6 @@ #include #define KVM_MAX_VCPUS 64 -#define KVM_MEMORY_SLOTS 32 -/* memory slots that does not exposed to userspace */ -#define KVM_PRIVATE_MEM_SLOTS 4 struct sca_entry { atomic_t scn; diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index ffd7f8d..df1382c 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -27,7 +27,6 @@ #include #define KVM_MAX_VCPUS 64 -#define KVM_MEMORY_SLOTS 32 /* memory slots that does not exposed to userspace */ #define KVM_PRIVATE_MEM_SLOTS 4 @@ -207,7 +206,7 @@ struct kvm_mmu_page { * One bit set per slot which has memory * in this shadow page. */ - DECLARE_BITMAP(slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS); + unsigned long *slot_bitmap; bool multimapped; /* More than one parent_pte? */ bool unsync; int root_count; /* Currently serving as active root */ diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 84471b8..7fd8c89 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -370,9 +370,9 @@ enum vmcs_field { #define AR_RESERVD_MASK 0xfffe0f00 -#define TSS_PRIVATE_MEMSLOT (KVM_MEMORY_SLOTS + 0) -#define APIC_ACCESS_PAGE_PRIVATE_MEMSLOT (KVM_MEMORY_SLOTS + 1) -#define IDENTITY_PAGETABLE_PRIVATE_MEMSLOT (KVM_MEMORY_SLOTS + 2) +#define TSS_PRIVATE_MEMSLOT 0 +#define APIC_ACCESS_PAGE_PRIVATE_MEMSLOT 1 +#define IDENTITY_PAGETABLE_PRIVATE_MEMSLOT 2 #define VMX_NR_VPIDS (1 << 16) #define VMX_VPID_EXTENT_SINGLE_CONTEXT 1 diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index ccacf0b..8c2533a 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -1032,6 +1032,7 @@ static void kvm_mmu_free_page(struct kvm *kvm, struct kvm_mmu_page *sp) ASSERT(is_empty_shadow_page(sp->spt)); hlist_del(&sp->hash_link); list_del(&sp->link); + kfree(sp->slot_bitmap); __free_page(virt_to_page(sp->spt)); if (!sp->role.direct) __free_page(virt_to_page(sp->gfns)); @@ -1048,6 +1049,7 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, u64 *parent_pte, int direct) { struct kvm_mmu_page *sp; + struct kvm_memslots *slots = kvm_memslots(vcpu->kvm); sp = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_header_cache, sizeof *sp); sp->spt = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache, PAGE_SIZE); @@ -1056,7 +1058,10 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, PAGE_SIZE); set_page_private(virt_to_page(sp->spt), (unsigned long)sp); list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages); - bitmap_zero(sp->slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS); + sp->slot_bitmap = kzalloc(sizeof(long) * + BITS_TO_LONGS(slots->nmemslots), GFP_KERNEL); + if (!sp->slot_bitmap) + return NULL; sp->multimapped = 0; sp->parent_pte = parent_pte; kvm_mod_used_mmu_pages(vcpu->kvm, +1); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 5eccdba..c002fac 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1978,7 +1978,7 @@ int kvm_dev_ioctl_check_extension(long ext) r = KVM_MAX_VCPUS; break; case KVM_CAP_NR_MEMSLOTS: - r = KVM_MEMORY_SLOTS; + r = INT_MAX - KVM_PRIVATE_MEM_SLOTS; break; case KVM_CAP_PV_MMU: /* obsolete */ r = 0; @@ -3201,7 +3201,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, mutex_lock(&kvm->slots_lock); r = -EINVAL; - if (log->slot >= KVM_MEMORY_SLOTS) + if (log->slot >= kvm->memslots->nmemslots) goto out; memslot = &kvm->memslots->memslots[log->slot]; @@ -6068,7 +6068,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, int map_flags = MAP_PRIVATE | MAP_ANONYMOUS; /* Prevent internal slot pages from being moved by fork()/COW. */ - if (memslot->id >= KVM_MEMORY_SLOTS) + if (memslot->id < KVM_PRIVATE_MEM_SLOTS) map_flags = MAP_SHARED | MAP_ANONYMOUS; /*To keep backward compatibility with older userspace, diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index b5021db..4cb9f94 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -27,6 +27,10 @@ #include +#ifndef KVM_PRIVATE_MEM_SLOTS + #define KVM_PRIVATE_MEM_SLOTS 0 +#endif + /* * vcpu->requests bit members */ @@ -206,8 +210,7 @@ struct kvm_irq_routing_table {}; struct kvm_memslots { int nmemslots; u64 generation; - struct kvm_memory_slot memslots[KVM_MEMORY_SLOTS + - KVM_PRIVATE_MEM_SLOTS]; + struct kvm_memory_slot memslots[]; }; struct kvm { diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index fd67bcd..32f023c 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -623,13 +623,14 @@ int __kvm_set_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem, int user_alloc) { - int r; + int r, nmemslots; gfn_t base_gfn; unsigned long npages; unsigned long i; - struct kvm_memory_slot *memslot; - struct kvm_memory_slot old, new; + struct kvm_memory_slot *memslot = NULL; + struct kvm_memory_slot old = {}, new = {}; struct kvm_memslots *slots, *old_memslots; + bool flush = false; r = -EINVAL; /* General sanity checks */ @@ -639,12 +640,9 @@ int __kvm_set_memory_region(struct kvm *kvm, goto out; if (user_alloc && (mem->userspace_addr & (PAGE_SIZE - 1))) goto out; - if (mem->slot >= KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS) - goto out; if (mem->guest_phys_addr + mem->memory_size < mem->guest_phys_addr) goto out; - memslot = &kvm->memslots->memslots[mem->slot]; base_gfn = mem->guest_phys_addr >> PAGE_SHIFT; npages = mem->memory_size >> PAGE_SHIFT; @@ -655,7 +653,10 @@ int __kvm_set_memory_region(struct kvm *kvm, if (!npages) mem->flags &= ~KVM_MEM_LOG_DIRTY_PAGES; - new = old = *memslot; + if (mem->slot < kvm->memslots->nmemslots) { + memslot = &kvm->memslots->memslots[mem->slot]; + new = old = *memslot; + } new.id = mem->slot; new.base_gfn = base_gfn; @@ -669,7 +670,7 @@ int __kvm_set_memory_region(struct kvm *kvm, /* Check for overlaps */ r = -EEXIST; - for (i = 0; i < KVM_MEMORY_SLOTS; ++i) { + for (i = KVM_PRIVATE_MEM_SLOTS; i < kvm->memslots->nmemslots; ++i) { struct kvm_memory_slot *s = &kvm->memslots->memslots[i]; if (s == memslot || !s->npages) @@ -752,12 +753,19 @@ skip_lpage: if (!npages) { r = -ENOMEM; - slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL); + + nmemslots = (mem->slot >= kvm->memslots->nmemslots) ? + mem->slot + 1 : kvm->memslots->nmemslots; + + slots = kzalloc(sizeof(struct kvm_memslots) + + nmemslots * sizeof(struct kvm_memory_slot), + GFP_KERNEL); if (!slots) goto out_free; - memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots)); - if (mem->slot >= slots->nmemslots) - slots->nmemslots = mem->slot + 1; + memcpy(slots, kvm->memslots, + sizeof(struct kvm_memslots) + kvm->memslots->nmemslots * + sizeof(struct kvm_memory_slot)); + slots->nmemslots = nmemslots; slots->generation++; slots->memslots[mem->slot].flags |= KVM_MEMSLOT_INVALID; @@ -787,12 +795,21 @@ skip_lpage: } r = -ENOMEM; - slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL); + + if (mem->slot >= kvm->memslots->nmemslots) { + nmemslots = mem->slot + 1; + flush = true; + } else + nmemslots = kvm->memslots->nmemslots; + + slots = kzalloc(sizeof(struct kvm_memslots) + + nmemslots * sizeof(struct kvm_memory_slot), + GFP_KERNEL); if (!slots) goto out_free; - memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots)); - if (mem->slot >= slots->nmemslots) - slots->nmemslots = mem->slot + 1; + memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots) + + kvm->memslots->nmemslots * sizeof(struct kvm_memory_slot)); + slots->nmemslots = nmemslots; slots->generation++; /* actual memory is freed via old in kvm_free_physmem_slot below */ @@ -808,6 +825,9 @@ skip_lpage: rcu_assign_pointer(kvm->memslots, slots); synchronize_srcu_expedited(&kvm->srcu); + if (flush) + kvm_arch_flush_shadow(kvm); + kvm_arch_commit_memory_region(kvm, mem, old, user_alloc); kvm_free_physmem_slot(&old, &new); @@ -841,8 +861,6 @@ int kvm_vm_ioctl_set_memory_region(struct kvm *kvm, kvm_userspace_memory_region *mem, int user_alloc) { - if (mem->slot >= KVM_MEMORY_SLOTS) - return -EINVAL; return kvm_set_memory_region(kvm, mem, user_alloc); } @@ -855,7 +873,7 @@ int kvm_get_dirty_log(struct kvm *kvm, unsigned long any = 0; r = -EINVAL; - if (log->slot >= KVM_MEMORY_SLOTS) + if (log->slot >= kvm->memslots->nmemslots) goto out; memslot = &kvm->memslots->memslots[log->slot]; @@ -947,7 +965,7 @@ int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn) int i; struct kvm_memslots *slots = kvm_memslots(kvm); - for (i = 0; i < KVM_MEMORY_SLOTS; ++i) { + for (i = KVM_PRIVATE_MEM_SLOTS; i < slots->nmemslots; ++i) { struct kvm_memory_slot *memslot = &slots->memslots[i]; if (memslot->flags & KVM_MEMSLOT_INVALID) @@ -1832,6 +1850,8 @@ static long kvm_vm_ioctl(struct file *filp, sizeof kvm_userspace_mem)) goto out; + kvm_userspace_mem.slot += KVM_PRIVATE_MEM_SLOTS; + r = kvm_vm_ioctl_set_memory_region(kvm, &kvm_userspace_mem, 1); if (r) goto out; @@ -1843,6 +1863,9 @@ static long kvm_vm_ioctl(struct file *filp, r = -EFAULT; if (copy_from_user(&log, argp, sizeof log)) goto out; + + log.slot += KVM_PRIVATE_MEM_SLOTS; + r = kvm_vm_ioctl_get_dirty_log(kvm, &log); if (r) goto out; @@ -1937,7 +1960,7 @@ static long kvm_vm_compat_ioctl(struct file *filp, if (copy_from_user(&compat_log, (void __user *)arg, sizeof(compat_log))) goto out; - log.slot = compat_log.slot; + log.slot = compat_log.slot + KVM_PRIVATE_MEM_SLOTS; log.padding1 = compat_log.padding1; log.padding2 = compat_log.padding2; log.dirty_bitmap = compat_ptr(compat_log.dirty_bitmap);