From patchwork Thu Feb 3 12:57:32 2011 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Nadav Har'El X-Patchwork-Id: 529251 Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by demeter1.kernel.org (8.14.4/8.14.3) with ESMTP id p13CvgKh003536 for ; Thu, 3 Feb 2011 12:57:42 GMT Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1756340Ab1BCM5j (ORCPT ); Thu, 3 Feb 2011 07:57:39 -0500 Received: from mailgw12.technion.ac.il ([132.68.225.12]:12913 "EHLO mailgw12.technion.ac.il" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1752505Ab1BCM5i (ORCPT ); Thu, 3 Feb 2011 07:57:38 -0500 X-IronPort-Anti-Spam-Filtered: true X-IronPort-Anti-Spam-Result: AqEEACY1Sk2ERHMGgWdsb2JhbAClKxUBARYiJLxZAoVWBItm X-IronPort-AV: E=Sophos;i="4.60,418,1291586400"; d="scan'208";a="29152750" Received: from fermat.math.technion.ac.il ([132.68.115.6]) by mailgw12.technion.ac.il with ESMTP; 03 Feb 2011 14:57:34 +0200 Received: from fermat.math.technion.ac.il (localhost [127.0.0.1]) by fermat.math.technion.ac.il (8.12.10/8.12.10) with ESMTP id p13CvXd9019684; Thu, 3 Feb 2011 14:57:33 +0200 (IST) Received: (from nyh@localhost) by fermat.math.technion.ac.il (8.12.10/8.12.10/Submit) id p13CvWXM019683; Thu, 3 Feb 2011 14:57:32 +0200 (IST) X-Authentication-Warning: fermat.math.technion.ac.il: nyh set sender to nyh@math.technion.ac.il using -f Date: Thu, 3 Feb 2011 14:57:32 +0200 From: "Nadav Har'El" To: Avi Kivity Cc: kvm@vger.kernel.org, gleb@redhat.com Subject: Re: [PATCH 07/29] nVMX: Hold a vmcs02 for each vmcs12 Message-ID: <20110203125732.GA19503@fermat.math.technion.ac.il> References: <1296116987-nyh@il.ibm.com> <201101270833.p0R8XQ4w002480@rice.haifa.ibm.com> <4D45372E.2050605@redhat.com> Mime-Version: 1.0 Content-Disposition: inline In-Reply-To: <4D45372E.2050605@redhat.com> User-Agent: Mutt/1.4.2.2i Hebrew-Date: 29 Shevat 5771 Sender: kvm-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: kvm@vger.kernel.org X-Greylist: IP, sender and recipient auto-whitelisted, not delayed by milter-greylist-4.2.6 (demeter1.kernel.org [140.211.167.41]); Thu, 03 Feb 2011 12:57:42 +0000 (UTC) --- .before/arch/x86/kvm/vmx.c 2011-02-03 14:46:53.000000000 +0200 +++ .after/arch/x86/kvm/vmx.c 2011-02-03 14:46:53.000000000 +0200 @@ -117,6 +117,7 @@ static int ple_window = KVM_VMX_DEFAULT_ module_param(ple_window, int, S_IRUGO); #define NR_AUTOLOAD_MSRS 1 +#define VMCS02_POOL_SIZE 1 struct vmcs { u32 revision_id; @@ -159,6 +160,31 @@ struct __packed vmcs12 { #define VMCS12_REVISION 0x11e57ed0 /* + * When we temporarily switch a vcpu's VMCS (e.g., stop using an L1's VMCS + * while we use L2's VMCS), and wish to save the previous VMCS, we must also + * remember on which CPU it was last loaded (vcpu->cpu), so when we return to + * using this VMCS we'll know if we're now running on a different CPU and need + * to clear the VMCS on the old CPU, and load it on the new one. Additionally, + * we need to remember whether this VMCS was launched (vmx->launched), so when + * we return to it we know if to VMLAUNCH or to VMRESUME it (we cannot deduce + * this from other state, because it's possible that this VMCS had once been + * launched, but has since been cleared after a CPU switch, and now + * vmx->launch is 0). + */ +struct saved_vmcs { + struct vmcs *vmcs; + int cpu; + int launched; +}; + +/* Used to remember the last vmcs02 used for some recently used vmcs12s */ +struct vmcs02_list { + struct list_head list; + gpa_t vmcs12_addr; + struct saved_vmcs vmcs02; +}; + +/* * The nested_vmx structure is part of vcpu_vmx, and holds information we need * for correct emulation of VMX (i.e., nested VMX) on this vcpu. For example, * the current VMCS set by L1, a list of the VMCSs used to run the active @@ -173,6 +199,10 @@ struct nested_vmx { /* The host-usable pointer to the above */ struct page *current_vmcs12_page; struct vmcs12 *current_vmcs12; + + /* vmcs02_list cache of VMCSs recently used to run L2 guests */ + struct list_head vmcs02_pool; + int vmcs02_num; }; struct vcpu_vmx { @@ -3965,6 +3995,106 @@ static int handle_invalid_op(struct kvm_ } /* + * To run an L2 guest, we need a vmcs02 based the L1-specified vmcs12. + * We could reuse a single VMCS for all the L2 guests, but we also want the + * option to allocate a separate vmcs02 for each separate loaded vmcs12 - this + * allows keeping them loaded on the processor, and in the future will allow + * optimizations where prepare_vmcs02 doesn't need to set all the fields on + * every entry if they never change. + * So we keep, in vmx->nested.vmcs02_pool, an cache of size VMCS02_POOL_SIZE + * (>=0) with a vmcs02 for each recently loaded vmcs12s, most recent first. + * + * The following functions allocate and free a vmcs02 in this pool. + */ + +static void __nested_free_saved_vmcs(void *arg) +{ + struct saved_vmcs *saved_vmcs = arg; + + vmcs_clear(saved_vmcs->vmcs); + if (per_cpu(current_vmcs, saved_vmcs->cpu) == saved_vmcs->vmcs) + per_cpu(current_vmcs, saved_vmcs->cpu) = NULL; +} + +/* + * Free a VMCS, but before that VMCLEAR it on the CPU where it was last loaded + * (the necessary information is in the saved_vmcs structure). + * See also vcpu_clear() (with different parameters and side-effects) + */ +static void nested_free_saved_vmcs(struct vcpu_vmx *vmx, + struct saved_vmcs *saved_vmcs) +{ + if (saved_vmcs->cpu != -1) + smp_call_function_single(saved_vmcs->cpu, + __nested_free_saved_vmcs, saved_vmcs, 1); + + free_vmcs(saved_vmcs->vmcs); +} + +/* Free and remove from pool a vmcs02 saved for a vmcs12 (if there is one) */ +static void nested_free_vmcs02(struct vcpu_vmx *vmx, gpa_t vmptr) +{ + struct vmcs02_list *item; + list_for_each_entry(item, &vmx->nested.vmcs02_pool, list) + if (item->vmcs12_addr == vmptr) { + nested_free_saved_vmcs(vmx, &item->vmcs02); + list_del(&item->list); + kfree(item); + vmx->nested.vmcs02_num--; + return; + } +} + +/* Free all vmcs02 saved for this vcpu */ +static void nested_free_all_vmcs02(struct vcpu_vmx *vmx) +{ + struct vmcs02_list *item, *n; + list_for_each_entry_safe(item, n, &vmx->nested.vmcs02_pool, list) { + nested_free_saved_vmcs(vmx, &item->vmcs02); + list_del(&item->list); + kfree(item); + } + vmx->nested.vmcs02_num = 0; +} + +/* Get a vmcs02 for the current vmcs12. */ +static struct saved_vmcs *nested_get_current_vmcs02(struct vcpu_vmx *vmx) +{ + struct vmcs02_list *item; + list_for_each_entry(item, &vmx->nested.vmcs02_pool, list) + if (item->vmcs12_addr == vmx->nested.current_vmptr){ + list_move(&item->list, &vmx->nested.vmcs02_pool); + return &item->vmcs02; + } + + if (vmx->nested.vmcs02_num >= max(VMCS02_POOL_SIZE,1)) { + /* Recycle the least recently used VMCS. */ + item = list_entry(vmx->nested.vmcs02_pool.prev, + struct vmcs02_list, list); + item->vmcs12_addr = vmx->nested.current_vmptr; + list_move(&item->list, &vmx->nested.vmcs02_pool); + return &item->vmcs02; + } + + /* Create a new vmcs02 */ + item = (struct vmcs02_list *) + kmalloc(sizeof(struct vmcs02_list), GFP_KERNEL); + if (!item) + return NULL; + item->vmcs02.vmcs = alloc_vmcs(); + if (!item->vmcs02.vmcs) { + kfree(item); + return NULL; + } + item->vmcs12_addr = vmx->nested.current_vmptr; + item->vmcs02.cpu = -1; + item->vmcs02.launched = 0; + list_add(&(item->list), &(vmx->nested.vmcs02_pool)); + vmx->nested.vmcs02_num++; + return &item->vmcs02; +} + +/* * Emulate the VMXON instruction. * Currently, we just remember that VMX is active, and do not save or even * inspect the argument to VMXON (the so-called "VMXON pointer") because we @@ -4000,6 +4130,9 @@ static int handle_vmon(struct kvm_vcpu * return 1; } + INIT_LIST_HEAD(&(vmx->nested.vmcs02_pool)); + vmx->nested.vmcs02_num = 0; + vmx->nested.vmxon = true; skip_emulated_instruction(vcpu); @@ -4050,6 +4183,8 @@ static void free_nested(struct vcpu_vmx nested_release_page(vmx->nested.current_vmcs12_page); vmx->nested.current_vmptr = -1ull; } + + nested_free_all_vmcs02(vmx); } /* Emulate the VMXOFF instruction */