From patchwork Thu Feb  3 12:57:32 2011
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: Nadav Har'El <nyh@math.technion.ac.il>
X-Patchwork-Id: 529251
Received: from vger.kernel.org (vger.kernel.org [209.132.180.67])
	by demeter1.kernel.org (8.14.4/8.14.3) with ESMTP id p13CvgKh003536
	for <patchwork-kvm@patchwork.kernel.org>; Thu, 3 Feb 2011 12:57:42 GMT
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
	id S1756340Ab1BCM5j (ORCPT
	<rfc822;patchwork-kvm@patchwork.kernel.org>);
	Thu, 3 Feb 2011 07:57:39 -0500
Received: from mailgw12.technion.ac.il ([132.68.225.12]:12913 "EHLO
	mailgw12.technion.ac.il" rhost-flags-OK-OK-OK-OK) by vger.kernel.org
	with ESMTP id S1752505Ab1BCM5i (ORCPT <rfc822; kvm@vger.kernel.org>);
	Thu, 3 Feb 2011 07:57:38 -0500
X-IronPort-Anti-Spam-Filtered: true
X-IronPort-Anti-Spam-Result: AqEEACY1Sk2ERHMGgWdsb2JhbAClKxUBARYiJLxZAoVWBItm
X-IronPort-AV: E=Sophos;i="4.60,418,1291586400"; d="scan'208";a="29152750"
Received: from fermat.math.technion.ac.il ([132.68.115.6])
	by mailgw12.technion.ac.il with ESMTP; 03 Feb 2011 14:57:34 +0200
Received: from fermat.math.technion.ac.il (localhost [127.0.0.1])
	by fermat.math.technion.ac.il (8.12.10/8.12.10) with ESMTP id
	p13CvXd9019684; Thu, 3 Feb 2011 14:57:33 +0200 (IST)
Received: (from nyh@localhost)
	by fermat.math.technion.ac.il (8.12.10/8.12.10/Submit) id
	p13CvWXM019683; Thu, 3 Feb 2011 14:57:32 +0200 (IST)
X-Authentication-Warning: fermat.math.technion.ac.il: nyh set sender to
	nyh@math.technion.ac.il using -f
Date: Thu, 3 Feb 2011 14:57:32 +0200
From: "Nadav Har'El" <nyh@math.technion.ac.il>
To: Avi Kivity <avi@redhat.com>
Cc: kvm@vger.kernel.org, gleb@redhat.com
Subject: Re: [PATCH 07/29] nVMX: Hold a vmcs02 for each vmcs12
Message-ID: <20110203125732.GA19503@fermat.math.technion.ac.il>
References: <1296116987-nyh@il.ibm.com>
	<201101270833.p0R8XQ4w002480@rice.haifa.ibm.com>
	<4D45372E.2050605@redhat.com>
Mime-Version: 1.0
Content-Disposition: inline
In-Reply-To: <4D45372E.2050605@redhat.com>
User-Agent: Mutt/1.4.2.2i
Hebrew-Date: 29 Shevat 5771
Sender: kvm-owner@vger.kernel.org
Precedence: bulk
List-ID: <kvm.vger.kernel.org>
X-Mailing-List: kvm@vger.kernel.org
X-Greylist: IP, sender and recipient auto-whitelisted, not delayed by
	milter-greylist-4.2.6 (demeter1.kernel.org [140.211.167.41]);
	Thu, 03 Feb 2011 12:57:42 +0000 (UTC)


--- .before/arch/x86/kvm/vmx.c	2011-02-03 14:46:53.000000000 +0200
+++ .after/arch/x86/kvm/vmx.c	2011-02-03 14:46:53.000000000 +0200
@@ -117,6 +117,7 @@ static int ple_window = KVM_VMX_DEFAULT_
 module_param(ple_window, int, S_IRUGO);
 
 #define NR_AUTOLOAD_MSRS 1
+#define VMCS02_POOL_SIZE 1
 
 struct vmcs {
 	u32 revision_id;
@@ -159,6 +160,31 @@ struct __packed vmcs12 {
 #define VMCS12_REVISION 0x11e57ed0
 
 /*
+ * When we temporarily switch a vcpu's VMCS (e.g., stop using an L1's VMCS
+ * while we use L2's VMCS), and wish to save the previous VMCS, we must also
+ * remember on which CPU it was last loaded (vcpu->cpu), so when we return to
+ * using this VMCS we'll know if we're now running on a different CPU and need
+ * to clear the VMCS on the old CPU, and load it on the new one. Additionally,
+ * we need to remember whether this VMCS was launched (vmx->launched), so when
+ * we return to it we know if to VMLAUNCH or to VMRESUME it (we cannot deduce
+ * this from other state, because it's possible that this VMCS had once been
+ * launched, but has since been cleared after a CPU switch, and now
+ * vmx->launch is 0).
+ */
+struct saved_vmcs {
+	struct vmcs *vmcs;
+	int cpu;
+	int launched;
+};
+
+/* Used to remember the last vmcs02 used for some recently used vmcs12s */
+struct vmcs02_list {
+	struct list_head list;
+	gpa_t vmcs12_addr;
+	struct saved_vmcs vmcs02;
+};
+
+/*
  * The nested_vmx structure is part of vcpu_vmx, and holds information we need
  * for correct emulation of VMX (i.e., nested VMX) on this vcpu. For example,
  * the current VMCS set by L1, a list of the VMCSs used to run the active
@@ -173,6 +199,10 @@ struct nested_vmx {
 	/* The host-usable pointer to the above */
 	struct page *current_vmcs12_page;
 	struct vmcs12 *current_vmcs12;
+
+	/* vmcs02_list cache of VMCSs recently used to run L2 guests */
+	struct list_head vmcs02_pool;
+	int vmcs02_num;
 };
 
 struct vcpu_vmx {
@@ -3965,6 +3995,106 @@ static int handle_invalid_op(struct kvm_
 }
 
 /*
+ * To run an L2 guest, we need a vmcs02 based the L1-specified vmcs12.
+ * We could reuse a single VMCS for all the L2 guests, but we also want the
+ * option to allocate a separate vmcs02 for each separate loaded vmcs12 - this
+ * allows keeping them loaded on the processor, and in the future will allow
+ * optimizations where prepare_vmcs02 doesn't need to set all the fields on
+ * every entry if they never change.
+ * So we keep, in vmx->nested.vmcs02_pool, an cache of size VMCS02_POOL_SIZE
+ * (>=0) with a vmcs02 for each recently loaded vmcs12s, most recent first.
+ *
+ * The following functions allocate and free a vmcs02 in this pool.
+ */
+
+static void __nested_free_saved_vmcs(void *arg)
+{
+	struct saved_vmcs *saved_vmcs = arg;
+
+	vmcs_clear(saved_vmcs->vmcs);
+	if (per_cpu(current_vmcs, saved_vmcs->cpu) == saved_vmcs->vmcs)
+		per_cpu(current_vmcs, saved_vmcs->cpu) = NULL;
+}
+
+/*
+ * Free a VMCS, but before that VMCLEAR it on the CPU where it was last loaded
+ * (the necessary information is in the saved_vmcs structure).
+ * See also vcpu_clear() (with different parameters and side-effects)
+ */
+static void nested_free_saved_vmcs(struct vcpu_vmx *vmx,
+		struct saved_vmcs *saved_vmcs)
+{
+	if (saved_vmcs->cpu != -1)
+		smp_call_function_single(saved_vmcs->cpu,
+				__nested_free_saved_vmcs, saved_vmcs, 1);
+
+	free_vmcs(saved_vmcs->vmcs);
+}
+
+/* Free and remove from pool a vmcs02 saved for a vmcs12 (if there is one) */
+static void nested_free_vmcs02(struct vcpu_vmx *vmx, gpa_t vmptr)
+{
+	struct vmcs02_list *item;
+	list_for_each_entry(item, &vmx->nested.vmcs02_pool, list)
+		if (item->vmcs12_addr == vmptr) {
+			nested_free_saved_vmcs(vmx, &item->vmcs02);
+			list_del(&item->list);
+			kfree(item);
+			vmx->nested.vmcs02_num--;
+			return;
+		}
+}
+
+/* Free all vmcs02 saved for this vcpu */
+static void nested_free_all_vmcs02(struct vcpu_vmx *vmx)
+{
+	struct vmcs02_list *item, *n;
+	list_for_each_entry_safe(item, n, &vmx->nested.vmcs02_pool, list) {
+		nested_free_saved_vmcs(vmx, &item->vmcs02);
+		list_del(&item->list);
+		kfree(item);
+	}
+	vmx->nested.vmcs02_num = 0;
+}
+
+/* Get a vmcs02 for the current vmcs12. */
+static struct saved_vmcs *nested_get_current_vmcs02(struct vcpu_vmx *vmx)
+{
+	struct vmcs02_list *item;
+	list_for_each_entry(item, &vmx->nested.vmcs02_pool, list)
+		if (item->vmcs12_addr == vmx->nested.current_vmptr){
+			list_move(&item->list, &vmx->nested.vmcs02_pool);
+			return &item->vmcs02;
+		}
+
+	if (vmx->nested.vmcs02_num >= max(VMCS02_POOL_SIZE,1)) {
+		/* Recycle the least recently used VMCS. */
+		item = list_entry(vmx->nested.vmcs02_pool.prev,
+			struct vmcs02_list, list);
+		item->vmcs12_addr = vmx->nested.current_vmptr;
+		list_move(&item->list, &vmx->nested.vmcs02_pool);
+		return &item->vmcs02;
+	}
+
+	/* Create a new vmcs02 */
+	item = (struct vmcs02_list *)
+		kmalloc(sizeof(struct vmcs02_list), GFP_KERNEL);
+	if (!item)
+		return NULL;
+	item->vmcs02.vmcs = alloc_vmcs();
+	if (!item->vmcs02.vmcs) {
+		kfree(item);
+		return NULL;
+	}
+	item->vmcs12_addr = vmx->nested.current_vmptr;
+	item->vmcs02.cpu = -1;
+	item->vmcs02.launched = 0;
+	list_add(&(item->list), &(vmx->nested.vmcs02_pool));
+	vmx->nested.vmcs02_num++;
+	return &item->vmcs02;
+}
+
+/*
  * Emulate the VMXON instruction.
  * Currently, we just remember that VMX is active, and do not save or even
  * inspect the argument to VMXON (the so-called "VMXON pointer") because we
@@ -4000,6 +4130,9 @@ static int handle_vmon(struct kvm_vcpu *
 		return 1;
 	}
 
+	INIT_LIST_HEAD(&(vmx->nested.vmcs02_pool));
+	vmx->nested.vmcs02_num = 0;
+
 	vmx->nested.vmxon = true;
 
 	skip_emulated_instruction(vcpu);
@@ -4050,6 +4183,8 @@ static void free_nested(struct vcpu_vmx 
 		nested_release_page(vmx->nested.current_vmcs12_page);
 		vmx->nested.current_vmptr = -1ull;
 	}
+
+	nested_free_all_vmcs02(vmx);
 }
 
 /* Emulate the VMXOFF instruction */