From patchwork Wed Dec  8 17:04:03 2010
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: Nadav Har'El <nyh@il.ibm.com>
X-Patchwork-Id: 391112
Received: from vger.kernel.org (vger.kernel.org [209.132.180.67])
	by demeter1.kernel.org (8.14.4/8.14.3) with ESMTP id oB8H4REj019538
	for <patchwork-kvm@patchwork.kernel.org>; Wed, 8 Dec 2010 17:04:28 GMT
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
	id S1753872Ab0LHREK (ORCPT
	<rfc822;patchwork-kvm@patchwork.kernel.org>);
	Wed, 8 Dec 2010 12:04:10 -0500
Received: from mtagate4.uk.ibm.com ([194.196.100.164]:34065 "EHLO
	mtagate4.uk.ibm.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org
	with ESMTP id S1753741Ab0LHREH (ORCPT <rfc822; kvm@vger.kernel.org>);
	Wed, 8 Dec 2010 12:04:07 -0500
Received: from d06nrmr1507.portsmouth.uk.ibm.com
	(d06nrmr1507.portsmouth.uk.ibm.com [9.149.38.233])
	by mtagate4.uk.ibm.com (8.13.1/8.13.1) with ESMTP id oB8H45iM017539
	for <kvm@vger.kernel.org>; Wed, 8 Dec 2010 17:04:05 GMT
Received: from d06av06.portsmouth.uk.ibm.com (d06av06.portsmouth.uk.ibm.com
	[9.149.37.217])
	by d06nrmr1507.portsmouth.uk.ibm.com (8.13.8/8.13.8/NCO v10.0) with
	ESMTP id oB8H47ij2748478
	for <kvm@vger.kernel.org>; Wed, 8 Dec 2010 17:04:07 GMT
Received: from d06av06.portsmouth.uk.ibm.com (loopback [127.0.0.1])
	by d06av06.portsmouth.uk.ibm.com (8.14.4/8.13.1/NCO v10.0 AVout) with
	ESMTP id oB8H453I018644
	for <kvm@vger.kernel.org>; Wed, 8 Dec 2010 10:04:05 -0700
Received: from rice.haifa.ibm.com (rice.haifa.ibm.com [9.148.8.217])
	by d06av06.portsmouth.uk.ibm.com (8.14.4/8.13.1/NCO v10.0 AVin) with
	ESMTP id oB8H44x4018639
	(version=TLSv1/SSLv3 cipher=DHE-RSA-AES256-SHA bits=256 verify=NO);
	Wed, 8 Dec 2010 10:04:05 -0700
Received: from rice.haifa.ibm.com (lnx-nyh.haifa.ibm.com [127.0.0.1])
	by rice.haifa.ibm.com (8.14.4/8.14.4) with ESMTP id oB8H44qW008631;
	Wed, 8 Dec 2010 19:04:04 +0200
Received: (from nyh@localhost)
	by rice.haifa.ibm.com (8.14.4/8.14.4/Submit) id oB8H43i2008629;
	Wed, 8 Dec 2010 19:04:03 +0200
Date: Wed, 8 Dec 2010 19:04:03 +0200
Message-Id: <201012081704.oB8H43i2008629@rice.haifa.ibm.com>
X-Authentication-Warning: rice.haifa.ibm.com: nyh set sender to "Nadav
	Har'El" <nyh@il.ibm.com> using -f
Cc: gleb@redhat.com, avi@redhat.com
To: kvm@vger.kernel.org
From: "Nadav Har'El" <nyh@il.ibm.com>
References: <1291827596-nyh@il.ibm.com>
Subject: [PATCH 08/28] nVMX: Hold a vmcs02 for each vmcs12
Sender: kvm-owner@vger.kernel.org
Precedence: bulk
List-ID: <kvm.vger.kernel.org>
X-Mailing-List: kvm@vger.kernel.org
X-Greylist: IP, sender and recipient auto-whitelisted, not delayed by
	milter-greylist-4.2.3 (demeter1.kernel.org [140.211.167.41]);
	Wed, 08 Dec 2010 17:04:28 +0000 (UTC)


--- .before/arch/x86/kvm/vmx.c	2010-12-08 18:56:49.000000000 +0200
+++ .after/arch/x86/kvm/vmx.c	2010-12-08 18:56:49.000000000 +0200
@@ -155,6 +155,12 @@ struct __packed vmcs12 {
  */
 #define VMCS12_REVISION 0x11e57ed0
 
+struct vmcs_list {
+	struct list_head list;
+	gpa_t vmcs12_addr;
+	struct vmcs *vmcs02;
+};
+
 /*
  * The nested_vmx structure is part of vcpu_vmx, and holds information we need
  * for correct emulation of VMX (i.e., nested VMX) on this vcpu. For example,
@@ -170,6 +176,10 @@ struct nested_vmx {
 	/* The host-usable pointer to the above */
 	struct page *current_vmcs12_page;
 	struct vmcs12 *current_vmcs12;
+
+	/* list of real (hardware) VMCS, one for each L2 guest of L1 */
+	struct list_head vmcs02_list; /* a vmcs_list */
+	int vmcs02_num;
 };
 
 struct vcpu_vmx {
@@ -1736,6 +1746,85 @@ static void free_vmcs(struct vmcs *vmcs)
 	free_pages((unsigned long)vmcs, vmcs_config.order);
 }
 
+static struct vmcs *nested_get_current_vmcs(struct kvm_vcpu *vcpu)
+{
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
+	struct vmcs_list *list_item, *n;
+
+	list_for_each_entry_safe(list_item, n, &vmx->nested.vmcs02_list, list)
+		if (list_item->vmcs12_addr == vmx->nested.current_vmptr)
+			return list_item->vmcs02;
+
+	return NULL;
+}
+
+/*
+ * Allocate an L0 VMCS (vmcs02) for the current L1 VMCS (vmcs12), if one
+ * does not already exist. The allocation is done in L0 memory, so to avoid
+ * denial-of-service attack by guests, we limit the number of concurrently-
+ * allocated vmcss. A well-behaving L1 will VMCLEAR unused vmcs12s and not
+ * trigger this limit.
+ */
+static const int NESTED_MAX_VMCS = 256;
+static int nested_create_current_vmcs(struct kvm_vcpu *vcpu)
+{
+	struct vmcs_list *new_l2_guest;
+	struct vmcs *vmcs02;
+
+	if (nested_get_current_vmcs(vcpu))
+		return 0; /* nothing to do - we already have a VMCS */
+
+	if (to_vmx(vcpu)->nested.vmcs02_num >= NESTED_MAX_VMCS)
+		return -ENOMEM;
+
+	new_l2_guest = (struct vmcs_list *)
+		kmalloc(sizeof(struct vmcs_list), GFP_KERNEL);
+	if (!new_l2_guest)
+		return -ENOMEM;
+
+	vmcs02 = alloc_vmcs();
+	if (!vmcs02) {
+		kfree(new_l2_guest);
+		return -ENOMEM;
+	}
+
+	new_l2_guest->vmcs12_addr = to_vmx(vcpu)->nested.current_vmptr;
+	new_l2_guest->vmcs02 = vmcs02;
+	list_add(&(new_l2_guest->list), &(to_vmx(vcpu)->nested.vmcs02_list));
+	to_vmx(vcpu)->nested.vmcs02_num++;
+	return 0;
+}
+
+/* Free a vmcs12's associated vmcs02, and remove it from vmcs02_list */
+static void nested_free_vmcs(struct kvm_vcpu *vcpu, gpa_t vmptr)
+{
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
+	struct vmcs_list *list_item, *n;
+
+	list_for_each_entry_safe(list_item, n, &vmx->nested.vmcs02_list, list)
+		if (list_item->vmcs12_addr == vmptr) {
+			free_vmcs(list_item->vmcs02);
+			list_del(&(list_item->list));
+			kfree(list_item);
+			vmx->nested.vmcs02_num--;
+			return;
+		}
+}
+
+static void free_l1_state(struct kvm_vcpu *vcpu)
+{
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
+	struct vmcs_list *list_item, *n;
+
+	list_for_each_entry_safe(list_item, n,
+			&vmx->nested.vmcs02_list, list) {
+		free_vmcs(list_item->vmcs02);
+		list_del(&(list_item->list));
+		kfree(list_item);
+	}
+	vmx->nested.vmcs02_num = 0;
+}
+
 static void free_kvm_area(void)
 {
 	int cpu;
@@ -3884,6 +3973,9 @@ static int handle_vmon(struct kvm_vcpu *
 		return 1;
 	}
 
+	INIT_LIST_HEAD(&(vmx->nested.vmcs02_list));
+	vmx->nested.vmcs02_num = 0;
+
 	vmx->nested.vmxon = true;
 
 	skip_emulated_instruction(vcpu);
@@ -3931,6 +4023,8 @@ static int handle_vmoff(struct kvm_vcpu 
 	if (to_vmx(vcpu)->nested.current_vmptr != -1ull)
 		nested_release_page(to_vmx(vcpu)->nested.current_vmcs12_page);
 
+	free_l1_state(vcpu);
+
 	skip_emulated_instruction(vcpu);
 	return 1;
 }
@@ -4420,6 +4514,8 @@ static void vmx_free_vcpu(struct kvm_vcp
 	free_vpid(vmx);
 	if (vmx->nested.vmxon && to_vmx(vcpu)->nested.current_vmptr != -1ull)
 		nested_release_page(to_vmx(vcpu)->nested.current_vmcs12_page);
+	if (vmx->nested.vmxon)
+		free_l1_state(vcpu);
 	vmx_free_vmcs(vcpu);
 	kfree(vmx->guest_msrs);
 	kvm_vcpu_uninit(vcpu);