From patchwork Thu Jan 27 08:32:24 2011
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: Nadav Har'El <nyh@il.ibm.com>
X-Patchwork-Id: 510501
Received: from vger.kernel.org (vger.kernel.org [209.132.180.67])
	by demeter1.kernel.org (8.14.4/8.14.3) with ESMTP id p0R8WXa6012019
	for <patchwork-kvm@patchwork.kernel.org>;
	Thu, 27 Jan 2011 08:32:33 GMT
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
	id S1751747Ab1A0Icb (ORCPT
	<rfc822;patchwork-kvm@patchwork.kernel.org>);
	Thu, 27 Jan 2011 03:32:31 -0500
Received: from mtagate5.uk.ibm.com ([194.196.100.165]:47415 "EHLO
	mtagate5.uk.ibm.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org
	with ESMTP id S1751291Ab1A0Ica (ORCPT <rfc822; kvm@vger.kernel.org>);
	Thu, 27 Jan 2011 03:32:30 -0500
Received: from d06nrmr1307.portsmouth.uk.ibm.com
	(d06nrmr1307.portsmouth.uk.ibm.com [9.149.38.129])
	by mtagate5.uk.ibm.com (8.13.1/8.13.1) with ESMTP id p0R8WQU4012829
	for <kvm@vger.kernel.org>; Thu, 27 Jan 2011 08:32:26 GMT
Received: from d06av04.portsmouth.uk.ibm.com (d06av04.portsmouth.uk.ibm.com
	[9.149.37.216])
	by d06nrmr1307.portsmouth.uk.ibm.com (8.13.8/8.13.8/NCO v10.0) with
	ESMTP id p0R8WSaq1462398
	for <kvm@vger.kernel.org>; Thu, 27 Jan 2011 08:32:28 GMT
Received: from d06av04.portsmouth.uk.ibm.com (loopback [127.0.0.1])
	by d06av04.portsmouth.uk.ibm.com (8.14.4/8.13.1/NCO v10.0 AVout) with
	ESMTP id p0R8WPEV021707
	for <kvm@vger.kernel.org>; Thu, 27 Jan 2011 01:32:25 -0700
Received: from rice.haifa.ibm.com (rice.haifa.ibm.com [9.148.8.217])
	by d06av04.portsmouth.uk.ibm.com (8.14.4/8.13.1/NCO v10.0 AVin) with
	ESMTP id p0R8WOjQ021688
	(version=TLSv1/SSLv3 cipher=DHE-RSA-AES256-SHA bits=256 verify=NO);
	Thu, 27 Jan 2011 01:32:25 -0700
Received: from rice.haifa.ibm.com (lnx-nyh.haifa.ibm.com [127.0.0.1])
	by rice.haifa.ibm.com (8.14.4/8.14.4) with ESMTP id p0R8WOp1002434;
	Thu, 27 Jan 2011 10:32:24 +0200
Received: (from nyh@localhost)
	by rice.haifa.ibm.com (8.14.4/8.14.4/Submit) id p0R8WOxe002432;
	Thu, 27 Jan 2011 10:32:24 +0200
Date: Thu, 27 Jan 2011 10:32:24 +0200
Message-Id: <201101270832.p0R8WOxe002432@rice.haifa.ibm.com>
X-Authentication-Warning: rice.haifa.ibm.com: nyh set sender to "Nadav
	Har'El" <nyh@il.ibm.com> using -f
Cc: gleb@redhat.com, avi@redhat.com
To: kvm@vger.kernel.org
From: "Nadav Har'El" <nyh@il.ibm.com>
References: <1296116987-nyh@il.ibm.com>
Subject: [PATCH 05/29] nVMX: Implement reading and writing of VMX MSRs
Sender: kvm-owner@vger.kernel.org
Precedence: bulk
List-ID: <kvm.vger.kernel.org>
X-Mailing-List: kvm@vger.kernel.org
X-Greylist: IP, sender and recipient auto-whitelisted, not delayed by
	milter-greylist-4.2.6 (demeter1.kernel.org [140.211.167.41]);
	Thu, 27 Jan 2011 08:32:52 +0000 (UTC)


--- .before/arch/x86/kvm/vmx.c	2011-01-26 18:06:03.000000000 +0200
+++ .after/arch/x86/kvm/vmx.c	2011-01-26 18:06:03.000000000 +0200
@@ -1258,6 +1258,128 @@ static inline bool nested_vmx_allowed(st
 }
 
 /*
+ * If we allow our guest to use VMX instructions (i.e., nested VMX), we should
+ * also let it use VMX-specific MSRs.
+ * vmx_get_vmx_msr() and vmx_set_vmx_msr() return 1 when we handled a
+ * VMX-specific MSR, or 0 when we haven't (and the caller should handle it
+ * like all other MSRs).
+ */
+static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
+{
+	u64 vmx_msr = 0;
+	u32 vmx_msr_high, vmx_msr_low;
+
+	if (!nested_vmx_allowed(vcpu) && msr_index >= MSR_IA32_VMX_BASIC &&
+	    msr_index <= MSR_IA32_VMX_TRUE_ENTRY_CTLS) {
+		/*
+		 * According to the spec, processors which do not support VMX
+		 * should throw a #GP(0) when VMX capability MSRs are read.
+		 */
+		kvm_queue_exception_e(vcpu, GP_VECTOR, 0);
+		return 1;
+	}
+
+	switch (msr_index) {
+	case MSR_IA32_FEATURE_CONTROL:
+		*pdata = 0;
+		break;
+	case MSR_IA32_VMX_BASIC:
+		/*
+		 * This MSR reports some information about VMX support of the
+		 * processor. We should return information about the VMX we
+		 * emulate for the guest, and the VMCS structure we give it -
+		 * not about the VMX support of the underlying hardware.
+		 * However, some capabilities of the underlying hardware are
+		 * used directly by our emulation (e.g., the physical address
+		 * width), so these are copied from what the hardware reports.
+		 */
+		*pdata = VMCS12_REVISION | (((u64)sizeof(struct vmcs12)) << 32);
+		rdmsrl(MSR_IA32_VMX_BASIC, vmx_msr);
+		*pdata |= vmx_msr &
+			(VMX_BASIC_64 | VMX_BASIC_MEM_TYPE | VMX_BASIC_INOUT);
+		break;
+#define CORE2_PINBASED_CTLS_MUST_BE_ONE	0x00000016
+	case MSR_IA32_VMX_TRUE_PINBASED_CTLS:
+	case MSR_IA32_VMX_PINBASED_CTLS:
+		vmx_msr_low  = CORE2_PINBASED_CTLS_MUST_BE_ONE;
+		vmx_msr_high = CORE2_PINBASED_CTLS_MUST_BE_ONE |
+				PIN_BASED_EXT_INTR_MASK |
+				PIN_BASED_NMI_EXITING |
+				PIN_BASED_VIRTUAL_NMIS;
+		*pdata = vmx_msr_low | ((u64)vmx_msr_high << 32);
+		break;
+	case MSR_IA32_VMX_TRUE_PROCBASED_CTLS:
+	case MSR_IA32_VMX_PROCBASED_CTLS:
+		/* This MSR determines which vm-execution controls the L1
+		 * hypervisor may ask, or may not ask, to enable. Normally we
+		 * can only allow enabling features which the hardware can
+		 * support, but we limit ourselves to allowing only known
+		 * features that were tested nested. We allow disabling any
+		 * feature (even if the hardware can't disable it).
+		 */
+		rdmsr(MSR_IA32_VMX_PROCBASED_CTLS, vmx_msr_low, vmx_msr_high);
+
+		vmx_msr_low = 0; /* allow disabling any feature */
+		vmx_msr_high &= /* do not expose new untested features */
+			CPU_BASED_HLT_EXITING | CPU_BASED_CR3_LOAD_EXITING |
+			CPU_BASED_CR3_STORE_EXITING | CPU_BASED_USE_IO_BITMAPS |
+			CPU_BASED_MOV_DR_EXITING | CPU_BASED_USE_TSC_OFFSETING |
+			CPU_BASED_MWAIT_EXITING | CPU_BASED_MONITOR_EXITING |
+			CPU_BASED_INVLPG_EXITING | CPU_BASED_TPR_SHADOW |
+#ifdef CONFIG_X86_64
+			CPU_BASED_CR8_LOAD_EXITING |
+			CPU_BASED_CR8_STORE_EXITING |
+#endif
+			CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
+		*pdata = vmx_msr_low | ((u64)vmx_msr_high << 32);
+		break;
+	case MSR_IA32_VMX_TRUE_EXIT_CTLS:
+	case MSR_IA32_VMX_EXIT_CTLS:
+		*pdata = 0;
+#ifdef CONFIG_X86_64
+		*pdata |= VM_EXIT_HOST_ADDR_SPACE_SIZE;
+#endif
+		break;
+	case MSR_IA32_VMX_TRUE_ENTRY_CTLS:
+	case MSR_IA32_VMX_ENTRY_CTLS:
+		*pdata = 0;
+		break;
+	case MSR_IA32_VMX_PROCBASED_CTLS2:
+		*pdata = 0;
+		if (vm_need_virtualize_apic_accesses(vcpu->kvm))
+			*pdata |= SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
+		break;
+	case MSR_IA32_VMX_EPT_VPID_CAP:
+		/* Currently, no nested ept or nested vpid */
+		*pdata = 0;
+		break;
+	default:
+		return 0;
+	}
+
+	return 1;
+}
+
+static int vmx_set_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
+{
+	if (!nested_vmx_allowed(vcpu))
+		return 0;
+
+	/*
+	 * according to the spec, "VMX capability MSRs are read-only; an
+	 * attempt to write them (with WRMSR) produces a #GP(0).
+	 */
+	if (msr_index >= MSR_IA32_VMX_BASIC &&
+	    msr_index <= MSR_IA32_VMX_TRUE_ENTRY_CTLS) {
+		kvm_queue_exception_e(vcpu, GP_VECTOR, 0);
+		return 1;
+	} else if (msr_index == MSR_IA32_FEATURE_CONTROL)
+		/* TODO: the right thing. */
+		return 1;
+	else
+		return 0;
+}
+/*
  * Reads an msr value (of 'msr_index') into 'pdata'.
  * Returns 0 on success, non-0 otherwise.
  * Assumes vcpu_load() was already called.
@@ -1305,6 +1427,8 @@ static int vmx_get_msr(struct kvm_vcpu *
 		/* Otherwise falls through */
 	default:
 		vmx_load_host_state(to_vmx(vcpu));
+		if (vmx_get_vmx_msr(vcpu, msr_index, pdata))
+			return 0;
 		msr = find_msr_entry(to_vmx(vcpu), msr_index);
 		if (msr) {
 			vmx_load_host_state(to_vmx(vcpu));
@@ -1374,6 +1498,8 @@ static int vmx_set_msr(struct kvm_vcpu *
 			return 1;
 		/* Otherwise falls through */
 	default:
+		if (vmx_set_vmx_msr(vcpu, msr_index, data))
+			break;
 		msr = find_msr_entry(vmx, msr_index);
 		if (msr) {
 			vmx_load_host_state(vmx);
--- .before/arch/x86/include/asm/msr-index.h	2011-01-26 18:06:03.000000000 +0200
+++ .after/arch/x86/include/asm/msr-index.h	2011-01-26 18:06:03.000000000 +0200
@@ -424,6 +424,15 @@
 #define MSR_IA32_VMX_VMCS_ENUM          0x0000048a
 #define MSR_IA32_VMX_PROCBASED_CTLS2    0x0000048b
 #define MSR_IA32_VMX_EPT_VPID_CAP       0x0000048c
+#define MSR_IA32_VMX_TRUE_PINBASED_CTLS  0x0000048d
+#define MSR_IA32_VMX_TRUE_PROCBASED_CTLS 0x0000048e
+#define MSR_IA32_VMX_TRUE_EXIT_CTLS      0x0000048f
+#define MSR_IA32_VMX_TRUE_ENTRY_CTLS     0x00000490
+
+/* VMX_BASIC bits and bitmasks */
+#define VMX_BASIC_64		0x0001000000000000LLU
+#define VMX_BASIC_MEM_TYPE	0x003c000000000000LLU
+#define VMX_BASIC_INOUT		0x0040000000000000LLU
 
 /* AMD-V MSRs */