From patchwork Thu Jan 27 08:32:24 2011 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Nadav Har'El X-Patchwork-Id: 510501 Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by demeter1.kernel.org (8.14.4/8.14.3) with ESMTP id p0R8WXa6012019 for ; Thu, 27 Jan 2011 08:32:33 GMT Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1751747Ab1A0Icb (ORCPT ); Thu, 27 Jan 2011 03:32:31 -0500 Received: from mtagate5.uk.ibm.com ([194.196.100.165]:47415 "EHLO mtagate5.uk.ibm.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751291Ab1A0Ica (ORCPT ); Thu, 27 Jan 2011 03:32:30 -0500 Received: from d06nrmr1307.portsmouth.uk.ibm.com (d06nrmr1307.portsmouth.uk.ibm.com [9.149.38.129]) by mtagate5.uk.ibm.com (8.13.1/8.13.1) with ESMTP id p0R8WQU4012829 for ; Thu, 27 Jan 2011 08:32:26 GMT Received: from d06av04.portsmouth.uk.ibm.com (d06av04.portsmouth.uk.ibm.com [9.149.37.216]) by d06nrmr1307.portsmouth.uk.ibm.com (8.13.8/8.13.8/NCO v10.0) with ESMTP id p0R8WSaq1462398 for ; Thu, 27 Jan 2011 08:32:28 GMT Received: from d06av04.portsmouth.uk.ibm.com (loopback [127.0.0.1]) by d06av04.portsmouth.uk.ibm.com (8.14.4/8.13.1/NCO v10.0 AVout) with ESMTP id p0R8WPEV021707 for ; Thu, 27 Jan 2011 01:32:25 -0700 Received: from rice.haifa.ibm.com (rice.haifa.ibm.com [9.148.8.217]) by d06av04.portsmouth.uk.ibm.com (8.14.4/8.13.1/NCO v10.0 AVin) with ESMTP id p0R8WOjQ021688 (version=TLSv1/SSLv3 cipher=DHE-RSA-AES256-SHA bits=256 verify=NO); Thu, 27 Jan 2011 01:32:25 -0700 Received: from rice.haifa.ibm.com (lnx-nyh.haifa.ibm.com [127.0.0.1]) by rice.haifa.ibm.com (8.14.4/8.14.4) with ESMTP id p0R8WOp1002434; Thu, 27 Jan 2011 10:32:24 +0200 Received: (from nyh@localhost) by rice.haifa.ibm.com (8.14.4/8.14.4/Submit) id p0R8WOxe002432; Thu, 27 Jan 2011 10:32:24 +0200 Date: Thu, 27 Jan 2011 10:32:24 +0200 Message-Id: <201101270832.p0R8WOxe002432@rice.haifa.ibm.com> X-Authentication-Warning: rice.haifa.ibm.com: nyh set sender to "Nadav Har'El" using -f Cc: gleb@redhat.com, avi@redhat.com To: kvm@vger.kernel.org From: "Nadav Har'El" References: <1296116987-nyh@il.ibm.com> Subject: [PATCH 05/29] nVMX: Implement reading and writing of VMX MSRs Sender: kvm-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: kvm@vger.kernel.org X-Greylist: IP, sender and recipient auto-whitelisted, not delayed by milter-greylist-4.2.6 (demeter1.kernel.org [140.211.167.41]); Thu, 27 Jan 2011 08:32:52 +0000 (UTC) --- .before/arch/x86/kvm/vmx.c 2011-01-26 18:06:03.000000000 +0200 +++ .after/arch/x86/kvm/vmx.c 2011-01-26 18:06:03.000000000 +0200 @@ -1258,6 +1258,128 @@ static inline bool nested_vmx_allowed(st } /* + * If we allow our guest to use VMX instructions (i.e., nested VMX), we should + * also let it use VMX-specific MSRs. + * vmx_get_vmx_msr() and vmx_set_vmx_msr() return 1 when we handled a + * VMX-specific MSR, or 0 when we haven't (and the caller should handle it + * like all other MSRs). + */ +static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) +{ + u64 vmx_msr = 0; + u32 vmx_msr_high, vmx_msr_low; + + if (!nested_vmx_allowed(vcpu) && msr_index >= MSR_IA32_VMX_BASIC && + msr_index <= MSR_IA32_VMX_TRUE_ENTRY_CTLS) { + /* + * According to the spec, processors which do not support VMX + * should throw a #GP(0) when VMX capability MSRs are read. + */ + kvm_queue_exception_e(vcpu, GP_VECTOR, 0); + return 1; + } + + switch (msr_index) { + case MSR_IA32_FEATURE_CONTROL: + *pdata = 0; + break; + case MSR_IA32_VMX_BASIC: + /* + * This MSR reports some information about VMX support of the + * processor. We should return information about the VMX we + * emulate for the guest, and the VMCS structure we give it - + * not about the VMX support of the underlying hardware. + * However, some capabilities of the underlying hardware are + * used directly by our emulation (e.g., the physical address + * width), so these are copied from what the hardware reports. + */ + *pdata = VMCS12_REVISION | (((u64)sizeof(struct vmcs12)) << 32); + rdmsrl(MSR_IA32_VMX_BASIC, vmx_msr); + *pdata |= vmx_msr & + (VMX_BASIC_64 | VMX_BASIC_MEM_TYPE | VMX_BASIC_INOUT); + break; +#define CORE2_PINBASED_CTLS_MUST_BE_ONE 0x00000016 + case MSR_IA32_VMX_TRUE_PINBASED_CTLS: + case MSR_IA32_VMX_PINBASED_CTLS: + vmx_msr_low = CORE2_PINBASED_CTLS_MUST_BE_ONE; + vmx_msr_high = CORE2_PINBASED_CTLS_MUST_BE_ONE | + PIN_BASED_EXT_INTR_MASK | + PIN_BASED_NMI_EXITING | + PIN_BASED_VIRTUAL_NMIS; + *pdata = vmx_msr_low | ((u64)vmx_msr_high << 32); + break; + case MSR_IA32_VMX_TRUE_PROCBASED_CTLS: + case MSR_IA32_VMX_PROCBASED_CTLS: + /* This MSR determines which vm-execution controls the L1 + * hypervisor may ask, or may not ask, to enable. Normally we + * can only allow enabling features which the hardware can + * support, but we limit ourselves to allowing only known + * features that were tested nested. We allow disabling any + * feature (even if the hardware can't disable it). + */ + rdmsr(MSR_IA32_VMX_PROCBASED_CTLS, vmx_msr_low, vmx_msr_high); + + vmx_msr_low = 0; /* allow disabling any feature */ + vmx_msr_high &= /* do not expose new untested features */ + CPU_BASED_HLT_EXITING | CPU_BASED_CR3_LOAD_EXITING | + CPU_BASED_CR3_STORE_EXITING | CPU_BASED_USE_IO_BITMAPS | + CPU_BASED_MOV_DR_EXITING | CPU_BASED_USE_TSC_OFFSETING | + CPU_BASED_MWAIT_EXITING | CPU_BASED_MONITOR_EXITING | + CPU_BASED_INVLPG_EXITING | CPU_BASED_TPR_SHADOW | +#ifdef CONFIG_X86_64 + CPU_BASED_CR8_LOAD_EXITING | + CPU_BASED_CR8_STORE_EXITING | +#endif + CPU_BASED_ACTIVATE_SECONDARY_CONTROLS; + *pdata = vmx_msr_low | ((u64)vmx_msr_high << 32); + break; + case MSR_IA32_VMX_TRUE_EXIT_CTLS: + case MSR_IA32_VMX_EXIT_CTLS: + *pdata = 0; +#ifdef CONFIG_X86_64 + *pdata |= VM_EXIT_HOST_ADDR_SPACE_SIZE; +#endif + break; + case MSR_IA32_VMX_TRUE_ENTRY_CTLS: + case MSR_IA32_VMX_ENTRY_CTLS: + *pdata = 0; + break; + case MSR_IA32_VMX_PROCBASED_CTLS2: + *pdata = 0; + if (vm_need_virtualize_apic_accesses(vcpu->kvm)) + *pdata |= SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; + break; + case MSR_IA32_VMX_EPT_VPID_CAP: + /* Currently, no nested ept or nested vpid */ + *pdata = 0; + break; + default: + return 0; + } + + return 1; +} + +static int vmx_set_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) +{ + if (!nested_vmx_allowed(vcpu)) + return 0; + + /* + * according to the spec, "VMX capability MSRs are read-only; an + * attempt to write them (with WRMSR) produces a #GP(0). + */ + if (msr_index >= MSR_IA32_VMX_BASIC && + msr_index <= MSR_IA32_VMX_TRUE_ENTRY_CTLS) { + kvm_queue_exception_e(vcpu, GP_VECTOR, 0); + return 1; + } else if (msr_index == MSR_IA32_FEATURE_CONTROL) + /* TODO: the right thing. */ + return 1; + else + return 0; +} +/* * Reads an msr value (of 'msr_index') into 'pdata'. * Returns 0 on success, non-0 otherwise. * Assumes vcpu_load() was already called. @@ -1305,6 +1427,8 @@ static int vmx_get_msr(struct kvm_vcpu * /* Otherwise falls through */ default: vmx_load_host_state(to_vmx(vcpu)); + if (vmx_get_vmx_msr(vcpu, msr_index, pdata)) + return 0; msr = find_msr_entry(to_vmx(vcpu), msr_index); if (msr) { vmx_load_host_state(to_vmx(vcpu)); @@ -1374,6 +1498,8 @@ static int vmx_set_msr(struct kvm_vcpu * return 1; /* Otherwise falls through */ default: + if (vmx_set_vmx_msr(vcpu, msr_index, data)) + break; msr = find_msr_entry(vmx, msr_index); if (msr) { vmx_load_host_state(vmx); --- .before/arch/x86/include/asm/msr-index.h 2011-01-26 18:06:03.000000000 +0200 +++ .after/arch/x86/include/asm/msr-index.h 2011-01-26 18:06:03.000000000 +0200 @@ -424,6 +424,15 @@ #define MSR_IA32_VMX_VMCS_ENUM 0x0000048a #define MSR_IA32_VMX_PROCBASED_CTLS2 0x0000048b #define MSR_IA32_VMX_EPT_VPID_CAP 0x0000048c +#define MSR_IA32_VMX_TRUE_PINBASED_CTLS 0x0000048d +#define MSR_IA32_VMX_TRUE_PROCBASED_CTLS 0x0000048e +#define MSR_IA32_VMX_TRUE_EXIT_CTLS 0x0000048f +#define MSR_IA32_VMX_TRUE_ENTRY_CTLS 0x00000490 + +/* VMX_BASIC bits and bitmasks */ +#define VMX_BASIC_64 0x0001000000000000LLU +#define VMX_BASIC_MEM_TYPE 0x003c000000000000LLU +#define VMX_BASIC_INOUT 0x0040000000000000LLU /* AMD-V MSRs */