From patchwork Sun May 8 08:24:27 2011 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Nadav Har'El X-Patchwork-Id: 765242 Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by demeter1.kernel.org (8.14.4/8.14.3) with ESMTP id p488Od5T032664 for ; Sun, 8 May 2011 08:24:39 GMT Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1752565Ab1EHIYf (ORCPT ); Sun, 8 May 2011 04:24:35 -0400 Received: from mtagate3.uk.ibm.com ([194.196.100.163]:46655 "EHLO mtagate3.uk.ibm.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1752126Ab1EHIYb (ORCPT ); Sun, 8 May 2011 04:24:31 -0400 Received: from d06nrmr1707.portsmouth.uk.ibm.com (d06nrmr1707.portsmouth.uk.ibm.com [9.149.39.225]) by mtagate3.uk.ibm.com (8.13.1/8.13.1) with ESMTP id p488OTdP012718 for ; Sun, 8 May 2011 08:24:29 GMT Received: from d06av06.portsmouth.uk.ibm.com (d06av06.portsmouth.uk.ibm.com [9.149.37.217]) by d06nrmr1707.portsmouth.uk.ibm.com (8.13.8/8.13.8/NCO v10.0) with ESMTP id p488PiWq2453538 for ; Sun, 8 May 2011 09:25:44 +0100 Received: from d06av06.portsmouth.uk.ibm.com (loopback [127.0.0.1]) by d06av06.portsmouth.uk.ibm.com (8.14.4/8.13.1/NCO v10.0 AVout) with ESMTP id p488OTjW008717 for ; Sun, 8 May 2011 02:24:29 -0600 Received: from rice.haifa.ibm.com (rice.haifa.ibm.com [9.148.8.217]) by d06av06.portsmouth.uk.ibm.com (8.14.4/8.13.1/NCO v10.0 AVin) with ESMTP id p488OS56008709 (version=TLSv1/SSLv3 cipher=DHE-RSA-AES256-SHA bits=256 verify=NO); Sun, 8 May 2011 02:24:28 -0600 Received: from rice.haifa.ibm.com (lnx-nyh.haifa.ibm.com [127.0.0.1]) by rice.haifa.ibm.com (8.14.4/8.14.4) with ESMTP id p488ORmX018253; Sun, 8 May 2011 11:24:27 +0300 Received: (from nyh@localhost) by rice.haifa.ibm.com (8.14.4/8.14.4/Submit) id p488ORoF018251; Sun, 8 May 2011 11:24:27 +0300 Date: Sun, 8 May 2011 11:24:27 +0300 Message-Id: <201105080824.p488ORoF018251@rice.haifa.ibm.com> X-Authentication-Warning: rice.haifa.ibm.com: nyh set sender to "Nadav Har'El" using -f Cc: gleb@redhat.com, avi@redhat.com To: kvm@vger.kernel.org From: "Nadav Har'El" References: <1304842511-nyh@il.ibm.com> Subject: [PATCH 18/30] nVMX: Implement VMLAUNCH and VMRESUME Sender: kvm-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: kvm@vger.kernel.org X-Greylist: IP, sender and recipient auto-whitelisted, not delayed by milter-greylist-4.2.6 (demeter1.kernel.org [140.211.167.41]); Sun, 08 May 2011 08:24:39 +0000 (UTC) Implement the VMLAUNCH and VMRESUME instructions, allowing a guest hypervisor to run its own guests. Signed-off-by: Nadav Har'El --- arch/x86/kvm/vmx.c | 139 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 137 insertions(+), 2 deletions(-) -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html --- .before/arch/x86/kvm/vmx.c 2011-05-08 10:43:20.000000000 +0300 +++ .after/arch/x86/kvm/vmx.c 2011-05-08 10:43:20.000000000 +0300 @@ -346,6 +346,9 @@ struct nested_vmx { /* vmcs02_list cache of VMCSs recently used to run L2 guests */ struct list_head vmcs02_pool; int vmcs02_num; + + /* Saving the VMCS that we used for running L1 */ + struct saved_vmcs saved_vmcs01; u64 vmcs01_tsc_offset; /* * Guest pages referred to in vmcs02 with host-physical pointers, so @@ -4880,6 +4883,21 @@ static int handle_vmclear(struct kvm_vcp return 1; } +static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch); + +/* Emulate the VMLAUNCH instruction */ +static int handle_vmlaunch(struct kvm_vcpu *vcpu) +{ + return nested_vmx_run(vcpu, true); +} + +/* Emulate the VMRESUME instruction */ +static int handle_vmresume(struct kvm_vcpu *vcpu) +{ + + return nested_vmx_run(vcpu, false); +} + enum vmcs_field_type { VMCS_FIELD_TYPE_U16 = 0, VMCS_FIELD_TYPE_U64 = 1, @@ -5160,11 +5178,11 @@ static int (*kvm_vmx_exit_handlers[])(st [EXIT_REASON_INVLPG] = handle_invlpg, [EXIT_REASON_VMCALL] = handle_vmcall, [EXIT_REASON_VMCLEAR] = handle_vmclear, - [EXIT_REASON_VMLAUNCH] = handle_vmx_insn, + [EXIT_REASON_VMLAUNCH] = handle_vmlaunch, [EXIT_REASON_VMPTRLD] = handle_vmptrld, [EXIT_REASON_VMPTRST] = handle_vmptrst, [EXIT_REASON_VMREAD] = handle_vmread, - [EXIT_REASON_VMRESUME] = handle_vmx_insn, + [EXIT_REASON_VMRESUME] = handle_vmresume, [EXIT_REASON_VMWRITE] = handle_vmwrite, [EXIT_REASON_VMOFF] = handle_vmoff, [EXIT_REASON_VMON] = handle_vmon, @@ -6021,6 +6039,123 @@ static int prepare_vmcs02(struct kvm_vcp return 0; } +/* + * nested_vmx_run() handles a nested entry, i.e., a VMLAUNCH or VMRESUME on L1 + * for running an L2 nested guest. + */ +static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) +{ + struct vmcs12 *vmcs12; + struct vcpu_vmx *vmx = to_vmx(vcpu); + int cpu; + struct saved_vmcs *saved_vmcs02; + u32 low, high; + + if (!nested_vmx_check_permission(vcpu)) + return 1; + skip_emulated_instruction(vcpu); + + /* + * The nested entry process starts with enforcing various prerequisites + * on vmcs12 as required by the Intel SDM, and act appropriately when + * they fail: As the SDM explains, some conditions should cause the + * instruction to fail, while others will cause the instruction to seem + * to succeed, but return an EXIT_REASON_INVALID_STATE. + * To speed up the normal (success) code path, we should avoid checking + * for misconfigurations which will anyway be caught by the processor + * when using the merged vmcs02. + */ + + vmcs12 = get_vmcs12(vcpu); + if (vmcs12->launch_state == launch) { + nested_vmx_failValid(vcpu, + launch ? VMXERR_VMLAUNCH_NONCLEAR_VMCS + : VMXERR_VMRESUME_NONLAUNCHED_VMCS); + return 1; + } + + if (vmcs12->guest_interruptibility_info & GUEST_INTR_STATE_MOV_SS) { + nested_vmx_failValid(vcpu, + VMXERR_ENTRY_EVENTS_BLOCKED_BY_MOV_SS); + return 1; + } + + if ((vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_MSR_BITMAPS) && + !IS_ALIGNED(vmcs12->msr_bitmap, PAGE_SIZE)) { + /*TODO: Also verify bits beyond physical address width are 0*/ + nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); + return 1; + } + + if (vmcs12->vm_entry_msr_load_count > 0 || + vmcs12->vm_exit_msr_load_count > 0 || + vmcs12->vm_exit_msr_store_count > 0) { + if (printk_ratelimit()) + printk(KERN_WARNING + "%s: VMCS MSR_{LOAD,STORE} unsupported\n", __func__); + nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); + return 1; + } + + nested_vmx_pinbased_ctls(&low, &high); + if (!vmx_control_verify(vmcs12->pin_based_vm_exec_control, low, high)) { + nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); + return 1; + } + + if (((vmcs12->host_cr0 & VMXON_CR0_ALWAYSON) != VMXON_CR0_ALWAYSON) || + ((vmcs12->host_cr4 & VMXON_CR4_ALWAYSON) != VMXON_CR4_ALWAYSON)) { + nested_vmx_failValid(vcpu, + VMXERR_ENTRY_INVALID_HOST_STATE_FIELD); + return 1; + } + + /* + * We're finally done with prerequisite checking, and can start with + * the nested entry. + */ + + enter_guest_mode(vcpu); + + vmx->nested.vmcs01_tsc_offset = vmcs_read64(TSC_OFFSET); + + /* + * Switch from L1's VMCS (vmcs01), to L2's VMCS (vmcs02). Remember + * vmcs01, on which CPU it was last loaded, and whether it was launched + * (we need all these values next time we will use L1). Then recall + * these values from the last time vmcs02 was used. + */ + saved_vmcs02 = nested_get_current_vmcs02(vmx); + if (!saved_vmcs02) + return -ENOMEM; + + cpu = get_cpu(); + vmx->nested.saved_vmcs01.vmcs = vmx->vmcs; + vmx->nested.saved_vmcs01.cpu = vcpu->cpu; + vmx->nested.saved_vmcs01.launched = vmx->launched; + + vmx->vmcs = saved_vmcs02->vmcs; + vcpu->cpu = saved_vmcs02->cpu; + vmx->launched = saved_vmcs02->launched; + + vmx_vcpu_put(vcpu); + vmx_vcpu_load(vcpu, cpu); + vcpu->cpu = cpu; + put_cpu(); + + vmcs12->launch_state = 1; + + prepare_vmcs02(vcpu, vmcs12); + + /* + * Note no nested_vmx_succeed or nested_vmx_fail here. At this point + * we are no longer running L1, and VMLAUNCH/VMRESUME has not yet + * returned as far as L1 is concerned. It will only return (and set + * the success flag) when L2 exits (see nested_vmx_vmexit()). + */ + return 1; +} + static int vmx_check_intercept(struct kvm_vcpu *vcpu, struct x86_instruction_info *info, enum x86_intercept_stage stage)