From patchwork Wed Dec 8 17:04:03 2010 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Nadav Har'El X-Patchwork-Id: 391112 Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by demeter1.kernel.org (8.14.4/8.14.3) with ESMTP id oB8H4REj019538 for ; Wed, 8 Dec 2010 17:04:28 GMT Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1753872Ab0LHREK (ORCPT ); Wed, 8 Dec 2010 12:04:10 -0500 Received: from mtagate4.uk.ibm.com ([194.196.100.164]:34065 "EHLO mtagate4.uk.ibm.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1753741Ab0LHREH (ORCPT ); Wed, 8 Dec 2010 12:04:07 -0500 Received: from d06nrmr1507.portsmouth.uk.ibm.com (d06nrmr1507.portsmouth.uk.ibm.com [9.149.38.233]) by mtagate4.uk.ibm.com (8.13.1/8.13.1) with ESMTP id oB8H45iM017539 for ; Wed, 8 Dec 2010 17:04:05 GMT Received: from d06av06.portsmouth.uk.ibm.com (d06av06.portsmouth.uk.ibm.com [9.149.37.217]) by d06nrmr1507.portsmouth.uk.ibm.com (8.13.8/8.13.8/NCO v10.0) with ESMTP id oB8H47ij2748478 for ; Wed, 8 Dec 2010 17:04:07 GMT Received: from d06av06.portsmouth.uk.ibm.com (loopback [127.0.0.1]) by d06av06.portsmouth.uk.ibm.com (8.14.4/8.13.1/NCO v10.0 AVout) with ESMTP id oB8H453I018644 for ; Wed, 8 Dec 2010 10:04:05 -0700 Received: from rice.haifa.ibm.com (rice.haifa.ibm.com [9.148.8.217]) by d06av06.portsmouth.uk.ibm.com (8.14.4/8.13.1/NCO v10.0 AVin) with ESMTP id oB8H44x4018639 (version=TLSv1/SSLv3 cipher=DHE-RSA-AES256-SHA bits=256 verify=NO); Wed, 8 Dec 2010 10:04:05 -0700 Received: from rice.haifa.ibm.com (lnx-nyh.haifa.ibm.com [127.0.0.1]) by rice.haifa.ibm.com (8.14.4/8.14.4) with ESMTP id oB8H44qW008631; Wed, 8 Dec 2010 19:04:04 +0200 Received: (from nyh@localhost) by rice.haifa.ibm.com (8.14.4/8.14.4/Submit) id oB8H43i2008629; Wed, 8 Dec 2010 19:04:03 +0200 Date: Wed, 8 Dec 2010 19:04:03 +0200 Message-Id: <201012081704.oB8H43i2008629@rice.haifa.ibm.com> X-Authentication-Warning: rice.haifa.ibm.com: nyh set sender to "Nadav Har'El" using -f Cc: gleb@redhat.com, avi@redhat.com To: kvm@vger.kernel.org From: "Nadav Har'El" References: <1291827596-nyh@il.ibm.com> Subject: [PATCH 08/28] nVMX: Hold a vmcs02 for each vmcs12 Sender: kvm-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: kvm@vger.kernel.org X-Greylist: IP, sender and recipient auto-whitelisted, not delayed by milter-greylist-4.2.3 (demeter1.kernel.org [140.211.167.41]); Wed, 08 Dec 2010 17:04:28 +0000 (UTC) --- .before/arch/x86/kvm/vmx.c 2010-12-08 18:56:49.000000000 +0200 +++ .after/arch/x86/kvm/vmx.c 2010-12-08 18:56:49.000000000 +0200 @@ -155,6 +155,12 @@ struct __packed vmcs12 { */ #define VMCS12_REVISION 0x11e57ed0 +struct vmcs_list { + struct list_head list; + gpa_t vmcs12_addr; + struct vmcs *vmcs02; +}; + /* * The nested_vmx structure is part of vcpu_vmx, and holds information we need * for correct emulation of VMX (i.e., nested VMX) on this vcpu. For example, @@ -170,6 +176,10 @@ struct nested_vmx { /* The host-usable pointer to the above */ struct page *current_vmcs12_page; struct vmcs12 *current_vmcs12; + + /* list of real (hardware) VMCS, one for each L2 guest of L1 */ + struct list_head vmcs02_list; /* a vmcs_list */ + int vmcs02_num; }; struct vcpu_vmx { @@ -1736,6 +1746,85 @@ static void free_vmcs(struct vmcs *vmcs) free_pages((unsigned long)vmcs, vmcs_config.order); } +static struct vmcs *nested_get_current_vmcs(struct kvm_vcpu *vcpu) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + struct vmcs_list *list_item, *n; + + list_for_each_entry_safe(list_item, n, &vmx->nested.vmcs02_list, list) + if (list_item->vmcs12_addr == vmx->nested.current_vmptr) + return list_item->vmcs02; + + return NULL; +} + +/* + * Allocate an L0 VMCS (vmcs02) for the current L1 VMCS (vmcs12), if one + * does not already exist. The allocation is done in L0 memory, so to avoid + * denial-of-service attack by guests, we limit the number of concurrently- + * allocated vmcss. A well-behaving L1 will VMCLEAR unused vmcs12s and not + * trigger this limit. + */ +static const int NESTED_MAX_VMCS = 256; +static int nested_create_current_vmcs(struct kvm_vcpu *vcpu) +{ + struct vmcs_list *new_l2_guest; + struct vmcs *vmcs02; + + if (nested_get_current_vmcs(vcpu)) + return 0; /* nothing to do - we already have a VMCS */ + + if (to_vmx(vcpu)->nested.vmcs02_num >= NESTED_MAX_VMCS) + return -ENOMEM; + + new_l2_guest = (struct vmcs_list *) + kmalloc(sizeof(struct vmcs_list), GFP_KERNEL); + if (!new_l2_guest) + return -ENOMEM; + + vmcs02 = alloc_vmcs(); + if (!vmcs02) { + kfree(new_l2_guest); + return -ENOMEM; + } + + new_l2_guest->vmcs12_addr = to_vmx(vcpu)->nested.current_vmptr; + new_l2_guest->vmcs02 = vmcs02; + list_add(&(new_l2_guest->list), &(to_vmx(vcpu)->nested.vmcs02_list)); + to_vmx(vcpu)->nested.vmcs02_num++; + return 0; +} + +/* Free a vmcs12's associated vmcs02, and remove it from vmcs02_list */ +static void nested_free_vmcs(struct kvm_vcpu *vcpu, gpa_t vmptr) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + struct vmcs_list *list_item, *n; + + list_for_each_entry_safe(list_item, n, &vmx->nested.vmcs02_list, list) + if (list_item->vmcs12_addr == vmptr) { + free_vmcs(list_item->vmcs02); + list_del(&(list_item->list)); + kfree(list_item); + vmx->nested.vmcs02_num--; + return; + } +} + +static void free_l1_state(struct kvm_vcpu *vcpu) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + struct vmcs_list *list_item, *n; + + list_for_each_entry_safe(list_item, n, + &vmx->nested.vmcs02_list, list) { + free_vmcs(list_item->vmcs02); + list_del(&(list_item->list)); + kfree(list_item); + } + vmx->nested.vmcs02_num = 0; +} + static void free_kvm_area(void) { int cpu; @@ -3884,6 +3973,9 @@ static int handle_vmon(struct kvm_vcpu * return 1; } + INIT_LIST_HEAD(&(vmx->nested.vmcs02_list)); + vmx->nested.vmcs02_num = 0; + vmx->nested.vmxon = true; skip_emulated_instruction(vcpu); @@ -3931,6 +4023,8 @@ static int handle_vmoff(struct kvm_vcpu if (to_vmx(vcpu)->nested.current_vmptr != -1ull) nested_release_page(to_vmx(vcpu)->nested.current_vmcs12_page); + free_l1_state(vcpu); + skip_emulated_instruction(vcpu); return 1; } @@ -4420,6 +4514,8 @@ static void vmx_free_vcpu(struct kvm_vcp free_vpid(vmx); if (vmx->nested.vmxon && to_vmx(vcpu)->nested.current_vmptr != -1ull) nested_release_page(to_vmx(vcpu)->nested.current_vmcs12_page); + if (vmx->nested.vmxon) + free_l1_state(vcpu); vmx_free_vmcs(vcpu); kfree(vmx->guest_msrs); kvm_vcpu_uninit(vcpu);