From patchwork Sun Jun 13 12:29:13 2010 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Nadav Har'El X-Patchwork-Id: 105791 Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by demeter.kernel.org (8.14.3/8.14.3) with ESMTP id o5DCTJ8u027824 for ; Sun, 13 Jun 2010 12:29:19 GMT Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1753393Ab0FMM3R (ORCPT ); Sun, 13 Jun 2010 08:29:17 -0400 Received: from mtagate4.uk.ibm.com ([194.196.100.164]:35284 "EHLO mtagate4.uk.ibm.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751891Ab0FMM3R (ORCPT ); Sun, 13 Jun 2010 08:29:17 -0400 Received: from d06nrmr1806.portsmouth.uk.ibm.com (d06nrmr1806.portsmouth.uk.ibm.com [9.149.39.193]) by mtagate4.uk.ibm.com (8.13.1/8.13.1) with ESMTP id o5DCTGSh016760 for ; Sun, 13 Jun 2010 12:29:16 GMT Received: from d06av03.portsmouth.uk.ibm.com (d06av03.portsmouth.uk.ibm.com [9.149.37.213]) by d06nrmr1806.portsmouth.uk.ibm.com (8.13.8/8.13.8/NCO v10.0) with ESMTP id o5DCTGOC1134690 for ; Sun, 13 Jun 2010 13:29:16 +0100 Received: from d06av03.portsmouth.uk.ibm.com (loopback [127.0.0.1]) by d06av03.portsmouth.uk.ibm.com (8.12.11.20060308/8.13.3) with ESMTP id o5DCTF3I002363 for ; Sun, 13 Jun 2010 13:29:15 +0100 Received: from rice.haifa.ibm.com (rice.haifa.ibm.com [9.148.8.205]) by d06av03.portsmouth.uk.ibm.com (8.12.11.20060308/8.12.11) with ESMTP id o5DCTEfu002352 (version=TLSv1/SSLv3 cipher=DHE-RSA-AES256-SHA bits=256 verify=NO); Sun, 13 Jun 2010 13:29:15 +0100 Received: from rice.haifa.ibm.com (lnx-nyh.haifa.ibm.com [127.0.0.1]) by rice.haifa.ibm.com (8.14.4/8.14.4) with ESMTP id o5DCTEDa013032; Sun, 13 Jun 2010 15:29:14 +0300 Received: (from nyh@localhost) by rice.haifa.ibm.com (8.14.4/8.14.4/Submit) id o5DCTDhk013030; Sun, 13 Jun 2010 15:29:13 +0300 Date: Sun, 13 Jun 2010 15:29:13 +0300 Message-Id: <201006131229.o5DCTDhk013030@rice.haifa.ibm.com> X-Authentication-Warning: rice.haifa.ibm.com: nyh set sender to "Nadav Har'El" using -f Cc: kvm@vger.kernel.org To: avi@redhat.com From: "Nadav Har'El" References: <1276431753-nyh@il.ibm.com> Subject: [PATCH 13/24] Implement VMREAD and VMWRITE Sender: kvm-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: kvm@vger.kernel.org X-Greylist: IP, sender and recipient auto-whitelisted, not delayed by milter-greylist-4.2.3 (demeter.kernel.org [140.211.167.41]); Sun, 13 Jun 2010 12:29:19 +0000 (UTC) --- .before/arch/x86/kvm/vmx.c 2010-06-13 15:01:29.000000000 +0300 +++ .after/arch/x86/kvm/vmx.c 2010-06-13 15:01:29.000000000 +0300 @@ -299,6 +299,42 @@ struct nested_vmx { int l2_vmcs_num; }; +enum vmcs_field_type { + VMCS_FIELD_TYPE_U16 = 0, + VMCS_FIELD_TYPE_U64 = 1, + VMCS_FIELD_TYPE_U32 = 2, + VMCS_FIELD_TYPE_ULONG = 3 +}; + +#define VMCS_FIELD_LENGTH_OFFSET 13 +#define VMCS_FIELD_LENGTH_MASK 0x6000 + +static inline int vmcs_field_type(unsigned long field) +{ + if (0x1 & field) /* one of the *_HIGH fields, all are 32 bit */ + return VMCS_FIELD_TYPE_U32; + return (VMCS_FIELD_LENGTH_MASK & field) >> 13; +} + +static inline int vmcs_field_size(int field_type, struct kvm_vcpu *vcpu) +{ + switch (field_type) { + case VMCS_FIELD_TYPE_U16: + return 2; + case VMCS_FIELD_TYPE_U32: + return 4; + case VMCS_FIELD_TYPE_U64: + return 8; + case VMCS_FIELD_TYPE_ULONG: +#ifdef CONFIG_X86_64 + if (is_long_mode(vcpu)) + return 8; +#endif + return 4; + } + return 0; /* should never happen */ +} + struct vcpu_vmx { struct kvm_vcpu vcpu; struct list_head local_vcpus_link; @@ -4184,6 +4220,189 @@ static int handle_vmclear(struct kvm_vcp return 1; } +static inline bool nested_vmcs_read_any(struct kvm_vcpu *vcpu, + unsigned long field, u64 *ret) +{ + short offset = vmcs_field_to_offset(field); + char *p; + + if (offset < 0) + return 0; + if (!to_vmx(vcpu)->nested.current_l2_page) + return 0; + + p = ((char *)(get_shadow_vmcs(vcpu))) + offset; + + switch (vmcs_field_type(field)) { + case VMCS_FIELD_TYPE_ULONG: + *ret = *((unsigned long *)p); + return 1; + case VMCS_FIELD_TYPE_U16: + *ret = (u16) *((unsigned long *)p); + return 1; + case VMCS_FIELD_TYPE_U32: + *ret = (u32) *((unsigned long *)p); + return 1; + case VMCS_FIELD_TYPE_U64: + *ret = *((u64 *)p); + return 1; + default: + return 0; /* can never happen. */ + } +} + +static int handle_vmread_reg(struct kvm_vcpu *vcpu, int reg, + unsigned long field) +{ + u64 field_value; + if (!nested_vmcs_read_any(vcpu, field, &field_value)) + return 0; + +#ifdef CONFIG_X86_64 + switch (vmcs_field_type(field)) { + case VMCS_FIELD_TYPE_U64: case VMCS_FIELD_TYPE_ULONG: + if (!is_long_mode(vcpu)) { + kvm_register_write(vcpu, reg+1, field_value >> 32); + field_value = (u32)field_value; + } + } +#endif + kvm_register_write(vcpu, reg, field_value); + return 1; +} + +static int handle_vmread_mem(struct kvm_vcpu *vcpu, gva_t gva, + unsigned long field) +{ + u64 field_value; + if (!nested_vmcs_read_any(vcpu, field, &field_value)) + return 0; + + /* It's ok to use *_system, because handle_vmread verifies cpl=0 */ + kvm_write_guest_virt_system(gva, &field_value, + vmcs_field_size(vmcs_field_type(field), vcpu), + vcpu, NULL); + return 1; +} + +static int handle_vmread(struct kvm_vcpu *vcpu) +{ + unsigned long field; + unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); + u32 vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO); + gva_t gva = 0; + int read_succeed; + + if (!nested_vmx_check_permission(vcpu)) + return 1; + + if (!nested_map_current(vcpu)) { + printk(KERN_INFO "%s invalid shadow vmcs\n", __func__); + set_rflags_to_vmx_fail_invalid(vcpu); + return 1; + } + + /* decode instruction info to get the field to read and where to store + * its value */ + field = kvm_register_read(vcpu, VMX_OPERAND_REG2(vmx_instruction_info)); + if (VMX_OPERAND_IS_REG(vmx_instruction_info)) { + read_succeed = handle_vmread_reg(vcpu, + VMX_OPERAND_REG(vmx_instruction_info), field); + } else { + gva = get_vmx_mem_address(vcpu, exit_qualification, + vmx_instruction_info); + if (gva == 0) + return 1; + read_succeed = handle_vmread_mem(vcpu, gva, field); + } + + if (read_succeed) { + clear_rflags_cf_zf(vcpu); + skip_emulated_instruction(vcpu); + } else { + set_rflags_to_vmx_fail_valid(vcpu); + vmcs_write32(VM_INSTRUCTION_ERROR, 12); + } + + nested_unmap_current(vcpu); + return 1; +} + + +static int handle_vmwrite(struct kvm_vcpu *vcpu) +{ + unsigned long field; + u64 field_value = 0; + gva_t gva; + int field_type; + unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); + u32 vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO); + char *p; + short offset; + + if (!nested_vmx_check_permission(vcpu)) + return 1; + + if (!nested_map_current(vcpu)) { + printk(KERN_INFO "%s invalid shadow vmcs\n", __func__); + set_rflags_to_vmx_fail_invalid(vcpu); + return 1; + } + + field = kvm_register_read(vcpu, VMX_OPERAND_REG2(vmx_instruction_info)); + field_type = vmcs_field_type(field); + + offset = vmcs_field_to_offset(field); + if (offset < 0) { + set_rflags_to_vmx_fail_invalid(vcpu); + return 1; + } + p = ((char *) get_shadow_vmcs(vcpu)) + offset; + + if (VMX_OPERAND_IS_REG(vmx_instruction_info)) + field_value = kvm_register_read(vcpu, + VMX_OPERAND_REG(vmx_instruction_info)); + else { + gva = get_vmx_mem_address(vcpu, exit_qualification, + vmx_instruction_info); + if (gva == 0) + return 1; + kvm_read_guest_virt(gva, &field_value, + vmcs_field_size(field_type, vcpu), vcpu, NULL); + } + + switch (field_type) { + case VMCS_FIELD_TYPE_U16: + *(u16 *)p = field_value; + break; + case VMCS_FIELD_TYPE_U32: + *(u32 *)p = field_value; + break; + case VMCS_FIELD_TYPE_U64: +#ifdef CONFIG_X86_64 + *(unsigned long *)p = field_value; +#else + *(unsigned long *)p = field_value; + *(((unsigned long *)p)+1) = field_value >> 32; +#endif + break; + case VMCS_FIELD_TYPE_ULONG: + *(unsigned long *)p = field_value; + break; + default: + printk(KERN_INFO "%s invalid field\n", __func__); + set_rflags_to_vmx_fail_valid(vcpu); + vmcs_write32(VM_INSTRUCTION_ERROR, 12); + nested_unmap_current(vcpu); + return 1; + } + + clear_rflags_cf_zf(vcpu); + skip_emulated_instruction(vcpu); + nested_unmap_current(vcpu); + return 1; +} + static bool verify_vmcs12_revision(struct kvm_vcpu *vcpu, gpa_t guest_vmcs_addr) { bool ret; @@ -4548,9 +4767,9 @@ static int (*kvm_vmx_exit_handlers[])(st [EXIT_REASON_VMLAUNCH] = handle_vmx_insn, [EXIT_REASON_VMPTRLD] = handle_vmptrld, [EXIT_REASON_VMPTRST] = handle_vmptrst, - [EXIT_REASON_VMREAD] = handle_vmx_insn, + [EXIT_REASON_VMREAD] = handle_vmread, [EXIT_REASON_VMRESUME] = handle_vmx_insn, - [EXIT_REASON_VMWRITE] = handle_vmx_insn, + [EXIT_REASON_VMWRITE] = handle_vmwrite, [EXIT_REASON_VMOFF] = handle_vmoff, [EXIT_REASON_VMON] = handle_vmon, [EXIT_REASON_TPR_BELOW_THRESHOLD] = handle_tpr_below_threshold,