@@ -820,6 +820,28 @@ static inline bool report_flexpriority(v
return flexpriority_enabled;
}
+static inline bool nested_cpu_has_vmx_tpr_shadow(struct kvm_vcpu *vcpu)
+{
+ return cpu_has_vmx_tpr_shadow() &&
+ (get_vmcs12_fields(vcpu)->cpu_based_vm_exec_control &
+ CPU_BASED_TPR_SHADOW);
+}
+
+static inline bool nested_cpu_has_secondary_exec_ctrls(struct kvm_vcpu *vcpu)
+{
+ return cpu_has_secondary_exec_ctrls() &&
+ (get_vmcs12_fields(vcpu)->cpu_based_vm_exec_control &
+ CPU_BASED_ACTIVATE_SECONDARY_CONTROLS);
+}
+
+static inline bool nested_vm_need_virtualize_apic_accesses(struct kvm_vcpu
+ *vcpu)
+{
+ return nested_cpu_has_secondary_exec_ctrls(vcpu) &&
+ (get_vmcs12_fields(vcpu)->secondary_vm_exec_control &
+ SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES);
+}
+
static int __find_msr_index(struct vcpu_vmx *vmx, u32 msr)
{
int i;
@@ -5529,6 +5551,394 @@ static void vmx_set_supported_cpuid(u32
{
}
+/*
+ * Make a copy of the current VMCS to ordinary memory. This is needed because
+ * in VMX you cannot read and write to two VMCS at the same time, so when we
+ * want to do this (in prepare_vmcs02, which needs to read from vmcs01 while
+ * preparing vmcs02), we need to first save a copy of one VMCS's fields in
+ * memory, and then use that copy.
+ */
+void save_vmcs(struct vmcs_fields *dst)
+{
+ dst->guest_es_selector = vmcs_read16(GUEST_ES_SELECTOR);
+ dst->guest_cs_selector = vmcs_read16(GUEST_CS_SELECTOR);
+ dst->guest_ss_selector = vmcs_read16(GUEST_SS_SELECTOR);
+ dst->guest_ds_selector = vmcs_read16(GUEST_DS_SELECTOR);
+ dst->guest_fs_selector = vmcs_read16(GUEST_FS_SELECTOR);
+ dst->guest_gs_selector = vmcs_read16(GUEST_GS_SELECTOR);
+ dst->guest_ldtr_selector = vmcs_read16(GUEST_LDTR_SELECTOR);
+ dst->guest_tr_selector = vmcs_read16(GUEST_TR_SELECTOR);
+ dst->host_es_selector = vmcs_read16(HOST_ES_SELECTOR);
+ dst->host_cs_selector = vmcs_read16(HOST_CS_SELECTOR);
+ dst->host_ss_selector = vmcs_read16(HOST_SS_SELECTOR);
+ dst->host_ds_selector = vmcs_read16(HOST_DS_SELECTOR);
+ dst->host_fs_selector = vmcs_read16(HOST_FS_SELECTOR);
+ dst->host_gs_selector = vmcs_read16(HOST_GS_SELECTOR);
+ dst->host_tr_selector = vmcs_read16(HOST_TR_SELECTOR);
+ dst->tsc_offset = vmcs_read64(TSC_OFFSET);
+ dst->virtual_apic_page_addr = vmcs_read64(VIRTUAL_APIC_PAGE_ADDR);
+ dst->apic_access_addr = vmcs_read64(APIC_ACCESS_ADDR);
+ dst->guest_physical_address = vmcs_read64(GUEST_PHYSICAL_ADDRESS);
+ dst->vmcs_link_pointer = vmcs_read64(VMCS_LINK_POINTER);
+ dst->guest_ia32_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL);
+ if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT)
+ dst->guest_ia32_pat = vmcs_read64(GUEST_IA32_PAT);
+ if (enable_ept) {
+ /* shadow pages tables on EPT */
+ dst->ept_pointer = vmcs_read64(EPT_POINTER);
+ dst->guest_pdptr0 = vmcs_read64(GUEST_PDPTR0);
+ dst->guest_pdptr1 = vmcs_read64(GUEST_PDPTR1);
+ dst->guest_pdptr2 = vmcs_read64(GUEST_PDPTR2);
+ dst->guest_pdptr3 = vmcs_read64(GUEST_PDPTR3);
+ }
+ dst->pin_based_vm_exec_control = vmcs_read32(PIN_BASED_VM_EXEC_CONTROL);
+ dst->cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
+ dst->exception_bitmap = vmcs_read32(EXCEPTION_BITMAP);
+ dst->page_fault_error_code_mask =
+ vmcs_read32(PAGE_FAULT_ERROR_CODE_MASK);
+ dst->page_fault_error_code_match =
+ vmcs_read32(PAGE_FAULT_ERROR_CODE_MATCH);
+ dst->cr3_target_count = vmcs_read32(CR3_TARGET_COUNT);
+ dst->vm_exit_controls = vmcs_read32(VM_EXIT_CONTROLS);
+ dst->vm_entry_controls = vmcs_read32(VM_ENTRY_CONTROLS);
+ dst->vm_entry_intr_info_field = vmcs_read32(VM_ENTRY_INTR_INFO_FIELD);
+ dst->vm_entry_exception_error_code =
+ vmcs_read32(VM_ENTRY_EXCEPTION_ERROR_CODE);
+ dst->vm_entry_instruction_len = vmcs_read32(VM_ENTRY_INSTRUCTION_LEN);
+ dst->tpr_threshold = vmcs_read32(TPR_THRESHOLD);
+ dst->secondary_vm_exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
+ if (enable_vpid && dst->secondary_vm_exec_control &
+ SECONDARY_EXEC_ENABLE_VPID)
+ dst->virtual_processor_id = vmcs_read16(VIRTUAL_PROCESSOR_ID);
+ dst->vm_instruction_error = vmcs_read32(VM_INSTRUCTION_ERROR);
+ dst->vm_exit_reason = vmcs_read32(VM_EXIT_REASON);
+ dst->vm_exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
+ dst->vm_exit_intr_error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE);
+ dst->idt_vectoring_info_field = vmcs_read32(IDT_VECTORING_INFO_FIELD);
+ dst->idt_vectoring_error_code = vmcs_read32(IDT_VECTORING_ERROR_CODE);
+ dst->vm_exit_instruction_len = vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
+ dst->vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
+ dst->guest_es_limit = vmcs_read32(GUEST_ES_LIMIT);
+ dst->guest_cs_limit = vmcs_read32(GUEST_CS_LIMIT);
+ dst->guest_ss_limit = vmcs_read32(GUEST_SS_LIMIT);
+ dst->guest_ds_limit = vmcs_read32(GUEST_DS_LIMIT);
+ dst->guest_fs_limit = vmcs_read32(GUEST_FS_LIMIT);
+ dst->guest_gs_limit = vmcs_read32(GUEST_GS_LIMIT);
+ dst->guest_ldtr_limit = vmcs_read32(GUEST_LDTR_LIMIT);
+ dst->guest_tr_limit = vmcs_read32(GUEST_TR_LIMIT);
+ dst->guest_gdtr_limit = vmcs_read32(GUEST_GDTR_LIMIT);
+ dst->guest_idtr_limit = vmcs_read32(GUEST_IDTR_LIMIT);
+ dst->guest_es_ar_bytes = vmcs_read32(GUEST_ES_AR_BYTES);
+ dst->guest_cs_ar_bytes = vmcs_read32(GUEST_CS_AR_BYTES);
+ dst->guest_ss_ar_bytes = vmcs_read32(GUEST_SS_AR_BYTES);
+ dst->guest_ds_ar_bytes = vmcs_read32(GUEST_DS_AR_BYTES);
+ dst->guest_fs_ar_bytes = vmcs_read32(GUEST_FS_AR_BYTES);
+ dst->guest_gs_ar_bytes = vmcs_read32(GUEST_GS_AR_BYTES);
+ dst->guest_ldtr_ar_bytes = vmcs_read32(GUEST_LDTR_AR_BYTES);
+ dst->guest_tr_ar_bytes = vmcs_read32(GUEST_TR_AR_BYTES);
+ dst->guest_interruptibility_info =
+ vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
+ dst->guest_activity_state = vmcs_read32(GUEST_ACTIVITY_STATE);
+ dst->guest_sysenter_cs = vmcs_read32(GUEST_SYSENTER_CS);
+ dst->host_ia32_sysenter_cs = vmcs_read32(HOST_IA32_SYSENTER_CS);
+ dst->cr0_guest_host_mask = vmcs_readl(CR0_GUEST_HOST_MASK);
+ dst->cr4_guest_host_mask = vmcs_readl(CR4_GUEST_HOST_MASK);
+ dst->cr0_read_shadow = vmcs_readl(CR0_READ_SHADOW);
+ dst->cr4_read_shadow = vmcs_readl(CR4_READ_SHADOW);
+ dst->cr3_target_value0 = vmcs_readl(CR3_TARGET_VALUE0);
+ dst->cr3_target_value1 = vmcs_readl(CR3_TARGET_VALUE1);
+ dst->cr3_target_value2 = vmcs_readl(CR3_TARGET_VALUE2);
+ dst->cr3_target_value3 = vmcs_readl(CR3_TARGET_VALUE3);
+ dst->exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
+ dst->guest_linear_address = vmcs_readl(GUEST_LINEAR_ADDRESS);
+ dst->guest_cr0 = vmcs_readl(GUEST_CR0);
+ dst->guest_cr3 = vmcs_readl(GUEST_CR3);
+ dst->guest_cr4 = vmcs_readl(GUEST_CR4);
+ dst->guest_es_base = vmcs_readl(GUEST_ES_BASE);
+ dst->guest_cs_base = vmcs_readl(GUEST_CS_BASE);
+ dst->guest_ss_base = vmcs_readl(GUEST_SS_BASE);
+ dst->guest_ds_base = vmcs_readl(GUEST_DS_BASE);
+ dst->guest_fs_base = vmcs_readl(GUEST_FS_BASE);
+ dst->guest_gs_base = vmcs_readl(GUEST_GS_BASE);
+ dst->guest_ldtr_base = vmcs_readl(GUEST_LDTR_BASE);
+ dst->guest_tr_base = vmcs_readl(GUEST_TR_BASE);
+ dst->guest_gdtr_base = vmcs_readl(GUEST_GDTR_BASE);
+ dst->guest_idtr_base = vmcs_readl(GUEST_IDTR_BASE);
+ dst->guest_dr7 = vmcs_readl(GUEST_DR7);
+ dst->guest_rsp = vmcs_readl(GUEST_RSP);
+ dst->guest_rip = vmcs_readl(GUEST_RIP);
+ dst->guest_rflags = vmcs_readl(GUEST_RFLAGS);
+ dst->guest_pending_dbg_exceptions =
+ vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS);
+ dst->guest_sysenter_esp = vmcs_readl(GUEST_SYSENTER_ESP);
+ dst->guest_sysenter_eip = vmcs_readl(GUEST_SYSENTER_EIP);
+ dst->host_cr0 = vmcs_readl(HOST_CR0);
+ dst->host_cr3 = vmcs_readl(HOST_CR3);
+ dst->host_cr4 = vmcs_readl(HOST_CR4);
+ dst->host_fs_base = vmcs_readl(HOST_FS_BASE);
+ dst->host_gs_base = vmcs_readl(HOST_GS_BASE);
+ dst->host_tr_base = vmcs_readl(HOST_TR_BASE);
+ dst->host_gdtr_base = vmcs_readl(HOST_GDTR_BASE);
+ dst->host_idtr_base = vmcs_readl(HOST_IDTR_BASE);
+ dst->host_ia32_sysenter_esp = vmcs_readl(HOST_IA32_SYSENTER_ESP);
+ dst->host_ia32_sysenter_eip = vmcs_readl(HOST_IA32_SYSENTER_EIP);
+ dst->host_rsp = vmcs_readl(HOST_RSP);
+ dst->host_rip = vmcs_readl(HOST_RIP);
+ if (vmcs_config.vmexit_ctrl & VM_EXIT_LOAD_IA32_PAT)
+ dst->host_ia32_pat = vmcs_read64(HOST_IA32_PAT);
+}
+
+/*
+ * prepare_vmcs02 is called in when the L1 guest hypervisor runs its nested
+ * L2 guest. L1 has a vmcs for L2 (vmcs12), and this function "merges" it
+ * with L0's wishes for its guest (vmsc01), so we can run the L2 guest in a
+ * way that will both be appropriate to L1's requests, and our needs.
+ */
+int prepare_vmcs02(struct kvm_vcpu *vcpu,
+ struct vmcs_fields *vmcs12, struct vmcs_fields *vmcs01)
+{
+ u32 exec_control;
+
+ vmcs_write16(GUEST_ES_SELECTOR, vmcs12->guest_es_selector);
+ vmcs_write16(GUEST_CS_SELECTOR, vmcs12->guest_cs_selector);
+ vmcs_write16(GUEST_SS_SELECTOR, vmcs12->guest_ss_selector);
+ vmcs_write16(GUEST_DS_SELECTOR, vmcs12->guest_ds_selector);
+ vmcs_write16(GUEST_FS_SELECTOR, vmcs12->guest_fs_selector);
+ vmcs_write16(GUEST_GS_SELECTOR, vmcs12->guest_gs_selector);
+ vmcs_write16(GUEST_LDTR_SELECTOR, vmcs12->guest_ldtr_selector);
+ vmcs_write16(GUEST_TR_SELECTOR, vmcs12->guest_tr_selector);
+
+ vmcs_write64(GUEST_IA32_DEBUGCTL, vmcs12->guest_ia32_debugctl);
+
+ if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT)
+ vmcs_write64(GUEST_IA32_PAT, vmcs12->guest_ia32_pat);
+
+ vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
+ vmcs12->vm_entry_intr_info_field);
+ vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE,
+ vmcs12->vm_entry_exception_error_code);
+ vmcs_write32(VM_ENTRY_INSTRUCTION_LEN,
+ vmcs12->vm_entry_instruction_len);
+
+ vmcs_write32(GUEST_ES_LIMIT, vmcs12->guest_es_limit);
+ vmcs_write32(GUEST_CS_LIMIT, vmcs12->guest_cs_limit);
+ vmcs_write32(GUEST_SS_LIMIT, vmcs12->guest_ss_limit);
+ vmcs_write32(GUEST_DS_LIMIT, vmcs12->guest_ds_limit);
+ vmcs_write32(GUEST_FS_LIMIT, vmcs12->guest_fs_limit);
+ vmcs_write32(GUEST_GS_LIMIT, vmcs12->guest_gs_limit);
+ vmcs_write32(GUEST_LDTR_LIMIT, vmcs12->guest_ldtr_limit);
+ vmcs_write32(GUEST_TR_LIMIT, vmcs12->guest_tr_limit);
+ vmcs_write32(GUEST_GDTR_LIMIT, vmcs12->guest_gdtr_limit);
+ vmcs_write32(GUEST_IDTR_LIMIT, vmcs12->guest_idtr_limit);
+ vmcs_write32(GUEST_ES_AR_BYTES, vmcs12->guest_es_ar_bytes);
+ vmcs_write32(GUEST_CS_AR_BYTES, vmcs12->guest_cs_ar_bytes);
+ vmcs_write32(GUEST_SS_AR_BYTES, vmcs12->guest_ss_ar_bytes);
+ vmcs_write32(GUEST_DS_AR_BYTES, vmcs12->guest_ds_ar_bytes);
+ vmcs_write32(GUEST_FS_AR_BYTES, vmcs12->guest_fs_ar_bytes);
+ vmcs_write32(GUEST_GS_AR_BYTES, vmcs12->guest_gs_ar_bytes);
+ vmcs_write32(GUEST_LDTR_AR_BYTES, vmcs12->guest_ldtr_ar_bytes);
+ vmcs_write32(GUEST_TR_AR_BYTES, vmcs12->guest_tr_ar_bytes);
+ vmcs_write32(GUEST_INTERRUPTIBILITY_INFO,
+ vmcs12->guest_interruptibility_info);
+ vmcs_write32(GUEST_ACTIVITY_STATE, vmcs12->guest_activity_state);
+ vmcs_write32(GUEST_SYSENTER_CS, vmcs12->guest_sysenter_cs);
+
+ vmcs_writel(GUEST_ES_BASE, vmcs12->guest_es_base);
+ vmcs_writel(GUEST_CS_BASE, vmcs12->guest_cs_base);
+ vmcs_writel(GUEST_SS_BASE, vmcs12->guest_ss_base);
+ vmcs_writel(GUEST_DS_BASE, vmcs12->guest_ds_base);
+ vmcs_writel(GUEST_FS_BASE, vmcs12->guest_fs_base);
+ vmcs_writel(GUEST_GS_BASE, vmcs12->guest_gs_base);
+ vmcs_writel(GUEST_LDTR_BASE, vmcs12->guest_ldtr_base);
+ vmcs_writel(GUEST_TR_BASE, vmcs12->guest_tr_base);
+ vmcs_writel(GUEST_GDTR_BASE, vmcs12->guest_gdtr_base);
+ vmcs_writel(GUEST_IDTR_BASE, vmcs12->guest_idtr_base);
+ vmcs_writel(GUEST_DR7, vmcs12->guest_dr7);
+ vmcs_writel(GUEST_RSP, vmcs12->guest_rsp);
+ vmcs_writel(GUEST_RIP, vmcs12->guest_rip);
+ vmcs_writel(GUEST_RFLAGS, vmcs12->guest_rflags);
+ vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS,
+ vmcs12->guest_pending_dbg_exceptions);
+ vmcs_writel(GUEST_SYSENTER_ESP, vmcs12->guest_sysenter_esp);
+ vmcs_writel(GUEST_SYSENTER_EIP, vmcs12->guest_sysenter_eip);
+
+ vmcs_write64(VMCS_LINK_POINTER, vmcs12->vmcs_link_pointer);
+
+ if (vmcs12->vm_entry_msr_load_count > 0 ||
+ vmcs12->vm_exit_msr_load_count > 0 ||
+ vmcs12->vm_exit_msr_store_count > 0) {
+ printk(KERN_WARNING
+ "%s: VMCS MSR_{LOAD,STORE} unsupported\n", __func__);
+ }
+
+ if (nested_cpu_has_vmx_tpr_shadow(vcpu)) {
+ struct page *page =
+ nested_get_page(vcpu, vmcs12->virtual_apic_page_addr);
+ if (!page)
+ return 1;
+ vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, page_to_phys(page));
+ kvm_release_page_clean(page);
+ }
+
+ if (nested_vm_need_virtualize_apic_accesses(vcpu)) {
+ struct page *page =
+ nested_get_page(vcpu, vmcs12->apic_access_addr);
+ if (!page)
+ return 1;
+ vmcs_write64(APIC_ACCESS_ADDR, page_to_phys(page));
+ kvm_release_page_clean(page);
+ }
+
+ vmcs_write32(PIN_BASED_VM_EXEC_CONTROL,
+ (vmcs01->pin_based_vm_exec_control |
+ vmcs12->pin_based_vm_exec_control));
+
+ /*
+ * Whether page-faults are trapped is determined by a combination of
+ * 3 settings: PFEC_MASK, PFEC_MATCH and EXCEPTION_BITMAP.PF.
+ * If enable_ept, L0 doesn't care about page faults and we should
+ * set all of these to L1's desires. However, if !enable_ept, L0 does
+ * care about (at least some) page faults, and because it is not easy
+ * (if at all possible?) to merge L0 and L1's desires, we simply ask
+ * to exit on each and every L2 page fault. This is done by setting
+ * MASK=MATCH=0 and (see below) EB.PF=1.
+ * Note that below we don't need special code to set EB.PF beyond the
+ * "or"ing of the EB of vmcs01 and vmcs12, because when enable_ept,
+ * vmcs01's EB.PF is 0 so the "or" will take vmcs12's value, and when
+ * !enable_ept, EB.PF is 1, so the "or" will always be 1.
+ *
+ * A problem with this approach (when !enable_ept) is that L1 may be
+ * injected with more page faults than it asked for. This could have
+ * caused problems, but in practice existing hypervisors don't care.
+ * To fix this, we will need to emulate the PFEC checking (on the L1
+ * page tables), using walk_addr(), when injecting PFs to L1.
+ */
+ vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK,
+ enable_ept ? vmcs12->page_fault_error_code_mask : 0);
+ vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH,
+ enable_ept ? vmcs12->page_fault_error_code_match : 0);
+
+ if (cpu_has_secondary_exec_ctrls()) {
+ u32 exec_control = vmcs01->secondary_vm_exec_control;
+ if (nested_cpu_has_secondary_exec_ctrls(vcpu)) {
+ exec_control |= vmcs12->secondary_vm_exec_control;
+ if (!vm_need_virtualize_apic_accesses(vcpu->kvm) ||
+ !nested_vm_need_virtualize_apic_accesses(vcpu))
+ exec_control &=
+ ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
+ }
+ vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control);
+ }
+
+ /*
+ * Copy host-state from vcms01. Notes that some fields are different
+ * per CPU, and are not copied here but rather in vmx_vcpu_load().
+ * Eventually, we shouldn't copy host fields from vcms01 at all, but
+ * rather just call the KVM code which sets them up.
+ */
+ vmcs_write16(HOST_ES_SELECTOR, vmcs01->host_es_selector);
+ vmcs_write16(HOST_CS_SELECTOR, vmcs01->host_cs_selector);
+ vmcs_write16(HOST_SS_SELECTOR, vmcs01->host_ss_selector);
+ vmcs_write16(HOST_DS_SELECTOR, vmcs01->host_ds_selector);
+ vmcs_write16(HOST_FS_SELECTOR, vmcs01->host_fs_selector);
+ vmcs_write16(HOST_GS_SELECTOR, vmcs01->host_gs_selector);
+ vmcs_write16(HOST_TR_SELECTOR, vmcs01->host_tr_selector);
+ if (vmcs_config.vmexit_ctrl & VM_EXIT_LOAD_IA32_PAT)
+ vmcs_write64(HOST_IA32_PAT, vmcs01->host_ia32_pat);
+ vmcs_write32(HOST_IA32_SYSENTER_CS, vmcs01->host_ia32_sysenter_cs);
+ vmcs_writel(HOST_CR0, vmcs01->host_cr0);
+ vmcs_writel(HOST_CR3, vmcs01->host_cr3);
+ vmcs_writel(HOST_CR4, vmcs01->host_cr4);
+ vmcs_writel(HOST_FS_BASE, vmcs01->host_fs_base);
+ vmcs_writel(HOST_GS_BASE, vmcs01->host_gs_base);
+ vmcs_writel(HOST_IDTR_BASE, vmcs01->host_idtr_base);
+ vmcs_writel(HOST_RSP, vmcs01->host_rsp);
+ vmcs_writel(HOST_RIP, vmcs01->host_rip);
+ vmcs_writel(HOST_IA32_SYSENTER_EIP, vmcs01->host_ia32_sysenter_eip);
+
+ if (vm_need_tpr_shadow(vcpu->kvm) &&
+ nested_cpu_has_vmx_tpr_shadow(vcpu))
+ vmcs_write32(TPR_THRESHOLD, vmcs12->tpr_threshold);
+
+ exec_control = vmcs01->cpu_based_vm_exec_control;
+ exec_control &= ~CPU_BASED_VIRTUAL_INTR_PENDING;
+ exec_control &= ~CPU_BASED_VIRTUAL_NMI_PENDING;
+ exec_control &= ~CPU_BASED_TPR_SHADOW;
+ exec_control |= vmcs12->cpu_based_vm_exec_control;
+ if (!vm_need_tpr_shadow(vcpu->kvm) ||
+ vmcs12->virtual_apic_page_addr == 0) {
+ exec_control &= ~CPU_BASED_TPR_SHADOW;
+#ifdef CONFIG_X86_64
+ exec_control |= CPU_BASED_CR8_STORE_EXITING |
+ CPU_BASED_CR8_LOAD_EXITING;
+#endif
+ } else if (exec_control & CPU_BASED_TPR_SHADOW) {
+#ifdef CONFIG_X86_64
+ exec_control &= ~CPU_BASED_CR8_STORE_EXITING;
+ exec_control &= ~CPU_BASED_CR8_LOAD_EXITING;
+#endif
+ }
+ /*
+ * Merging of IO and MSR bitmaps not currently supported.
+ * Rather, exit every time.
+ */
+ exec_control &= ~CPU_BASED_USE_MSR_BITMAPS;
+ exec_control &= ~CPU_BASED_USE_IO_BITMAPS;
+ exec_control |= CPU_BASED_UNCOND_IO_EXITING;
+
+ vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, exec_control);
+
+ /* EXCEPTION_BITMAP and CR0_GUEST_HOST_MASK should basically be the
+ * bitwise-or of what L1 wants to trap for L2, and what we want to
+ * trap. However, vmx_fpu_activate/deactivate may have happened after
+ * we saved vmcs01, so we shouldn't trust its TS and NM_VECTOR bits
+ * and need to base them again on fpu_active. Note that CR0.TS also
+ * needs updating - we do this after this function returns (in
+ * nested_vmx_run).
+ */
+ vmcs_write32(EXCEPTION_BITMAP,
+ ((vmcs01->exception_bitmap&~(1u<<NM_VECTOR)) |
+ (vcpu->fpu_active ? 0 : (1u<<NM_VECTOR)) |
+ vmcs12->exception_bitmap));
+ vmcs_writel(CR0_GUEST_HOST_MASK, vmcs12->cr0_guest_host_mask |
+ (vcpu->fpu_active ? 0 : X86_CR0_TS));
+ vcpu->arch.cr0_guest_owned_bits = ~(vmcs12->cr0_guest_host_mask |
+ (vcpu->fpu_active ? 0 : X86_CR0_TS));
+
+ vmcs_write32(VM_EXIT_CONTROLS,
+ (vmcs01->vm_exit_controls &
+ (~(VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT)))
+ | vmcs12->vm_exit_controls);
+
+ vmcs_write32(VM_ENTRY_CONTROLS,
+ (vmcs01->vm_entry_controls &
+ (~(VM_ENTRY_LOAD_IA32_PAT | VM_ENTRY_IA32E_MODE)))
+ | vmcs12->vm_entry_controls);
+
+ vmcs_writel(CR4_GUEST_HOST_MASK,
+ (vmcs01->cr4_guest_host_mask |
+ vmcs12->cr4_guest_host_mask));
+ vcpu->arch.cr4_guest_owned_bits = ~(vmcs01->cr4_guest_host_mask |
+ vmcs12->cr4_guest_host_mask);
+
+ vmcs_write64(TSC_OFFSET, vmcs01->tsc_offset + vmcs12->tsc_offset);
+
+ if (enable_ept) {
+ /* shadow page tables on EPT */
+ vmcs_write64(EPT_POINTER, vmcs01->ept_pointer);
+ }
+ if (enable_vpid) {
+ /*
+ * Trivially support vpid by letting L2s share their parent
+ * L1's vpid. TODO: move to a more elaborate solution, giving
+ * each L2 its own vpid and exposing the vpid feature to L1.
+ */
+ vmcs_write16(VIRTUAL_PROCESSOR_ID, to_vmx(vcpu)->vpid);
+ vmx_flush_tlb(vcpu);
+ }
+ return 0;
+}
+
static struct kvm_x86_ops vmx_x86_ops = {
.cpu_has_kvm_support = cpu_has_kvm_support,
.disabled_by_bios = vmx_disabled_by_bios,