@@ -376,7 +376,9 @@ enum vmcs_field {
#define VMX_EPTP_WB_BIT (1ull << 14)
#define VMX_EPT_2MB_PAGE_BIT (1ull << 16)
#define VMX_EPT_1GB_PAGE_BIT (1ull << 17)
-#define VMX_EPT_AD_BIT (1ull << 21)
+#define VMX_EPT_INVEPT_BIT (1ull << 20)
+#define VMX_EPT_AD_BIT (1ull << 21)
+#define VMX_EPT_EXTENT_INDIVIDUAL_BIT (1ull << 24)
#define VMX_EPT_EXTENT_CONTEXT_BIT (1ull << 25)
#define VMX_EPT_EXTENT_GLOBAL_BIT (1ull << 26)
@@ -65,6 +65,7 @@
#define EXIT_REASON_EOI_INDUCED 45
#define EXIT_REASON_EPT_VIOLATION 48
#define EXIT_REASON_EPT_MISCONFIG 49
+#define EXIT_REASON_INVEPT 50
#define EXIT_REASON_WBINVD 54
#define EXIT_REASON_XSETBV 55
#define EXIT_REASON_APIC_WRITE 56
@@ -5879,6 +5879,87 @@ static int handle_vmptrst(struct kvm_vcpu *vcpu)
return 1;
}
+/* Emulate the INVEPT instruction */
+static int handle_invept(struct kvm_vcpu *vcpu)
+{
+ u32 vmx_instruction_info;
+ unsigned long type;
+ gva_t gva;
+ struct x86_exception e;
+ struct {
+ u64 eptp, gpa;
+ } operand;
+
+ if (!(nested_vmx_secondary_ctls_high & SECONDARY_EXEC_ENABLE_EPT) ||
+ !(nested_vmx_ept_caps & VMX_EPT_INVEPT_BIT)) {
+ kvm_queue_exception(vcpu, UD_VECTOR);
+ return 1;
+ }
+
+ if (!nested_vmx_check_permission(vcpu))
+ return 1;
+
+ if (!kvm_read_cr0_bits(vcpu, X86_CR0_PE)) {
+ kvm_queue_exception(vcpu, UD_VECTOR);
+ return 1;
+ }
+
+ /* According to the Intel VMX instruction reference, the memory
+ * operand is read even if it isn't needed (e.g., for type==global)
+ */
+ vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
+ if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION),
+ vmx_instruction_info, &gva))
+ return 1;
+ if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva, &operand,
+ sizeof(operand), &e)) {
+ kvm_inject_page_fault(vcpu, &e);
+ return 1;
+ }
+
+ type = kvm_register_read(vcpu, (vmx_instruction_info >> 28) & 0xf);
+
+ switch (type) {
+ case VMX_EPT_EXTENT_GLOBAL:
+ if (!(nested_vmx_ept_caps & VMX_EPT_EXTENT_GLOBAL_BIT))
+ nested_vmx_failValid(vcpu,
+ VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
+ else {
+ /*
+ * Do nothing: when L1 changes EPT12, we already
+ * update EPT02 (the shadow EPT table) and call INVEPT.
+ * So when L1 calls INVEPT, there's nothing left to do.
+ */
+ nested_vmx_succeed(vcpu);
+ }
+ break;
+ case VMX_EPT_EXTENT_CONTEXT:
+ if (!(nested_vmx_ept_caps & VMX_EPT_EXTENT_CONTEXT_BIT))
+ nested_vmx_failValid(vcpu,
+ VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
+ else {
+ /* Do nothing */
+ nested_vmx_succeed(vcpu);
+ }
+ break;
+ case VMX_EPT_EXTENT_INDIVIDUAL_ADDR:
+ if (!(nested_vmx_ept_caps & VMX_EPT_EXTENT_INDIVIDUAL_BIT))
+ nested_vmx_failValid(vcpu,
+ VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
+ else {
+ /* Do nothing */
+ nested_vmx_succeed(vcpu);
+ }
+ break;
+ default:
+ nested_vmx_failValid(vcpu,
+ VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
+ }
+
+ skip_emulated_instruction(vcpu);
+ return 1;
+}
+
/*
* The exit handlers return 1 if the exit was handled fully and guest execution
* may resume. Otherwise they set the kvm_run parameter to indicate what needs
@@ -5923,6 +6004,7 @@ static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
[EXIT_REASON_PAUSE_INSTRUCTION] = handle_pause,
[EXIT_REASON_MWAIT_INSTRUCTION] = handle_invalid_op,
[EXIT_REASON_MONITOR_INSTRUCTION] = handle_invalid_op,
+ [EXIT_REASON_INVEPT] = handle_invept,
};
static const int kvm_vmx_max_exit_handlers =
@@ -6107,6 +6189,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
case EXIT_REASON_VMPTRST: case EXIT_REASON_VMREAD:
case EXIT_REASON_VMRESUME: case EXIT_REASON_VMWRITE:
case EXIT_REASON_VMOFF: case EXIT_REASON_VMON:
+ case EXIT_REASON_INVEPT:
/*
* VMX instructions trap unconditionally. This allows L1 to
* emulate them for its L2 guest, i.e., allows 3-level nesting!