@@ -262,6 +262,8 @@ struct kvm_mmu {
void (*invlpg)(struct kvm_vcpu *vcpu, gva_t gva);
void (*update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
u64 *spte, const void *pte);
+ bool (*check_tdp_pte)(u64 pte, int level);
+
hpa_t root_hpa;
int root_level;
int shadow_root_level;
@@ -504,6 +506,8 @@ struct kvm_vcpu_arch {
* instruction.
*/
bool write_fault_to_shadow_pgtable;
+
+ unsigned long exit_qualification; /* set at EPT violation at this point */
};
struct kvm_lpage_info {
@@ -230,11 +230,6 @@ static bool set_mmio_spte(u64 *sptep, gfn_t gfn, pfn_t pfn, unsigned access)
return false;
}
-static inline u64 rsvd_bits(int s, int e)
-{
- return ((1ULL << (e - s + 1)) - 1) << s;
-}
-
void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
u64 dirty_mask, u64 nx_mask, u64 x_mask)
{
@@ -91,6 +91,11 @@ static inline bool is_write_protection(struct kvm_vcpu *vcpu)
return kvm_read_cr0_bits(vcpu, X86_CR0_WP);
}
+static inline u64 rsvd_bits(int s, int e)
+{
+ return ((1ULL << (e - s + 1)) - 1) << s;
+}
+
/*
* Will a fault with a given page-fault error code (pfec) cause a permission
* fault with the given access (in ACC_* format)?
@@ -126,10 +126,14 @@ static int FNAME(cmpxchg_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
static bool FNAME(is_rsvd_bits_set)(struct kvm_mmu *mmu, u64 gpte, int level)
{
+#if PTTYPE == PTTYPE_EPT
+ return (mmu->check_tdp_pte(gpte, level));
+#else
int bit7;
bit7 = (gpte >> 7) & 1;
return (gpte & mmu->rsvd_bits_mask[bit7][level-1]) != 0;
+#endif
}
static bool FNAME(prefetch_invalid_gpte)(struct kvm_vcpu *vcpu,
@@ -352,6 +356,28 @@ error:
walker->fault.vector = PF_VECTOR;
walker->fault.error_code_valid = true;
walker->fault.error_code = errcode;
+
+#if PTTYPE == PTTYPE_EPT
+ /*
+ * Use PFERR_RSVD_MASK in erorr_code to to tell if EPT
+ * misconfiguration requires to be injected. The detection is
+ * done by is_rsvd_bits_set() above.
+ *
+ * We set up the value of exit_qualification to inject:
+ * [2:0] -- Derive from [2:0] of real exit_qualification at EPT violation
+ * [5:3] -- Calculated by the page walk of the guest EPT page tables
+ * [7:8] -- Clear to 0.
+ *
+ * The other bits are set to 0.
+ */
+ if (!(errcode & PFERR_RSVD_MASK)) {
+ unsigned long exit_qualification = vcpu->arch.exit_qualification;
+
+ pte_access = pt_access & pte;
+ vcpu->arch.exit_qualification = ((pte_access & 0x7) << 3) |
+ (exit_qualification & 0x7);
+ }
+#endif
walker->fault.address = addr;
walker->fault.nested_page_fault = mmu != vcpu->arch.walk_mmu;
@@ -5074,6 +5074,8 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu)
/* ept page table is present? */
error_code |= (exit_qualification >> 3) & 0x1;
+ vcpu->arch.exit_qualification = exit_qualification;
+
return kvm_mmu_page_fault(vcpu, gpa, error_code, NULL, 0);
}
@@ -6994,7 +6996,7 @@ static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu)
}
static void nested_ept_inject_page_fault(struct kvm_vcpu *vcpu,
- struct x86_exception *fault)
+ struct x86_exception *fault)
{
struct vmcs12 *vmcs12;
nested_vmx_vmexit(vcpu);
@@ -7003,10 +7005,81 @@ static void nested_ept_inject_page_fault(struct kvm_vcpu *vcpu,
* Note no need to set vmcs12->vm_exit_reason as it is already copied
* from vmcs02 in nested_vmx_vmexit() above, i.e., EPT_VIOLATION.
*/
- vmcs12->exit_qualification = fault->error_code;
+ if (fault->error_code & PFERR_RSVD_MASK)
+ vmcs12->vm_exit_reason = EXIT_REASON_EPT_MISCONFIG;
+ else
+ vmcs12->vm_exit_reason = EXIT_REASON_EPT_VIOLATION;
+
+ vmcs12->exit_qualification = vcpu->arch.exit_qualification;
vmcs12->guest_physical_address = fault->address;
}
+static bool nested_ept_rsvd_bits_check(u64 pte, int level)
+{
+ const int maxphyaddr = 48; /* set to the max size for now */
+ u64 rsvd_mask = rsvd_bits(maxphyaddr, 51);
+
+ switch (level) {
+ case 4:
+ rsvd_mask |= rsvd_bits(3, 7);
+ break;
+ case 3:
+ case 2:
+ if (pte & (1 << 7))
+ rsvd_mask |= rsvd_bits(PAGE_SHIFT, PAGE_SHIFT + 9 * (level - 1) - 1);
+ else
+ rsvd_mask |= rsvd_bits(3, 6);
+ break;
+ case 1:
+ break;
+ default:
+ /* impossible to go to here */
+ BUG();
+ }
+
+ return pte & rsvd_mask;
+}
+
+static bool nested_ept_rwx_bits_check(u64 pte)
+{
+ /* write only or write/execute only */
+ uint8_t rwx_bits = pte & 7;
+
+ switch (rwx_bits) {
+ case 0x2:
+ case 0x6:
+ return true;
+ case 0x4:
+ if (!(nested_vmx_ept_caps & 0x1))
+ return 1;
+ default:
+ return false;
+ }
+}
+
+static bool nested_ept_memtype_check(u64 pte, int level)
+{
+ if (level == 1 || (level == 2 && (pte & (1ULL << 7)))) {
+ /* 0x38, namely bits 5:3, stands for EPT memory type */
+ u64 ept_mem_type = (pte & 0x38) >> 3;
+
+ if (ept_mem_type == 0x2 || ept_mem_type == 0x3 ||
+ ept_mem_type == 0x7)
+ return true;
+ }
+ return false;
+}
+
+bool nested_check_ept_pte(u64 pte, int level)
+{
+ bool r;
+ r = nested_ept_rsvd_bits_check(pte, level) ||
+ nested_ept_rwx_bits_check(pte) ||
+ nested_ept_memtype_check(pte, level);
+
+ return r;
+}
+
static int nested_ept_init_mmu_context(struct kvm_vcpu *vcpu)
{
int r = kvm_init_shadow_EPT_mmu(vcpu, &vcpu->arch.mmu);
@@ -7014,7 +7087,7 @@ static int nested_ept_init_mmu_context(struct kvm_vcpu *vcpu)
vcpu->arch.mmu.set_cr3 = vmx_set_cr3;
vcpu->arch.mmu.get_cr3 = nested_ept_get_cr3;
vcpu->arch.mmu.inject_page_fault = nested_ept_inject_page_fault;
-
+ vcpu->arch.mmu.check_tdp_pte = nested_check_ept_pte;
vcpu->arch.walk_mmu = &vcpu->arch.nested_mmu;
return r;