Message ID | 1368939152-11406-13-git-send-email-jun.nakajima@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Il 19/05/2013 06:52, Jun Nakajima ha scritto: > @@ -7441,10 +7443,81 @@ static void nested_ept_inject_page_fault(struct kvm_vcpu *vcpu, > * Note no need to set vmcs12->vm_exit_reason as it is already copied > * from vmcs02 in nested_vmx_vmexit() above, i.e., EPT_VIOLATION. > */ This comment is now wrong. > - vmcs12->exit_qualification = fault->error_code; And this shows that patch 5 ("nEPT: MMU context for nested EPT") was wrong in this respect. Perhaps this patch should be moved earlier in the series, so that the exit qualification is "bisectably" ok. 1) the updating of exit_qualification in walk_addr_generic should be split out and moved before patch 5; 2) the changes to handle_ept_violation and nested_ept_inject_page_fault (plus fixing the above comment) should also be split out, this time to squash them in patch 5. These two changes ensure that patch 5 can already use the right exit qualification. 3) if needed to make the series bisectable, squash patch 12 into patch 2 and make is_rsvd_bits_set always return 0 in patch 3; then the rest of the handling of reserved bits (including the introduction of check_tdp_pte) will remain here. Otherwise, just squash what's left of this patch into patch 12 and again change the subject. In either case the subject will have to change. Paolo > + if (fault->error_code & PFERR_RSVD_MASK) > + vmcs12->vm_exit_reason = EXIT_REASON_EPT_MISCONFIG; > + else > + vmcs12->vm_exit_reason = EXIT_REASON_EPT_VIOLATION; > + > + vmcs12->exit_qualification = vcpu->arch.exit_qualification; > vmcs12->guest_physical_address = fault->address; > } > -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 05/19/2013 12:52 PM, Jun Nakajima wrote: > Add code to detect EPT misconfiguration and inject it to L1 VMM. Also, > it injects more correct exit qualification upon EPT violation to L1 > VMM. Now L1 can correctly go to ept_misconfig handler (instead of > wrongly going to fast_page_fault), it will try to handle mmio page > fault, if failed, it is a real EPT misconfiguration. > > Signed-off-by: Jun Nakajima <jun.nakajima@intel.com> > Signed-off-by: Xinhao Xu <xinhao.xu@intel.com> > --- > arch/x86/include/asm/kvm_host.h | 4 +++ > arch/x86/kvm/mmu.c | 5 --- > arch/x86/kvm/mmu.h | 5 +++ > arch/x86/kvm/paging_tmpl.h | 26 ++++++++++++++ > arch/x86/kvm/vmx.c | 79 +++++++++++++++++++++++++++++++++++++++-- > 5 files changed, 111 insertions(+), 8 deletions(-) > > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h > index 3741c65..1d03202 100644 > --- a/arch/x86/include/asm/kvm_host.h > +++ b/arch/x86/include/asm/kvm_host.h > @@ -262,6 +262,8 @@ struct kvm_mmu { > void (*invlpg)(struct kvm_vcpu *vcpu, gva_t gva); > void (*update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, > u64 *spte, const void *pte); > + bool (*check_tdp_pte)(u64 pte, int level); > + > hpa_t root_hpa; > int root_level; > int shadow_root_level; > @@ -503,6 +505,8 @@ struct kvm_vcpu_arch { > * instruction. > */ > bool write_fault_to_shadow_pgtable; > + > + unsigned long exit_qualification; /* set at EPT violation at this point */ > }; > > struct kvm_lpage_info { > diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c > index 93d6abf..3a3b11f 100644 > --- a/arch/x86/kvm/mmu.c > +++ b/arch/x86/kvm/mmu.c > @@ -233,11 +233,6 @@ static bool set_mmio_spte(u64 *sptep, gfn_t gfn, pfn_t pfn, unsigned access) > return false; > } > > -static inline u64 rsvd_bits(int s, int e) > -{ > - return ((1ULL << (e - s + 1)) - 1) << s; > -} > - > void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, > u64 dirty_mask, u64 nx_mask, u64 x_mask) > { > diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h > index 8fc94dd..559e2e0 100644 > --- a/arch/x86/kvm/mmu.h > +++ b/arch/x86/kvm/mmu.h > @@ -88,6 +88,11 @@ static inline bool is_write_protection(struct kvm_vcpu *vcpu) > return kvm_read_cr0_bits(vcpu, X86_CR0_WP); > } > > +static inline u64 rsvd_bits(int s, int e) > +{ > + return ((1ULL << (e - s + 1)) - 1) << s; > +} > + > /* > * Will a fault with a given page-fault error code (pfec) cause a permission > * fault with the given access (in ACC_* format)? > diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h > index 2432d49..067b1f8 100644 > --- a/arch/x86/kvm/paging_tmpl.h > +++ b/arch/x86/kvm/paging_tmpl.h > @@ -126,10 +126,14 @@ static int FNAME(cmpxchg_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, > > static bool FNAME(is_rsvd_bits_set)(struct kvm_mmu *mmu, u64 gpte, int level) > { > +#if PTTYPE == PTTYPE_EPT > + return (mmu->check_tdp_pte(gpte, level)); > +#else > int bit7; > > bit7 = (gpte >> 7) & 1; > return (gpte & mmu->rsvd_bits_mask[bit7][level-1]) != 0; > +#endif > } It is better that set mmu->check_tdp_pte = is_rsvd_bits_set for the current modes, then this part can be moved to mmu.c > > static bool FNAME(prefetch_invalid_gpte)(struct kvm_vcpu *vcpu, > @@ -352,6 +356,28 @@ error: > walker->fault.vector = PF_VECTOR; > walker->fault.error_code_valid = true; > walker->fault.error_code = errcode; > + > +#if PTTYPE == PTTYPE_EPT > + /* > + * Use PFERR_RSVD_MASK in erorr_code to to tell if EPT > + * misconfiguration requires to be injected. The detection is > + * done by is_rsvd_bits_set() above. > + * > + * We set up the value of exit_qualification to inject: > + * [2:0] -- Derive from [2:0] of real exit_qualification at EPT violation > + * [5:3] -- Calculated by the page walk of the guest EPT page tables > + * [7:8] -- Clear to 0. > + * > + * The other bits are set to 0. > + */ > + if (!(errcode & PFERR_RSVD_MASK)) { > + unsigned long exit_qualification = vcpu->arch.exit_qualification; > + > + pte_access = pt_access & pte; > + vcpu->arch.exit_qualification = ((pte_access & 0x7) << 3) | > + (exit_qualification & 0x7); > + } > +#endif This specified operations can be move to nested_ept_inject_page_fault()? -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 3741c65..1d03202 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -262,6 +262,8 @@ struct kvm_mmu { void (*invlpg)(struct kvm_vcpu *vcpu, gva_t gva); void (*update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, u64 *spte, const void *pte); + bool (*check_tdp_pte)(u64 pte, int level); + hpa_t root_hpa; int root_level; int shadow_root_level; @@ -503,6 +505,8 @@ struct kvm_vcpu_arch { * instruction. */ bool write_fault_to_shadow_pgtable; + + unsigned long exit_qualification; /* set at EPT violation at this point */ }; struct kvm_lpage_info { diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 93d6abf..3a3b11f 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -233,11 +233,6 @@ static bool set_mmio_spte(u64 *sptep, gfn_t gfn, pfn_t pfn, unsigned access) return false; } -static inline u64 rsvd_bits(int s, int e) -{ - return ((1ULL << (e - s + 1)) - 1) << s; -} - void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, u64 dirty_mask, u64 nx_mask, u64 x_mask) { diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index 8fc94dd..559e2e0 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h @@ -88,6 +88,11 @@ static inline bool is_write_protection(struct kvm_vcpu *vcpu) return kvm_read_cr0_bits(vcpu, X86_CR0_WP); } +static inline u64 rsvd_bits(int s, int e) +{ + return ((1ULL << (e - s + 1)) - 1) << s; +} + /* * Will a fault with a given page-fault error code (pfec) cause a permission * fault with the given access (in ACC_* format)? diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 2432d49..067b1f8 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -126,10 +126,14 @@ static int FNAME(cmpxchg_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, static bool FNAME(is_rsvd_bits_set)(struct kvm_mmu *mmu, u64 gpte, int level) { +#if PTTYPE == PTTYPE_EPT + return (mmu->check_tdp_pte(gpte, level)); +#else int bit7; bit7 = (gpte >> 7) & 1; return (gpte & mmu->rsvd_bits_mask[bit7][level-1]) != 0; +#endif } static bool FNAME(prefetch_invalid_gpte)(struct kvm_vcpu *vcpu, @@ -352,6 +356,28 @@ error: walker->fault.vector = PF_VECTOR; walker->fault.error_code_valid = true; walker->fault.error_code = errcode; + +#if PTTYPE == PTTYPE_EPT + /* + * Use PFERR_RSVD_MASK in erorr_code to to tell if EPT + * misconfiguration requires to be injected. The detection is + * done by is_rsvd_bits_set() above. + * + * We set up the value of exit_qualification to inject: + * [2:0] -- Derive from [2:0] of real exit_qualification at EPT violation + * [5:3] -- Calculated by the page walk of the guest EPT page tables + * [7:8] -- Clear to 0. + * + * The other bits are set to 0. + */ + if (!(errcode & PFERR_RSVD_MASK)) { + unsigned long exit_qualification = vcpu->arch.exit_qualification; + + pte_access = pt_access & pte; + vcpu->arch.exit_qualification = ((pte_access & 0x7) << 3) | + (exit_qualification & 0x7); + } +#endif walker->fault.address = addr; walker->fault.nested_page_fault = mmu != vcpu->arch.walk_mmu; diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index ec4e9b9..667be15 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -5310,6 +5310,8 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu) /* ept page table is present? */ error_code |= (exit_qualification >> 3) & 0x1; + vcpu->arch.exit_qualification = exit_qualification; + return kvm_mmu_page_fault(vcpu, gpa, error_code, NULL, 0); } @@ -7432,7 +7434,7 @@ static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu) } static void nested_ept_inject_page_fault(struct kvm_vcpu *vcpu, - struct x86_exception *fault) + struct x86_exception *fault) { struct vmcs12 *vmcs12; nested_vmx_vmexit(vcpu); @@ -7441,10 +7443,81 @@ static void nested_ept_inject_page_fault(struct kvm_vcpu *vcpu, * Note no need to set vmcs12->vm_exit_reason as it is already copied * from vmcs02 in nested_vmx_vmexit() above, i.e., EPT_VIOLATION. */ - vmcs12->exit_qualification = fault->error_code; + if (fault->error_code & PFERR_RSVD_MASK) + vmcs12->vm_exit_reason = EXIT_REASON_EPT_MISCONFIG; + else + vmcs12->vm_exit_reason = EXIT_REASON_EPT_VIOLATION; + + vmcs12->exit_qualification = vcpu->arch.exit_qualification; vmcs12->guest_physical_address = fault->address; } +static bool nested_ept_rsvd_bits_check(u64 pte, int level) +{ + const int maxphyaddr = 48; /* set to the max size for now */ + u64 rsvd_mask = rsvd_bits(maxphyaddr, 51); + + switch (level) { + case 4: + rsvd_mask |= rsvd_bits(3, 7); + break; + case 3: + case 2: + if (pte & (1 << 7)) + rsvd_mask |= rsvd_bits(PAGE_SHIFT, PAGE_SHIFT + 9 * (level - 1) - 1); + else + rsvd_mask |= rsvd_bits(3, 6); + break; + case 1: + break; + default: + /* impossible to go to here */ + BUG(); + } + + return pte & rsvd_mask; +} + +static bool nested_ept_rwx_bits_check(u64 pte) +{ + /* write only or write/execute only */ + uint8_t rwx_bits = pte & 7; + + switch (rwx_bits) { + case 0x2: + case 0x6: + return true; + case 0x4: + if (!(nested_vmx_ept_caps & 0x1)) + return 1; + default: + return false; + } +} + +static bool nested_ept_memtype_check(u64 pte, int level) +{ + if (level == 1 || (level == 2 && (pte & (1ULL << 7)))) { + /* 0x38, namely bits 5:3, stands for EPT memory type */ + u64 ept_mem_type = (pte & 0x38) >> 3; + + if (ept_mem_type == 0x2 || ept_mem_type == 0x3 || + ept_mem_type == 0x7) + return true; + } + return false; +} + +bool nested_check_ept_pte(u64 pte, int level) +{ + bool r; + r = nested_ept_rsvd_bits_check(pte, level) || + nested_ept_rwx_bits_check(pte) || + nested_ept_memtype_check(pte, level); + + return r; +} + static int nested_ept_init_mmu_context(struct kvm_vcpu *vcpu) { int r = kvm_init_shadow_EPT_mmu(vcpu, &vcpu->arch.mmu); @@ -7452,7 +7525,7 @@ static int nested_ept_init_mmu_context(struct kvm_vcpu *vcpu) vcpu->arch.mmu.set_cr3 = vmx_set_cr3; vcpu->arch.mmu.get_cr3 = nested_ept_get_cr3; vcpu->arch.mmu.inject_page_fault = nested_ept_inject_page_fault; - + vcpu->arch.mmu.check_tdp_pte = nested_check_ept_pte; vcpu->arch.walk_mmu = &vcpu->arch.nested_mmu; return r;