diff mbox

[3/5] mmu: don't set the present bit unconditionally

Message ID 1467088360-10186-4-git-send-email-bsd@redhat.com (mailing list archive)
State New, archived
Headers show

Commit Message

Bandan Das June 28, 2016, 4:32 a.m. UTC
To support execute only mappings on behalf of L1
hypervisors, we teach set_spte() to honor L1's valid XWR
bits. This is only if host supports EPT execute only. Reuse
ACC_USER_MASK to signify if the L1 hypervisor has the R bit
set

Signed-off-by: Bandan Das <bsd@redhat.com>
---
 arch/x86/kvm/mmu.c         | 9 +++++++--
 arch/x86/kvm/paging_tmpl.h | 2 +-
 arch/x86/kvm/vmx.c         | 2 +-
 3 files changed, 9 insertions(+), 4 deletions(-)

Comments

Paolo Bonzini June 28, 2016, 8:57 a.m. UTC | #1
On 28/06/2016 06:32, Bandan Das wrote:
> +	bool execonly = !(context->guest_rsvd_check.bad_mt_xwr &
> +			  (1ull << VMX_EPT_EXECUTABLE_MASK));
>  
>  	if (set_mmio_spte(vcpu, sptep, gfn, pfn, pte_access))
>  		return 0;
>  
> -	spte = PT_PRESENT_MASK;
> +	if (!execonly)
> +		spte |= PT_PRESENT_MASK;

This needs a comment:

	/*
	 * There are two cases in which execonly is false: 1) for
	 * non-EPT page tables, in which case we need to set the
	 * P bit; 2) for EPT page tables where an X-- page table
	 * entry is invalid, in which case we need to force the R
	 * bit of the page table entry to 1.
	 */
	BUILD_BUG_ON(PT_PRESENT_MASK != VMX_EPT_READABLE_MASK);
	if (!execonly)
		spte |= PT_PRESENT_MASK;
	

>  	if (!speculative)
>  		spte |= shadow_accessed_mask;
> 
>  	if (enable_ept) {
> -		kvm_mmu_set_mask_ptes(0ull,
> +		kvm_mmu_set_mask_ptes(PT_PRESENT_MASK,

This should be VMX_EPT_READABLE_MASK.

> @@ -2531,6 +2535,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
>  	else
>  		spte |= shadow_nx_mask;
>  
> +	/* In the EPT case, shadow_user_mask is PT_PRESENT_MASK */

I don't think this comment is necessary, but it's better to add one in
FNAME(gpte_access).

	/*
	 * In the EPT case, a page table can be executable but not
	 * readable (on some processors).  Therefore, set_spte does not
	 * automatically set bit 0 if execute-only is supported.
	 * Instead, since EPT page tables do not have a U bit, we
	 * repurpose ACC_USER_MASK to signify readability.  Likewise,
	 * when EPT is in use shadow_user_mask is set to
	 * VMX_EPT_READABLE_MASK.
	 */
	

Thanks,

Paolo

>  	if (pte_access & ACC_USER_MASK)
>  		spte |= shadow_user_mask;


Paolo

>  			(enable_ept_ad_bits) ? VMX_EPT_ACCESS_BIT : 0ull,
>  			(enable_ept_ad_bits) ? VMX_EPT_DIRTY_BIT : 0ull,
>  			0ull, VMX_EPT_EXECUTABLE_MASK);

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Bandan Das June 28, 2016, 5:30 p.m. UTC | #2
Paolo Bonzini <pbonzini@redhat.com> writes:

> On 28/06/2016 06:32, Bandan Das wrote:
>> +	bool execonly = !(context->guest_rsvd_check.bad_mt_xwr &
>> +			  (1ull << VMX_EPT_EXECUTABLE_MASK));
>>  
>>  	if (set_mmio_spte(vcpu, sptep, gfn, pfn, pte_access))
>>  		return 0;
>>  
>> -	spte = PT_PRESENT_MASK;
>> +	if (!execonly)
>> +		spte |= PT_PRESENT_MASK;
>
> This needs a comment:
>
> 	/*
> 	 * There are two cases in which execonly is false: 1) for
> 	 * non-EPT page tables, in which case we need to set the
> 	 * P bit; 2) for EPT page tables where an X-- page table
> 	 * entry is invalid, in which case we need to force the R
> 	 * bit of the page table entry to 1.
> 	 */

I think this should be: 2) for EPT page tables where an X-- page
table entry is invalid and a EPT misconfig is injected to the guest
before we reach here.

> 	BUILD_BUG_ON(PT_PRESENT_MASK != VMX_EPT_READABLE_MASK);
> 	if (!execonly)
> 		spte |= PT_PRESENT_MASK;
> 	
>
>>  	if (!speculative)
>>  		spte |= shadow_accessed_mask;
>> 
>>  	if (enable_ept) {
>> -		kvm_mmu_set_mask_ptes(0ull,
>> +		kvm_mmu_set_mask_ptes(PT_PRESENT_MASK,
>
> This should be VMX_EPT_READABLE_MASK.
>
>> @@ -2531,6 +2535,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
>>  	else
>>  		spte |= shadow_nx_mask;
>>  
>> +	/* In the EPT case, shadow_user_mask is PT_PRESENT_MASK */
>
> I don't think this comment is necessary, but it's better to add one in
> FNAME(gpte_access).
>
> 	/*
> 	 * In the EPT case, a page table can be executable but not
> 	 * readable (on some processors).  Therefore, set_spte does not
> 	 * automatically set bit 0 if execute-only is supported.
> 	 * Instead, since EPT page tables do not have a U bit, we
> 	 * repurpose ACC_USER_MASK to signify readability.  Likewise,
> 	 * when EPT is in use shadow_user_mask is set to
> 	 * VMX_EPT_READABLE_MASK.
> 	 */
> 	
>
> Thanks,
>
> Paolo
>
>>  	if (pte_access & ACC_USER_MASK)
>>  		spte |= shadow_user_mask;
>
>
> Paolo
>
>>  			(enable_ept_ad_bits) ? VMX_EPT_ACCESS_BIT : 0ull,
>>  			(enable_ept_ad_bits) ? VMX_EPT_DIRTY_BIT : 0ull,
>>  			0ull, VMX_EPT_EXECUTABLE_MASK);
>
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Paolo Bonzini June 28, 2016, 8:21 p.m. UTC | #3
On 28/06/2016 19:30, Bandan Das wrote:
> Paolo Bonzini <pbonzini@redhat.com> writes:
>> On 28/06/2016 06:32, Bandan Das wrote:
>>> +	bool execonly = !(context->guest_rsvd_check.bad_mt_xwr &
>>> +			  (1ull << VMX_EPT_EXECUTABLE_MASK));
>>>  
>>>  	if (set_mmio_spte(vcpu, sptep, gfn, pfn, pte_access))
>>>  		return 0;
>>>  
>>> -	spte = PT_PRESENT_MASK;
>>> +	if (!execonly)
>>> +		spte |= PT_PRESENT_MASK;
>>
>> This needs a comment:
>>
>> 	/*
>> 	 * There are two cases in which execonly is false: 1) for
>> 	 * non-EPT page tables, in which case we need to set the
>> 	 * P bit; 2) for EPT page tables where an X-- page table
>> 	 * entry is invalid, in which case we need to force the R
>> 	 * bit of the page table entry to 1.
>> 	 */
> 
> I think this should be: 2) for EPT page tables where an X-- page
> table entry is invalid and a EPT misconfig is injected to the guest
> before we reach here.

Right, I messed this one up.  shadow_user_mask and ACC_USER_MASK work
the same way on processors that do not support execonly.

Paolo

>> 	BUILD_BUG_ON(PT_PRESENT_MASK != VMX_EPT_READABLE_MASK);
>> 	if (!execonly)
>> 		spte |= PT_PRESENT_MASK;
>> 	
>>
>>>  	if (!speculative)
>>>  		spte |= shadow_accessed_mask;
>>>
>>>  	if (enable_ept) {
>>> -		kvm_mmu_set_mask_ptes(0ull,
>>> +		kvm_mmu_set_mask_ptes(PT_PRESENT_MASK,
>>
>> This should be VMX_EPT_READABLE_MASK.
>>
>>> @@ -2531,6 +2535,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
>>>  	else
>>>  		spte |= shadow_nx_mask;
>>>  
>>> +	/* In the EPT case, shadow_user_mask is PT_PRESENT_MASK */
>>
>> I don't think this comment is necessary, but it's better to add one in
>> FNAME(gpte_access).
>>
>> 	/*
>> 	 * In the EPT case, a page table can be executable but not
>> 	 * readable (on some processors).  Therefore, set_spte does not
>> 	 * automatically set bit 0 if execute-only is supported.
>> 	 * Instead, since EPT page tables do not have a U bit, we
>> 	 * repurpose ACC_USER_MASK to signify readability.  Likewise,
>> 	 * when EPT is in use shadow_user_mask is set to
>> 	 * VMX_EPT_READABLE_MASK.
>> 	 */
>> 	
>>
>> Thanks,
>>
>> Paolo
>>
>>>  	if (pte_access & ACC_USER_MASK)
>>>  		spte |= shadow_user_mask;
>>
>>
>> Paolo
>>
>>>  			(enable_ept_ad_bits) ? VMX_EPT_ACCESS_BIT : 0ull,
>>>  			(enable_ept_ad_bits) ? VMX_EPT_DIRTY_BIT : 0ull,
>>>  			0ull, VMX_EPT_EXECUTABLE_MASK);
>>
>> --
>> To unsubscribe from this list: send the line "unsubscribe kvm" in
>> the body of a message to majordomo@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Xiao Guangrong June 29, 2016, 3:17 a.m. UTC | #4
On 06/28/2016 12:32 PM, Bandan Das wrote:
> To support execute only mappings on behalf of L1
> hypervisors, we teach set_spte() to honor L1's valid XWR
> bits. This is only if host supports EPT execute only. Reuse
> ACC_USER_MASK to signify if the L1 hypervisor has the R bit
> set
>
> Signed-off-by: Bandan Das <bsd@redhat.com>
> ---
>   arch/x86/kvm/mmu.c         | 9 +++++++--
>   arch/x86/kvm/paging_tmpl.h | 2 +-
>   arch/x86/kvm/vmx.c         | 2 +-
>   3 files changed, 9 insertions(+), 4 deletions(-)
>
> diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
> index 875d4f7..ee2fb16 100644
> --- a/arch/x86/kvm/mmu.c
> +++ b/arch/x86/kvm/mmu.c
> @@ -2516,13 +2516,17 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
>   		    gfn_t gfn, kvm_pfn_t pfn, bool speculative,
>   		    bool can_unsync, bool host_writable)
>   {
> -	u64 spte;
> +	u64 spte = 0;
>   	int ret = 0;
> +	struct kvm_mmu *context = &vcpu->arch.mmu;
> +	bool execonly = !(context->guest_rsvd_check.bad_mt_xwr &
> +			  (1ull << VMX_EPT_EXECUTABLE_MASK));

Could we introduce a new field, say execonly, to "struct kvm_mmu"?
That would make the code be more clearer.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Paolo Bonzini June 29, 2016, 8:18 a.m. UTC | #5
On 29/06/2016 05:17, Xiao Guangrong wrote:
>>
>> +++ b/arch/x86/kvm/mmu.c
>> @@ -2516,13 +2516,17 @@ static int set_spte(struct kvm_vcpu *vcpu, u64
>> *sptep,
>>               gfn_t gfn, kvm_pfn_t pfn, bool speculative,
>>               bool can_unsync, bool host_writable)
>>   {
>> -    u64 spte;
>> +    u64 spte = 0;
>>       int ret = 0;
>> +    struct kvm_mmu *context = &vcpu->arch.mmu;
>> +    bool execonly = !(context->guest_rsvd_check.bad_mt_xwr &
>> +              (1ull << VMX_EPT_EXECUTABLE_MASK));
> 
> Could we introduce a new field, say execonly, to "struct kvm_mmu"?
> That would make the code be more clearer.

Given how execonly is used, let's add shadow_present_mask instead.

Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Xiao Guangrong June 30, 2016, 7:18 a.m. UTC | #6
On 06/29/2016 04:18 PM, Paolo Bonzini wrote:
>
>
> On 29/06/2016 05:17, Xiao Guangrong wrote:
>>>
>>> +++ b/arch/x86/kvm/mmu.c
>>> @@ -2516,13 +2516,17 @@ static int set_spte(struct kvm_vcpu *vcpu, u64
>>> *sptep,
>>>                gfn_t gfn, kvm_pfn_t pfn, bool speculative,
>>>                bool can_unsync, bool host_writable)
>>>    {
>>> -    u64 spte;
>>> +    u64 spte = 0;
>>>        int ret = 0;
>>> +    struct kvm_mmu *context = &vcpu->arch.mmu;
>>> +    bool execonly = !(context->guest_rsvd_check.bad_mt_xwr &
>>> +              (1ull << VMX_EPT_EXECUTABLE_MASK));
>>
>> Could we introduce a new field, say execonly, to "struct kvm_mmu"?
>> That would make the code be more clearer.
>
> Given how execonly is used, let's add shadow_present_mask instead.

Yup, it is better.

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Wanpeng Li July 5, 2016, 5:50 a.m. UTC | #7
2016-06-28 16:57 GMT+08:00 Paolo Bonzini <pbonzini@redhat.com>:
>
>
> On 28/06/2016 06:32, Bandan Das wrote:
>> +     bool execonly = !(context->guest_rsvd_check.bad_mt_xwr &
>> +                       (1ull << VMX_EPT_EXECUTABLE_MASK));
>>
>>       if (set_mmio_spte(vcpu, sptep, gfn, pfn, pte_access))
>>               return 0;
>>
>> -     spte = PT_PRESENT_MASK;
>> +     if (!execonly)
>> +             spte |= PT_PRESENT_MASK;
>
> This needs a comment:
>
>         /*
>          * There are two cases in which execonly is false: 1) for
>          * non-EPT page tables, in which case we need to set the
>          * P bit; 2) for EPT page tables where an X-- page table

In the scenario of non-EPT shadow page table and non-nested, the
present bit can't be set any more since
context->guest_rsvd_check.bad_mt_xwr is always 0.

Regards,
Wanpeng Li
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Paolo Bonzini July 5, 2016, 10:50 a.m. UTC | #8
On 05/07/2016 07:50, Wanpeng Li wrote:
>> > This needs a comment:
>> >
>> >         /*
>> >          * There are two cases in which execonly is false: 1) for
>> >          * non-EPT page tables, in which case we need to set the
>> >          * P bit; 2) for EPT page tables where an X-- page table
> In the scenario of non-EPT shadow page table and non-nested, the
> present bit can't be set any more since
> context->guest_rsvd_check.bad_mt_xwr is always 0.

This will be fixed with a new shadow_present_mask variable.

Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 875d4f7..ee2fb16 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2516,13 +2516,17 @@  static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
 		    gfn_t gfn, kvm_pfn_t pfn, bool speculative,
 		    bool can_unsync, bool host_writable)
 {
-	u64 spte;
+	u64 spte = 0;
 	int ret = 0;
+	struct kvm_mmu *context = &vcpu->arch.mmu;
+	bool execonly = !(context->guest_rsvd_check.bad_mt_xwr &
+			  (1ull << VMX_EPT_EXECUTABLE_MASK));
 
 	if (set_mmio_spte(vcpu, sptep, gfn, pfn, pte_access))
 		return 0;
 
-	spte = PT_PRESENT_MASK;
+	if (!execonly)
+		spte |= PT_PRESENT_MASK;
 	if (!speculative)
 		spte |= shadow_accessed_mask;
 
@@ -2531,6 +2535,7 @@  static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
 	else
 		spte |= shadow_nx_mask;
 
+	/* In the EPT case, shadow_user_mask is PT_PRESENT_MASK */
 	if (pte_access & ACC_USER_MASK)
 		spte |= shadow_user_mask;
 
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index bc019f7..896118e 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -187,7 +187,7 @@  static inline unsigned FNAME(gpte_access)(struct kvm_vcpu *vcpu, u64 gpte)
 #if PTTYPE == PTTYPE_EPT
 	access = ((gpte & VMX_EPT_WRITABLE_MASK) ? ACC_WRITE_MASK : 0) |
 		((gpte & VMX_EPT_EXECUTABLE_MASK) ? ACC_EXEC_MASK : 0) |
-		ACC_USER_MASK;
+		((gpte & VMX_EPT_READABLE_MASK) ? ACC_USER_MASK : 0);
 #else
 	BUILD_BUG_ON(ACC_EXEC_MASK != PT_PRESENT_MASK);
 	BUILD_BUG_ON(ACC_EXEC_MASK != 1);
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 003618e..417debc 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -6366,7 +6366,7 @@  static __init int hardware_setup(void)
 	vmx_disable_intercept_msr_write_x2apic(0x83f);
 
 	if (enable_ept) {
-		kvm_mmu_set_mask_ptes(0ull,
+		kvm_mmu_set_mask_ptes(PT_PRESENT_MASK,
 			(enable_ept_ad_bits) ? VMX_EPT_ACCESS_BIT : 0ull,
 			(enable_ept_ad_bits) ? VMX_EPT_DIRTY_BIT : 0ull,
 			0ull, VMX_EPT_EXECUTABLE_MASK);