diff mbox

[v2,05/12] KVM: MMU: introduce vcpu_adjust_access

Message ID 50FFB62C.4070808@linux.vnet.ibm.com (mailing list archive)
State New, archived
Headers show

Commit Message

Xiao Guangrong Jan. 23, 2013, 10:06 a.m. UTC
Introduce it to split the code of adjusting pte_access from the large
function of set_spte

Signed-off-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
---
 arch/x86/kvm/mmu.c |   63 +++++++++++++++++++++++++++++++++-------------------
 1 files changed, 40 insertions(+), 23 deletions(-)

Comments

Gleb Natapov Jan. 24, 2013, 10:36 a.m. UTC | #1
On Wed, Jan 23, 2013 at 06:06:36PM +0800, Xiao Guangrong wrote:
> Introduce it to split the code of adjusting pte_access from the large
> function of set_spte
> 
> Signed-off-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
> ---
>  arch/x86/kvm/mmu.c |   63 +++++++++++++++++++++++++++++++++-------------------
>  1 files changed, 40 insertions(+), 23 deletions(-)
> 
> diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
> index af8bcb2..43b7e0c 100644
> --- a/arch/x86/kvm/mmu.c
> +++ b/arch/x86/kvm/mmu.c
> @@ -2324,25 +2324,18 @@ static int mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn,
>  	return 0;
>  }
> 
> -static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
> -		    unsigned pte_access, int level,
> -		    gfn_t gfn, pfn_t pfn, bool speculative,
> -		    bool can_unsync, bool host_writable)
> +/*
> + * Return -1 if a race condition is detected, 1 if @gfn need to be
> + * write-protected, otherwise 0 is returned.
> + */
That's a little bit crafty.

Isn't it better to handle race condition in set_spte() explicitly?
Something like do:

 if (host_writable && (pte_access & ACC_WRITE_MASK) &&
        level > PT_PAGE_TABLE_LEVEL && has_wrprotected_page(vcpu->kvm, gfn, level))
    return 0;

before calling vcpu_adjust_access() in set_spte()?

Or even do:

 if ((pte_access & ACC_WRITE_MASK) && level > PT_PAGE_TABLE_LEVEL &&
           has_wrprotected_page(vcpu->kvm, gfn, level))
    return 0;

After calling vcpu_adjust_access().

The later will create read only large page mapping where now it is not
created, but it shouldn't be a problem as far as I see.

> +static int vcpu_adjust_access(struct kvm_vcpu *vcpu, u64 *sptep,
> +			      unsigned *pte_access, int level, gfn_t gfn,
> +			      bool can_unsync, bool host_writable)
>  {
> -	u64 spte;
> -	int ret = 0;
> -
> -	if (set_mmio_spte(sptep, gfn, pfn, pte_access))
> -		return 0;
> +	if (!host_writable)
> +		*pte_access &= ~ACC_WRITE_MASK;
> 
> -	spte = PT_PRESENT_MASK;
> -
> -	if (host_writable)
> -		spte |= SPTE_HOST_WRITEABLE;
> -	else
> -		pte_access &= ~ACC_WRITE_MASK;
> -
> -	if (pte_access & ACC_WRITE_MASK) {
> +	if (*pte_access & ACC_WRITE_MASK) {
>  		/*
>  		 * Other vcpu creates new sp in the window between
>  		 * mapping_level() and acquiring mmu-lock. We can
> @@ -2351,7 +2344,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
>  		 */
>  		if (level > PT_PAGE_TABLE_LEVEL &&
>  		      has_wrprotected_page(vcpu->kvm, gfn, level))
> -			goto done;
> +			return -1;
> 
>  		/*
>  		 * Optimization: for pte sync, if spte was writable the hash
> @@ -2360,17 +2353,41 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
>  		 * Same reasoning can be applied to dirty page accounting.
>  		 */
>  		if (!can_unsync && is_writable_pte(*sptep))
> -			goto out_access_adjust;
> +			return 0;
> 
>  		if (mmu_need_write_protect(vcpu, gfn, can_unsync)) {
>  			pgprintk("%s: found shadow page for %llx, marking ro\n",
>  				 __func__, gfn);
> -			ret = 1;
> -			pte_access &= ~ACC_WRITE_MASK;
> +
> +			*pte_access &= ~ACC_WRITE_MASK;
> +			return 1;
>  		}
>  	}
> 
> -out_access_adjust:
> +	return 0;
> +}
> +
> +static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
> +		    unsigned pte_access, int level,
> +		    gfn_t gfn, pfn_t pfn, bool speculative,
> +		    bool can_unsync, bool host_writable)
> +{
> +	u64 spte;
> +	int ret;
> +
> +	if (set_mmio_spte(sptep, gfn, pfn, pte_access))
> +		return 0;
> +
> +	ret = vcpu_adjust_access(vcpu, sptep, &pte_access, level, gfn,
> +				 can_unsync, host_writable);
> +	if (ret < 0)
> +		return 0;
> +
> +	spte = PT_PRESENT_MASK;
> +
> +	if (host_writable)
> +		spte |= SPTE_HOST_WRITEABLE;
> +
>  	if (!speculative)
>  		spte |= shadow_accessed_mask;
> 
> @@ -2399,7 +2416,7 @@ out_access_adjust:
> 
>  	if (mmu_spte_update(sptep, spte))
>  		kvm_flush_remote_tlbs(vcpu->kvm);
> -done:
> +
>  	return ret;
>  }
> 
> -- 
> 1.7.7.6

--
			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Xiao Guangrong Jan. 24, 2013, 11:33 a.m. UTC | #2
On 01/24/2013 06:36 PM, Gleb Natapov wrote:
> On Wed, Jan 23, 2013 at 06:06:36PM +0800, Xiao Guangrong wrote:
>> Introduce it to split the code of adjusting pte_access from the large
>> function of set_spte
>>
>> Signed-off-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
>> ---
>>  arch/x86/kvm/mmu.c |   63 +++++++++++++++++++++++++++++++++-------------------
>>  1 files changed, 40 insertions(+), 23 deletions(-)
>>
>> diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
>> index af8bcb2..43b7e0c 100644
>> --- a/arch/x86/kvm/mmu.c
>> +++ b/arch/x86/kvm/mmu.c
>> @@ -2324,25 +2324,18 @@ static int mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn,
>>  	return 0;
>>  }
>>
>> -static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
>> -		    unsigned pte_access, int level,
>> -		    gfn_t gfn, pfn_t pfn, bool speculative,
>> -		    bool can_unsync, bool host_writable)
>> +/*
>> + * Return -1 if a race condition is detected, 1 if @gfn need to be
>> + * write-protected, otherwise 0 is returned.
>> + */
> That's a little bit crafty.
> 
> Isn't it better to handle race condition in set_spte() explicitly?
> Something like do:
> 
>  if (host_writable && (pte_access & ACC_WRITE_MASK) &&
>         level > PT_PAGE_TABLE_LEVEL && has_wrprotected_page(vcpu->kvm, gfn, level))
>     return 0;
> 
> before calling vcpu_adjust_access() in set_spte()?
> 
> Or even do:
> 
>  if ((pte_access & ACC_WRITE_MASK) && level > PT_PAGE_TABLE_LEVEL &&
>            has_wrprotected_page(vcpu->kvm, gfn, level))
>     return 0;
> 
> After calling vcpu_adjust_access().
> 
> The later will create read only large page mapping where now it is not
> created, but it shouldn't be a problem as far as I see.

Yes. I like the later way. Will update it. Thanks for your suggestion, Gleb!

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index af8bcb2..43b7e0c 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2324,25 +2324,18 @@  static int mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn,
 	return 0;
 }

-static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
-		    unsigned pte_access, int level,
-		    gfn_t gfn, pfn_t pfn, bool speculative,
-		    bool can_unsync, bool host_writable)
+/*
+ * Return -1 if a race condition is detected, 1 if @gfn need to be
+ * write-protected, otherwise 0 is returned.
+ */
+static int vcpu_adjust_access(struct kvm_vcpu *vcpu, u64 *sptep,
+			      unsigned *pte_access, int level, gfn_t gfn,
+			      bool can_unsync, bool host_writable)
 {
-	u64 spte;
-	int ret = 0;
-
-	if (set_mmio_spte(sptep, gfn, pfn, pte_access))
-		return 0;
+	if (!host_writable)
+		*pte_access &= ~ACC_WRITE_MASK;

-	spte = PT_PRESENT_MASK;
-
-	if (host_writable)
-		spte |= SPTE_HOST_WRITEABLE;
-	else
-		pte_access &= ~ACC_WRITE_MASK;
-
-	if (pte_access & ACC_WRITE_MASK) {
+	if (*pte_access & ACC_WRITE_MASK) {
 		/*
 		 * Other vcpu creates new sp in the window between
 		 * mapping_level() and acquiring mmu-lock. We can
@@ -2351,7 +2344,7 @@  static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
 		 */
 		if (level > PT_PAGE_TABLE_LEVEL &&
 		      has_wrprotected_page(vcpu->kvm, gfn, level))
-			goto done;
+			return -1;

 		/*
 		 * Optimization: for pte sync, if spte was writable the hash
@@ -2360,17 +2353,41 @@  static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
 		 * Same reasoning can be applied to dirty page accounting.
 		 */
 		if (!can_unsync && is_writable_pte(*sptep))
-			goto out_access_adjust;
+			return 0;

 		if (mmu_need_write_protect(vcpu, gfn, can_unsync)) {
 			pgprintk("%s: found shadow page for %llx, marking ro\n",
 				 __func__, gfn);
-			ret = 1;
-			pte_access &= ~ACC_WRITE_MASK;
+
+			*pte_access &= ~ACC_WRITE_MASK;
+			return 1;
 		}
 	}

-out_access_adjust:
+	return 0;
+}
+
+static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
+		    unsigned pte_access, int level,
+		    gfn_t gfn, pfn_t pfn, bool speculative,
+		    bool can_unsync, bool host_writable)
+{
+	u64 spte;
+	int ret;
+
+	if (set_mmio_spte(sptep, gfn, pfn, pte_access))
+		return 0;
+
+	ret = vcpu_adjust_access(vcpu, sptep, &pte_access, level, gfn,
+				 can_unsync, host_writable);
+	if (ret < 0)
+		return 0;
+
+	spte = PT_PRESENT_MASK;
+
+	if (host_writable)
+		spte |= SPTE_HOST_WRITEABLE;
+
 	if (!speculative)
 		spte |= shadow_accessed_mask;

@@ -2399,7 +2416,7 @@  out_access_adjust:

 	if (mmu_spte_update(sptep, spte))
 		kvm_flush_remote_tlbs(vcpu->kvm);
-done:
+
 	return ret;
 }