Message ID | 50FFB62C.4070808@linux.vnet.ibm.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Wed, Jan 23, 2013 at 06:06:36PM +0800, Xiao Guangrong wrote: > Introduce it to split the code of adjusting pte_access from the large > function of set_spte > > Signed-off-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com> > --- > arch/x86/kvm/mmu.c | 63 +++++++++++++++++++++++++++++++++------------------- > 1 files changed, 40 insertions(+), 23 deletions(-) > > diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c > index af8bcb2..43b7e0c 100644 > --- a/arch/x86/kvm/mmu.c > +++ b/arch/x86/kvm/mmu.c > @@ -2324,25 +2324,18 @@ static int mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn, > return 0; > } > > -static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, > - unsigned pte_access, int level, > - gfn_t gfn, pfn_t pfn, bool speculative, > - bool can_unsync, bool host_writable) > +/* > + * Return -1 if a race condition is detected, 1 if @gfn need to be > + * write-protected, otherwise 0 is returned. > + */ That's a little bit crafty. Isn't it better to handle race condition in set_spte() explicitly? Something like do: if (host_writable && (pte_access & ACC_WRITE_MASK) && level > PT_PAGE_TABLE_LEVEL && has_wrprotected_page(vcpu->kvm, gfn, level)) return 0; before calling vcpu_adjust_access() in set_spte()? Or even do: if ((pte_access & ACC_WRITE_MASK) && level > PT_PAGE_TABLE_LEVEL && has_wrprotected_page(vcpu->kvm, gfn, level)) return 0; After calling vcpu_adjust_access(). The later will create read only large page mapping where now it is not created, but it shouldn't be a problem as far as I see. > +static int vcpu_adjust_access(struct kvm_vcpu *vcpu, u64 *sptep, > + unsigned *pte_access, int level, gfn_t gfn, > + bool can_unsync, bool host_writable) > { > - u64 spte; > - int ret = 0; > - > - if (set_mmio_spte(sptep, gfn, pfn, pte_access)) > - return 0; > + if (!host_writable) > + *pte_access &= ~ACC_WRITE_MASK; > > - spte = PT_PRESENT_MASK; > - > - if (host_writable) > - spte |= SPTE_HOST_WRITEABLE; > - else > - pte_access &= ~ACC_WRITE_MASK; > - > - if (pte_access & ACC_WRITE_MASK) { > + if (*pte_access & ACC_WRITE_MASK) { > /* > * Other vcpu creates new sp in the window between > * mapping_level() and acquiring mmu-lock. We can > @@ -2351,7 +2344,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, > */ > if (level > PT_PAGE_TABLE_LEVEL && > has_wrprotected_page(vcpu->kvm, gfn, level)) > - goto done; > + return -1; > > /* > * Optimization: for pte sync, if spte was writable the hash > @@ -2360,17 +2353,41 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, > * Same reasoning can be applied to dirty page accounting. > */ > if (!can_unsync && is_writable_pte(*sptep)) > - goto out_access_adjust; > + return 0; > > if (mmu_need_write_protect(vcpu, gfn, can_unsync)) { > pgprintk("%s: found shadow page for %llx, marking ro\n", > __func__, gfn); > - ret = 1; > - pte_access &= ~ACC_WRITE_MASK; > + > + *pte_access &= ~ACC_WRITE_MASK; > + return 1; > } > } > > -out_access_adjust: > + return 0; > +} > + > +static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, > + unsigned pte_access, int level, > + gfn_t gfn, pfn_t pfn, bool speculative, > + bool can_unsync, bool host_writable) > +{ > + u64 spte; > + int ret; > + > + if (set_mmio_spte(sptep, gfn, pfn, pte_access)) > + return 0; > + > + ret = vcpu_adjust_access(vcpu, sptep, &pte_access, level, gfn, > + can_unsync, host_writable); > + if (ret < 0) > + return 0; > + > + spte = PT_PRESENT_MASK; > + > + if (host_writable) > + spte |= SPTE_HOST_WRITEABLE; > + > if (!speculative) > spte |= shadow_accessed_mask; > > @@ -2399,7 +2416,7 @@ out_access_adjust: > > if (mmu_spte_update(sptep, spte)) > kvm_flush_remote_tlbs(vcpu->kvm); > -done: > + > return ret; > } > > -- > 1.7.7.6 -- Gleb. -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 01/24/2013 06:36 PM, Gleb Natapov wrote: > On Wed, Jan 23, 2013 at 06:06:36PM +0800, Xiao Guangrong wrote: >> Introduce it to split the code of adjusting pte_access from the large >> function of set_spte >> >> Signed-off-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com> >> --- >> arch/x86/kvm/mmu.c | 63 +++++++++++++++++++++++++++++++++------------------- >> 1 files changed, 40 insertions(+), 23 deletions(-) >> >> diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c >> index af8bcb2..43b7e0c 100644 >> --- a/arch/x86/kvm/mmu.c >> +++ b/arch/x86/kvm/mmu.c >> @@ -2324,25 +2324,18 @@ static int mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn, >> return 0; >> } >> >> -static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, >> - unsigned pte_access, int level, >> - gfn_t gfn, pfn_t pfn, bool speculative, >> - bool can_unsync, bool host_writable) >> +/* >> + * Return -1 if a race condition is detected, 1 if @gfn need to be >> + * write-protected, otherwise 0 is returned. >> + */ > That's a little bit crafty. > > Isn't it better to handle race condition in set_spte() explicitly? > Something like do: > > if (host_writable && (pte_access & ACC_WRITE_MASK) && > level > PT_PAGE_TABLE_LEVEL && has_wrprotected_page(vcpu->kvm, gfn, level)) > return 0; > > before calling vcpu_adjust_access() in set_spte()? > > Or even do: > > if ((pte_access & ACC_WRITE_MASK) && level > PT_PAGE_TABLE_LEVEL && > has_wrprotected_page(vcpu->kvm, gfn, level)) > return 0; > > After calling vcpu_adjust_access(). > > The later will create read only large page mapping where now it is not > created, but it shouldn't be a problem as far as I see. Yes. I like the later way. Will update it. Thanks for your suggestion, Gleb! -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index af8bcb2..43b7e0c 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -2324,25 +2324,18 @@ static int mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn, return 0; } -static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, - unsigned pte_access, int level, - gfn_t gfn, pfn_t pfn, bool speculative, - bool can_unsync, bool host_writable) +/* + * Return -1 if a race condition is detected, 1 if @gfn need to be + * write-protected, otherwise 0 is returned. + */ +static int vcpu_adjust_access(struct kvm_vcpu *vcpu, u64 *sptep, + unsigned *pte_access, int level, gfn_t gfn, + bool can_unsync, bool host_writable) { - u64 spte; - int ret = 0; - - if (set_mmio_spte(sptep, gfn, pfn, pte_access)) - return 0; + if (!host_writable) + *pte_access &= ~ACC_WRITE_MASK; - spte = PT_PRESENT_MASK; - - if (host_writable) - spte |= SPTE_HOST_WRITEABLE; - else - pte_access &= ~ACC_WRITE_MASK; - - if (pte_access & ACC_WRITE_MASK) { + if (*pte_access & ACC_WRITE_MASK) { /* * Other vcpu creates new sp in the window between * mapping_level() and acquiring mmu-lock. We can @@ -2351,7 +2344,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, */ if (level > PT_PAGE_TABLE_LEVEL && has_wrprotected_page(vcpu->kvm, gfn, level)) - goto done; + return -1; /* * Optimization: for pte sync, if spte was writable the hash @@ -2360,17 +2353,41 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, * Same reasoning can be applied to dirty page accounting. */ if (!can_unsync && is_writable_pte(*sptep)) - goto out_access_adjust; + return 0; if (mmu_need_write_protect(vcpu, gfn, can_unsync)) { pgprintk("%s: found shadow page for %llx, marking ro\n", __func__, gfn); - ret = 1; - pte_access &= ~ACC_WRITE_MASK; + + *pte_access &= ~ACC_WRITE_MASK; + return 1; } } -out_access_adjust: + return 0; +} + +static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, + unsigned pte_access, int level, + gfn_t gfn, pfn_t pfn, bool speculative, + bool can_unsync, bool host_writable) +{ + u64 spte; + int ret; + + if (set_mmio_spte(sptep, gfn, pfn, pte_access)) + return 0; + + ret = vcpu_adjust_access(vcpu, sptep, &pte_access, level, gfn, + can_unsync, host_writable); + if (ret < 0) + return 0; + + spte = PT_PRESENT_MASK; + + if (host_writable) + spte |= SPTE_HOST_WRITEABLE; + if (!speculative) spte |= shadow_accessed_mask; @@ -2399,7 +2416,7 @@ out_access_adjust: if (mmu_spte_update(sptep, spte)) kvm_flush_remote_tlbs(vcpu->kvm); -done: + return ret; }
Introduce it to split the code of adjusting pte_access from the large function of set_spte Signed-off-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com> --- arch/x86/kvm/mmu.c | 63 +++++++++++++++++++++++++++++++++------------------- 1 files changed, 40 insertions(+), 23 deletions(-)