Message ID | 1252600738-9456-3-git-send-email-ieidus@redhat.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Thu, Sep 10, 2009 at 07:38:57PM +0300, Izik Eidus wrote: > this flag notify that the host physical page we are pointing to from > the spte is write protected, and therefore we cant change its access > to be write unless we run get_user_pages(write = 1). > > (this is needed for change_pte support in kvm) > > Signed-off-by: Izik Eidus <ieidus@redhat.com> > --- > arch/x86/kvm/mmu.c | 15 +++++++++++---- > arch/x86/kvm/paging_tmpl.h | 18 +++++++++++++++--- > 2 files changed, 26 insertions(+), 7 deletions(-) > > diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c > index 62d2f86..a7151b8 100644 > --- a/arch/x86/kvm/mmu.c > +++ b/arch/x86/kvm/mmu.c > @@ -156,6 +156,8 @@ module_param(oos_shadow, bool, 0644); > #define CREATE_TRACE_POINTS > #include "mmutrace.h" > > +#define SPTE_HOST_WRITEABLE (1ULL << PT_FIRST_AVAIL_BITS_SHIFT) > + > #define SHADOW_PT_INDEX(addr, level) PT64_INDEX(addr, level) > > struct kvm_rmap_desc { > @@ -1754,7 +1756,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, > unsigned pte_access, int user_fault, > int write_fault, int dirty, int level, > gfn_t gfn, pfn_t pfn, bool speculative, > - bool can_unsync) > + bool can_unsync, bool reset_host_protection) bool host_pte_writeable ? > { > u64 spte; > int ret = 0; > @@ -1781,6 +1783,9 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, > spte |= kvm_x86_ops->get_mt_mask(vcpu, gfn, > kvm_is_mmio_pfn(pfn)); > > + if (reset_host_protection) > + spte |= SPTE_HOST_WRITEABLE; > + > spte |= (u64)pfn << PAGE_SHIFT; > > if ((pte_access & ACC_WRITE_MASK) > @@ -1826,7 +1831,8 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, > unsigned pt_access, unsigned pte_access, > int user_fault, int write_fault, int dirty, > int *ptwrite, int level, gfn_t gfn, > - pfn_t pfn, bool speculative) > + pfn_t pfn, bool speculative, > + bool reset_host_protection) > { > int was_rmapped = 0; > int was_writeble = is_writeble_pte(*sptep); > @@ -1858,7 +1864,8 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, > } > > if (set_spte(vcpu, sptep, pte_access, user_fault, write_fault, > - dirty, level, gfn, pfn, speculative, true)) { > + dirty, level, gfn, pfn, speculative, true, > + reset_host_protection)) { > if (write_fault) > *ptwrite = 1; > kvm_x86_ops->tlb_flush(vcpu); > @@ -1906,7 +1913,7 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write, > if (iterator.level == level) { > mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, ACC_ALL, > 0, write, 1, &pt_write, > - level, gfn, pfn, false); > + level, gfn, pfn, false, true); > ++vcpu->stat.pf_fixed; > break; > } > diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h > index d2fec9c..c9256ee 100644 > --- a/arch/x86/kvm/paging_tmpl.h > +++ b/arch/x86/kvm/paging_tmpl.h > @@ -273,9 +273,13 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page, > if (mmu_notifier_retry(vcpu, vcpu->arch.update_pte.mmu_seq)) > return; > kvm_get_pfn(pfn); > + /* > + * we call mmu_set_spte() with reset_host_protection = true beacuse that > + * vcpu->arch.update_pte.pfn was fetched from get_user_pages(write = 1). > + */ > mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0, > gpte & PT_DIRTY_MASK, NULL, PT_PAGE_TABLE_LEVEL, > - gpte_to_gfn(gpte), pfn, true); > + gpte_to_gfn(gpte), pfn, true, true); > } > > /* > @@ -308,7 +312,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, > user_fault, write_fault, > gw->ptes[gw->level-1] & PT_DIRTY_MASK, > ptwrite, level, > - gw->gfn, pfn, false); > + gw->gfn, pfn, false, true); > break; > } > > @@ -558,6 +562,7 @@ static void FNAME(prefetch_page)(struct kvm_vcpu *vcpu, > static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) > { > int i, offset, nr_present; > + bool reset_host_protection; Tab damaged. > offset = nr_present = 0; > > @@ -595,9 +600,16 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) > > nr_present++; > pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte); > + if (!(sp->spt[i] & SPTE_HOST_WRITEABLE)) { > + pte_access &= ~PT_WRITABLE_MASK; > + reset_host_protection = 0; > + } else { > + reset_host_protection = 1; > + } > set_spte(vcpu, &sp->spt[i], pte_access, 0, 0, > is_dirty_gpte(gpte), PT_PAGE_TABLE_LEVEL, gfn, > - spte_to_pfn(sp->spt[i]), true, false); > + spte_to_pfn(sp->spt[i]), true, false, > + reset_host_protection); > } > > return !nr_present; Looks good otherwise. > -- > 1.5.6.5 > > -- > To unsubscribe from this list: send the line "unsubscribe kvm" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Marcelo Tosatti wrote: > On Thu, Sep 10, 2009 at 07:38:57PM +0300, Izik Eidus wrote: > >> this flag notify that the host physical page we are pointing to from >> the spte is write protected, and therefore we cant change its access >> to be write unless we run get_user_pages(write = 1). >> >> (this is needed for change_pte support in kvm) >> >> Signed-off-by: Izik Eidus <ieidus@redhat.com> >> --- >> arch/x86/kvm/mmu.c | 15 +++++++++++---- >> arch/x86/kvm/paging_tmpl.h | 18 +++++++++++++++--- >> 2 files changed, 26 insertions(+), 7 deletions(-) >> >> diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c >> index 62d2f86..a7151b8 100644 >> --- a/arch/x86/kvm/mmu.c >> +++ b/arch/x86/kvm/mmu.c >> @@ -156,6 +156,8 @@ module_param(oos_shadow, bool, 0644); >> #define CREATE_TRACE_POINTS >> #include "mmutrace.h" >> >> +#define SPTE_HOST_WRITEABLE (1ULL << PT_FIRST_AVAIL_BITS_SHIFT) >> + >> #define SHADOW_PT_INDEX(addr, level) PT64_INDEX(addr, level) >> >> struct kvm_rmap_desc { >> @@ -1754,7 +1756,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, >> unsigned pte_access, int user_fault, >> int write_fault, int dirty, int level, >> gfn_t gfn, pfn_t pfn, bool speculative, >> - bool can_unsync) >> + bool can_unsync, bool reset_host_protection) >> > bool host_pte_writeable ? > > > Sure. -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 09/10/2009 07:38 PM, Izik Eidus wrote: > this flag notify that the host physical page we are pointing to from > the spte is write protected, and therefore we cant change its access > to be write unless we run get_user_pages(write = 1). > > (this is needed for change_pte support in kvm) > > > > diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c > index 62d2f86..a7151b8 100644 > --- a/arch/x86/kvm/mmu.c > +++ b/arch/x86/kvm/mmu.c > @@ -156,6 +156,8 @@ module_param(oos_shadow, bool, 0644); > #define CREATE_TRACE_POINTS > #include "mmutrace.h" > > +#define SPTE_HOST_WRITEABLE (1ULL<< PT_FIRST_AVAIL_BITS_SHIFT) > + > Luckilly, this bit is available on EPT too. > #define SHADOW_PT_INDEX(addr, level) PT64_I > @@ -595,9 +600,16 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) > > nr_present++; > pte_access = sp->role.access& FNAME(gpte_access)(vcpu, gpte); > + if (!(sp->spt[i]& SPTE_HOST_WRITEABLE)) { > + pte_access&= ~PT_WRITABLE_MASK; > pte_access uses ACC_ masks, not PT_ masks.
On Thu, Sep 10, 2009 at 07:38:57PM +0300, Izik Eidus wrote: > this flag notify that the host physical page we are pointing to from > the spte is write protected, and therefore we cant change its access > to be write unless we run get_user_pages(write = 1). > > (this is needed for change_pte support in kvm) > > Signed-off-by: Izik Eidus <ieidus@redhat.com> > --- > arch/x86/kvm/mmu.c | 15 +++++++++++---- > arch/x86/kvm/paging_tmpl.h | 18 +++++++++++++++--- > 2 files changed, 26 insertions(+), 7 deletions(-) > > diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c > index 62d2f86..a7151b8 100644 > --- a/arch/x86/kvm/mmu.c > +++ b/arch/x86/kvm/mmu.c > @@ -156,6 +156,8 @@ module_param(oos_shadow, bool, 0644); > #define CREATE_TRACE_POINTS > #include "mmutrace.h" > > +#define SPTE_HOST_WRITEABLE (1ULL << PT_FIRST_AVAIL_BITS_SHIFT) > + > #define SHADOW_PT_INDEX(addr, level) PT64_INDEX(addr, level) > > struct kvm_rmap_desc { > @@ -1754,7 +1756,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, > unsigned pte_access, int user_fault, > int write_fault, int dirty, int level, > gfn_t gfn, pfn_t pfn, bool speculative, > - bool can_unsync) > + bool can_unsync, bool reset_host_protection) > { > u64 spte; > int ret = 0; > @@ -1781,6 +1783,9 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, > spte |= kvm_x86_ops->get_mt_mask(vcpu, gfn, > kvm_is_mmio_pfn(pfn)); > > + if (reset_host_protection) > + spte |= SPTE_HOST_WRITEABLE; > + > spte |= (u64)pfn << PAGE_SHIFT; > > if ((pte_access & ACC_WRITE_MASK) > @@ -1826,7 +1831,8 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, > unsigned pt_access, unsigned pte_access, > int user_fault, int write_fault, int dirty, > int *ptwrite, int level, gfn_t gfn, > - pfn_t pfn, bool speculative) > + pfn_t pfn, bool speculative, > + bool reset_host_protection) > { > int was_rmapped = 0; > int was_writeble = is_writeble_pte(*sptep); > @@ -1858,7 +1864,8 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, > } > > if (set_spte(vcpu, sptep, pte_access, user_fault, write_fault, > - dirty, level, gfn, pfn, speculative, true)) { > + dirty, level, gfn, pfn, speculative, true, > + reset_host_protection)) { > if (write_fault) > *ptwrite = 1; > kvm_x86_ops->tlb_flush(vcpu); > @@ -1906,7 +1913,7 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write, > if (iterator.level == level) { > mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, ACC_ALL, > 0, write, 1, &pt_write, > - level, gfn, pfn, false); > + level, gfn, pfn, false, true); > ++vcpu->stat.pf_fixed; > break; > } > diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h > index d2fec9c..c9256ee 100644 > --- a/arch/x86/kvm/paging_tmpl.h > +++ b/arch/x86/kvm/paging_tmpl.h > @@ -273,9 +273,13 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page, > if (mmu_notifier_retry(vcpu, vcpu->arch.update_pte.mmu_seq)) > return; > kvm_get_pfn(pfn); > + /* > + * we call mmu_set_spte() with reset_host_protection = true beacuse that > + * vcpu->arch.update_pte.pfn was fetched from get_user_pages(write = 1). > + */ > mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0, > gpte & PT_DIRTY_MASK, NULL, PT_PAGE_TABLE_LEVEL, > - gpte_to_gfn(gpte), pfn, true); > + gpte_to_gfn(gpte), pfn, true, true); > } > > /* > @@ -308,7 +312,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, > user_fault, write_fault, > gw->ptes[gw->level-1] & PT_DIRTY_MASK, > ptwrite, level, > - gw->gfn, pfn, false); > + gw->gfn, pfn, false, true); > break; > } > > @@ -558,6 +562,7 @@ static void FNAME(prefetch_page)(struct kvm_vcpu *vcpu, > static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) > { > int i, offset, nr_present; > + bool reset_host_protection; > > offset = nr_present = 0; > > @@ -595,9 +600,16 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) > > nr_present++; > pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte); > + if (!(sp->spt[i] & SPTE_HOST_WRITEABLE)) { > + pte_access &= ~PT_WRITABLE_MASK; > + reset_host_protection = 0; > + } else { > + reset_host_protection = 1; > + } > set_spte(vcpu, &sp->spt[i], pte_access, 0, 0, > is_dirty_gpte(gpte), PT_PAGE_TABLE_LEVEL, gfn, > - spte_to_pfn(sp->spt[i]), true, false); > + spte_to_pfn(sp->spt[i]), true, false, > + reset_host_protection); Why can't you use the writable bit in the spte? So that you can only sync a writeable spte if it was writeable before, in sync_page? Is there any other need for the extra bit? > } > > return !nr_present; > -- > 1.5.6.5 > > -- > To unsubscribe from this list: send the line "unsubscribe kvm" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Marcelo Tosatti wrote: > Why can't you use the writable bit in the spte? So that you can only > sync a writeable spte if it was writeable before, in sync_page? > I could, but there we will add overhead for read only gptes that become writable in the guest... If you prefer to fault on the syncing of the guest gpte readonly to gpte writeable I can change it... What you prefer? > Is there any other need for the extra bit? > No -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Mon, Sep 14, 2009 at 07:51:16PM +0300, Izik Eidus wrote: > Marcelo Tosatti wrote: >> Why can't you use the writable bit in the spte? So that you can only >> sync a writeable spte if it was writeable before, in sync_page? >> > > I could, but there we will add overhead for read only gptes that become > writable in the guest... > If you prefer to fault on the syncing of the guest gpte readonly to gpte > writeable I can change it... > > What you prefer? Oh yes, better keep the bit then. >> Is there any other need for the extra bit? >> > > No > -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 62d2f86..a7151b8 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -156,6 +156,8 @@ module_param(oos_shadow, bool, 0644); #define CREATE_TRACE_POINTS #include "mmutrace.h" +#define SPTE_HOST_WRITEABLE (1ULL << PT_FIRST_AVAIL_BITS_SHIFT) + #define SHADOW_PT_INDEX(addr, level) PT64_INDEX(addr, level) struct kvm_rmap_desc { @@ -1754,7 +1756,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, unsigned pte_access, int user_fault, int write_fault, int dirty, int level, gfn_t gfn, pfn_t pfn, bool speculative, - bool can_unsync) + bool can_unsync, bool reset_host_protection) { u64 spte; int ret = 0; @@ -1781,6 +1783,9 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, spte |= kvm_x86_ops->get_mt_mask(vcpu, gfn, kvm_is_mmio_pfn(pfn)); + if (reset_host_protection) + spte |= SPTE_HOST_WRITEABLE; + spte |= (u64)pfn << PAGE_SHIFT; if ((pte_access & ACC_WRITE_MASK) @@ -1826,7 +1831,8 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, unsigned pt_access, unsigned pte_access, int user_fault, int write_fault, int dirty, int *ptwrite, int level, gfn_t gfn, - pfn_t pfn, bool speculative) + pfn_t pfn, bool speculative, + bool reset_host_protection) { int was_rmapped = 0; int was_writeble = is_writeble_pte(*sptep); @@ -1858,7 +1864,8 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, } if (set_spte(vcpu, sptep, pte_access, user_fault, write_fault, - dirty, level, gfn, pfn, speculative, true)) { + dirty, level, gfn, pfn, speculative, true, + reset_host_protection)) { if (write_fault) *ptwrite = 1; kvm_x86_ops->tlb_flush(vcpu); @@ -1906,7 +1913,7 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write, if (iterator.level == level) { mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, ACC_ALL, 0, write, 1, &pt_write, - level, gfn, pfn, false); + level, gfn, pfn, false, true); ++vcpu->stat.pf_fixed; break; } diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index d2fec9c..c9256ee 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -273,9 +273,13 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page, if (mmu_notifier_retry(vcpu, vcpu->arch.update_pte.mmu_seq)) return; kvm_get_pfn(pfn); + /* + * we call mmu_set_spte() with reset_host_protection = true beacuse that + * vcpu->arch.update_pte.pfn was fetched from get_user_pages(write = 1). + */ mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0, gpte & PT_DIRTY_MASK, NULL, PT_PAGE_TABLE_LEVEL, - gpte_to_gfn(gpte), pfn, true); + gpte_to_gfn(gpte), pfn, true, true); } /* @@ -308,7 +312,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, user_fault, write_fault, gw->ptes[gw->level-1] & PT_DIRTY_MASK, ptwrite, level, - gw->gfn, pfn, false); + gw->gfn, pfn, false, true); break; } @@ -558,6 +562,7 @@ static void FNAME(prefetch_page)(struct kvm_vcpu *vcpu, static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) { int i, offset, nr_present; + bool reset_host_protection; offset = nr_present = 0; @@ -595,9 +600,16 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) nr_present++; pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte); + if (!(sp->spt[i] & SPTE_HOST_WRITEABLE)) { + pte_access &= ~PT_WRITABLE_MASK; + reset_host_protection = 0; + } else { + reset_host_protection = 1; + } set_spte(vcpu, &sp->spt[i], pte_access, 0, 0, is_dirty_gpte(gpte), PT_PAGE_TABLE_LEVEL, gfn, - spte_to_pfn(sp->spt[i]), true, false); + spte_to_pfn(sp->spt[i]), true, false, + reset_host_protection); } return !nr_present;
this flag notify that the host physical page we are pointing to from the spte is write protected, and therefore we cant change its access to be write unless we run get_user_pages(write = 1). (this is needed for change_pte support in kvm) Signed-off-by: Izik Eidus <ieidus@redhat.com> --- arch/x86/kvm/mmu.c | 15 +++++++++++---- arch/x86/kvm/paging_tmpl.h | 18 +++++++++++++++--- 2 files changed, 26 insertions(+), 7 deletions(-)