Message ID | 20190219200430.11130-10-jglisse@redhat.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | mmu notifier provide context informations | expand |
On 2/19/19 12:04 PM, jglisse@redhat.com wrote: > From: Jérôme Glisse <jglisse@redhat.com> > > When notifying change for a range use MMU_NOTIFIER_USE_CHANGE_PTE flag > for page table update that use set_pte_at_notify() and where the we are > going either from read and write to read only with same pfn or read only > to read and write with new pfn. > > Note that set_pte_at_notify() itself should only be use in rare cases > ie we do not want to use it when we are updating a significant range of > virtual addresses and thus a significant number of pte. Instead for > those cases the event provided to mmu notifer invalidate_range_start() > callback should be use for optimization. > > Changes since v1: > - Use the new unsigned flags field in struct mmu_notifier_range > - Use the new flags parameter to mmu_notifier_range_init() > - Explicitly list all the patterns where we can use change_pte() > > Signed-off-by: Jérôme Glisse <jglisse@redhat.com> > Cc: Christian König <christian.koenig@amd.com> > Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> > Cc: Jani Nikula <jani.nikula@linux.intel.com> > Cc: Rodrigo Vivi <rodrigo.vivi@intel.com> > Cc: Jan Kara <jack@suse.cz> > Cc: Andrea Arcangeli <aarcange@redhat.com> > Cc: Peter Xu <peterx@redhat.com> > Cc: Felix Kuehling <Felix.Kuehling@amd.com> > Cc: Jason Gunthorpe <jgg@mellanox.com> > Cc: Ross Zwisler <zwisler@kernel.org> > Cc: Dan Williams <dan.j.williams@intel.com> > Cc: Paolo Bonzini <pbonzini@redhat.com> > Cc: Radim Krčmář <rkrcmar@redhat.com> > Cc: Michal Hocko <mhocko@kernel.org> > Cc: Christian Koenig <christian.koenig@amd.com> > Cc: Ralph Campbell <rcampbell@nvidia.com> > Cc: John Hubbard <jhubbard@nvidia.com> > Cc: kvm@vger.kernel.org > Cc: dri-devel@lists.freedesktop.org > Cc: linux-rdma@vger.kernel.org > Cc: Arnd Bergmann <arnd@arndb.de> > --- > include/linux/mmu_notifier.h | 34 ++++++++++++++++++++++++++++++++-- > mm/ksm.c | 11 ++++++----- > mm/memory.c | 5 +++-- > 3 files changed, 41 insertions(+), 9 deletions(-) > > diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h > index b6c004bd9f6a..0230a4b06b46 100644 > --- a/include/linux/mmu_notifier.h > +++ b/include/linux/mmu_notifier.h > @@ -40,6 +40,26 @@ enum mmu_notifier_event { > MMU_NOTIFY_SOFT_DIRTY, > }; > > +/* > + * @MMU_NOTIFIER_RANGE_BLOCKABLE: can the mmu notifier range_start/range_end > + * callback block or not ? If set then the callback can block. > + * > + * @MMU_NOTIFIER_USE_CHANGE_PTE: only set when the page table it updated with > + * the set_pte_at_notify() the valid patterns for this are: > + * - pte read and write to read only same pfn > + * - pte read only to read and write (pfn can change or stay the same) > + * - pte read only to read only with different pfn > + * It is illegal to set in any other circumstances. > + * > + * Note that set_pte_at_notify() should not be use outside of the above cases. > + * When updating a range in batch (like write protecting a range) it is better > + * to rely on invalidate_range_start() and struct mmu_notifier_range to infer > + * the kind of update that is happening (as an example you can look at the > + * mmu_notifier_range_update_to_read_only() function). > + */ > +#define MMU_NOTIFIER_RANGE_BLOCKABLE (1 << 0) > +#define MMU_NOTIFIER_USE_CHANGE_PTE (1 << 1) > + > #ifdef CONFIG_MMU_NOTIFIER > > /* > @@ -55,8 +75,6 @@ struct mmu_notifier_mm { > spinlock_t lock; > }; > > -#define MMU_NOTIFIER_RANGE_BLOCKABLE (1 << 0) > - > struct mmu_notifier_range { > struct vm_area_struct *vma; > struct mm_struct *mm; > @@ -268,6 +286,12 @@ mmu_notifier_range_blockable(const struct mmu_notifier_range *range) > return (range->flags & MMU_NOTIFIER_RANGE_BLOCKABLE); > } > > +static inline bool > +mmu_notifier_range_use_change_pte(const struct mmu_notifier_range *range) > +{ > + return (range->flags & MMU_NOTIFIER_USE_CHANGE_PTE); > +} > + > static inline void mmu_notifier_release(struct mm_struct *mm) > { > if (mm_has_notifiers(mm)) > @@ -509,6 +533,12 @@ mmu_notifier_range_blockable(const struct mmu_notifier_range *range) > return true; > } > > +static inline bool > +mmu_notifier_range_use_change_pte(const struct mmu_notifier_range *range) > +{ > + return false; > +} > + > static inline int mm_has_notifiers(struct mm_struct *mm) > { > return 0; > diff --git a/mm/ksm.c b/mm/ksm.c > index b782fadade8f..41e51882f999 100644 > --- a/mm/ksm.c > +++ b/mm/ksm.c > @@ -1066,9 +1066,9 @@ static int write_protect_page(struct vm_area_struct *vma, struct page *page, > > BUG_ON(PageTransCompound(page)); > > - mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, mm, > - pvmw.address, > - pvmw.address + PAGE_SIZE); > + mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, > + MMU_NOTIFIER_USE_CHANGE_PTE, vma, mm, > + pvmw.address, pvmw.address + PAGE_SIZE); > mmu_notifier_invalidate_range_start(&range); > > if (!page_vma_mapped_walk(&pvmw)) > @@ -1155,8 +1155,9 @@ static int replace_page(struct vm_area_struct *vma, struct page *page, > if (!pmd) > goto out; > > - mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, mm, addr, > - addr + PAGE_SIZE); > + mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, > + MMU_NOTIFIER_USE_CHANGE_PTE, > + vma, mm, addr, addr + PAGE_SIZE); > mmu_notifier_invalidate_range_start(&range); > > ptep = pte_offset_map_lock(mm, pmd, addr, &ptl); > diff --git a/mm/memory.c b/mm/memory.c > index 45dbc174a88c..cb71d3ff1b97 100644 > --- a/mm/memory.c > +++ b/mm/memory.c > @@ -2282,8 +2282,9 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf) > > __SetPageUptodate(new_page); > > - mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, mm, > - vmf->address & PAGE_MASK, > + mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, > + MMU_NOTIFIER_USE_CHANGE_PTE, > + vma, mm, vmf->address & PAGE_MASK, > (vmf->address & PAGE_MASK) + PAGE_SIZE); > mmu_notifier_invalidate_range_start(&range); > > Reviewed-by: Ralph Campbell <rcampbell@nvidia.com>
diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h index b6c004bd9f6a..0230a4b06b46 100644 --- a/include/linux/mmu_notifier.h +++ b/include/linux/mmu_notifier.h @@ -40,6 +40,26 @@ enum mmu_notifier_event { MMU_NOTIFY_SOFT_DIRTY, }; +/* + * @MMU_NOTIFIER_RANGE_BLOCKABLE: can the mmu notifier range_start/range_end + * callback block or not ? If set then the callback can block. + * + * @MMU_NOTIFIER_USE_CHANGE_PTE: only set when the page table it updated with + * the set_pte_at_notify() the valid patterns for this are: + * - pte read and write to read only same pfn + * - pte read only to read and write (pfn can change or stay the same) + * - pte read only to read only with different pfn + * It is illegal to set in any other circumstances. + * + * Note that set_pte_at_notify() should not be use outside of the above cases. + * When updating a range in batch (like write protecting a range) it is better + * to rely on invalidate_range_start() and struct mmu_notifier_range to infer + * the kind of update that is happening (as an example you can look at the + * mmu_notifier_range_update_to_read_only() function). + */ +#define MMU_NOTIFIER_RANGE_BLOCKABLE (1 << 0) +#define MMU_NOTIFIER_USE_CHANGE_PTE (1 << 1) + #ifdef CONFIG_MMU_NOTIFIER /* @@ -55,8 +75,6 @@ struct mmu_notifier_mm { spinlock_t lock; }; -#define MMU_NOTIFIER_RANGE_BLOCKABLE (1 << 0) - struct mmu_notifier_range { struct vm_area_struct *vma; struct mm_struct *mm; @@ -268,6 +286,12 @@ mmu_notifier_range_blockable(const struct mmu_notifier_range *range) return (range->flags & MMU_NOTIFIER_RANGE_BLOCKABLE); } +static inline bool +mmu_notifier_range_use_change_pte(const struct mmu_notifier_range *range) +{ + return (range->flags & MMU_NOTIFIER_USE_CHANGE_PTE); +} + static inline void mmu_notifier_release(struct mm_struct *mm) { if (mm_has_notifiers(mm)) @@ -509,6 +533,12 @@ mmu_notifier_range_blockable(const struct mmu_notifier_range *range) return true; } +static inline bool +mmu_notifier_range_use_change_pte(const struct mmu_notifier_range *range) +{ + return false; +} + static inline int mm_has_notifiers(struct mm_struct *mm) { return 0; diff --git a/mm/ksm.c b/mm/ksm.c index b782fadade8f..41e51882f999 100644 --- a/mm/ksm.c +++ b/mm/ksm.c @@ -1066,9 +1066,9 @@ static int write_protect_page(struct vm_area_struct *vma, struct page *page, BUG_ON(PageTransCompound(page)); - mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, mm, - pvmw.address, - pvmw.address + PAGE_SIZE); + mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, + MMU_NOTIFIER_USE_CHANGE_PTE, vma, mm, + pvmw.address, pvmw.address + PAGE_SIZE); mmu_notifier_invalidate_range_start(&range); if (!page_vma_mapped_walk(&pvmw)) @@ -1155,8 +1155,9 @@ static int replace_page(struct vm_area_struct *vma, struct page *page, if (!pmd) goto out; - mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, mm, addr, - addr + PAGE_SIZE); + mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, + MMU_NOTIFIER_USE_CHANGE_PTE, + vma, mm, addr, addr + PAGE_SIZE); mmu_notifier_invalidate_range_start(&range); ptep = pte_offset_map_lock(mm, pmd, addr, &ptl); diff --git a/mm/memory.c b/mm/memory.c index 45dbc174a88c..cb71d3ff1b97 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2282,8 +2282,9 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf) __SetPageUptodate(new_page); - mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, mm, - vmf->address & PAGE_MASK, + mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, + MMU_NOTIFIER_USE_CHANGE_PTE, + vma, mm, vmf->address & PAGE_MASK, (vmf->address & PAGE_MASK) + PAGE_SIZE); mmu_notifier_invalidate_range_start(&range);