Message ID | 20230130125504.2509710-5-fengwei.yin@intel.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | folio based filemap_map_pages() | expand |
On Mon, Jan 30, 2023 at 08:55:03PM +0800, Yin Fengwei wrote: > +++ b/include/linux/mm.h > @@ -1061,6 +1061,8 @@ static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma) > > vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page); > void do_set_pte(struct vm_fault *vmf, struct page *page, unsigned long addr); > +void do_set_pte_entry(struct vm_fault *vmf, struct page *page, > + unsigned long addr); indentation > -void do_set_pte(struct vm_fault *vmf, struct page *page, unsigned long addr) > +void do_set_pte_entry(struct vm_fault *vmf, struct page *page, > + unsigned long addr) ditto > { > struct vm_area_struct *vma = vmf->vma; > bool uffd_wp = pte_marker_uffd_wp(vmf->orig_pte); > @@ -4276,6 +4277,16 @@ void do_set_pte(struct vm_fault *vmf, struct page *page, unsigned long addr) > entry = maybe_mkwrite(pte_mkdirty(entry), vma); > if (unlikely(uffd_wp)) > entry = pte_mkuffd_wp(entry); > + set_pte_at(vma->vm_mm, addr, vmf->pte, entry); I'm not sure this is safe. As soon as you call set_pte_at(), the page can be found by GUP. If it is, and it doesn't have rmap set up, aren't things going to go horribly wrong?
On 1/30/2023 9:53 PM, Matthew Wilcox wrote: > On Mon, Jan 30, 2023 at 08:55:03PM +0800, Yin Fengwei wrote: >> +++ b/include/linux/mm.h >> @@ -1061,6 +1061,8 @@ static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma) >> >> vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page); >> void do_set_pte(struct vm_fault *vmf, struct page *page, unsigned long addr); >> +void do_set_pte_entry(struct vm_fault *vmf, struct page *page, >> + unsigned long addr); > > indentation > >> -void do_set_pte(struct vm_fault *vmf, struct page *page, unsigned long addr) >> +void do_set_pte_entry(struct vm_fault *vmf, struct page *page, >> + unsigned long addr) > > ditto > >> { >> struct vm_area_struct *vma = vmf->vma; >> bool uffd_wp = pte_marker_uffd_wp(vmf->orig_pte); >> @@ -4276,6 +4277,16 @@ void do_set_pte(struct vm_fault *vmf, struct page *page, unsigned long addr) >> entry = maybe_mkwrite(pte_mkdirty(entry), vma); >> if (unlikely(uffd_wp)) >> entry = pte_mkuffd_wp(entry); >> + set_pte_at(vma->vm_mm, addr, vmf->pte, entry); > > I'm not sure this is safe. As soon as you call set_pte_at(), the page > can be found by GUP. If it is, and it doesn't have rmap set up, aren't > things going to go horribly wrong? Thanks a lot for pointing this out. I was not aware of the connection of the sequence here with GUP. Will take care of this in next version by putting rmap set up before set_pte_at(). Regards Yin, Fengwei >
diff --git a/include/linux/mm.h b/include/linux/mm.h index 63c0e645f37c..f31426a40e05 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1061,6 +1061,8 @@ static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma) vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page); void do_set_pte(struct vm_fault *vmf, struct page *page, unsigned long addr); +void do_set_pte_entry(struct vm_fault *vmf, struct page *page, + unsigned long addr); vm_fault_t finish_fault(struct vm_fault *vmf); vm_fault_t finish_mkwrite_fault(struct vm_fault *vmf); diff --git a/mm/memory.c b/mm/memory.c index 61ccd2d7e6a6..d0c27e11fab4 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -4256,7 +4256,8 @@ vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page) } #endif -void do_set_pte(struct vm_fault *vmf, struct page *page, unsigned long addr) +void do_set_pte_entry(struct vm_fault *vmf, struct page *page, + unsigned long addr) { struct vm_area_struct *vma = vmf->vma; bool uffd_wp = pte_marker_uffd_wp(vmf->orig_pte); @@ -4276,6 +4277,16 @@ void do_set_pte(struct vm_fault *vmf, struct page *page, unsigned long addr) entry = maybe_mkwrite(pte_mkdirty(entry), vma); if (unlikely(uffd_wp)) entry = pte_mkuffd_wp(entry); + set_pte_at(vma->vm_mm, addr, vmf->pte, entry); +} + +void do_set_pte(struct vm_fault *vmf, struct page *page, unsigned long addr) +{ + struct vm_area_struct *vma = vmf->vma; + bool write = vmf->flags & FAULT_FLAG_WRITE; + + do_set_pte_entry(vmf, page, addr); + /* copy-on-write page */ if (write && !(vma->vm_flags & VM_SHARED)) { inc_mm_counter(vma->vm_mm, MM_ANONPAGES); @@ -4285,7 +4296,6 @@ void do_set_pte(struct vm_fault *vmf, struct page *page, unsigned long addr) inc_mm_counter(vma->vm_mm, mm_counter_file(page)); page_add_file_rmap(page, vma, false); } - set_pte_at(vma->vm_mm, addr, vmf->pte, entry); } static bool vmf_pte_changed(struct vm_fault *vmf)
do_set_pte_entry() does same as do_set_pte() but doesn't update mm_counter or add file rmap. It allows batched mm_counter and rmap operations later. Signed-off-by: Yin Fengwei <fengwei.yin@intel.com> --- include/linux/mm.h | 2 ++ mm/memory.c | 14 ++++++++++++-- 2 files changed, 14 insertions(+), 2 deletions(-)