Message ID | 20230331093937.945725-4-zhangpeng362@huawei.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | userfaultfd: convert userfaultfd functions to use folios | expand |
On 03/31/23 17:39, Peng Zhang wrote: > From: ZhangPeng <zhangpeng362@huawei.com> > > Replace copy_huge_page_from_user() with copy_folio_from_user(). > copy_folio_from_user() does the same as copy_huge_page_from_user(), but > takes in a folio instead of a page. Convert page_kaddr to kaddr in > copy_folio_from_user() to do indenting cleanup. > > Signed-off-by: ZhangPeng <zhangpeng362@huawei.com> > Reviewed-by: Sidhartha Kumar <sidhartha.kumar@oracle.com> > --- > include/linux/mm.h | 7 +++---- > mm/hugetlb.c | 5 ++--- > mm/memory.c | 26 ++++++++++++-------------- > mm/userfaultfd.c | 6 ++---- > 4 files changed, 19 insertions(+), 25 deletions(-) Thanks, Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com>
On Fri, Mar 31, 2023 at 2:41 AM Peng Zhang <zhangpeng362@huawei.com> wrote: > > From: ZhangPeng <zhangpeng362@huawei.com> > > Replace copy_huge_page_from_user() with copy_folio_from_user(). > copy_folio_from_user() does the same as copy_huge_page_from_user(), but > takes in a folio instead of a page. Convert page_kaddr to kaddr in > copy_folio_from_user() to do indenting cleanup. > > Signed-off-by: ZhangPeng <zhangpeng362@huawei.com> > Reviewed-by: Sidhartha Kumar <sidhartha.kumar@oracle.com> > --- > include/linux/mm.h | 7 +++---- > mm/hugetlb.c | 5 ++--- > mm/memory.c | 26 ++++++++++++-------------- > mm/userfaultfd.c | 6 ++---- > 4 files changed, 19 insertions(+), 25 deletions(-) > > diff --git a/include/linux/mm.h b/include/linux/mm.h > index e249208f8fbe..cf4d773ca506 100644 > --- a/include/linux/mm.h > +++ b/include/linux/mm.h > @@ -3682,10 +3682,9 @@ extern void copy_user_huge_page(struct page *dst, struct page *src, > unsigned long addr_hint, > struct vm_area_struct *vma, > unsigned int pages_per_huge_page); > -extern long copy_huge_page_from_user(struct page *dst_page, > - const void __user *usr_src, > - unsigned int pages_per_huge_page, > - bool allow_pagefault); > +long copy_folio_from_user(struct folio *dst_folio, > + const void __user *usr_src, > + bool allow_pagefault); > > /** > * vma_is_special_huge - Are transhuge page-table entries considered special? > diff --git a/mm/hugetlb.c b/mm/hugetlb.c > index 7e4a80769c9e..aade1b513474 100644 > --- a/mm/hugetlb.c > +++ b/mm/hugetlb.c > @@ -6217,9 +6217,8 @@ int hugetlb_mfill_atomic_pte(pte_t *dst_pte, > goto out; > } > > - ret = copy_huge_page_from_user(&folio->page, > - (const void __user *) src_addr, > - pages_per_huge_page(h), false); > + ret = copy_folio_from_user(folio, (const void __user *) src_addr, > + false); > > /* fallback to copy_from_user outside mmap_lock */ > if (unlikely(ret)) { > diff --git a/mm/memory.c b/mm/memory.c > index 808f354bce65..4976422b6979 100644 > --- a/mm/memory.c > +++ b/mm/memory.c > @@ -5868,35 +5868,33 @@ void copy_user_huge_page(struct page *dst, struct page *src, > process_huge_page(addr_hint, pages_per_huge_page, copy_subpage, &arg); > } > > -long copy_huge_page_from_user(struct page *dst_page, > - const void __user *usr_src, > - unsigned int pages_per_huge_page, > - bool allow_pagefault) > +long copy_folio_from_user(struct folio *dst_folio, > + const void __user *usr_src, > + bool allow_pagefault) > { > - void *page_kaddr; > + void *kaddr; > unsigned long i, rc = 0; > - unsigned long ret_val = pages_per_huge_page * PAGE_SIZE; > + unsigned int nr_pages = folio_nr_pages(dst_folio); > + unsigned long ret_val = nr_pages * PAGE_SIZE; > struct page *subpage; > > - for (i = 0; i < pages_per_huge_page; i++) { > - subpage = nth_page(dst_page, i); > - page_kaddr = kmap_local_page(subpage); > + for (i = 0; i < nr_pages; i++) { > + subpage = folio_page(dst_folio, i); > + kaddr = kmap_local_page(subpage); > if (!allow_pagefault) > pagefault_disable(); > - rc = copy_from_user(page_kaddr, > - usr_src + i * PAGE_SIZE, PAGE_SIZE); > + rc = copy_from_user(kaddr, usr_src + i * PAGE_SIZE, PAGE_SIZE); > if (!allow_pagefault) > pagefault_enable(); > - kunmap_local(page_kaddr); > + kunmap_local(kaddr); > > ret_val -= (PAGE_SIZE - rc); > if (rc) > break; > > - flush_dcache_page(subpage); > - > cond_resched(); > } > + flush_dcache_folio(dst_folio); > return ret_val; > } Moving the flush_dcache_page() outside the loop to be flush_dcache_folio() changes the behavior of the function. Initially, if it fails to copy the entire page, the function breaks out of the loop and returns the number of unwritten bytes without flushing the page from the cache. Now if it fails, it will still flush out the page it failed on, as well as any later pages it may not have gotten to yet.
On 2023/4/7 10:28, Vishal Moola wrote: > On Fri, Mar 31, 2023 at 2:41 AM Peng Zhang <zhangpeng362@huawei.com> wrote: >> From: ZhangPeng <zhangpeng362@huawei.com> >> >> Replace copy_huge_page_from_user() with copy_folio_from_user(). >> copy_folio_from_user() does the same as copy_huge_page_from_user(), but >> takes in a folio instead of a page. Convert page_kaddr to kaddr in >> copy_folio_from_user() to do indenting cleanup. >> >> Signed-off-by: ZhangPeng <zhangpeng362@huawei.com> >> Reviewed-by: Sidhartha Kumar <sidhartha.kumar@oracle.com> >> --- >> include/linux/mm.h | 7 +++---- >> mm/hugetlb.c | 5 ++--- >> mm/memory.c | 26 ++++++++++++-------------- >> mm/userfaultfd.c | 6 ++---- >> 4 files changed, 19 insertions(+), 25 deletions(-) >> >> diff --git a/include/linux/mm.h b/include/linux/mm.h >> index e249208f8fbe..cf4d773ca506 100644 >> --- a/include/linux/mm.h >> +++ b/include/linux/mm.h >> @@ -3682,10 +3682,9 @@ extern void copy_user_huge_page(struct page *dst, struct page *src, >> unsigned long addr_hint, >> struct vm_area_struct *vma, >> unsigned int pages_per_huge_page); >> -extern long copy_huge_page_from_user(struct page *dst_page, >> - const void __user *usr_src, >> - unsigned int pages_per_huge_page, >> - bool allow_pagefault); >> +long copy_folio_from_user(struct folio *dst_folio, >> + const void __user *usr_src, >> + bool allow_pagefault); >> >> /** >> * vma_is_special_huge - Are transhuge page-table entries considered special? >> diff --git a/mm/hugetlb.c b/mm/hugetlb.c >> index 7e4a80769c9e..aade1b513474 100644 >> --- a/mm/hugetlb.c >> +++ b/mm/hugetlb.c >> @@ -6217,9 +6217,8 @@ int hugetlb_mfill_atomic_pte(pte_t *dst_pte, >> goto out; >> } >> >> - ret = copy_huge_page_from_user(&folio->page, >> - (const void __user *) src_addr, >> - pages_per_huge_page(h), false); >> + ret = copy_folio_from_user(folio, (const void __user *) src_addr, >> + false); >> >> /* fallback to copy_from_user outside mmap_lock */ >> if (unlikely(ret)) { >> diff --git a/mm/memory.c b/mm/memory.c >> index 808f354bce65..4976422b6979 100644 >> --- a/mm/memory.c >> +++ b/mm/memory.c >> @@ -5868,35 +5868,33 @@ void copy_user_huge_page(struct page *dst, struct page *src, >> process_huge_page(addr_hint, pages_per_huge_page, copy_subpage, &arg); >> } >> >> -long copy_huge_page_from_user(struct page *dst_page, >> - const void __user *usr_src, >> - unsigned int pages_per_huge_page, >> - bool allow_pagefault) >> +long copy_folio_from_user(struct folio *dst_folio, >> + const void __user *usr_src, >> + bool allow_pagefault) >> { >> - void *page_kaddr; >> + void *kaddr; >> unsigned long i, rc = 0; >> - unsigned long ret_val = pages_per_huge_page * PAGE_SIZE; >> + unsigned int nr_pages = folio_nr_pages(dst_folio); >> + unsigned long ret_val = nr_pages * PAGE_SIZE; >> struct page *subpage; >> >> - for (i = 0; i < pages_per_huge_page; i++) { >> - subpage = nth_page(dst_page, i); >> - page_kaddr = kmap_local_page(subpage); >> + for (i = 0; i < nr_pages; i++) { >> + subpage = folio_page(dst_folio, i); >> + kaddr = kmap_local_page(subpage); >> if (!allow_pagefault) >> pagefault_disable(); >> - rc = copy_from_user(page_kaddr, >> - usr_src + i * PAGE_SIZE, PAGE_SIZE); >> + rc = copy_from_user(kaddr, usr_src + i * PAGE_SIZE, PAGE_SIZE); >> if (!allow_pagefault) >> pagefault_enable(); >> - kunmap_local(page_kaddr); >> + kunmap_local(kaddr); >> >> ret_val -= (PAGE_SIZE - rc); >> if (rc) >> break; >> >> - flush_dcache_page(subpage); >> - >> cond_resched(); >> } >> + flush_dcache_folio(dst_folio); >> return ret_val; >> } > Moving the flush_dcache_page() outside the loop to be > flush_dcache_folio() changes the behavior of the function. > > Initially, if it fails to copy the entire page, the function breaks out > of the loop and returns the number of unwritten bytes without > flushing the page from the cache. Now if it fails, it will still flush > out the page it failed on, as well as any later pages it may not > have gotten to yet. Agreed. If it fails, could we just not flush the folio? Like this: long copy_folio_from_user(...) { ... for (i = 0; i < nr_pages; i++) { ... rc = copy_from_user(kaddr, usr_src + i * PAGE_SIZE, PAGE_SIZE); ... ret_val -= (PAGE_SIZE - rc); if (rc) - break; + return ret_val; cond_resched(); } flush_dcache_folio(dst_folio); return ret_val; } Thanks for your review. Best Regards, Peng
On 04/08/23 12:43, zhangpeng (AS) wrote: > On 2023/4/7 10:28, Vishal Moola wrote: > > > On Fri, Mar 31, 2023 at 2:41 AM Peng Zhang <zhangpeng362@huawei.com> wrote: > > > From: ZhangPeng <zhangpeng362@huawei.com> > > > > > > Replace copy_huge_page_from_user() with copy_folio_from_user(). > > > copy_folio_from_user() does the same as copy_huge_page_from_user(), but > > > takes in a folio instead of a page. Convert page_kaddr to kaddr in > > > copy_folio_from_user() to do indenting cleanup. > > > > > > Signed-off-by: ZhangPeng <zhangpeng362@huawei.com> > > > Reviewed-by: Sidhartha Kumar <sidhartha.kumar@oracle.com> > > > --- > > > - bool allow_pagefault) > > > +long copy_folio_from_user(struct folio *dst_folio, > > > + const void __user *usr_src, > > > + bool allow_pagefault) > > > { > > > - void *page_kaddr; > > > + void *kaddr; > > > unsigned long i, rc = 0; > > > - unsigned long ret_val = pages_per_huge_page * PAGE_SIZE; > > > + unsigned int nr_pages = folio_nr_pages(dst_folio); > > > + unsigned long ret_val = nr_pages * PAGE_SIZE; > > > struct page *subpage; > > > > > > - for (i = 0; i < pages_per_huge_page; i++) { > > > - subpage = nth_page(dst_page, i); > > > - page_kaddr = kmap_local_page(subpage); > > > + for (i = 0; i < nr_pages; i++) { > > > + subpage = folio_page(dst_folio, i); > > > + kaddr = kmap_local_page(subpage); > > > if (!allow_pagefault) > > > pagefault_disable(); > > > - rc = copy_from_user(page_kaddr, > > > - usr_src + i * PAGE_SIZE, PAGE_SIZE); > > > + rc = copy_from_user(kaddr, usr_src + i * PAGE_SIZE, PAGE_SIZE); > > > if (!allow_pagefault) > > > pagefault_enable(); > > > - kunmap_local(page_kaddr); > > > + kunmap_local(kaddr); > > > > > > ret_val -= (PAGE_SIZE - rc); > > > if (rc) > > > break; > > > > > > - flush_dcache_page(subpage); > > > - > > > cond_resched(); > > > } > > > + flush_dcache_folio(dst_folio); > > > return ret_val; > > > } > > Moving the flush_dcache_page() outside the loop to be > > flush_dcache_folio() changes the behavior of the function. > > > > Initially, if it fails to copy the entire page, the function breaks out > > of the loop and returns the number of unwritten bytes without > > flushing the page from the cache. Now if it fails, it will still flush > > out the page it failed on, as well as any later pages it may not > > have gotten to yet. > > Agreed. If it fails, could we just not flush the folio? I believe that should be OK. If returning an error, nobody should be depending on any part of the page being present or not in the cache.
On 4/11/2023 5:26 AM, Mike Kravetz wrote: > On 04/08/23 12:43, zhangpeng (AS) wrote: >> On 2023/4/7 10:28, Vishal Moola wrote: >> >>> On Fri, Mar 31, 2023 at 2:41 AM Peng Zhang <zhangpeng362@huawei.com> wrote: >>>> From: ZhangPeng <zhangpeng362@huawei.com> >>>> >>>> Replace copy_huge_page_from_user() with copy_folio_from_user(). >>>> copy_folio_from_user() does the same as copy_huge_page_from_user(), but >>>> takes in a folio instead of a page. Convert page_kaddr to kaddr in >>>> copy_folio_from_user() to do indenting cleanup. >>>> >>>> Signed-off-by: ZhangPeng <zhangpeng362@huawei.com> >>>> Reviewed-by: Sidhartha Kumar <sidhartha.kumar@oracle.com> >>>> --- >>>> - bool allow_pagefault) >>>> +long copy_folio_from_user(struct folio *dst_folio, >>>> + const void __user *usr_src, >>>> + bool allow_pagefault) >>>> { >>>> - void *page_kaddr; >>>> + void *kaddr; >>>> unsigned long i, rc = 0; >>>> - unsigned long ret_val = pages_per_huge_page * PAGE_SIZE; >>>> + unsigned int nr_pages = folio_nr_pages(dst_folio); >>>> + unsigned long ret_val = nr_pages * PAGE_SIZE; >>>> struct page *subpage; >>>> >>>> - for (i = 0; i < pages_per_huge_page; i++) { >>>> - subpage = nth_page(dst_page, i); >>>> - page_kaddr = kmap_local_page(subpage); >>>> + for (i = 0; i < nr_pages; i++) { >>>> + subpage = folio_page(dst_folio, i); >>>> + kaddr = kmap_local_page(subpage); >>>> if (!allow_pagefault) >>>> pagefault_disable(); >>>> - rc = copy_from_user(page_kaddr, >>>> - usr_src + i * PAGE_SIZE, PAGE_SIZE); >>>> + rc = copy_from_user(kaddr, usr_src + i * PAGE_SIZE, PAGE_SIZE); >>>> if (!allow_pagefault) >>>> pagefault_enable(); >>>> - kunmap_local(page_kaddr); >>>> + kunmap_local(kaddr); >>>> >>>> ret_val -= (PAGE_SIZE - rc); >>>> if (rc) >>>> break; >>>> >>>> - flush_dcache_page(subpage); >>>> - >>>> cond_resched(); >>>> } >>>> + flush_dcache_folio(dst_folio); >>>> return ret_val; >>>> } >>> Moving the flush_dcache_page() outside the loop to be >>> flush_dcache_folio() changes the behavior of the function. >>> >>> Initially, if it fails to copy the entire page, the function breaks out >>> of the loop and returns the number of unwritten bytes without >>> flushing the page from the cache. Now if it fails, it will still flush >>> out the page it failed on, as well as any later pages it may not >>> have gotten to yet. >> >> Agreed. If it fails, could we just not flush the folio? > > I believe that should be OK. If returning an error, nobody should be > depending on any part of the page being present or not in the cache. Maybe we should flush_dcache because this function returns the bytes copied successfully? flushing cache to make sure the copied pieces to RAM for sure. For the range not copied yet, flushing cache or not doesn't make difference. Thanks. Regards Yin, Fengwei
On Thu, Apr 06, 2023 at 07:28:44PM -0700, Vishal Moola wrote: > > - flush_dcache_page(subpage); > > - > > cond_resched(); > > } > > + flush_dcache_folio(dst_folio); > > return ret_val; > > } > > Moving the flush_dcache_page() outside the loop to be > flush_dcache_folio() changes the behavior of the function. > > Initially, if it fails to copy the entire page, the function breaks out > of the loop and returns the number of unwritten bytes without > flushing the page from the cache. Now if it fails, it will still flush > out the page it failed on, as well as any later pages it may not > have gotten to yet. I'm not sure this is worth worrying about. Failing to copy the entire folio is unlikely, and if we do, flushing the entire folio instead of just a few pages in it is harmless. Plus I have patches which significantly optiise flush_dcache_folio() over flush_dcache_page() (for the majority of architectures) and so I think this change is actually beneficial in the long term.
On Tue, 11 Apr 2023 04:40:17 +0100 Matthew Wilcox <willy@infradead.org> wrote: > On Thu, Apr 06, 2023 at 07:28:44PM -0700, Vishal Moola wrote: > > > - flush_dcache_page(subpage); > > > - > > > cond_resched(); > > > } > > > + flush_dcache_folio(dst_folio); > > > return ret_val; > > > } > > > > Moving the flush_dcache_page() outside the loop to be > > flush_dcache_folio() changes the behavior of the function. > > > > Initially, if it fails to copy the entire page, the function breaks out > > of the loop and returns the number of unwritten bytes without > > flushing the page from the cache. Now if it fails, it will still flush > > out the page it failed on, as well as any later pages it may not > > have gotten to yet. > > I'm not sure this is worth worrying about. Failing to copy the entire > folio is unlikely, and if we do, flushing the entire folio instead of just > a few pages in it is harmless. Plus I have patches which significantly > optiise flush_dcache_folio() over flush_dcache_page() (for the majority > of architectures) and so I think this change is actually beneficial in > the long term. Thanks, I'll send the series in for the next merge window as-is. If others remain unhappy with the flushing issue, please propose something during the next -rc cycle.
diff --git a/include/linux/mm.h b/include/linux/mm.h index e249208f8fbe..cf4d773ca506 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -3682,10 +3682,9 @@ extern void copy_user_huge_page(struct page *dst, struct page *src, unsigned long addr_hint, struct vm_area_struct *vma, unsigned int pages_per_huge_page); -extern long copy_huge_page_from_user(struct page *dst_page, - const void __user *usr_src, - unsigned int pages_per_huge_page, - bool allow_pagefault); +long copy_folio_from_user(struct folio *dst_folio, + const void __user *usr_src, + bool allow_pagefault); /** * vma_is_special_huge - Are transhuge page-table entries considered special? diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 7e4a80769c9e..aade1b513474 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -6217,9 +6217,8 @@ int hugetlb_mfill_atomic_pte(pte_t *dst_pte, goto out; } - ret = copy_huge_page_from_user(&folio->page, - (const void __user *) src_addr, - pages_per_huge_page(h), false); + ret = copy_folio_from_user(folio, (const void __user *) src_addr, + false); /* fallback to copy_from_user outside mmap_lock */ if (unlikely(ret)) { diff --git a/mm/memory.c b/mm/memory.c index 808f354bce65..4976422b6979 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -5868,35 +5868,33 @@ void copy_user_huge_page(struct page *dst, struct page *src, process_huge_page(addr_hint, pages_per_huge_page, copy_subpage, &arg); } -long copy_huge_page_from_user(struct page *dst_page, - const void __user *usr_src, - unsigned int pages_per_huge_page, - bool allow_pagefault) +long copy_folio_from_user(struct folio *dst_folio, + const void __user *usr_src, + bool allow_pagefault) { - void *page_kaddr; + void *kaddr; unsigned long i, rc = 0; - unsigned long ret_val = pages_per_huge_page * PAGE_SIZE; + unsigned int nr_pages = folio_nr_pages(dst_folio); + unsigned long ret_val = nr_pages * PAGE_SIZE; struct page *subpage; - for (i = 0; i < pages_per_huge_page; i++) { - subpage = nth_page(dst_page, i); - page_kaddr = kmap_local_page(subpage); + for (i = 0; i < nr_pages; i++) { + subpage = folio_page(dst_folio, i); + kaddr = kmap_local_page(subpage); if (!allow_pagefault) pagefault_disable(); - rc = copy_from_user(page_kaddr, - usr_src + i * PAGE_SIZE, PAGE_SIZE); + rc = copy_from_user(kaddr, usr_src + i * PAGE_SIZE, PAGE_SIZE); if (!allow_pagefault) pagefault_enable(); - kunmap_local(page_kaddr); + kunmap_local(kaddr); ret_val -= (PAGE_SIZE - rc); if (rc) break; - flush_dcache_page(subpage); - cond_resched(); } + flush_dcache_folio(dst_folio); return ret_val; } #endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLBFS */ diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c index 24d6ed7ff302..78fed9003cf7 100644 --- a/mm/userfaultfd.c +++ b/mm/userfaultfd.c @@ -422,10 +422,8 @@ static __always_inline ssize_t mfill_atomic_hugetlb( mmap_read_unlock(dst_mm); BUG_ON(!page); - err = copy_huge_page_from_user(page, - (const void __user *)src_addr, - vma_hpagesize / PAGE_SIZE, - true); + err = copy_folio_from_user(page_folio(page), + (const void __user *)src_addr, true); if (unlikely(err)) { err = -EFAULT; goto out;