Message ID | 20220816022102.582865-3-haiyue.wang@intel.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | fix follow_page related issues | expand |
Looks good, thanks. Reviewed-by: Alistair Popple <apopple@nvidia.com> Haiyue Wang <haiyue.wang@intel.com> writes: > The handling Non-LRU pages returned by follow_page() jumps directly, it > doesn't call put_page() to handle the reference count, since 'FOLL_GET' > flag for follow_page() has get_page() called. Fix the zone device page > check by handling the page reference count correctly before returning. > > And as David reviewed, "device pages are never PageKsm pages". Drop this > zone device page check for break_ksm(). > > Fixes: 3218f8712d6b ("mm: handling Non-LRU pages returned by vm_normal_pages") > Signed-off-by: Haiyue Wang <haiyue.wang@intel.com> > Reviewed-by: "Huang, Ying" <ying.huang@intel.com> > Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com> > --- > mm/huge_memory.c | 4 ++-- > mm/ksm.c | 12 +++++++++--- > mm/migrate.c | 19 ++++++++++++------- > 3 files changed, 23 insertions(+), 12 deletions(-) > > diff --git a/mm/huge_memory.c b/mm/huge_memory.c > index 8a7c1b344abe..b2ba17c3dcd7 100644 > --- a/mm/huge_memory.c > +++ b/mm/huge_memory.c > @@ -2963,10 +2963,10 @@ static int split_huge_pages_pid(int pid, unsigned long vaddr_start, > /* FOLL_DUMP to ignore special (like zero) pages */ > page = follow_page(vma, addr, FOLL_GET | FOLL_DUMP); > > - if (IS_ERR_OR_NULL(page) || is_zone_device_page(page)) > + if (IS_ERR_OR_NULL(page)) > continue; > > - if (!is_transparent_hugepage(page)) > + if (is_zone_device_page(page) || !is_transparent_hugepage(page)) > goto next; > > total++; > diff --git a/mm/ksm.c b/mm/ksm.c > index 42ab153335a2..e26f57fc1f0e 100644 > --- a/mm/ksm.c > +++ b/mm/ksm.c > @@ -475,7 +475,7 @@ static int break_ksm(struct vm_area_struct *vma, unsigned long addr) > cond_resched(); > page = follow_page(vma, addr, > FOLL_GET | FOLL_MIGRATION | FOLL_REMOTE); > - if (IS_ERR_OR_NULL(page) || is_zone_device_page(page)) > + if (IS_ERR_OR_NULL(page)) > break; > if (PageKsm(page)) > ret = handle_mm_fault(vma, addr, > @@ -560,12 +560,15 @@ static struct page *get_mergeable_page(struct rmap_item *rmap_item) > goto out; > > page = follow_page(vma, addr, FOLL_GET); > - if (IS_ERR_OR_NULL(page) || is_zone_device_page(page)) > + if (IS_ERR_OR_NULL(page)) > goto out; > + if (is_zone_device_page(page)) > + goto out_putpage; > if (PageAnon(page)) { > flush_anon_page(vma, page, addr); > flush_dcache_page(page); > } else { > +out_putpage: > put_page(page); > out: > page = NULL; > @@ -2308,11 +2311,13 @@ static struct rmap_item *scan_get_next_rmap_item(struct page **page) > if (ksm_test_exit(mm)) > break; > *page = follow_page(vma, ksm_scan.address, FOLL_GET); > - if (IS_ERR_OR_NULL(*page) || is_zone_device_page(*page)) { > + if (IS_ERR_OR_NULL(*page)) { > ksm_scan.address += PAGE_SIZE; > cond_resched(); > continue; > } > + if (is_zone_device_page(*page)) > + goto next_page; > if (PageAnon(*page)) { > flush_anon_page(vma, *page, ksm_scan.address); > flush_dcache_page(*page); > @@ -2327,6 +2332,7 @@ static struct rmap_item *scan_get_next_rmap_item(struct page **page) > mmap_read_unlock(mm); > return rmap_item; > } > +next_page: > put_page(*page); > ksm_scan.address += PAGE_SIZE; > cond_resched(); > diff --git a/mm/migrate.c b/mm/migrate.c > index 581dfaad9257..44e05ce41d49 100644 > --- a/mm/migrate.c > +++ b/mm/migrate.c > @@ -1672,9 +1672,12 @@ static int add_page_for_migration(struct mm_struct *mm, unsigned long addr, > goto out; > > err = -ENOENT; > - if (!page || is_zone_device_page(page)) > + if (!page) > goto out; > > + if (is_zone_device_page(page)) > + goto out_putpage; > + > err = 0; > if (page_to_nid(page) == node) > goto out_putpage; > @@ -1868,13 +1871,15 @@ static void do_pages_stat_array(struct mm_struct *mm, unsigned long nr_pages, > if (IS_ERR(page)) > goto set_status; > > - if (page && !is_zone_device_page(page)) { > + err = -ENOENT; > + if (!page) > + goto set_status; > + > + if (!is_zone_device_page(page)) > err = page_to_nid(page); > - if (foll_flags & FOLL_GET) > - put_page(page); > - } else { > - err = -ENOENT; > - } > + > + if (foll_flags & FOLL_GET) > + put_page(page); > set_status: > *status = err;
On 2022/8/16 10:21, Haiyue Wang wrote: > The handling Non-LRU pages returned by follow_page() jumps directly, it > doesn't call put_page() to handle the reference count, since 'FOLL_GET' > flag for follow_page() has get_page() called. Fix the zone device page > check by handling the page reference count correctly before returning. > > And as David reviewed, "device pages are never PageKsm pages". Drop this > zone device page check for break_ksm(). > > Fixes: 3218f8712d6b ("mm: handling Non-LRU pages returned by vm_normal_pages") > Signed-off-by: Haiyue Wang <haiyue.wang@intel.com> > Reviewed-by: "Huang, Ying" <ying.huang@intel.com> > Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com> Thanks for your fixing. LGTM with one nit below. But I have no strong opinion on it. So with or without fixing below nit: Reviewed-by: Miaohe Lin <linmiaohe@huawei.com> > --- > mm/huge_memory.c | 4 ++-- > mm/ksm.c | 12 +++++++++--- > mm/migrate.c | 19 ++++++++++++------- > 3 files changed, 23 insertions(+), 12 deletions(-) > > diff --git a/mm/huge_memory.c b/mm/huge_memory.c > index 8a7c1b344abe..b2ba17c3dcd7 100644 > --- a/mm/huge_memory.c > +++ b/mm/huge_memory.c > @@ -2963,10 +2963,10 @@ static int split_huge_pages_pid(int pid, unsigned long vaddr_start, > /* FOLL_DUMP to ignore special (like zero) pages */ > page = follow_page(vma, addr, FOLL_GET | FOLL_DUMP); > > - if (IS_ERR_OR_NULL(page) || is_zone_device_page(page)) > + if (IS_ERR_OR_NULL(page)) > continue; > > - if (!is_transparent_hugepage(page)) > + if (is_zone_device_page(page) || !is_transparent_hugepage(page)) !is_transparent_hugepage should already do the work here? IIRC, zone_device_page can't be a transhuge page anyway. And only transparent_hugepage is cared here. Thanks, Miaohe Lin
On 17.08.22 04:34, Miaohe Lin wrote: > On 2022/8/16 10:21, Haiyue Wang wrote: >> The handling Non-LRU pages returned by follow_page() jumps directly, it >> doesn't call put_page() to handle the reference count, since 'FOLL_GET' >> flag for follow_page() has get_page() called. Fix the zone device page >> check by handling the page reference count correctly before returning. >> >> And as David reviewed, "device pages are never PageKsm pages". Drop this >> zone device page check for break_ksm(). >> >> Fixes: 3218f8712d6b ("mm: handling Non-LRU pages returned by vm_normal_pages") >> Signed-off-by: Haiyue Wang <haiyue.wang@intel.com> >> Reviewed-by: "Huang, Ying" <ying.huang@intel.com> >> Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com> > > Thanks for your fixing. LGTM with one nit below. But I have no strong opinion on it. > So with or without fixing below nit: > > Reviewed-by: Miaohe Lin <linmiaohe@huawei.com> > >> --- >> mm/huge_memory.c | 4 ++-- >> mm/ksm.c | 12 +++++++++--- >> mm/migrate.c | 19 ++++++++++++------- >> 3 files changed, 23 insertions(+), 12 deletions(-) >> >> diff --git a/mm/huge_memory.c b/mm/huge_memory.c >> index 8a7c1b344abe..b2ba17c3dcd7 100644 >> --- a/mm/huge_memory.c >> +++ b/mm/huge_memory.c >> @@ -2963,10 +2963,10 @@ static int split_huge_pages_pid(int pid, unsigned long vaddr_start, >> /* FOLL_DUMP to ignore special (like zero) pages */ >> page = follow_page(vma, addr, FOLL_GET | FOLL_DUMP); >> >> - if (IS_ERR_OR_NULL(page) || is_zone_device_page(page)) >> + if (IS_ERR_OR_NULL(page)) >> continue; >> >> - if (!is_transparent_hugepage(page)) >> + if (is_zone_device_page(page) || !is_transparent_hugepage(page)) > > !is_transparent_hugepage should already do the work here? IIRC, zone_device_page can't be > a transhuge page anyway. And only transparent_hugepage is cared here. I agree. Can we avoid sending a new version of a patch series as reply to another patch series (previous version)? Acked-by: David Hildenbrand <david@redhat.com>
> -----Original Message----- > From: David Hildenbrand <david@redhat.com> > Sent: Tuesday, August 23, 2022 18:07 > To: Miaohe Lin <linmiaohe@huawei.com>; Wang, Haiyue <haiyue.wang@intel.com>; linux-mm@kvack.org; > linux-kernel@vger.kernel.org > Cc: akpm@linux-foundation.org; apopple@nvidia.com; Huang, Ying <ying.huang@intel.com>; > songmuchun@bytedance.com; naoya.horiguchi@linux.dev; alex.sierra@amd.com; Felix Kuehling > <Felix.Kuehling@amd.com> > Subject: Re: [PATCH v6 2/2] mm: fix the handling Non-LRU pages returned by follow_page > > On 17.08.22 04:34, Miaohe Lin wrote: > > On 2022/8/16 10:21, Haiyue Wang wrote: > >> The handling Non-LRU pages returned by follow_page() jumps directly, it > >> doesn't call put_page() to handle the reference count, since 'FOLL_GET' > >> flag for follow_page() has get_page() called. Fix the zone device page > >> check by handling the page reference count correctly before returning. > >> > >> And as David reviewed, "device pages are never PageKsm pages". Drop this > >> zone device page check for break_ksm(). > >> > >> Fixes: 3218f8712d6b ("mm: handling Non-LRU pages returned by vm_normal_pages") > >> Signed-off-by: Haiyue Wang <haiyue.wang@intel.com> > >> Reviewed-by: "Huang, Ying" <ying.huang@intel.com> > >> Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com> > > > > Thanks for your fixing. LGTM with one nit below. But I have no strong opinion on it. > > So with or without fixing below nit: > > > > Reviewed-by: Miaohe Lin <linmiaohe@huawei.com> > > > >> --- > >> mm/huge_memory.c | 4 ++-- > >> mm/ksm.c | 12 +++++++++--- > >> mm/migrate.c | 19 ++++++++++++------- > >> 3 files changed, 23 insertions(+), 12 deletions(-) > >> > >> diff --git a/mm/huge_memory.c b/mm/huge_memory.c > >> index 8a7c1b344abe..b2ba17c3dcd7 100644 > >> --- a/mm/huge_memory.c > >> +++ b/mm/huge_memory.c > >> @@ -2963,10 +2963,10 @@ static int split_huge_pages_pid(int pid, unsigned long vaddr_start, > >> /* FOLL_DUMP to ignore special (like zero) pages */ > >> page = follow_page(vma, addr, FOLL_GET | FOLL_DUMP); > >> > >> - if (IS_ERR_OR_NULL(page) || is_zone_device_page(page)) > >> + if (IS_ERR_OR_NULL(page)) > >> continue; > >> > >> - if (!is_transparent_hugepage(page)) > >> + if (is_zone_device_page(page) || !is_transparent_hugepage(page)) > > > > !is_transparent_hugepage should already do the work here? IIRC, zone_device_page can't be > > a transhuge page anyway. And only transparent_hugepage is cared here. > > I agree. OK, will remove it in next version. > > Can we avoid sending a new version of a patch series as reply to another > patch series (previous version)? Don't use '--in-reply-to=' when running 'git send-email' ? > > > Acked-by: David Hildenbrand <david@redhat.com> > > -- > Thanks, > > David / dhildenb
On 23.08.22 15:26, Wang, Haiyue wrote: >> -----Original Message----- >> From: David Hildenbrand <david@redhat.com> >> Sent: Tuesday, August 23, 2022 18:07 >> To: Miaohe Lin <linmiaohe@huawei.com>; Wang, Haiyue <haiyue.wang@intel.com>; linux-mm@kvack.org; >> linux-kernel@vger.kernel.org >> Cc: akpm@linux-foundation.org; apopple@nvidia.com; Huang, Ying <ying.huang@intel.com>; >> songmuchun@bytedance.com; naoya.horiguchi@linux.dev; alex.sierra@amd.com; Felix Kuehling >> <Felix.Kuehling@amd.com> >> Subject: Re: [PATCH v6 2/2] mm: fix the handling Non-LRU pages returned by follow_page >> >> On 17.08.22 04:34, Miaohe Lin wrote: >>> On 2022/8/16 10:21, Haiyue Wang wrote: >>>> The handling Non-LRU pages returned by follow_page() jumps directly, it >>>> doesn't call put_page() to handle the reference count, since 'FOLL_GET' >>>> flag for follow_page() has get_page() called. Fix the zone device page >>>> check by handling the page reference count correctly before returning. >>>> >>>> And as David reviewed, "device pages are never PageKsm pages". Drop this >>>> zone device page check for break_ksm(). >>>> >>>> Fixes: 3218f8712d6b ("mm: handling Non-LRU pages returned by vm_normal_pages") >>>> Signed-off-by: Haiyue Wang <haiyue.wang@intel.com> >>>> Reviewed-by: "Huang, Ying" <ying.huang@intel.com> >>>> Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com> >>> >>> Thanks for your fixing. LGTM with one nit below. But I have no strong opinion on it. >>> So with or without fixing below nit: >>> >>> Reviewed-by: Miaohe Lin <linmiaohe@huawei.com> >>> >>>> --- >>>> mm/huge_memory.c | 4 ++-- >>>> mm/ksm.c | 12 +++++++++--- >>>> mm/migrate.c | 19 ++++++++++++------- >>>> 3 files changed, 23 insertions(+), 12 deletions(-) >>>> >>>> diff --git a/mm/huge_memory.c b/mm/huge_memory.c >>>> index 8a7c1b344abe..b2ba17c3dcd7 100644 >>>> --- a/mm/huge_memory.c >>>> +++ b/mm/huge_memory.c >>>> @@ -2963,10 +2963,10 @@ static int split_huge_pages_pid(int pid, unsigned long vaddr_start, >>>> /* FOLL_DUMP to ignore special (like zero) pages */ >>>> page = follow_page(vma, addr, FOLL_GET | FOLL_DUMP); >>>> >>>> - if (IS_ERR_OR_NULL(page) || is_zone_device_page(page)) >>>> + if (IS_ERR_OR_NULL(page)) >>>> continue; >>>> >>>> - if (!is_transparent_hugepage(page)) >>>> + if (is_zone_device_page(page) || !is_transparent_hugepage(page)) >>> >>> !is_transparent_hugepage should already do the work here? IIRC, zone_device_page can't be >>> a transhuge page anyway. And only transparent_hugepage is cared here. >> >> I agree. > > OK, will remove it in next version. > >> >> Can we avoid sending a new version of a patch series as reply to another >> patch series (previous version)? > > Don't use '--in-reply-to=' when running 'git send-email' ? Yes. That makes it easier top identify versions of patch series, without having to dig down into an ever-growing thread.
> -----Original Message----- > From: David Hildenbrand <david@redhat.com> > Sent: Tuesday, August 23, 2022 21:28 > To: Wang, Haiyue <haiyue.wang@intel.com>; Miaohe Lin <linmiaohe@huawei.com>; linux-mm@kvack.org; > linux-kernel@vger.kernel.org > Cc: akpm@linux-foundation.org; apopple@nvidia.com; Huang, Ying <ying.huang@intel.com>; > songmuchun@bytedance.com; naoya.horiguchi@linux.dev; alex.sierra@amd.com; Felix Kuehling > <Felix.Kuehling@amd.com> > Subject: Re: [PATCH v6 2/2] mm: fix the handling Non-LRU pages returned by follow_page > > On 23.08.22 15:26, Wang, Haiyue wrote: > >> -----Original Message----- > >> From: David Hildenbrand <david@redhat.com> > >> Sent: Tuesday, August 23, 2022 18:07 > >> To: Miaohe Lin <linmiaohe@huawei.com>; Wang, Haiyue <haiyue.wang@intel.com>; linux-mm@kvack.org; > >> linux-kernel@vger.kernel.org > >> Cc: akpm@linux-foundation.org; apopple@nvidia.com; Huang, Ying <ying.huang@intel.com>; > >> songmuchun@bytedance.com; naoya.horiguchi@linux.dev; alex.sierra@amd.com; Felix Kuehling > >> <Felix.Kuehling@amd.com> > >> Subject: Re: [PATCH v6 2/2] mm: fix the handling Non-LRU pages returned by follow_page > >> > >> On 17.08.22 04:34, Miaohe Lin wrote: > >>> On 2022/8/16 10:21, Haiyue Wang wrote: > >>>> The handling Non-LRU pages returned by follow_page() jumps directly, it > >>>> doesn't call put_page() to handle the reference count, since 'FOLL_GET' > >>>> flag for follow_page() has get_page() called. Fix the zone device page > >>>> check by handling the page reference count correctly before returning. > >>>> > >>>> And as David reviewed, "device pages are never PageKsm pages". Drop this > >>>> zone device page check for break_ksm(). > >>>> > >>>> Fixes: 3218f8712d6b ("mm: handling Non-LRU pages returned by vm_normal_pages") > >>>> Signed-off-by: Haiyue Wang <haiyue.wang@intel.com> > >>>> Reviewed-by: "Huang, Ying" <ying.huang@intel.com> > >>>> Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com> > >>> > >>> Thanks for your fixing. LGTM with one nit below. But I have no strong opinion on it. > >>> So with or without fixing below nit: > >>> > >>> Reviewed-by: Miaohe Lin <linmiaohe@huawei.com> > >>> > >>>> --- > >>>> mm/huge_memory.c | 4 ++-- > >>>> mm/ksm.c | 12 +++++++++--- > >>>> mm/migrate.c | 19 ++++++++++++------- > >>>> 3 files changed, 23 insertions(+), 12 deletions(-) > >>>> > >>>> diff --git a/mm/huge_memory.c b/mm/huge_memory.c > >>>> index 8a7c1b344abe..b2ba17c3dcd7 100644 > >>>> --- a/mm/huge_memory.c > >>>> +++ b/mm/huge_memory.c > >>>> @@ -2963,10 +2963,10 @@ static int split_huge_pages_pid(int pid, unsigned long vaddr_start, > >>>> /* FOLL_DUMP to ignore special (like zero) pages */ > >>>> page = follow_page(vma, addr, FOLL_GET | FOLL_DUMP); > >>>> > >>>> - if (IS_ERR_OR_NULL(page) || is_zone_device_page(page)) > >>>> + if (IS_ERR_OR_NULL(page)) > >>>> continue; > >>>> > >>>> - if (!is_transparent_hugepage(page)) > >>>> + if (is_zone_device_page(page) || !is_transparent_hugepage(page)) > >>> > >>> !is_transparent_hugepage should already do the work here? IIRC, zone_device_page can't be > >>> a transhuge page anyway. And only transparent_hugepage is cared here. > >> > >> I agree. > > > > OK, will remove it in next version. > > > >> > >> Can we avoid sending a new version of a patch series as reply to another > >> patch series (previous version)? > > > > Don't use '--in-reply-to=' when running 'git send-email' ? > > Yes. That makes it easier top identify versions of patch series, without > having to dig down into an ever-growing thread. Got it, thanks. ;-) > > -- > Thanks, > > David / dhildenb
diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 8a7c1b344abe..b2ba17c3dcd7 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2963,10 +2963,10 @@ static int split_huge_pages_pid(int pid, unsigned long vaddr_start, /* FOLL_DUMP to ignore special (like zero) pages */ page = follow_page(vma, addr, FOLL_GET | FOLL_DUMP); - if (IS_ERR_OR_NULL(page) || is_zone_device_page(page)) + if (IS_ERR_OR_NULL(page)) continue; - if (!is_transparent_hugepage(page)) + if (is_zone_device_page(page) || !is_transparent_hugepage(page)) goto next; total++; diff --git a/mm/ksm.c b/mm/ksm.c index 42ab153335a2..e26f57fc1f0e 100644 --- a/mm/ksm.c +++ b/mm/ksm.c @@ -475,7 +475,7 @@ static int break_ksm(struct vm_area_struct *vma, unsigned long addr) cond_resched(); page = follow_page(vma, addr, FOLL_GET | FOLL_MIGRATION | FOLL_REMOTE); - if (IS_ERR_OR_NULL(page) || is_zone_device_page(page)) + if (IS_ERR_OR_NULL(page)) break; if (PageKsm(page)) ret = handle_mm_fault(vma, addr, @@ -560,12 +560,15 @@ static struct page *get_mergeable_page(struct rmap_item *rmap_item) goto out; page = follow_page(vma, addr, FOLL_GET); - if (IS_ERR_OR_NULL(page) || is_zone_device_page(page)) + if (IS_ERR_OR_NULL(page)) goto out; + if (is_zone_device_page(page)) + goto out_putpage; if (PageAnon(page)) { flush_anon_page(vma, page, addr); flush_dcache_page(page); } else { +out_putpage: put_page(page); out: page = NULL; @@ -2308,11 +2311,13 @@ static struct rmap_item *scan_get_next_rmap_item(struct page **page) if (ksm_test_exit(mm)) break; *page = follow_page(vma, ksm_scan.address, FOLL_GET); - if (IS_ERR_OR_NULL(*page) || is_zone_device_page(*page)) { + if (IS_ERR_OR_NULL(*page)) { ksm_scan.address += PAGE_SIZE; cond_resched(); continue; } + if (is_zone_device_page(*page)) + goto next_page; if (PageAnon(*page)) { flush_anon_page(vma, *page, ksm_scan.address); flush_dcache_page(*page); @@ -2327,6 +2332,7 @@ static struct rmap_item *scan_get_next_rmap_item(struct page **page) mmap_read_unlock(mm); return rmap_item; } +next_page: put_page(*page); ksm_scan.address += PAGE_SIZE; cond_resched(); diff --git a/mm/migrate.c b/mm/migrate.c index 581dfaad9257..44e05ce41d49 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -1672,9 +1672,12 @@ static int add_page_for_migration(struct mm_struct *mm, unsigned long addr, goto out; err = -ENOENT; - if (!page || is_zone_device_page(page)) + if (!page) goto out; + if (is_zone_device_page(page)) + goto out_putpage; + err = 0; if (page_to_nid(page) == node) goto out_putpage; @@ -1868,13 +1871,15 @@ static void do_pages_stat_array(struct mm_struct *mm, unsigned long nr_pages, if (IS_ERR(page)) goto set_status; - if (page && !is_zone_device_page(page)) { + err = -ENOENT; + if (!page) + goto set_status; + + if (!is_zone_device_page(page)) err = page_to_nid(page); - if (foll_flags & FOLL_GET) - put_page(page); - } else { - err = -ENOENT; - } + + if (foll_flags & FOLL_GET) + put_page(page); set_status: *status = err;