Message ID | 20230306092259.3507807-6-fengwei.yin@intel.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | batched remove rmap in try_to_unmap_one() | expand |
在 2023/3/6 下午5:22, Yin Fengwei 写道: > If unmap one page fails, or the vma walk will skip next pte, > or the vma walk will end on next pte, batched remove map, > update folio refcount. > > Signed-off-by: Yin Fengwei <fengwei.yin@intel.com> > --- > include/linux/rmap.h | 1 + > mm/page_vma_mapped.c | 30 +++++++++++++++++++++++++++ > mm/rmap.c | 48 ++++++++++++++++++++++++++++++++++---------- > 3 files changed, 68 insertions(+), 11 deletions(-) > > diff --git a/include/linux/rmap.h b/include/linux/rmap.h > index d2569b42e21a..18193d1d5a8e 100644 > --- a/include/linux/rmap.h > +++ b/include/linux/rmap.h > @@ -424,6 +424,7 @@ static inline void page_vma_mapped_walk_done(struct page_vma_mapped_walk *pvmw) > } > > bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw); > +bool pvmw_walk_skip_or_end_on_next(struct page_vma_mapped_walk *pvmw); > > /* > * Used by swapoff to help locate where page is expected in vma. > diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c > index 4e448cfbc6ef..19e997dfb5c6 100644 > --- a/mm/page_vma_mapped.c > +++ b/mm/page_vma_mapped.c > @@ -291,6 +291,36 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw) > return false; > } > > +/** > + * pvmw_walk_skip_or_end_on_next - check if next pte will be skipped or > + * end the walk > + * @pvmw: pointer to struct page_vma_mapped_walk. > + * > + * This function can only be called with correct pte lock hold > + */ > +bool pvmw_walk_skip_or_end_on_next(struct page_vma_mapped_walk *pvmw) > +{ > + unsigned long address = pvmw->address + PAGE_SIZE; > + > + if (address >= vma_address_end(pvmw)) > + return true; If vma_address_end is exactly equal to next address(pvmw->address + PAGE_SIZE) , does this mean that we are ignored to unmap the last page here ? so there can just use ' > ' ' if (address > vma_address_end(pvmw))' . I may have misunderstood, please correct me. > + > + if ((address & (PMD_SIZE - PAGE_SIZE)) == 0) > + return true; > + > + if (pte_none(*pvmw->pte)) > + return true; > + > + pvmw->pte++; > + if (!check_pte(pvmw)) { > + pvmw->pte--; > + return true; > + } > + pvmw->pte--; > + > + return false; > +} > + > /** > * page_mapped_in_vma - check whether a page is really mapped in a VMA > * @page: the page to test > diff --git a/mm/rmap.c b/mm/rmap.c > index bb3fcb8df579..a64e9cbb52dd 100644 > --- a/mm/rmap.c > +++ b/mm/rmap.c > @@ -1741,6 +1741,26 @@ static bool try_to_unmap_one_page(struct folio *folio, > return false; > } > > +static void folio_remove_rmap_and_update_count(struct folio *folio, > + struct page *start, struct vm_area_struct *vma, int count) > +{ > + if (count == 0) > + return; > + > + /* > + * No need to call mmu_notifier_invalidate_range() it has be > + * done above for all cases requiring it to happen under page > + * table lock before mmu_notifier_invalidate_range_end() > + * > + * See Documentation/mm/mmu_notifier.rst > + */ > + folio_remove_rmap_range(folio, start, count, vma, > + folio_test_hugetlb(folio)); > + if (vma->vm_flags & VM_LOCKED) > + mlock_drain_local(); > + folio_ref_sub(folio, count); > +} > + > /* > * @arg: enum ttu_flags will be passed to this argument > */ > @@ -1748,10 +1768,11 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma, > unsigned long address, void *arg) > { > DEFINE_FOLIO_VMA_WALK(pvmw, folio, vma, address, 0); > - struct page *subpage; > + struct page *start = NULL; > bool ret = true; > struct mmu_notifier_range range; > enum ttu_flags flags = (enum ttu_flags)(long)arg; > + int count = 0; > > /* > * When racing against e.g. zap_pte_range() on another cpu, > @@ -1812,26 +1833,31 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma, > break; > } > > - subpage = folio_page(folio, > + if (!start) > + start = folio_page(folio, > pte_pfn(*pvmw.pte) - folio_pfn(folio)); > ret = try_to_unmap_one_page(folio, vma, > range, pvmw, address, flags); > if (!ret) { > + folio_remove_rmap_and_update_count(folio, > + start, vma, count); > page_vma_mapped_walk_done(&pvmw); > break; > } > + count++; > > /* > - * No need to call mmu_notifier_invalidate_range() it has be > - * done above for all cases requiring it to happen under page > - * table lock before mmu_notifier_invalidate_range_end() > - * > - * See Documentation/mm/mmu_notifier.rst > + * If next pte will be skipped in page_vma_mapped_walk() or > + * the walk will end at it, batched remove rmap and update > + * page refcount. We can't do it after page_vma_mapped_walk() > + * return false because the pte lock will not be hold. > */ > - page_remove_rmap(subpage, vma, false); > - if (vma->vm_flags & VM_LOCKED) > - mlock_drain_local(); > - folio_put(folio); > + if (pvmw_walk_skip_or_end_on_next(&pvmw)) { > + folio_remove_rmap_and_update_count(folio, > + start, vma, count); > + count = 0; > + start = NULL; > + } > } > > mmu_notifier_invalidate_range_end(&range);
On Mon, 2023-03-06 at 20:39 +0800, haoxin wrote: > > 在 2023/3/6 下午5:22, Yin Fengwei 写道: > > If unmap one page fails, or the vma walk will skip next pte, > > or the vma walk will end on next pte, batched remove map, > > update folio refcount. > > > > Signed-off-by: Yin Fengwei <fengwei.yin@intel.com> > > --- > > include/linux/rmap.h | 1 + > > mm/page_vma_mapped.c | 30 +++++++++++++++++++++++++++ > > mm/rmap.c | 48 ++++++++++++++++++++++++++++++++++----- > > ----- > > 3 files changed, 68 insertions(+), 11 deletions(-) > > > > diff --git a/include/linux/rmap.h b/include/linux/rmap.h > > index d2569b42e21a..18193d1d5a8e 100644 > > --- a/include/linux/rmap.h > > +++ b/include/linux/rmap.h > > @@ -424,6 +424,7 @@ static inline void > > page_vma_mapped_walk_done(struct page_vma_mapped_walk *pvmw) > > } > > > > bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw); > > +bool pvmw_walk_skip_or_end_on_next(struct page_vma_mapped_walk > > *pvmw); > > > > /* > > * Used by swapoff to help locate where page is expected in vma. > > diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c > > index 4e448cfbc6ef..19e997dfb5c6 100644 > > --- a/mm/page_vma_mapped.c > > +++ b/mm/page_vma_mapped.c > > @@ -291,6 +291,36 @@ bool page_vma_mapped_walk(struct > > page_vma_mapped_walk *pvmw) > > return false; > > } > > > > +/** > > + * pvmw_walk_skip_or_end_on_next - check if next pte will be > > skipped or > > + * end the walk > > + * @pvmw: pointer to struct page_vma_mapped_walk. > > + * > > + * This function can only be called with correct pte lock hold > > + */ > > +bool pvmw_walk_skip_or_end_on_next(struct page_vma_mapped_walk > > *pvmw) > > +{ > > + unsigned long address = pvmw->address + PAGE_SIZE; > > + > > + if (address >= vma_address_end(pvmw)) > > + return true; > > If vma_address_end is exactly equal to next address(pvmw->address + > PAGE_SIZE) , does this mean that we are ignored to unmap the last > page > here ? so > > there can just use ' > ' ' if (address > vma_address_end(pvmw))' > . This check will be done after the last PTE is handled. Thanks. Regards Yin, Fengwei > > I may have misunderstood, please correct me. > > > + > > + if ((address & (PMD_SIZE - PAGE_SIZE)) == 0) > > + return true; > > + > > + if (pte_none(*pvmw->pte)) > > + return true; > > + > > + pvmw->pte++; > > + if (!check_pte(pvmw)) { > > + pvmw->pte--; > > + return true; > > + } > > + pvmw->pte--; > > + > > + return false; > > +} > > + > > /** > > * page_mapped_in_vma - check whether a page is really mapped in > > a VMA > > * @page: the page to test > > diff --git a/mm/rmap.c b/mm/rmap.c > > index bb3fcb8df579..a64e9cbb52dd 100644 > > --- a/mm/rmap.c > > +++ b/mm/rmap.c > > @@ -1741,6 +1741,26 @@ static bool try_to_unmap_one_page(struct > > folio *folio, > > return false; > > } > > > > +static void folio_remove_rmap_and_update_count(struct folio > > *folio, > > + struct page *start, struct vm_area_struct *vma, int > > count) > > +{ > > + if (count == 0) > > + return; > > + > > + /* > > + * No need to call mmu_notifier_invalidate_range() it has > > be > > + * done above for all cases requiring it to happen under > > page > > + * table lock before mmu_notifier_invalidate_range_end() > > + * > > + * See Documentation/mm/mmu_notifier.rst > > + */ > > + folio_remove_rmap_range(folio, start, count, vma, > > + folio_test_hugetlb(folio)); > > + if (vma->vm_flags & VM_LOCKED) > > + mlock_drain_local(); > > + folio_ref_sub(folio, count); > > +} > > + > > /* > > * @arg: enum ttu_flags will be passed to this argument > > */ > > @@ -1748,10 +1768,11 @@ static bool try_to_unmap_one(struct folio > > *folio, struct vm_area_struct *vma, > > unsigned long address, void *arg) > > { > > DEFINE_FOLIO_VMA_WALK(pvmw, folio, vma, address, 0); > > - struct page *subpage; > > + struct page *start = NULL; > > bool ret = true; > > struct mmu_notifier_range range; > > enum ttu_flags flags = (enum ttu_flags)(long)arg; > > + int count = 0; > > > > /* > > * When racing against e.g. zap_pte_range() on another cpu, > > @@ -1812,26 +1833,31 @@ static bool try_to_unmap_one(struct folio > > *folio, struct vm_area_struct *vma, > > break; > > } > > > > - subpage = folio_page(folio, > > + if (!start) > > + start = folio_page(folio, > > pte_pfn(*pvmw.pte) - > > folio_pfn(folio)); > > ret = try_to_unmap_one_page(folio, vma, > > range, pvmw, > > address, flags); > > if (!ret) { > > + folio_remove_rmap_and_update_count(folio, > > + start, vma, > > count); > > page_vma_mapped_walk_done(&pvmw); > > break; > > } > > + count++; > > > > /* > > - * No need to call mmu_notifier_invalidate_range() > > it has be > > - * done above for all cases requiring it to happen > > under page > > - * table lock before > > mmu_notifier_invalidate_range_end() > > - * > > - * See Documentation/mm/mmu_notifier.rst > > + * If next pte will be skipped in > > page_vma_mapped_walk() or > > + * the walk will end at it, batched remove rmap and > > update > > + * page refcount. We can't do it after > > page_vma_mapped_walk() > > + * return false because the pte lock will not be > > hold. > > */ > > - page_remove_rmap(subpage, vma, false); > > - if (vma->vm_flags & VM_LOCKED) > > - mlock_drain_local(); > > - folio_put(folio); > > + if (pvmw_walk_skip_or_end_on_next(&pvmw)) { > > + folio_remove_rmap_and_update_count(folio, > > + start, vma, > > count); > > + count = 0; > > + start = NULL; > > + } > > } > > > > mmu_notifier_invalidate_range_end(&range);
diff --git a/include/linux/rmap.h b/include/linux/rmap.h index d2569b42e21a..18193d1d5a8e 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -424,6 +424,7 @@ static inline void page_vma_mapped_walk_done(struct page_vma_mapped_walk *pvmw) } bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw); +bool pvmw_walk_skip_or_end_on_next(struct page_vma_mapped_walk *pvmw); /* * Used by swapoff to help locate where page is expected in vma. diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c index 4e448cfbc6ef..19e997dfb5c6 100644 --- a/mm/page_vma_mapped.c +++ b/mm/page_vma_mapped.c @@ -291,6 +291,36 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw) return false; } +/** + * pvmw_walk_skip_or_end_on_next - check if next pte will be skipped or + * end the walk + * @pvmw: pointer to struct page_vma_mapped_walk. + * + * This function can only be called with correct pte lock hold + */ +bool pvmw_walk_skip_or_end_on_next(struct page_vma_mapped_walk *pvmw) +{ + unsigned long address = pvmw->address + PAGE_SIZE; + + if (address >= vma_address_end(pvmw)) + return true; + + if ((address & (PMD_SIZE - PAGE_SIZE)) == 0) + return true; + + if (pte_none(*pvmw->pte)) + return true; + + pvmw->pte++; + if (!check_pte(pvmw)) { + pvmw->pte--; + return true; + } + pvmw->pte--; + + return false; +} + /** * page_mapped_in_vma - check whether a page is really mapped in a VMA * @page: the page to test diff --git a/mm/rmap.c b/mm/rmap.c index bb3fcb8df579..a64e9cbb52dd 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -1741,6 +1741,26 @@ static bool try_to_unmap_one_page(struct folio *folio, return false; } +static void folio_remove_rmap_and_update_count(struct folio *folio, + struct page *start, struct vm_area_struct *vma, int count) +{ + if (count == 0) + return; + + /* + * No need to call mmu_notifier_invalidate_range() it has be + * done above for all cases requiring it to happen under page + * table lock before mmu_notifier_invalidate_range_end() + * + * See Documentation/mm/mmu_notifier.rst + */ + folio_remove_rmap_range(folio, start, count, vma, + folio_test_hugetlb(folio)); + if (vma->vm_flags & VM_LOCKED) + mlock_drain_local(); + folio_ref_sub(folio, count); +} + /* * @arg: enum ttu_flags will be passed to this argument */ @@ -1748,10 +1768,11 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma, unsigned long address, void *arg) { DEFINE_FOLIO_VMA_WALK(pvmw, folio, vma, address, 0); - struct page *subpage; + struct page *start = NULL; bool ret = true; struct mmu_notifier_range range; enum ttu_flags flags = (enum ttu_flags)(long)arg; + int count = 0; /* * When racing against e.g. zap_pte_range() on another cpu, @@ -1812,26 +1833,31 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma, break; } - subpage = folio_page(folio, + if (!start) + start = folio_page(folio, pte_pfn(*pvmw.pte) - folio_pfn(folio)); ret = try_to_unmap_one_page(folio, vma, range, pvmw, address, flags); if (!ret) { + folio_remove_rmap_and_update_count(folio, + start, vma, count); page_vma_mapped_walk_done(&pvmw); break; } + count++; /* - * No need to call mmu_notifier_invalidate_range() it has be - * done above for all cases requiring it to happen under page - * table lock before mmu_notifier_invalidate_range_end() - * - * See Documentation/mm/mmu_notifier.rst + * If next pte will be skipped in page_vma_mapped_walk() or + * the walk will end at it, batched remove rmap and update + * page refcount. We can't do it after page_vma_mapped_walk() + * return false because the pte lock will not be hold. */ - page_remove_rmap(subpage, vma, false); - if (vma->vm_flags & VM_LOCKED) - mlock_drain_local(); - folio_put(folio); + if (pvmw_walk_skip_or_end_on_next(&pvmw)) { + folio_remove_rmap_and_update_count(folio, + start, vma, count); + count = 0; + start = NULL; + } } mmu_notifier_invalidate_range_end(&range);
If unmap one page fails, or the vma walk will skip next pte, or the vma walk will end on next pte, batched remove map, update folio refcount. Signed-off-by: Yin Fengwei <fengwei.yin@intel.com> --- include/linux/rmap.h | 1 + mm/page_vma_mapped.c | 30 +++++++++++++++++++++++++++ mm/rmap.c | 48 ++++++++++++++++++++++++++++++++++---------- 3 files changed, 68 insertions(+), 11 deletions(-)