Message ID | 20250115033808.40641-4-21cnbao@gmail.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | mm: batched unmap lazyfree large folios during reclamation | expand |
Hi, > unsigned long hsz = 0; > > @@ -1780,6 +1800,16 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma, > hugetlb_vma_unlock_write(vma); > } > pteval = huge_ptep_clear_flush(vma, address, pvmw.pte); > + } else if (folio_test_large(folio) && !(flags & TTU_HWPOISON) && > + can_batch_unmap_folio_ptes(address, folio, pvmw.pte)) { > + nr_pages = folio_nr_pages(folio); > + flush_cache_range(vma, range.start, range.end); > + pteval = get_and_clear_full_ptes(mm, address, pvmw.pte, nr_pages, 0); > + if (should_defer_flush(mm, flags)) > + set_tlb_ubc_flush_pending(mm, pteval, address, > + address + folio_size(folio)); > + else > + flush_tlb_range(vma, range.start, range.end); > } else { I have some fixes [1] that will collide with this series. I'm currently preparing a v2, and am not 100% sure when the fixes will get queued+merged. I'll base them against mm-stable for now, and send them out based on that, to avoid the conflicts here (should all be fairly easy to resolve from a quick glimpse). So we might have to refresh this series here if the fixes go in first. [1] https://lkml.kernel.org/r/20250129115411.2077152-1-david@redhat.com
On Tue, 4 Feb 2025 12:38:31 +0100 David Hildenbrand <david@redhat.com> wrote: > Hi, > > > unsigned long hsz = 0; > > > > @@ -1780,6 +1800,16 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma, > > hugetlb_vma_unlock_write(vma); > > } > > pteval = huge_ptep_clear_flush(vma, address, pvmw.pte); > > + } else if (folio_test_large(folio) && !(flags & TTU_HWPOISON) && > > + can_batch_unmap_folio_ptes(address, folio, pvmw.pte)) { > > + nr_pages = folio_nr_pages(folio); > > + flush_cache_range(vma, range.start, range.end); > > + pteval = get_and_clear_full_ptes(mm, address, pvmw.pte, nr_pages, 0); > > + if (should_defer_flush(mm, flags)) > > + set_tlb_ubc_flush_pending(mm, pteval, address, > > + address + folio_size(folio)); > > + else > > + flush_tlb_range(vma, range.start, range.end); > > } else { > > I have some fixes [1] that will collide with this series. I'm currently > preparing a v2, and am not 100% sure when the fixes will get queued+merged. > > I'll base them against mm-stable for now, and send them out based on > that, to avoid the conflicts here (should all be fairly easy to resolve > from a quick glimpse). > > So we might have to refresh this series here if the fixes go in first. > > [1] https://lkml.kernel.org/r/20250129115411.2077152-1-david@redhat.com It doesn't look like "mm: fixes for device-exclusive entries (hmm)" will be backportable(?) but yes, we should aim to stage your fixes against mainline and ahead of other changes to at least make life easier for anyone who chooses to backport your fixes into an earlier kernel.
On Wed, Feb 5, 2025 at 12:38 AM David Hildenbrand <david@redhat.com> wrote: > > Hi, > > > unsigned long hsz = 0; > > > > @@ -1780,6 +1800,16 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma, > > hugetlb_vma_unlock_write(vma); > > } > > pteval = huge_ptep_clear_flush(vma, address, pvmw.pte); > > + } else if (folio_test_large(folio) && !(flags & TTU_HWPOISON) && > > + can_batch_unmap_folio_ptes(address, folio, pvmw.pte)) { > > + nr_pages = folio_nr_pages(folio); > > + flush_cache_range(vma, range.start, range.end); > > + pteval = get_and_clear_full_ptes(mm, address, pvmw.pte, nr_pages, 0); > > + if (should_defer_flush(mm, flags)) > > + set_tlb_ubc_flush_pending(mm, pteval, address, > > + address + folio_size(folio)); > > + else > > + flush_tlb_range(vma, range.start, range.end); > > } else { > > I have some fixes [1] that will collide with this series. I'm currently > preparing a v2, and am not 100% sure when the fixes will get queued+merged. > > I'll base them against mm-stable for now, and send them out based on > that, to avoid the conflicts here (should all be fairly easy to resolve > from a quick glimpse). > > So we might have to refresh this series here if the fixes go in first. I assume you're referring to "[PATCH v1 08/12] mm/rmap: handle device-exclusive entries correctly in try_to_unmap_one()". It looks straightforward to resolve the conflict. If your patch is applied first, I'll send a rebase. > > [1] https://lkml.kernel.org/r/20250129115411.2077152-1-david@redhat.com > > -- > Cheers, > > David / dhildenb > Thanks Barry
diff --git a/mm/rmap.c b/mm/rmap.c index abeb9fcec384..be1978d2712d 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -1642,6 +1642,25 @@ void folio_remove_rmap_pmd(struct folio *folio, struct page *page, #endif } +/* We support batch unmapping of PTEs for lazyfree large folios */ +static inline bool can_batch_unmap_folio_ptes(unsigned long addr, + struct folio *folio, pte_t *ptep) +{ + const fpb_t fpb_flags = FPB_IGNORE_DIRTY | FPB_IGNORE_SOFT_DIRTY; + int max_nr = folio_nr_pages(folio); + pte_t pte = ptep_get(ptep); + + if (!folio_test_anon(folio) || folio_test_swapbacked(folio)) + return false; + if (pte_none(pte) || pte_unused(pte) || !pte_present(pte)) + return false; + if (pte_pfn(pte) != folio_pfn(folio)) + return false; + + return folio_pte_batch(folio, addr, ptep, pte, max_nr, fpb_flags, NULL, + NULL, NULL) == max_nr; +} + /* * @arg: enum ttu_flags will be passed to this argument */ @@ -1655,6 +1674,7 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma, bool anon_exclusive, ret = true; struct mmu_notifier_range range; enum ttu_flags flags = (enum ttu_flags)(long)arg; + int nr_pages = 1; unsigned long pfn; unsigned long hsz = 0; @@ -1780,6 +1800,16 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma, hugetlb_vma_unlock_write(vma); } pteval = huge_ptep_clear_flush(vma, address, pvmw.pte); + } else if (folio_test_large(folio) && !(flags & TTU_HWPOISON) && + can_batch_unmap_folio_ptes(address, folio, pvmw.pte)) { + nr_pages = folio_nr_pages(folio); + flush_cache_range(vma, range.start, range.end); + pteval = get_and_clear_full_ptes(mm, address, pvmw.pte, nr_pages, 0); + if (should_defer_flush(mm, flags)) + set_tlb_ubc_flush_pending(mm, pteval, address, + address + folio_size(folio)); + else + flush_tlb_range(vma, range.start, range.end); } else { flush_cache_page(vma, address, pfn); /* Nuke the page table entry. */ @@ -1875,7 +1905,7 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma, * redirtied either using the page table or a previously * obtained GUP reference. */ - set_pte_at(mm, address, pvmw.pte, pteval); + set_ptes(mm, address, pvmw.pte, pteval, nr_pages); folio_set_swapbacked(folio); goto walk_abort; } else if (ref_count != 1 + map_count) { @@ -1888,10 +1918,10 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma, * We'll come back here later and detect if the folio was * dirtied when the additional reference is gone. */ - set_pte_at(mm, address, pvmw.pte, pteval); + set_ptes(mm, address, pvmw.pte, pteval, nr_pages); goto walk_abort; } - dec_mm_counter(mm, MM_ANONPAGES); + add_mm_counter(mm, MM_ANONPAGES, -nr_pages); goto discard; } @@ -1943,13 +1973,18 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma, dec_mm_counter(mm, mm_counter_file(folio)); } discard: - if (unlikely(folio_test_hugetlb(folio))) + if (unlikely(folio_test_hugetlb(folio))) { hugetlb_remove_rmap(folio); - else - folio_remove_rmap_pte(folio, subpage, vma); + } else { + folio_remove_rmap_ptes(folio, subpage, nr_pages, vma); + folio_ref_sub(folio, nr_pages - 1); + } if (vma->vm_flags & VM_LOCKED) mlock_drain_local(); folio_put(folio); + /* We have already batched the entire folio */ + if (nr_pages > 1) + goto walk_done; continue; walk_abort: ret = false;