@@ -1642,6 +1642,25 @@ void folio_remove_rmap_pmd(struct folio *folio, struct page *page,
#endif
}
+/* We support batch unmapping of PTEs for lazyfree large folios */
+static inline bool can_batch_unmap_folio_ptes(unsigned long addr,
+ struct folio *folio, pte_t *ptep)
+{
+ const fpb_t fpb_flags = FPB_IGNORE_DIRTY | FPB_IGNORE_SOFT_DIRTY;
+ int max_nr = folio_nr_pages(folio);
+ pte_t pte = ptep_get(ptep);
+
+ if (!folio_test_anon(folio) || folio_test_swapbacked(folio))
+ return false;
+ if (pte_none(pte) || pte_unused(pte) || !pte_present(pte))
+ return false;
+ if (pte_pfn(pte) != folio_pfn(folio))
+ return false;
+
+ return folio_pte_batch(folio, addr, ptep, pte, max_nr, fpb_flags, NULL,
+ NULL, NULL) == max_nr;
+}
+
/*
* @arg: enum ttu_flags will be passed to this argument
*/
@@ -1655,6 +1674,7 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma,
bool anon_exclusive, ret = true;
struct mmu_notifier_range range;
enum ttu_flags flags = (enum ttu_flags)(long)arg;
+ int nr_pages = 1;
unsigned long pfn;
unsigned long hsz = 0;
@@ -1780,6 +1800,15 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma,
hugetlb_vma_unlock_write(vma);
}
pteval = huge_ptep_clear_flush(vma, address, pvmw.pte);
+ } else if (folio_test_large(folio) && !(flags & TTU_HWPOISON) &&
+ can_batch_unmap_folio_ptes(address, folio, pvmw.pte)) {
+ nr_pages = folio_nr_pages(folio);
+ flush_cache_range(vma, range.start, range.end);
+ pteval = get_and_clear_full_ptes(mm, address, pvmw.pte, nr_pages, 0);
+ if (should_defer_flush(mm, flags))
+ set_tlb_ubc_flush_pending(mm, pteval, address, folio_size(folio));
+ else
+ flush_tlb_range(vma, range.start, range.end);
} else {
flush_cache_page(vma, address, pfn);
/* Nuke the page table entry. */
@@ -1875,7 +1904,7 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma,
* redirtied either using the page table or a previously
* obtained GUP reference.
*/
- set_pte_at(mm, address, pvmw.pte, pteval);
+ set_ptes(mm, address, pvmw.pte, pteval, nr_pages);
folio_set_swapbacked(folio);
goto walk_abort;
} else if (ref_count != 1 + map_count) {
@@ -1888,10 +1917,10 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma,
* We'll come back here later and detect if the folio was
* dirtied when the additional reference is gone.
*/
- set_pte_at(mm, address, pvmw.pte, pteval);
+ set_ptes(mm, address, pvmw.pte, pteval, nr_pages);
goto walk_abort;
}
- dec_mm_counter(mm, MM_ANONPAGES);
+ add_mm_counter(mm, MM_ANONPAGES, -nr_pages);
goto discard;
}
@@ -1943,13 +1972,18 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma,
dec_mm_counter(mm, mm_counter_file(folio));
}
discard:
- if (unlikely(folio_test_hugetlb(folio)))
+ if (unlikely(folio_test_hugetlb(folio))) {
hugetlb_remove_rmap(folio);
- else
- folio_remove_rmap_pte(folio, subpage, vma);
+ } else {
+ folio_remove_rmap_ptes(folio, subpage, nr_pages, vma);
+ folio_ref_sub(folio, nr_pages - 1);
+ }
if (vma->vm_flags & VM_LOCKED)
mlock_drain_local();
folio_put(folio);
+ /* We have already batched the entire folio */
+ if (nr_pages > 1)
+ goto walk_done;
continue;
walk_abort:
ret = false;