[v4,2/5] rmap: move page unmap operation to dedicated function

Message ID	20230313124526.1207490-3-fengwei.yin@intel.com (mailing list archive)
State	New
Headers	show Return-Path: <owner-linux-mm@kvack.org> From: Yin Fengwei <fengwei.yin@intel.com> To: linux-mm@kvack.org, akpm@linux-foundation.org, willy@infradead.org, mike.kravetz@oracle.com, sidhartha.kumar@oracle.com, naoya.horiguchi@nec.com, jane.chu@oracle.com, david@redhat.com Cc: fengwei.yin@intel.com Subject: [PATCH v4 2/5] rmap: move page unmap operation to dedicated function Date: Mon, 13 Mar 2023 20:45:23 +0800 Message-Id: <20230313124526.1207490-3-fengwei.yin@intel.com> In-Reply-To: <20230313124526.1207490-1-fengwei.yin@intel.com> References: <20230313124526.1207490-1-fengwei.yin@intel.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Sender: owner-linux-mm@kvack.org Precedence: bulk
Series	batched remove rmap in try_to_unmap_one() \| expand [v4,0/5] batched remove rmap in try_to_unmap_one() [v4,1/5] rmap: move hugetlb try_to_unmap to dedicated function [v4,2/5] rmap: move page unmap operation to dedicated function [v4,3/5] rmap: cleanup exit path of try_to_unmap_one_page() [v4,4/5] rmap:addd folio_remove_rmap_range() [v4,5/5] try_to_unmap_one: batched remove rmap, update folio refcount

diff --git a/mm/rmap.c b/mm/rmap.c index 3a2e3ccb8031..23eda671447a 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -1538,17 +1538,204 @@ static bool try_to_unmap_one_hugetlb(struct folio *folio, return ret; } +static bool try_to_unmap_one_page(struct folio *folio, + struct vm_area_struct *vma, struct mmu_notifier_range range, + struct page_vma_mapped_walk pvmw, unsigned long address, + enum ttu_flags flags) +{ + bool anon_exclusive, ret = true; + struct page *subpage; + struct mm_struct *mm = vma->vm_mm; + pte_t pteval; + + subpage = folio_page(folio, + pte_pfn(*pvmw.pte) - folio_pfn(folio)); + anon_exclusive = folio_test_anon(folio) && + PageAnonExclusive(subpage); + + flush_cache_page(vma, address, pte_pfn(*pvmw.pte)); + /* Nuke the page table entry. */ + if (should_defer_flush(mm, flags)) { + /* + * We clear the PTE but do not flush so potentially + * a remote CPU could still be writing to the folio. + * If the entry was previously clean then the + * architecture must guarantee that a clear->dirty + * transition on a cached TLB entry is written through + * and traps if the PTE is unmapped. + */ + pteval = ptep_get_and_clear(mm, address, pvmw.pte); + + set_tlb_ubc_flush_pending(mm, pte_dirty(pteval)); + } else { + pteval = ptep_clear_flush(vma, address, pvmw.pte); + } + + /* + * Now the pte is cleared. If this pte was uffd-wp armed, + * we may want to replace a none pte with a marker pte if + * it's file-backed, so we don't lose the tracking info. + */ + pte_install_uffd_wp_if_needed(vma, address, pvmw.pte, pteval); + + /* Set the dirty flag on the folio now the pte is gone. */ + if (pte_dirty(pteval)) + folio_mark_dirty(folio); + + /* Update high watermark before we lower rss */ + update_hiwater_rss(mm); + + if (PageHWPoison(subpage) && !(flags & TTU_HWPOISON)) { + pteval = swp_entry_to_pte(make_hwpoison_entry(subpage)); + dec_mm_counter(mm, mm_counter(&folio->page)); + set_pte_at(mm, address, pvmw.pte, pteval); + } else if (pte_unused(pteval) && !userfaultfd_armed(vma)) { + /* + * The guest indicated that the page content is of no + * interest anymore. Simply discard the pte, vmscan + * will take care of the rest. + * A future reference will then fault in a new zero + * page. When userfaultfd is active, we must not drop + * this page though, as its main user (postcopy + * migration) will not expect userfaults on already + * copied pages. + */ + dec_mm_counter(mm, mm_counter(&folio->page)); + /* We have to invalidate as we cleared the pte */ + mmu_notifier_invalidate_range(mm, address, + address + PAGE_SIZE); + } else if (folio_test_anon(folio)) { + swp_entry_t entry = { .val = page_private(subpage) }; + pte_t swp_pte; + /* + * Store the swap location in the pte. + * See handle_pte_fault() ... + */ + if (unlikely(folio_test_swapbacked(folio) != + folio_test_swapcache(folio))) { + WARN_ON_ONCE(1); + ret = false; + /* We have to invalidate as we cleared the pte */ + mmu_notifier_invalidate_range(mm, address, + address + PAGE_SIZE); + page_vma_mapped_walk_done(&pvmw); + goto discard; + } + + /* MADV_FREE page check */ + if (!folio_test_swapbacked(folio)) { + int ref_count, map_count; + + /* + * Synchronize with gup_pte_range(): + * - clear PTE; barrier; read refcount + * - inc refcount; barrier; read PTE + */ + smp_mb(); + + ref_count = folio_ref_count(folio); + map_count = folio_mapcount(folio); + + /* + * Order reads for page refcount and dirty flag + * (see comments in __remove_mapping()). + */ + smp_rmb(); + + /* + * The only page refs must be one from isolation + * plus the rmap(s) (dropped by discard:). + */ + if (ref_count == 1 + map_count && + !folio_test_dirty(folio)) { + /* Invalidate as we cleared the pte */ + mmu_notifier_invalidate_range(mm, + address, address + PAGE_SIZE); + dec_mm_counter(mm, MM_ANONPAGES); + goto discard; + } + + /* + * If the folio was redirtied, it cannot be + * discarded. Remap the page to page table. + */ + set_pte_at(mm, address, pvmw.pte, pteval); + folio_set_swapbacked(folio); + ret = false; + page_vma_mapped_walk_done(&pvmw); + goto discard; + } + + if (swap_duplicate(entry) < 0) { + set_pte_at(mm, address, pvmw.pte, pteval); + ret = false; + page_vma_mapped_walk_done(&pvmw); + goto discard; + } + if (arch_unmap_one(mm, vma, address, pteval) < 0) { + swap_free(entry); + set_pte_at(mm, address, pvmw.pte, pteval); + ret = false; + page_vma_mapped_walk_done(&pvmw); + goto discard; + } + + /* See page_try_share_anon_rmap(): clear PTE first. */ + if (anon_exclusive && + page_try_share_anon_rmap(subpage)) { + swap_free(entry); + set_pte_at(mm, address, pvmw.pte, pteval); + ret = false; + page_vma_mapped_walk_done(&pvmw); + goto discard; + } + if (list_empty(&mm->mmlist)) { + spin_lock(&mmlist_lock); + if (list_empty(&mm->mmlist)) + list_add(&mm->mmlist, &init_mm.mmlist); + spin_unlock(&mmlist_lock); + } + dec_mm_counter(mm, MM_ANONPAGES); + inc_mm_counter(mm, MM_SWAPENTS); + swp_pte = swp_entry_to_pte(entry); + if (anon_exclusive) + swp_pte = pte_swp_mkexclusive(swp_pte); + if (pte_soft_dirty(pteval)) + swp_pte = pte_swp_mksoft_dirty(swp_pte); + if (pte_uffd_wp(pteval)) + swp_pte = pte_swp_mkuffd_wp(swp_pte); + set_pte_at(mm, address, pvmw.pte, swp_pte); + /* Invalidate as we cleared the pte */ + mmu_notifier_invalidate_range(mm, address, + address + PAGE_SIZE); + } else { + /* + * This is a locked file-backed folio, + * so it cannot be removed from the page + * cache and replaced by a new folio before + * mmu_notifier_invalidate_range_end, so no + * concurrent thread might update its page table + * to point at a new folio while a device is + * still using this folio. + * + * See Documentation/mm/mmu_notifier.rst + */ + dec_mm_counter(mm, mm_counter_file(&folio->page)); + } + +discard: + return ret; +} + /* * @arg: enum ttu_flags will be passed to this argument */ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma, unsigned long address, void *arg) { - struct mm_struct *mm = vma->vm_mm; DEFINE_FOLIO_VMA_WALK(pvmw, folio, vma, address, 0); - pte_t pteval; struct page *subpage; - bool anon_exclusive, ret = true; + bool ret = true; struct mmu_notifier_range range; enum ttu_flags flags = (enum ttu_flags)(long)arg; @@ -1613,179 +1800,11 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma, subpage = folio_page(folio, pte_pfn(*pvmw.pte) - folio_pfn(folio)); - anon_exclusive = folio_test_anon(folio) && - PageAnonExclusive(subpage); - - flush_cache_page(vma, address, pte_pfn(*pvmw.pte)); - /* Nuke the page table entry. */ - if (should_defer_flush(mm, flags)) { - /* - * We clear the PTE but do not flush so potentially - * a remote CPU could still be writing to the folio. - * If the entry was previously clean then the - * architecture must guarantee that a clear->dirty - * transition on a cached TLB entry is written through - * and traps if the PTE is unmapped. - */ - pteval = ptep_get_and_clear(mm, address, pvmw.pte); - - set_tlb_ubc_flush_pending(mm, pte_dirty(pteval)); - } else { - pteval = ptep_clear_flush(vma, address, pvmw.pte); - } - - /* - * Now the pte is cleared. If this pte was uffd-wp armed, - * we may want to replace a none pte with a marker pte if - * it's file-backed, so we don't lose the tracking info. - */ - pte_install_uffd_wp_if_needed(vma, address, pvmw.pte, pteval); - - /* Set the dirty flag on the folio now the pte is gone. */ - if (pte_dirty(pteval)) - folio_mark_dirty(folio); - - /* Update high watermark before we lower rss */ - update_hiwater_rss(mm); - - if (PageHWPoison(subpage) && (flags & TTU_HWPOISON)) { - pteval = swp_entry_to_pte(make_hwpoison_entry(subpage)); - dec_mm_counter(mm, mm_counter(&folio->page)); - set_pte_at(mm, address, pvmw.pte, pteval); - } else if (pte_unused(pteval) && !userfaultfd_armed(vma)) { - /* - * The guest indicated that the page content is of no - * interest anymore. Simply discard the pte, vmscan - * will take care of the rest. - * A future reference will then fault in a new zero - * page. When userfaultfd is active, we must not drop - * this page though, as its main user (postcopy - * migration) will not expect userfaults on already - * copied pages. - */ - dec_mm_counter(mm, mm_counter(&folio->page)); - /* We have to invalidate as we cleared the pte */ - mmu_notifier_invalidate_range(mm, address, - address + PAGE_SIZE); - } else if (folio_test_anon(folio)) { - swp_entry_t entry = { .val = page_private(subpage) }; - pte_t swp_pte; - /* - * Store the swap location in the pte. - * See handle_pte_fault() ... - */ - if (unlikely(folio_test_swapbacked(folio) != - folio_test_swapcache(folio))) { - WARN_ON_ONCE(1); - ret = false; - /* We have to invalidate as we cleared the pte */ - mmu_notifier_invalidate_range(mm, address, - address + PAGE_SIZE); - page_vma_mapped_walk_done(&pvmw); - break; - } - - /* MADV_FREE page check */ - if (!folio_test_swapbacked(folio)) { - int ref_count, map_count; - - /* - * Synchronize with gup_pte_range(): - * - clear PTE; barrier; read refcount - * - inc refcount; barrier; read PTE - */ - smp_mb(); - - ref_count = folio_ref_count(folio); - map_count = folio_mapcount(folio); - - /* - * Order reads for page refcount and dirty flag - * (see comments in __remove_mapping()). - */ - smp_rmb(); - - /* - * The only page refs must be one from isolation - * plus the rmap(s) (dropped by discard:). - */ - if (ref_count == 1 + map_count && - !folio_test_dirty(folio)) { - /* Invalidate as we cleared the pte */ - mmu_notifier_invalidate_range(mm, - address, address + PAGE_SIZE); - dec_mm_counter(mm, MM_ANONPAGES); - goto discard; - } - - /* - * If the folio was redirtied, it cannot be - * discarded. Remap the page to page table. - */ - set_pte_at(mm, address, pvmw.pte, pteval); - folio_set_swapbacked(folio); - ret = false; - page_vma_mapped_walk_done(&pvmw); - break; - } - - if (swap_duplicate(entry) < 0) { - set_pte_at(mm, address, pvmw.pte, pteval); - ret = false; - page_vma_mapped_walk_done(&pvmw); - break; - } - if (arch_unmap_one(mm, vma, address, pteval) < 0) { - swap_free(entry); - set_pte_at(mm, address, pvmw.pte, pteval); - ret = false; - page_vma_mapped_walk_done(&pvmw); - break; - } + ret = try_to_unmap_one_page(folio, vma, + range, pvmw, address, flags); + if (!ret) + break; - /* See page_try_share_anon_rmap(): clear PTE first. */ - if (anon_exclusive && - page_try_share_anon_rmap(subpage)) { - swap_free(entry); - set_pte_at(mm, address, pvmw.pte, pteval); - ret = false; - page_vma_mapped_walk_done(&pvmw); - break; - } - if (list_empty(&mm->mmlist)) { - spin_lock(&mmlist_lock); - if (list_empty(&mm->mmlist)) - list_add(&mm->mmlist, &init_mm.mmlist); - spin_unlock(&mmlist_lock); - } - dec_mm_counter(mm, MM_ANONPAGES); - inc_mm_counter(mm, MM_SWAPENTS); - swp_pte = swp_entry_to_pte(entry); - if (anon_exclusive) - swp_pte = pte_swp_mkexclusive(swp_pte); - if (pte_soft_dirty(pteval)) - swp_pte = pte_swp_mksoft_dirty(swp_pte); - if (pte_uffd_wp(pteval)) - swp_pte = pte_swp_mkuffd_wp(swp_pte); - set_pte_at(mm, address, pvmw.pte, swp_pte); - /* Invalidate as we cleared the pte */ - mmu_notifier_invalidate_range(mm, address, - address + PAGE_SIZE); - } else { - /* - * This is a locked file-backed folio, - * so it cannot be removed from the page - * cache and replaced by a new folio before - * mmu_notifier_invalidate_range_end, so no - * concurrent thread might update its page table - * to point at a new folio while a device is - * still using this folio. - * - * See Documentation/mm/mmu_notifier.rst - */ - dec_mm_counter(mm, mm_counter_file(&folio->page)); - } -discard: /* * No need to call mmu_notifier_invalidate_range() it has be * done above for all cases requiring it to happen under page

[v4,2/5] rmap: move page unmap operation to dedicated function

Commit Message

Patch