diff mbox series

[v8,2/3] mm/rmap: integrate PMD-mapped folio splitting into pagewalk loop

Message ID 20240614015138.31461-3-ioworker0@gmail.com (mailing list archive)
State New
Headers show
Series Reclaim lazyfree THP without splitting | expand

Commit Message

Lance Yang June 14, 2024, 1:51 a.m. UTC
In preparation for supporting try_to_unmap_one() to unmap PMD-mapped
folios, start the pagewalk first, then call split_huge_pmd_address() to
split the folio.

Suggested-by: David Hildenbrand <david@redhat.com>
Suggested-by: Baolin Wang <baolin.wang@linux.alibaba.com>
Signed-off-by: Lance Yang <ioworker0@gmail.com>
---
 include/linux/huge_mm.h |  6 ++++++
 include/linux/rmap.h    | 24 +++++++++++++++++++++++
 mm/huge_memory.c        | 42 +++++++++++++++++++++--------------------
 mm/rmap.c               | 21 +++++++++++++++------
 4 files changed, 67 insertions(+), 26 deletions(-)

Comments

David Hildenbrand June 14, 2024, 7:34 a.m. UTC | #1
On 14.06.24 03:51, Lance Yang wrote:
> In preparation for supporting try_to_unmap_one() to unmap PMD-mapped
> folios, start the pagewalk first, then call split_huge_pmd_address() to
> split the folio.
> 
> Suggested-by: David Hildenbrand <david@redhat.com>
> Suggested-by: Baolin Wang <baolin.wang@linux.alibaba.com>
> Signed-off-by: Lance Yang <ioworker0@gmail.com>
> ---

Would have converted that VM_BUG_ON to a VM_WARN_ON_ONCE, but it's just 
moving code, so no big deal.

Thanks!

Acked-by: David Hildenbrand <david@redhat.com>
Lance Yang June 14, 2024, 7:46 a.m. UTC | #2
On Fri, Jun 14, 2024 at 3:34 PM David Hildenbrand <david@redhat.com> wrote:
>
> On 14.06.24 03:51, Lance Yang wrote:
> > In preparation for supporting try_to_unmap_one() to unmap PMD-mapped
> > folios, start the pagewalk first, then call split_huge_pmd_address() to
> > split the folio.
> >
> > Suggested-by: David Hildenbrand <david@redhat.com>
> > Suggested-by: Baolin Wang <baolin.wang@linux.alibaba.com>
> > Signed-off-by: Lance Yang <ioworker0@gmail.com>
> > ---
>
> Would have converted that VM_BUG_ON to a VM_WARN_ON_ONCE, but it's just
> moving code, so no big deal.

OK, let’s leave it as is for now ;)

>
> Thanks!
>
> Acked-by: David Hildenbrand <david@redhat.com>

Thanks for taking time to review!
Lance

>
> --
> Cheers,
>
> David / dhildenb
>
Zi Yan June 14, 2024, 2:26 p.m. UTC | #3
On 13 Jun 2024, at 21:51, Lance Yang wrote:

> In preparation for supporting try_to_unmap_one() to unmap PMD-mapped
> folios, start the pagewalk first, then call split_huge_pmd_address() to
> split the folio.
>
> Suggested-by: David Hildenbrand <david@redhat.com>
> Suggested-by: Baolin Wang <baolin.wang@linux.alibaba.com>
> Signed-off-by: Lance Yang <ioworker0@gmail.com>
> ---
>  include/linux/huge_mm.h |  6 ++++++
>  include/linux/rmap.h    | 24 +++++++++++++++++++++++
>  mm/huge_memory.c        | 42 +++++++++++++++++++++--------------------
>  mm/rmap.c               | 21 +++++++++++++++------
>  4 files changed, 67 insertions(+), 26 deletions(-)

Thanks.

Acked-by: Zi Yan <ziy@nvidia.com>

--
Best Regards,
Yan, Zi
Lance Yang June 14, 2024, 2:41 p.m. UTC | #4
On Fri, Jun 14, 2024 at 10:26 PM Zi Yan <ziy@nvidia.com> wrote:
>
> On 13 Jun 2024, at 21:51, Lance Yang wrote:
>
> > In preparation for supporting try_to_unmap_one() to unmap PMD-mapped
> > folios, start the pagewalk first, then call split_huge_pmd_address() to
> > split the folio.
> >
> > Suggested-by: David Hildenbrand <david@redhat.com>
> > Suggested-by: Baolin Wang <baolin.wang@linux.alibaba.com>
> > Signed-off-by: Lance Yang <ioworker0@gmail.com>
> > ---
> >  include/linux/huge_mm.h |  6 ++++++
> >  include/linux/rmap.h    | 24 +++++++++++++++++++++++
> >  mm/huge_memory.c        | 42 +++++++++++++++++++++--------------------
> >  mm/rmap.c               | 21 +++++++++++++++------
> >  4 files changed, 67 insertions(+), 26 deletions(-)
>
> Thanks.
>
> Acked-by: Zi Yan <ziy@nvidia.com>

Thanks for taking time to review!
Lance

>
> --
> Best Regards,
> Yan, Zi
diff mbox series

Patch

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 7ad41de5eaea..9f720b0731c4 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -428,6 +428,9 @@  static inline bool thp_migration_supported(void)
 	return IS_ENABLED(CONFIG_ARCH_ENABLE_THP_MIGRATION);
 }
 
+void split_huge_pmd_locked(struct vm_area_struct *vma, unsigned long address,
+			   pmd_t *pmd, bool freeze, struct folio *folio);
+
 #else /* CONFIG_TRANSPARENT_HUGEPAGE */
 
 static inline bool folio_test_pmd_mappable(struct folio *folio)
@@ -490,6 +493,9 @@  static inline void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
 		unsigned long address, bool freeze, struct folio *folio) {}
 static inline void split_huge_pmd_address(struct vm_area_struct *vma,
 		unsigned long address, bool freeze, struct folio *folio) {}
+static inline void split_huge_pmd_locked(struct vm_area_struct *vma,
+					 unsigned long address, pmd_t *pmd,
+					 bool freeze, struct folio *folio) {}
 
 #define split_huge_pud(__vma, __pmd, __address)	\
 	do { } while (0)
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index 0fd9bebce54c..d1c5e2d694b2 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -703,6 +703,30 @@  static inline void page_vma_mapped_walk_done(struct page_vma_mapped_walk *pvmw)
 		spin_unlock(pvmw->ptl);
 }
 
+/**
+ * page_vma_mapped_walk_restart - Restart the page table walk.
+ * @pvmw: Pointer to struct page_vma_mapped_walk.
+ *
+ * It restarts the page table walk when changes occur in the page
+ * table, such as splitting a PMD. Ensures that the PTL held during
+ * the previous walk is released and resets the state to allow for
+ * a new walk starting at the current address stored in pvmw->address.
+ */
+static inline void
+page_vma_mapped_walk_restart(struct page_vma_mapped_walk *pvmw)
+{
+	WARN_ON_ONCE(!pvmw->pmd && !pvmw->pte);
+
+	if (likely(pvmw->ptl))
+		spin_unlock(pvmw->ptl);
+	else
+		WARN_ON_ONCE(1);
+
+	pvmw->ptl = NULL;
+	pvmw->pmd = NULL;
+	pvmw->pte = NULL;
+}
+
 bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw);
 
 /*
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 70d20fefc6db..e766d3f3a302 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2582,6 +2582,27 @@  static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
 	pmd_populate(mm, pmd, pgtable);
 }
 
+void split_huge_pmd_locked(struct vm_area_struct *vma, unsigned long address,
+			   pmd_t *pmd, bool freeze, struct folio *folio)
+{
+	VM_WARN_ON_ONCE(folio && !folio_test_pmd_mappable(folio));
+	VM_WARN_ON_ONCE(!IS_ALIGNED(address, HPAGE_PMD_SIZE));
+	VM_WARN_ON_ONCE(folio && !folio_test_locked(folio));
+	VM_BUG_ON(freeze && !folio);
+
+	/*
+	 * When the caller requests to set up a migration entry, we
+	 * require a folio to check the PMD against. Otherwise, there
+	 * is a risk of replacing the wrong folio.
+	 */
+	if (pmd_trans_huge(*pmd) || pmd_devmap(*pmd) ||
+	    is_pmd_migration_entry(*pmd)) {
+		if (folio && folio != pmd_folio(*pmd))
+			return;
+		__split_huge_pmd_locked(vma, pmd, address, freeze);
+	}
+}
+
 void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
 		unsigned long address, bool freeze, struct folio *folio)
 {
@@ -2593,26 +2614,7 @@  void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
 				(address & HPAGE_PMD_MASK) + HPAGE_PMD_SIZE);
 	mmu_notifier_invalidate_range_start(&range);
 	ptl = pmd_lock(vma->vm_mm, pmd);
-
-	/*
-	 * If caller asks to setup a migration entry, we need a folio to check
-	 * pmd against. Otherwise we can end up replacing wrong folio.
-	 */
-	VM_BUG_ON(freeze && !folio);
-	VM_WARN_ON_ONCE(folio && !folio_test_locked(folio));
-
-	if (pmd_trans_huge(*pmd) || pmd_devmap(*pmd) ||
-	    is_pmd_migration_entry(*pmd)) {
-		/*
-		 * It's safe to call pmd_page when folio is set because it's
-		 * guaranteed that pmd is present.
-		 */
-		if (folio && folio != pmd_folio(*pmd))
-			goto out;
-		__split_huge_pmd_locked(vma, pmd, range.start, freeze);
-	}
-
-out:
+	split_huge_pmd_locked(vma, range.start, pmd, freeze, folio);
 	spin_unlock(ptl);
 	mmu_notifier_invalidate_range_end(&range);
 }
diff --git a/mm/rmap.c b/mm/rmap.c
index 2d778725e4f5..dacf24bc82f0 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1642,9 +1642,6 @@  static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma,
 	if (flags & TTU_SYNC)
 		pvmw.flags = PVMW_SYNC;
 
-	if (flags & TTU_SPLIT_HUGE_PMD)
-		split_huge_pmd_address(vma, address, false, folio);
-
 	/*
 	 * For THP, we have to assume the worse case ie pmd for invalidation.
 	 * For hugetlb, it could be much worse if we need to do pud
@@ -1670,9 +1667,6 @@  static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma,
 	mmu_notifier_invalidate_range_start(&range);
 
 	while (page_vma_mapped_walk(&pvmw)) {
-		/* Unexpected PMD-mapped THP? */
-		VM_BUG_ON_FOLIO(!pvmw.pte, folio);
-
 		/*
 		 * If the folio is in an mlock()d vma, we must not swap it out.
 		 */
@@ -1684,6 +1678,21 @@  static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma,
 			goto walk_abort;
 		}
 
+		if (!pvmw.pte && (flags & TTU_SPLIT_HUGE_PMD)) {
+			/*
+			 * We temporarily have to drop the PTL and start once
+			 * again from that now-PTE-mapped page table.
+			 */
+			split_huge_pmd_locked(vma, pvmw.address, pvmw.pmd,
+					      false, folio);
+			flags &= ~TTU_SPLIT_HUGE_PMD;
+			page_vma_mapped_walk_restart(&pvmw);
+			continue;
+		}
+
+		/* Unexpected PMD-mapped THP? */
+		VM_BUG_ON_FOLIO(!pvmw.pte, folio);
+
 		pfn = pte_pfn(ptep_get(pvmw.pte));
 		subpage = folio_page(folio, pfn - folio_pfn(folio));
 		address = pvmw.address;