diff mbox series

[RFC,18/26] hugetlb: use struct hugetlb_pte for walk_hugetlb_range

Message ID 20220624173656.2033256-19-jthoughton@google.com (mailing list archive)
State New
Headers show
Series hugetlb: Introduce HugeTLB high-granularity mapping | expand

Commit Message

James Houghton June 24, 2022, 5:36 p.m. UTC
Although this change is large, it is somewhat straightforward. Before,
all users of walk_hugetlb_range could get the size of the PTE just be
checking the hmask or the mm_walk struct. With HGM, that information is
held in the hugetlb_pte struct, so we provide that instead of the raw
pte_t*.

Signed-off-by: James Houghton <jthoughton@google.com>
---
 arch/s390/mm/gmap.c      |  8 ++++++--
 fs/proc/task_mmu.c       | 35 +++++++++++++++++++----------------
 include/linux/pagewalk.h |  3 ++-
 mm/damon/vaddr.c         | 34 ++++++++++++++++++----------------
 mm/hmm.c                 |  7 ++++---
 mm/mempolicy.c           | 11 ++++++++---
 mm/mincore.c             |  4 ++--
 mm/mprotect.c            |  6 +++---
 mm/pagewalk.c            | 18 ++++++++++++++++--
 9 files changed, 78 insertions(+), 48 deletions(-)
diff mbox series

Patch

diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c
index b8ae4a4aa2ba..518cebfd72cd 100644
--- a/arch/s390/mm/gmap.c
+++ b/arch/s390/mm/gmap.c
@@ -2620,10 +2620,14 @@  static int __s390_enable_skey_pmd(pmd_t *pmd, unsigned long addr,
 	return 0;
 }
 
-static int __s390_enable_skey_hugetlb(pte_t *pte, unsigned long addr,
-				      unsigned long hmask, unsigned long next,
+static int __s390_enable_skey_hugetlb(struct hugetlb_pte *hpte,
+				      unsigned long addr, unsigned long next,
 				      struct mm_walk *walk)
 {
+	if (!hugetlb_pte_present_leaf(hpte) ||
+			hugetlb_pte_size(hpte) != PMD_SIZE)
+		return -EINVAL;
+
 	pmd_t *pmd = (pmd_t *)pte;
 	unsigned long start, end;
 	struct page *page = pmd_page(*pmd);
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 2d04e3470d4c..b2d683f99fa9 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -714,18 +714,19 @@  static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma)
 }
 
 #ifdef CONFIG_HUGETLB_PAGE
-static int smaps_hugetlb_range(pte_t *pte, unsigned long hmask,
+static int smaps_hugetlb_range(struct hugetlb_pte *hpte,
 				 unsigned long addr, unsigned long end,
 				 struct mm_walk *walk)
 {
 	struct mem_size_stats *mss = walk->private;
 	struct vm_area_struct *vma = walk->vma;
 	struct page *page = NULL;
+	pte_t pte = hugetlb_ptep_get(hpte);
 
-	if (pte_present(*pte)) {
-		page = vm_normal_page(vma, addr, *pte);
-	} else if (is_swap_pte(*pte)) {
-		swp_entry_t swpent = pte_to_swp_entry(*pte);
+	if (hugetlb_pte_present_leaf(hpte)) {
+		page = vm_normal_page(vma, addr, pte);
+	} else if (is_swap_pte(pte)) {
+		swp_entry_t swpent = pte_to_swp_entry(pte);
 
 		if (is_pfn_swap_entry(swpent))
 			page = pfn_swap_entry_to_page(swpent);
@@ -734,9 +735,9 @@  static int smaps_hugetlb_range(pte_t *pte, unsigned long hmask,
 		int mapcount = page_mapcount(page);
 
 		if (mapcount >= 2)
-			mss->shared_hugetlb += huge_page_size(hstate_vma(vma));
+			mss->shared_hugetlb += hugetlb_pte_size(hpte);
 		else
-			mss->private_hugetlb += huge_page_size(hstate_vma(vma));
+			mss->private_hugetlb += hugetlb_pte_size(hpte);
 	}
 	return 0;
 }
@@ -1535,7 +1536,7 @@  static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end,
 
 #ifdef CONFIG_HUGETLB_PAGE
 /* This function walks within one hugetlb entry in the single call */
-static int pagemap_hugetlb_range(pte_t *ptep, unsigned long hmask,
+static int pagemap_hugetlb_range(struct hugetlb_pte *hpte,
 				 unsigned long addr, unsigned long end,
 				 struct mm_walk *walk)
 {
@@ -1543,13 +1544,13 @@  static int pagemap_hugetlb_range(pte_t *ptep, unsigned long hmask,
 	struct vm_area_struct *vma = walk->vma;
 	u64 flags = 0, frame = 0;
 	int err = 0;
-	pte_t pte;
+	unsigned long hmask = hugetlb_pte_mask(hpte);
 
 	if (vma->vm_flags & VM_SOFTDIRTY)
 		flags |= PM_SOFT_DIRTY;
 
-	pte = huge_ptep_get(ptep);
-	if (pte_present(pte)) {
+	if (hugetlb_pte_present_leaf(hpte)) {
+		pte_t pte = hugetlb_ptep_get(hpte);
 		struct page *page = pte_page(pte);
 
 		if (!PageAnon(page))
@@ -1565,7 +1566,7 @@  static int pagemap_hugetlb_range(pte_t *ptep, unsigned long hmask,
 		if (pm->show_pfn)
 			frame = pte_pfn(pte) +
 				((addr & ~hmask) >> PAGE_SHIFT);
-	} else if (pte_swp_uffd_wp_any(pte)) {
+	} else if (pte_swp_uffd_wp_any(hugetlb_ptep_get(hpte))) {
 		flags |= PM_UFFD_WP;
 	}
 
@@ -1869,17 +1870,19 @@  static int gather_pte_stats(pmd_t *pmd, unsigned long addr,
 	return 0;
 }
 #ifdef CONFIG_HUGETLB_PAGE
-static int gather_hugetlb_stats(pte_t *pte, unsigned long hmask,
-		unsigned long addr, unsigned long end, struct mm_walk *walk)
+static int gather_hugetlb_stats(struct hugetlb_pte *hpte, unsigned long addr,
+		unsigned long end, struct mm_walk *walk)
 {
-	pte_t huge_pte = huge_ptep_get(pte);
+	pte_t huge_pte = hugetlb_ptep_get(hpte);
 	struct numa_maps *md;
 	struct page *page;
 
-	if (!pte_present(huge_pte))
+	if (!hugetlb_pte_present_leaf(hpte))
 		return 0;
 
 	page = pte_page(huge_pte);
+	if (page != compound_head(page))
+		return 0;
 
 	md = walk->private;
 	gather_stats(page, md, pte_dirty(huge_pte), 1);
diff --git a/include/linux/pagewalk.h b/include/linux/pagewalk.h
index ac7b38ad5903..0d21e25df37f 100644
--- a/include/linux/pagewalk.h
+++ b/include/linux/pagewalk.h
@@ -3,6 +3,7 @@ 
 #define _LINUX_PAGEWALK_H
 
 #include <linux/mm.h>
+#include <linux/hugetlb.h>
 
 struct mm_walk;
 
@@ -47,7 +48,7 @@  struct mm_walk_ops {
 			 unsigned long next, struct mm_walk *walk);
 	int (*pte_hole)(unsigned long addr, unsigned long next,
 			int depth, struct mm_walk *walk);
-	int (*hugetlb_entry)(pte_t *pte, unsigned long hmask,
+	int (*hugetlb_entry)(struct hugetlb_pte *hpte,
 			     unsigned long addr, unsigned long next,
 			     struct mm_walk *walk);
 	int (*test_walk)(unsigned long addr, unsigned long next,
diff --git a/mm/damon/vaddr.c b/mm/damon/vaddr.c
index 59e1653799f8..ce50b937dcf2 100644
--- a/mm/damon/vaddr.c
+++ b/mm/damon/vaddr.c
@@ -324,14 +324,15 @@  static int damon_mkold_pmd_entry(pmd_t *pmd, unsigned long addr,
 }
 
 #ifdef CONFIG_HUGETLB_PAGE
-static void damon_hugetlb_mkold(pte_t *pte, struct mm_struct *mm,
+static void damon_hugetlb_mkold(struct hugetlb_pte *hpte, struct mm_struct *mm,
 				struct vm_area_struct *vma, unsigned long addr)
 {
 	bool referenced = false;
 	pte_t entry = huge_ptep_get(pte);
 	struct page *page = pte_page(entry);
+	struct page *hpage = compound_head(page);
 
-	get_page(page);
+	get_page(hpage);
 
 	if (pte_young(entry)) {
 		referenced = true;
@@ -342,18 +343,18 @@  static void damon_hugetlb_mkold(pte_t *pte, struct mm_struct *mm,
 
 #ifdef CONFIG_MMU_NOTIFIER
 	if (mmu_notifier_clear_young(mm, addr,
-				     addr + huge_page_size(hstate_vma(vma))))
+				     addr + hugetlb_pte_size(hpte));
 		referenced = true;
 #endif /* CONFIG_MMU_NOTIFIER */
 
 	if (referenced)
-		set_page_young(page);
+		set_page_young(hpage);
 
-	set_page_idle(page);
-	put_page(page);
+	set_page_idle(hpage);
+	put_page(hpage);
 }
 
-static int damon_mkold_hugetlb_entry(pte_t *pte, unsigned long hmask,
+static int damon_mkold_hugetlb_entry(struct hugetlb_pte *hpte,
 				     unsigned long addr, unsigned long end,
 				     struct mm_walk *walk)
 {
@@ -361,12 +362,12 @@  static int damon_mkold_hugetlb_entry(pte_t *pte, unsigned long hmask,
 	spinlock_t *ptl;
 	pte_t entry;
 
-	ptl = huge_pte_lock(h, walk->mm, pte);
-	entry = huge_ptep_get(pte);
+	ptl = huge_pte_lock_shift(hpte->shift, walk->mm, hpte->ptep);
+	entry = huge_ptep_get(hpte->ptep);
 	if (!pte_present(entry))
 		goto out;
 
-	damon_hugetlb_mkold(pte, walk->mm, walk->vma, addr);
+	damon_hugetlb_mkold(hpte, walk->mm, walk->vma, addr);
 
 out:
 	spin_unlock(ptl);
@@ -474,31 +475,32 @@  static int damon_young_pmd_entry(pmd_t *pmd, unsigned long addr,
 }
 
 #ifdef CONFIG_HUGETLB_PAGE
-static int damon_young_hugetlb_entry(pte_t *pte, unsigned long hmask,
+static int damon_young_hugetlb_entry(struct hugetlb_pte *hpte,
 				     unsigned long addr, unsigned long end,
 				     struct mm_walk *walk)
 {
 	struct damon_young_walk_private *priv = walk->private;
 	struct hstate *h = hstate_vma(walk->vma);
-	struct page *page;
+	struct page *page, *hpage;
 	spinlock_t *ptl;
 	pte_t entry;
 
-	ptl = huge_pte_lock(h, walk->mm, pte);
+	ptl = huge_pte_lock_shift(hpte->shift, walk->mm, hpte->ptep);
 	entry = huge_ptep_get(pte);
 	if (!pte_present(entry))
 		goto out;
 
 	page = pte_page(entry);
-	get_page(page);
+	hpage = compound_head(page);
+	get_page(hpage);
 
-	if (pte_young(entry) || !page_is_idle(page) ||
+	if (pte_young(entry) || !page_is_idle(hpage) ||
 	    mmu_notifier_test_young(walk->mm, addr)) {
 		*priv->page_sz = huge_page_size(h);
 		priv->young = true;
 	}
 
-	put_page(page);
+	put_page(hpage);
 
 out:
 	spin_unlock(ptl);
diff --git a/mm/hmm.c b/mm/hmm.c
index 3fd3242c5e50..1ad5d76fa8be 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -472,7 +472,7 @@  static int hmm_vma_walk_pud(pud_t *pudp, unsigned long start, unsigned long end,
 #endif
 
 #ifdef CONFIG_HUGETLB_PAGE
-static int hmm_vma_walk_hugetlb_entry(pte_t *pte, unsigned long hmask,
+static int hmm_vma_walk_hugetlb_entry(struct hugetlb_pte *hpte,
 				      unsigned long start, unsigned long end,
 				      struct mm_walk *walk)
 {
@@ -483,11 +483,12 @@  static int hmm_vma_walk_hugetlb_entry(pte_t *pte, unsigned long hmask,
 	unsigned int required_fault;
 	unsigned long pfn_req_flags;
 	unsigned long cpu_flags;
+	unsigned long hmask = hugetlb_pte_mask(hpte);
 	spinlock_t *ptl;
 	pte_t entry;
 
-	ptl = huge_pte_lock(hstate_vma(vma), walk->mm, pte);
-	entry = huge_ptep_get(pte);
+	ptl = huge_pte_lock_shift(hpte->shift, walk->mm, hpte->ptep);
+	entry = huge_ptep_get(hpte->ptep);
 
 	i = (start - range->start) >> PAGE_SHIFT;
 	pfn_req_flags = range->hmm_pfns[i];
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index d39b01fd52fe..a1d82db7c19f 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -559,7 +559,7 @@  static int queue_pages_pte_range(pmd_t *pmd, unsigned long addr,
 	return addr != end ? -EIO : 0;
 }
 
-static int queue_pages_hugetlb(pte_t *pte, unsigned long hmask,
+static int queue_pages_hugetlb(struct hugetlb_pte *hpte,
 			       unsigned long addr, unsigned long end,
 			       struct mm_walk *walk)
 {
@@ -571,8 +571,13 @@  static int queue_pages_hugetlb(pte_t *pte, unsigned long hmask,
 	spinlock_t *ptl;
 	pte_t entry;
 
-	ptl = huge_pte_lock(hstate_vma(walk->vma), walk->mm, pte);
-	entry = huge_ptep_get(pte);
+	/* We don't migrate high-granularity HugeTLB mappings for now. */
+	if (hugetlb_pte_size(hpte) !=
+			huge_page_size(hstate_vma(walk->vma)))
+		return -EINVAL;
+
+	ptl = hugetlb_pte_lock(walk->mm, hpte);
+	entry = hugetlb_ptep_get(hpte);
 	if (!pte_present(entry))
 		goto unlock;
 	page = pte_page(entry);
diff --git a/mm/mincore.c b/mm/mincore.c
index fa200c14185f..dc1717dc6a2c 100644
--- a/mm/mincore.c
+++ b/mm/mincore.c
@@ -22,7 +22,7 @@ 
 #include <linux/uaccess.h>
 #include "swap.h"
 
-static int mincore_hugetlb(pte_t *pte, unsigned long hmask, unsigned long addr,
+static int mincore_hugetlb(struct hugetlb_pte *hpte, unsigned long addr,
 			unsigned long end, struct mm_walk *walk)
 {
 #ifdef CONFIG_HUGETLB_PAGE
@@ -33,7 +33,7 @@  static int mincore_hugetlb(pte_t *pte, unsigned long hmask, unsigned long addr,
 	 * Hugepages under user process are always in RAM and never
 	 * swapped out, but theoretically it needs to be checked.
 	 */
-	present = pte && !huge_pte_none(huge_ptep_get(pte));
+	present = hpte->ptep && !hugetlb_pte_none(hpte);
 	for (; addr != end; vec++, addr += PAGE_SIZE)
 		*vec = present;
 	walk->private = vec;
diff --git a/mm/mprotect.c b/mm/mprotect.c
index ba5592655ee3..9c5a35a1c0eb 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -476,12 +476,12 @@  static int prot_none_pte_entry(pte_t *pte, unsigned long addr,
 		0 : -EACCES;
 }
 
-static int prot_none_hugetlb_entry(pte_t *pte, unsigned long hmask,
+static int prot_none_hugetlb_entry(struct hugetlb_pte *hpte,
 				   unsigned long addr, unsigned long next,
 				   struct mm_walk *walk)
 {
-	return pfn_modify_allowed(pte_pfn(*pte), *(pgprot_t *)(walk->private)) ?
-		0 : -EACCES;
+	return pfn_modify_allowed(pte_pfn(*hpte->ptep),
+			*(pgprot_t *)(walk->private)) ? 0 : -EACCES;
 }
 
 static int prot_none_test(unsigned long addr, unsigned long next,
diff --git a/mm/pagewalk.c b/mm/pagewalk.c
index 9b3db11a4d1d..f8e24a0a0179 100644
--- a/mm/pagewalk.c
+++ b/mm/pagewalk.c
@@ -3,6 +3,7 @@ 
 #include <linux/highmem.h>
 #include <linux/sched.h>
 #include <linux/hugetlb.h>
+#include <linux/minmax.h>
 
 /*
  * We want to know the real level where a entry is located ignoring any
@@ -301,13 +302,26 @@  static int walk_hugetlb_range(unsigned long addr, unsigned long end,
 	pte_t *pte;
 	const struct mm_walk_ops *ops = walk->ops;
 	int err = 0;
+	struct hugetlb_pte hpte;
 
 	do {
-		next = hugetlb_entry_end(h, addr, end);
 		pte = huge_pte_offset(walk->mm, addr & hmask, sz);
+		if (!pte) {
+			next = hugetlb_entry_end(h, addr, end);
+		} else {
+			hugetlb_pte_populate(&hpte, pte, huge_page_shift(h));
+			if (hugetlb_hgm_enabled(vma)) {
+				err = hugetlb_walk_to(walk->mm, &hpte, addr,
+						      PAGE_SIZE,
+						      /*stop_at_none=*/true);
+				if (err)
+					break;
+			}
+			next = min(addr + hugetlb_pte_size(&hpte), end);
+		}
 
 		if (pte)
-			err = ops->hugetlb_entry(pte, hmask, addr, next, walk);
+			err = ops->hugetlb_entry(&hpte, addr, next, walk);
 		else if (ops->pte_hole)
 			err = ops->pte_hole(addr, next, -1, walk);