diff mbox series

[RFC,07/19] mm: hugetlb: Refactor vma_*_reservation functions

Message ID 508025e09425a98d52b17cfbdc07340ae05e3e32.1686077275.git.ackerleytng@google.com (mailing list archive)
State New
Headers show
Series hugetlb support for KVM guest_mem | expand

Commit Message

Ackerley Tng June 6, 2023, 7:03 p.m. UTC
vma_*_reservation functions rely on vma_resv_map(), which assumes on a
hugetlbfs concept of the resv_map being stored in a specific field of
the inode.

This refactor enables vma_*_reservation functions, now renamed
resv_map_*_reservation, to be used with non-hugetlbfs filesystems,
further decoupling hugetlb from hugetlbfs.

Signed-off-by: Ackerley Tng <ackerleytng@google.com>
---
 mm/hugetlb.c | 184 +++++++++++++++++++++++++++------------------------
 1 file changed, 99 insertions(+), 85 deletions(-)
diff mbox series

Patch

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index d16c6417b90f..d943f83d15a9 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -2643,89 +2643,81 @@  static void return_unused_surplus_pages(struct hstate *h,
 
 
 /*
- * vma_needs_reservation, vma_commit_reservation and vma_end_reservation
- * are used by the huge page allocation routines to manage reservations.
+ * resv_map_needs_reservation, resv_map_commit_reservation and
+ * resv_map_end_reservation are used by the huge page allocation routines to
+ * manage reservations.
  *
- * vma_needs_reservation is called to determine if the huge page at addr
- * within the vma has an associated reservation.  If a reservation is
- * needed, the value 1 is returned.  The caller is then responsible for
- * managing the global reservation and subpool usage counts.  After
- * the huge page has been allocated, vma_commit_reservation is called
- * to add the page to the reservation map.  If the page allocation fails,
- * the reservation must be ended instead of committed.  vma_end_reservation
- * is called in such cases.
+ * resv_map_needs_reservation is called to determine if the huge page at addr
+ * within the vma has an associated reservation.  If a reservation is needed,
+ * the value 1 is returned.  The caller is then responsible for managing the
+ * global reservation and subpool usage counts.  After the huge page has been
+ * allocated, resv_map_commit_reservation is called to add the page to the
+ * reservation map.  If the page allocation fails, the reservation must be ended
+ * instead of committed.  resv_map_end_reservation is called in such cases.
  *
- * In the normal case, vma_commit_reservation returns the same value
- * as the preceding vma_needs_reservation call.  The only time this
- * is not the case is if a reserve map was changed between calls.  It
- * is the responsibility of the caller to notice the difference and
- * take appropriate action.
+ * In the normal case, resv_map_commit_reservation returns the same value as the
+ * preceding resv_map_needs_reservation call.  The only time this is not the
+ * case is if a reserve map was changed between calls.  It is the responsibility
+ * of the caller to notice the difference and take appropriate action.
  *
- * vma_add_reservation is used in error paths where a reservation must
- * be restored when a newly allocated huge page must be freed.  It is
- * to be called after calling vma_needs_reservation to determine if a
- * reservation exists.
+ * resv_map_add_reservation is used in error paths where a reservation must be
+ * restored when a newly allocated huge page must be freed.  It is to be called
+ * after calling resv_map_needs_reservation to determine if a reservation
+ * exists.
  *
- * vma_del_reservation is used in error paths where an entry in the reserve
- * map was created during huge page allocation and must be removed.  It is to
- * be called after calling vma_needs_reservation to determine if a reservation
+ * resv_map_del_reservation is used in error paths where an entry in the reserve
+ * map was created during huge page allocation and must be removed.  It is to be
+ * called after calling resv_map_needs_reservation to determine if a reservation
  * exists.
  */
-enum vma_resv_mode {
-	VMA_NEEDS_RESV,
-	VMA_COMMIT_RESV,
-	VMA_END_RESV,
-	VMA_ADD_RESV,
-	VMA_DEL_RESV,
+enum resv_map_resv_mode {
+	RESV_MAP_NEEDS_RESV,
+	RESV_MAP_COMMIT_RESV,
+	RESV_MAP_END_RESV,
+	RESV_MAP_ADD_RESV,
+	RESV_MAP_DEL_RESV,
 };
-static long __vma_reservation_common(struct hstate *h,
-				struct vm_area_struct *vma, unsigned long addr,
-				enum vma_resv_mode mode)
+static long __resv_map_reservation_common(struct resv_map *resv, pgoff_t resv_index,
+					  bool may_be_shared_mapping,
+					  enum resv_map_resv_mode mode)
 {
-	struct resv_map *resv;
-	pgoff_t idx;
 	long ret;
 	long dummy_out_regions_needed;
 
-	resv = vma_resv_map(vma);
-	if (!resv)
-		return 1;
-
-	idx = vma_hugecache_offset(h, vma, addr);
 	switch (mode) {
-	case VMA_NEEDS_RESV:
-		ret = region_chg(resv, idx, idx + 1, &dummy_out_regions_needed);
+	case RESV_MAP_NEEDS_RESV:
+		ret = region_chg(resv, resv_index, resv_index + 1, &dummy_out_regions_needed);
 		/* We assume that vma_reservation_* routines always operate on
 		 * 1 page, and that adding to resv map a 1 page entry can only
 		 * ever require 1 region.
 		 */
 		VM_BUG_ON(dummy_out_regions_needed != 1);
 		break;
-	case VMA_COMMIT_RESV:
-		ret = region_add(resv, idx, idx + 1, 1, NULL, NULL);
+	case RESV_MAP_COMMIT_RESV:
+		ret = region_add(resv, resv_index, resv_index + 1, 1, NULL, NULL);
 		/* region_add calls of range 1 should never fail. */
 		VM_BUG_ON(ret < 0);
 		break;
-	case VMA_END_RESV:
-		region_abort(resv, idx, idx + 1, 1);
+	case RESV_MAP_END_RESV:
+		region_abort(resv, resv_index, resv_index + 1, 1);
 		ret = 0;
 		break;
-	case VMA_ADD_RESV:
-		if (vma->vm_flags & VM_MAYSHARE) {
-			ret = region_add(resv, idx, idx + 1, 1, NULL, NULL);
+	case RESV_MAP_ADD_RESV:
+		if (may_be_shared_mapping) {
+			ret = region_add(resv, resv_index, resv_index + 1, 1, NULL, NULL);
 			/* region_add calls of range 1 should never fail. */
 			VM_BUG_ON(ret < 0);
 		} else {
-			region_abort(resv, idx, idx + 1, 1);
-			ret = region_del(resv, idx, idx + 1);
+			region_abort(resv, resv_index, resv_index + 1, 1);
+			ret = region_del(resv, resv_index, resv_index + 1);
 		}
 		break;
-	case VMA_DEL_RESV:
-		if (vma->vm_flags & VM_MAYSHARE) {
-			region_abort(resv, idx, idx + 1, 1);
-			ret = region_del(resv, idx, idx + 1);
+	case RESV_MAP_DEL_RESV:
+		if (may_be_shared_mapping) {
+			region_abort(resv, resv_index, resv_index + 1, 1);
+			ret = region_del(resv, resv_index, resv_index + 1);
 		} else {
-			ret = region_add(resv, idx, idx + 1, 1, NULL, NULL);
+			ret = region_add(resv, resv_index, resv_index + 1, 1, NULL, NULL);
 			/* region_add calls of range 1 should never fail. */
 			VM_BUG_ON(ret < 0);
 		}
@@ -2734,7 +2726,7 @@  static long __vma_reservation_common(struct hstate *h,
 		BUG();
 	}
 
-	if (vma->vm_flags & VM_MAYSHARE || mode == VMA_DEL_RESV)
+	if (may_be_shared_mapping || mode == RESV_MAP_DEL_RESV)
 		return ret;
 	/*
 	 * We know private mapping must have HPAGE_RESV_OWNER set.
@@ -2758,34 +2750,39 @@  static long __vma_reservation_common(struct hstate *h,
 	return ret;
 }
 
-static long vma_needs_reservation(struct hstate *h,
-			struct vm_area_struct *vma, unsigned long addr)
+static long resv_map_needs_reservation(struct resv_map *resv, pgoff_t resv_index,
+				       bool may_be_shared_mapping)
 {
-	return __vma_reservation_common(h, vma, addr, VMA_NEEDS_RESV);
+	return __resv_map_reservation_common(
+		resv, resv_index, may_be_shared_mapping, RESV_MAP_NEEDS_RESV);
 }
 
-static long vma_commit_reservation(struct hstate *h,
-			struct vm_area_struct *vma, unsigned long addr)
+static long resv_map_commit_reservation(struct resv_map *resv, pgoff_t resv_index,
+					bool may_be_shared_mapping)
 {
-	return __vma_reservation_common(h, vma, addr, VMA_COMMIT_RESV);
+	return __resv_map_reservation_common(
+		resv, resv_index, may_be_shared_mapping, RESV_MAP_COMMIT_RESV);
 }
 
-static void vma_end_reservation(struct hstate *h,
-			struct vm_area_struct *vma, unsigned long addr)
+static void resv_map_end_reservation(struct resv_map *resv, pgoff_t resv_index,
+				     bool may_be_shared_mapping)
 {
-	(void)__vma_reservation_common(h, vma, addr, VMA_END_RESV);
+	(void)__resv_map_reservation_common(
+		resv, resv_index, may_be_shared_mapping, RESV_MAP_END_RESV);
 }
 
-static long vma_add_reservation(struct hstate *h,
-			struct vm_area_struct *vma, unsigned long addr)
+static long resv_map_add_reservation(struct resv_map *resv, pgoff_t resv_index,
+				     bool may_be_shared_mapping)
 {
-	return __vma_reservation_common(h, vma, addr, VMA_ADD_RESV);
+	return __resv_map_reservation_common(
+		resv, resv_index, may_be_shared_mapping, RESV_MAP_ADD_RESV);
 }
 
-static long vma_del_reservation(struct hstate *h,
-			struct vm_area_struct *vma, unsigned long addr)
+static long resv_map_del_reservation(struct resv_map *resv, pgoff_t resv_index,
+				     bool may_be_shared_mapping)
 {
-	return __vma_reservation_common(h, vma, addr, VMA_DEL_RESV);
+	return __resv_map_reservation_common(
+		resv, resv_index, may_be_shared_mapping, RESV_MAP_DEL_RESV);
 }
 
 /*
@@ -2811,7 +2808,12 @@  static long vma_del_reservation(struct hstate *h,
 void restore_reserve_on_error(struct hstate *h, struct vm_area_struct *vma,
 			unsigned long address, struct folio *folio)
 {
-	long rc = vma_needs_reservation(h, vma, address);
+	long rc;
+	struct resv_map *resv = vma_resv_map(vma);
+	pgoff_t resv_index = vma_hugecache_offset(h, vma, address);
+	bool may_share = vma->vm_flags & VM_MAYSHARE;
+
+	rc = resv_map_needs_reservation(resv, resv_index, may_share);
 
 	if (folio_test_hugetlb_restore_reserve(folio)) {
 		if (unlikely(rc < 0))
@@ -2828,9 +2830,9 @@  void restore_reserve_on_error(struct hstate *h, struct vm_area_struct *vma,
 			 */
 			folio_clear_hugetlb_restore_reserve(folio);
 		else if (rc)
-			(void)vma_add_reservation(h, vma, address);
+			(void)resv_map_add_reservation(resv, resv_index, may_share);
 		else
-			vma_end_reservation(h, vma, address);
+			resv_map_end_reservation(resv, resv_index, may_share);
 	} else {
 		if (!rc) {
 			/*
@@ -2841,7 +2843,7 @@  void restore_reserve_on_error(struct hstate *h, struct vm_area_struct *vma,
 			 * Remove the entry so that a subsequent allocation
 			 * does not consume a reservation.
 			 */
-			rc = vma_del_reservation(h, vma, address);
+			rc = resv_map_del_reservation(resv, resv_index, may_share);
 			if (rc < 0)
 				/*
 				 * VERY rare out of memory condition.  Since
@@ -2855,7 +2857,7 @@  void restore_reserve_on_error(struct hstate *h, struct vm_area_struct *vma,
 		} else if (rc < 0) {
 			/*
 			 * Rare out of memory condition from
-			 * vma_needs_reservation call.  Memory allocation is
+			 * resv_map_needs_reservation call.  Memory allocation is
 			 * only attempted if a new entry is needed.  Therefore,
 			 * this implies there is not an entry in the
 			 * reserve map.
@@ -2877,7 +2879,7 @@  void restore_reserve_on_error(struct hstate *h, struct vm_area_struct *vma,
 			/*
 			 * No reservation present, do nothing
 			 */
-			 vma_end_reservation(h, vma, address);
+			 resv_map_end_reservation(resv, resv_index, may_share);
 	}
 }
 
@@ -3019,13 +3021,17 @@  struct folio *alloc_hugetlb_folio_from_subpool(
 	struct hugetlb_cgroup *h_cg = NULL;
 	bool deferred_reserve;
 
+	struct resv_map *resv = vma_resv_map(vma);
+	pgoff_t resv_index = vma_hugecache_offset(h, vma, addr);
+	bool may_share = vma->vm_flags & VM_MAYSHARE;
+
 	idx = hstate_index(h);
 	/*
 	 * Examine the region/reserve map to determine if the process
 	 * has a reservation for the page to be allocated.  A return
 	 * code of zero indicates a reservation exists (no change).
 	 */
-	map_chg = gbl_chg = vma_needs_reservation(h, vma, addr);
+	map_chg = gbl_chg = resv_map_needs_reservation(resv, resv_index, may_share);
 	if (map_chg < 0)
 		return ERR_PTR(-ENOMEM);
 
@@ -3039,7 +3045,7 @@  struct folio *alloc_hugetlb_folio_from_subpool(
 	if (map_chg || avoid_reserve) {
 		gbl_chg = hugepage_subpool_get_pages(spool, 1);
 		if (gbl_chg < 0) {
-			vma_end_reservation(h, vma, addr);
+			resv_map_end_reservation(resv, resv_index, may_share);
 			return ERR_PTR(-ENOSPC);
 		}
 
@@ -3104,11 +3110,11 @@  struct folio *alloc_hugetlb_folio_from_subpool(
 
 	hugetlb_set_folio_subpool(folio, spool);
 
-	map_commit = vma_commit_reservation(h, vma, addr);
+	map_commit = resv_map_commit_reservation(resv, resv_index, may_share);
 	if (unlikely(map_chg > map_commit)) {
 		/*
 		 * The page was added to the reservation map between
-		 * vma_needs_reservation and vma_commit_reservation.
+		 * resv_map_needs_reservation and resv_map_commit_reservation.
 		 * This indicates a race with hugetlb_reserve_pages.
 		 * Adjust for the subpool count incremented above AND
 		 * in hugetlb_reserve_pages for the same page.  Also,
@@ -3134,7 +3140,7 @@  struct folio *alloc_hugetlb_folio_from_subpool(
 out_subpool_put:
 	if (map_chg || avoid_reserve)
 		hugepage_subpool_put_pages(spool, 1);
-	vma_end_reservation(h, vma, addr);
+	resv_map_end_reservation(resv, resv_index, may_share);
 	return ERR_PTR(-ENOSPC);
 }
 
@@ -5901,12 +5907,16 @@  static vm_fault_t hugetlb_no_page(struct mm_struct *mm,
 	 * the spinlock.
 	 */
 	if ((flags & FAULT_FLAG_WRITE) && !(vma->vm_flags & VM_SHARED)) {
-		if (vma_needs_reservation(h, vma, haddr) < 0) {
+		struct resv_map *resv = vma_resv_map(vma);
+		pgoff_t resv_index = vma_hugecache_offset(h, vma, address);
+		bool may_share = vma->vm_flags & VM_MAYSHARE;
+
+		if (resv_map_needs_reservation(resv, resv_index, may_share) < 0) {
 			ret = VM_FAULT_OOM;
 			goto backout_unlocked;
 		}
 		/* Just decrements count, does not deallocate */
-		vma_end_reservation(h, vma, haddr);
+		resv_map_end_reservation(resv, resv_index, may_share);
 	}
 
 	ptl = huge_pte_lock(h, mm, ptep);
@@ -6070,12 +6080,16 @@  vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 	 */
 	if ((flags & (FAULT_FLAG_WRITE|FAULT_FLAG_UNSHARE)) &&
 	    !(vma->vm_flags & VM_MAYSHARE) && !huge_pte_write(entry)) {
-		if (vma_needs_reservation(h, vma, haddr) < 0) {
+		struct resv_map *resv = vma_resv_map(vma);
+		pgoff_t resv_index = vma_hugecache_offset(h, vma, address);
+		bool may_share = vma->vm_flags & VM_MAYSHARE;
+
+		if (resv_map_needs_reservation(resv, resv_index, may_share) < 0) {
 			ret = VM_FAULT_OOM;
 			goto out_mutex;
 		}
 		/* Just decrements count, does not deallocate */
-		vma_end_reservation(h, vma, haddr);
+		resv_map_end_reservation(resv, resv_index, may_share);
 
 		pagecache_folio = filemap_lock_folio(mapping, idx);
 	}