diff mbox series

[RFC,04/19] mm: hugetlb: Decouple hstate, subpool from inode

Message ID d1c8d97fbeab33b147b104ed18299b510c50ab40.1686077275.git.ackerleytng@google.com (mailing list archive)
State New
Headers show
Series hugetlb support for KVM guest_mem | expand

Commit Message

Ackerley Tng June 6, 2023, 7:03 p.m. UTC
hstate and subpool being retrievable from inode via hstate_inode() and
subpool_inode() respectively is a hugetlbfs concept.

hugetlb should be agnostic of hugetlbfs and hugetlb accounting
functions should accept hstate (required) and subpool (can be NULL)
independently of inode.

inode is still a parameter for these accounting functions since the
inode's block counts need to be updated during accounting.

The inode's resv_map will also still need to be updated if not NULL.

Signed-off-by: Ackerley Tng <ackerleytng@google.com>
---
 fs/hugetlbfs/inode.c    | 59 ++++++++++++++++++++++++++++-------------
 include/linux/hugetlb.h | 32 +++++++++++++++++-----
 mm/hugetlb.c            | 49 ++++++++++++++++++++--------------
 3 files changed, 95 insertions(+), 45 deletions(-)
diff mbox series

Patch

diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 4f25df31ae80..0fc49b6252e4 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -164,7 +164,7 @@  static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
 	file_accessed(file);
 
 	ret = -ENOMEM;
-	if (!hugetlb_reserve_pages(inode,
+	if (!hugetlb_reserve_pages(h, subpool_inode(inode), inode,
 				vma->vm_pgoff >> huge_page_order(h),
 				len >> huge_page_shift(h), vma,
 				vma->vm_flags))
@@ -550,14 +550,18 @@  hugetlb_vmdelete_list(struct rb_root_cached *root, pgoff_t start, pgoff_t end,
 	}
 }
 
-/*
+/**
+ * Remove folio from page_cache and userspace mappings. Also unreserves pages,
+ * updating hstate @h, subpool @spool (if not NULL), @inode block info and
+ * @inode's resv_map (if not NULL).
+ *
  * Called with hugetlb fault mutex held.
  * Returns true if page was actually removed, false otherwise.
  */
-static bool remove_inode_single_folio(struct hstate *h, struct inode *inode,
-					struct address_space *mapping,
-					struct folio *folio, pgoff_t index,
-					bool truncate_op)
+static bool remove_mapping_single_folio(
+	struct address_space *mapping, struct folio *folio, pgoff_t index,
+	struct hstate *h, struct hugepage_subpool *spool, struct inode *inode,
+	bool truncate_op)
 {
 	bool ret = false;
 
@@ -582,9 +586,8 @@  static bool remove_inode_single_folio(struct hstate *h, struct inode *inode,
 	hugetlb_delete_from_page_cache(folio);
 	ret = true;
 	if (!truncate_op) {
-		if (unlikely(hugetlb_unreserve_pages(inode, index,
-							index + 1, 1)))
-			hugetlb_fix_reserve_counts(inode);
+		if (unlikely(hugetlb_unreserve_pages(h, spool, inode, index, index + 1, 1)))
+			hugetlb_fix_reserve_counts(h, spool);
 	}
 
 	folio_unlock(folio);
@@ -592,7 +595,14 @@  static bool remove_inode_single_folio(struct hstate *h, struct inode *inode,
 }
 
 /*
- * remove_inode_hugepages handles two distinct cases: truncation and hole
+ * Remove hugetlb page mappings from @mapping between offsets [@lstart, @lend).
+ * Also updates reservations in:
+ * + hstate @h (required)
+ * + subpool @spool (can be NULL)
+ * + resv_map in @inode (can be NULL)
+ * and updates blocks in @inode (required)
+ *
+ * remove_mapping_hugepages handles two distinct cases: truncation and hole
  * punch.  There are subtle differences in operation for each case.
  *
  * truncation is indicated by end of range being LLONG_MAX
@@ -611,10 +621,10 @@  static bool remove_inode_single_folio(struct hstate *h, struct inode *inode,
  * Note: If the passed end of range value is beyond the end of file, but
  * not LLONG_MAX this routine still performs a hole punch operation.
  */
-void remove_inode_hugepages(struct inode *inode, loff_t lstart, loff_t lend)
+void remove_mapping_hugepages(struct address_space *mapping,
+			      struct hstate *h, struct hugepage_subpool *spool,
+			      struct inode *inode, loff_t lstart, loff_t lend)
 {
-	struct hstate *h = hstate_inode(inode);
-	struct address_space *mapping = &inode->i_data;
 	const pgoff_t start = lstart >> huge_page_shift(h);
 	const pgoff_t end = lend >> huge_page_shift(h);
 	struct folio_batch fbatch;
@@ -636,8 +646,8 @@  void remove_inode_hugepages(struct inode *inode, loff_t lstart, loff_t lend)
 			/*
 			 * Remove folio that was part of folio_batch.
 			 */
-			if (remove_inode_single_folio(h, inode, mapping, folio,
-							index, truncate_op))
+			if (remove_mapping_single_folio(mapping, folio, index,
+							h, spool, inode, truncate_op))
 				freed++;
 
 			mutex_unlock(&hugetlb_fault_mutex_table[hash]);
@@ -647,7 +657,16 @@  void remove_inode_hugepages(struct inode *inode, loff_t lstart, loff_t lend)
 	}
 
 	if (truncate_op)
-		(void)hugetlb_unreserve_pages(inode, start, LONG_MAX, freed);
+		(void)hugetlb_unreserve_pages(h, spool, inode, start, LONG_MAX, freed);
+}
+
+void remove_inode_hugepages(struct inode *inode, loff_t lstart, loff_t lend)
+{
+	struct address_space *mapping = &inode->i_data;
+	struct hstate *h = hstate_inode(inode);
+	struct hugepage_subpool *spool = subpool_inode(inode);
+
+	return remove_mapping_hugepages(mapping, h, spool, inode, lstart, lend);
 }
 
 static void hugetlbfs_evict_inode(struct inode *inode)
@@ -1548,6 +1567,7 @@  struct file *hugetlb_file_setup(const char *name, size_t size,
 	struct vfsmount *mnt;
 	int hstate_idx;
 	struct file *file;
+	struct hstate *h;
 
 	hstate_idx = get_hstate_idx(page_size_log);
 	if (hstate_idx < 0)
@@ -1578,9 +1598,10 @@  struct file *hugetlb_file_setup(const char *name, size_t size,
 	inode->i_size = size;
 	clear_nlink(inode);
 
-	if (!hugetlb_reserve_pages(inode, 0,
-			size >> huge_page_shift(hstate_inode(inode)), NULL,
-			acctflag))
+	h = hstate_inode(inode);
+	if (!hugetlb_reserve_pages(h, subpool_inode(inode), inode, 0,
+				   size >> huge_page_shift(h), NULL,
+				   acctflag))
 		file = ERR_PTR(-ENOMEM);
 	else
 		file = alloc_file_pseudo(inode, mnt, name, O_RDWR,
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 1483020b412b..2457d7a21974 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -166,11 +166,13 @@  int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, pte_t *dst_pte,
 				struct page **pagep,
 				bool wp_copy);
 #endif /* CONFIG_USERFAULTFD */
-bool hugetlb_reserve_pages(struct inode *inode, long from, long to,
-						struct vm_area_struct *vma,
-						vm_flags_t vm_flags);
-long hugetlb_unreserve_pages(struct inode *inode, long start, long end,
-						long freed);
+bool hugetlb_reserve_pages(struct hstate *h, struct hugepage_subpool *spool,
+			   struct inode *inode,
+			   long from, long to,
+			   struct vm_area_struct *vma,
+			   vm_flags_t vm_flags);
+long hugetlb_unreserve_pages(struct hstate *h, struct hugepage_subpool *spool,
+			     struct inode *inode, long start, long end, long freed);
 bool isolate_hugetlb(struct folio *folio, struct list_head *list);
 int get_hwpoison_hugetlb_folio(struct folio *folio, bool *hugetlb, bool unpoison);
 int get_huge_page_for_hwpoison(unsigned long pfn, int flags,
@@ -178,7 +180,7 @@  int get_huge_page_for_hwpoison(unsigned long pfn, int flags,
 void folio_putback_active_hugetlb(struct folio *folio);
 void move_hugetlb_state(struct folio *old_folio, struct folio *new_folio, int reason);
 void free_huge_page(struct page *page);
-void hugetlb_fix_reserve_counts(struct inode *inode);
+void hugetlb_fix_reserve_counts(struct hstate *h, struct hugepage_subpool *spool);
 extern struct mutex *hugetlb_fault_mutex_table;
 u32 hugetlb_fault_mutex_hash(struct address_space *mapping, pgoff_t idx);
 
@@ -259,6 +261,9 @@  void hugetlb_unshare_all_pmds(struct vm_area_struct *vma);
 void hugetlb_zero_partial_page(struct hstate *h, struct address_space *mapping,
 			       loff_t start, loff_t end);
 
+void remove_mapping_hugepages(struct address_space *mapping,
+			      struct hstate *h, struct hugepage_subpool *spool,
+			      struct inode *inode, loff_t lstart, loff_t lend);
 void remove_inode_hugepages(struct inode *inode, loff_t lstart, loff_t lend);
 
 #else /* !CONFIG_HUGETLB_PAGE */
@@ -472,6 +477,9 @@  static inline void hugetlb_unshare_all_pmds(struct vm_area_struct *vma) { }
 static inline void hugetlb_zero_partial_page(
 	struct hstate *h, struct address_space *mapping, loff_t start, loff_t end) {}
 
+static inline void remove_mapping_hugepages(
+	struct address_space *mapping, struct hstate *h, struct hugepage_subpool *spool,
+	struct inode *inode, loff_t lstart, loff_t lend) {}
 static inline void remove_inode_hugepages(struct inode *inode, loff_t lstart, loff_t lend) {}
 
 #endif /* !CONFIG_HUGETLB_PAGE */
@@ -554,6 +562,12 @@  static inline struct hstate *hstate_inode(struct inode *i)
 {
 	return HUGETLBFS_SB(i->i_sb)->hstate;
 }
+
+static inline struct hugepage_subpool *subpool_inode(struct inode *inode)
+{
+	return HUGETLBFS_SB(inode->i_sb)->spool;
+}
+
 #else /* !CONFIG_HUGETLBFS */
 
 #define is_file_hugepages(file)			false
@@ -568,6 +582,12 @@  static inline struct hstate *hstate_inode(struct inode *i)
 {
 	return NULL;
 }
+
+static inline struct hugepage_subpool *subpool_inode(struct inode *inode)
+{
+	return NULL;
+}
+
 #endif /* !CONFIG_HUGETLBFS */
 
 #ifdef HAVE_ARCH_HUGETLB_UNMAPPED_AREA
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 9c9262833b4f..9da419b930df 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -247,11 +247,6 @@  static long hugepage_subpool_put_pages(struct hugepage_subpool *spool,
 	return ret;
 }
 
-static inline struct hugepage_subpool *subpool_inode(struct inode *inode)
-{
-	return HUGETLBFS_SB(inode->i_sb)->spool;
-}
-
 static inline struct hugepage_subpool *subpool_vma(struct vm_area_struct *vma)
 {
 	return subpool_inode(file_inode(vma->vm_file));
@@ -898,16 +893,13 @@  static long region_del(struct resv_map *resv, long f, long t)
  * appear as a "reserved" entry instead of simply dangling with incorrect
  * counts.
  */
-void hugetlb_fix_reserve_counts(struct inode *inode)
+void hugetlb_fix_reserve_counts(struct hstate *h, struct hugepage_subpool *spool)
 {
-	struct hugepage_subpool *spool = subpool_inode(inode);
 	long rsv_adjust;
 	bool reserved = false;
 
 	rsv_adjust = hugepage_subpool_get_pages(spool, 1);
 	if (rsv_adjust > 0) {
-		struct hstate *h = hstate_inode(inode);
-
 		if (!hugetlb_acct_memory(h, 1))
 			reserved = true;
 	} else if (!rsv_adjust) {
@@ -6762,15 +6754,22 @@  long hugetlb_change_protection(struct vm_area_struct *vma,
 	return pages > 0 ? (pages << h->order) : pages;
 }
 
-/* Return true if reservation was successful, false otherwise.  */
-bool hugetlb_reserve_pages(struct inode *inode,
-					long from, long to,
-					struct vm_area_struct *vma,
-					vm_flags_t vm_flags)
+/**
+ * Reserves pages between vma indices @from and @to by handling accounting in:
+ * + hstate @h (required)
+ * + subpool @spool (can be NULL)
+ * + @inode (required if @vma is NULL)
+ *
+ * Will setup resv_map in @vma if necessary.
+ * Return true if reservation was successful, false otherwise.
+ */
+bool hugetlb_reserve_pages(struct hstate *h, struct hugepage_subpool *spool,
+			   struct inode *inode,
+			   long from, long to,
+			   struct vm_area_struct *vma,
+			   vm_flags_t vm_flags)
 {
 	long chg = -1, add = -1;
-	struct hstate *h = hstate_inode(inode);
-	struct hugepage_subpool *spool = subpool_inode(inode);
 	struct resv_map *resv_map;
 	struct hugetlb_cgroup *h_cg = NULL;
 	long gbl_reserve, regions_needed = 0;
@@ -6921,13 +6920,23 @@  bool hugetlb_reserve_pages(struct inode *inode,
 	return false;
 }
 
-long hugetlb_unreserve_pages(struct inode *inode, long start, long end,
-								long freed)
+/**
+ * Unreserves pages between vma indices @start and @end by handling accounting
+ * in:
+ * + hstate @h (required)
+ * + subpool @spool (can be NULL)
+ * + @inode (required)
+ * + resv_map in @inode (can be NULL)
+ *
+ * @freed is the number of pages freed, for updating inode->i_blocks.
+ *
+ * Returns 0 on success.
+ */
+long hugetlb_unreserve_pages(struct hstate *h, struct hugepage_subpool *spool,
+			     struct inode *inode, long start, long end, long freed)
 {
-	struct hstate *h = hstate_inode(inode);
 	struct resv_map *resv_map = inode_resv_map(inode);
 	long chg = 0;
-	struct hugepage_subpool *spool = subpool_inode(inode);
 	long gbl_reserve;
 
 	/*