diff mbox series

[7/7] khugepaged: Use a folio throughout hpage_collapse_scan_file()

Message ID 20240403171838.1445826-8-willy@infradead.org (mailing list archive)
State New
Headers show
Series khugepaged folio conversions | expand

Commit Message

Matthew Wilcox April 3, 2024, 5:18 p.m. UTC
Replace the use of pages with folios.  Saves a few calls to
compound_head() and removes some uses of obsolete functions.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 include/trace/events/huge_memory.h |  6 +++---
 mm/khugepaged.c                    | 33 +++++++++++++++---------------
 2 files changed, 19 insertions(+), 20 deletions(-)

Comments

Vishal Moola April 5, 2024, 9:21 p.m. UTC | #1
On Wed, Apr 03, 2024 at 06:18:36PM +0100, Matthew Wilcox (Oracle) wrote:
> Replace the use of pages with folios.  Saves a few calls to
> compound_head() and removes some uses of obsolete functions.
> 
> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>

Reviewed-by: Vishal Moola (Oracle) <vishal.moola@gmail.com>
David Hildenbrand April 8, 2024, 8:07 p.m. UTC | #2
On 03.04.24 19:18, Matthew Wilcox (Oracle) wrote:
> Replace the use of pages with folios.  Saves a few calls to
> compound_head() and removes some uses of obsolete functions.
> 
> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
> ---

[...]

>   
> -		if (page_count(page) !=
> -		    1 + page_mapcount(page) + page_has_private(page)) {
> +		if (folio_ref_count(folio) !=
> +		    1 + folio_mapcount(folio) + folio_test_private(folio)) {
>   			result = SCAN_PAGE_COUNT;

Just stumbled over that: page_has_private() would have checked 
PG_private and PG_private_2. The corresponding replacement would have 
been folio_has_private(). folio_test_private() only checks PG_private.

Are we sure that we no longer have to check PG_private_2 here? pagecache 
... so I have no clue :)
Matthew Wilcox April 8, 2024, 8:28 p.m. UTC | #3
On Mon, Apr 08, 2024 at 10:07:01PM +0200, David Hildenbrand wrote:
> On 03.04.24 19:18, Matthew Wilcox (Oracle) wrote:
> > Replace the use of pages with folios.  Saves a few calls to
> > compound_head() and removes some uses of obsolete functions.
> > 
> > Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
> > ---
> 
> [...]
> 
> > -		if (page_count(page) !=
> > -		    1 + page_mapcount(page) + page_has_private(page)) {
> > +		if (folio_ref_count(folio) !=
> > +		    1 + folio_mapcount(folio) + folio_test_private(folio)) {
> >   			result = SCAN_PAGE_COUNT;
> 
> Just stumbled over that: page_has_private() would have checked PG_private
> and PG_private_2. The corresponding replacement would have been
> folio_has_private(). folio_test_private() only checks PG_private.
> 
> Are we sure that we no longer have to check PG_private_2 here? pagecache ...
> so I have no clue :)

Oh man.  Vishal just asked me about this in our meeting and I struggled
to explain the whole horrid history.  The short answer is that this
almost certainly fixes a bug.

We have a rule (which most filesystem developers don't know about) that
attaching private data to the folio should increase its refcount by one.
This is handled by folio_attach_private().  But there's no corresponding
rule that setting PG_private_2 should also increment the refcount.
So checking PG_private_2 is wrong here because a folio with PG_private_2
set and PG_private clear will not have its refcount increased.

There are longer versions of this answer, and if you keep asking, I'll
give them ;-P
diff mbox series

Patch

diff --git a/include/trace/events/huge_memory.h b/include/trace/events/huge_memory.h
index dc6eeef2d3da..ab576898a126 100644
--- a/include/trace/events/huge_memory.h
+++ b/include/trace/events/huge_memory.h
@@ -174,10 +174,10 @@  TRACE_EVENT(mm_collapse_huge_page_swapin,
 
 TRACE_EVENT(mm_khugepaged_scan_file,
 
-	TP_PROTO(struct mm_struct *mm, struct page *page, struct file *file,
+	TP_PROTO(struct mm_struct *mm, struct folio *folio, struct file *file,
 		 int present, int swap, int result),
 
-	TP_ARGS(mm, page, file, present, swap, result),
+	TP_ARGS(mm, folio, file, present, swap, result),
 
 	TP_STRUCT__entry(
 		__field(struct mm_struct *, mm)
@@ -190,7 +190,7 @@  TRACE_EVENT(mm_khugepaged_scan_file,
 
 	TP_fast_assign(
 		__entry->mm = mm;
-		__entry->pfn = page ? page_to_pfn(page) : -1;
+		__entry->pfn = folio ? folio_pfn(folio) : -1;
 		__assign_str(filename, file->f_path.dentry->d_iname);
 		__entry->present = present;
 		__entry->swap = swap;
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 0b0053fb30c0..ef2871aaeb43 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -2203,7 +2203,7 @@  static int hpage_collapse_scan_file(struct mm_struct *mm, unsigned long addr,
 				    struct file *file, pgoff_t start,
 				    struct collapse_control *cc)
 {
-	struct page *page = NULL;
+	struct folio *folio = NULL;
 	struct address_space *mapping = file->f_mapping;
 	XA_STATE(xas, &mapping->i_pages, start);
 	int present, swap;
@@ -2215,11 +2215,11 @@  static int hpage_collapse_scan_file(struct mm_struct *mm, unsigned long addr,
 	memset(cc->node_load, 0, sizeof(cc->node_load));
 	nodes_clear(cc->alloc_nmask);
 	rcu_read_lock();
-	xas_for_each(&xas, page, start + HPAGE_PMD_NR - 1) {
-		if (xas_retry(&xas, page))
+	xas_for_each(&xas, folio, start + HPAGE_PMD_NR - 1) {
+		if (xas_retry(&xas, folio))
 			continue;
 
-		if (xa_is_value(page)) {
+		if (xa_is_value(folio)) {
 			++swap;
 			if (cc->is_khugepaged &&
 			    swap > khugepaged_max_ptes_swap) {
@@ -2234,11 +2234,9 @@  static int hpage_collapse_scan_file(struct mm_struct *mm, unsigned long addr,
 		 * TODO: khugepaged should compact smaller compound pages
 		 * into a PMD sized page
 		 */
-		if (PageTransCompound(page)) {
-			struct page *head = compound_head(page);
-
-			result = compound_order(head) == HPAGE_PMD_ORDER &&
-					head->index == start
+		if (folio_test_large(folio)) {
+			result = folio_order(folio) == HPAGE_PMD_ORDER &&
+					folio->index == start
 					/* Maybe PMD-mapped */
 					? SCAN_PTE_MAPPED_HUGEPAGE
 					: SCAN_PAGE_COMPOUND;
@@ -2251,28 +2249,29 @@  static int hpage_collapse_scan_file(struct mm_struct *mm, unsigned long addr,
 			break;
 		}
 
-		node = page_to_nid(page);
+		node = folio_nid(folio);
 		if (hpage_collapse_scan_abort(node, cc)) {
 			result = SCAN_SCAN_ABORT;
 			break;
 		}
 		cc->node_load[node]++;
 
-		if (!PageLRU(page)) {
+		if (!folio_test_lru(folio)) {
 			result = SCAN_PAGE_LRU;
 			break;
 		}
 
-		if (page_count(page) !=
-		    1 + page_mapcount(page) + page_has_private(page)) {
+		if (folio_ref_count(folio) !=
+		    1 + folio_mapcount(folio) + folio_test_private(folio)) {
 			result = SCAN_PAGE_COUNT;
 			break;
 		}
 
 		/*
-		 * We probably should check if the page is referenced here, but
-		 * nobody would transfer pte_young() to PageReferenced() for us.
-		 * And rmap walk here is just too costly...
+		 * We probably should check if the folio is referenced
+		 * here, but nobody would transfer pte_young() to
+		 * folio_test_referenced() for us.  And rmap walk here
+		 * is just too costly...
 		 */
 
 		present++;
@@ -2294,7 +2293,7 @@  static int hpage_collapse_scan_file(struct mm_struct *mm, unsigned long addr,
 		}
 	}
 
-	trace_mm_khugepaged_scan_file(mm, page, file, present, swap, result);
+	trace_mm_khugepaged_scan_file(mm, folio, file, present, swap, result);
 	return result;
 }
 #else