diff mbox series

[07/10] mm/khugepaged: minor reorderings in collapse_shmem()

Message ID alpine.LSU.2.11.1811261526400.2275@eggly.anvils (mailing list archive)
State New, archived
Headers show
Series huge_memory,khugepaged tmpfs split/collapse fixes | expand

Commit Message

Hugh Dickins Nov. 26, 2018, 11:27 p.m. UTC
Several cleanups in collapse_shmem(): most of which probably do not
really matter, beyond doing things in a more familiar and reassuring
order.  Simplify the failure gotos in the main loop, and on success
update stats while interrupts still disabled from the last iteration.

Fixes: f3f0e1d2150b2 ("khugepaged: add support of collapse for tmpfs/shmem pages")
Signed-off-by: Hugh Dickins <hughd@google.com>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: stable@vger.kernel.org # 4.8+
---
 mm/khugepaged.c | 72 ++++++++++++++++++++++---------------------------
 1 file changed, 32 insertions(+), 40 deletions(-)

Comments

Kirill A. Shutemov Nov. 27, 2018, 7:59 a.m. UTC | #1
On Mon, Nov 26, 2018 at 03:27:52PM -0800, Hugh Dickins wrote:
> Several cleanups in collapse_shmem(): most of which probably do not
> really matter, beyond doing things in a more familiar and reassuring
> order.  Simplify the failure gotos in the main loop, and on success
> update stats while interrupts still disabled from the last iteration.
> 
> Fixes: f3f0e1d2150b2 ("khugepaged: add support of collapse for tmpfs/shmem pages")
> Signed-off-by: Hugh Dickins <hughd@google.com>
> Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
> Cc: stable@vger.kernel.org # 4.8+
> ---
>  mm/khugepaged.c | 72 ++++++++++++++++++++++---------------------------
>  1 file changed, 32 insertions(+), 40 deletions(-)
> 
> diff --git a/mm/khugepaged.c b/mm/khugepaged.c
> index 1c402d33547e..9d4e9ff1af95 100644
> --- a/mm/khugepaged.c
> +++ b/mm/khugepaged.c
> @@ -1329,10 +1329,10 @@ static void collapse_shmem(struct mm_struct *mm,
>  		goto out;
>  	}
>  
> +	__SetPageLocked(new_page);
> +	__SetPageSwapBacked(new_page);
>  	new_page->index = start;
>  	new_page->mapping = mapping;
> -	__SetPageSwapBacked(new_page);
> -	__SetPageLocked(new_page);
>  	BUG_ON(!page_ref_freeze(new_page, 1));
>  
>  	/*
> @@ -1366,13 +1366,13 @@ static void collapse_shmem(struct mm_struct *mm,
>  			if (index == start) {
>  				if (!xas_next_entry(&xas, end - 1)) {
>  					result = SCAN_TRUNCATED;
> -					break;
> +					goto xa_locked;
>  				}
>  				xas_set(&xas, index);
>  			}
>  			if (!shmem_charge(mapping->host, 1)) {
>  				result = SCAN_FAIL;
> -				break;
> +				goto xa_locked;
>  			}
>  			xas_store(&xas, new_page + (index % HPAGE_PMD_NR));
>  			nr_none++;
> @@ -1387,13 +1387,12 @@ static void collapse_shmem(struct mm_struct *mm,
>  				result = SCAN_FAIL;
>  				goto xa_unlocked;
>  			}
> -			xas_lock_irq(&xas);
> -			xas_set(&xas, index);
>  		} else if (trylock_page(page)) {
>  			get_page(page);
> +			xas_unlock_irq(&xas);
>  		} else {
>  			result = SCAN_PAGE_LOCK;
> -			break;
> +			goto xa_locked;
>  		}
>  
>  		/*

I'm puzzled by locking change here.

Isn't the change responsible for the bug you are fixing in 09/10?

IIRC, my intend for the locking scheme was to protect against
truncate-repopulate race.

What do I miss?

The rest of the patch *looks* okay, but I found it hard to follow.
Splitting it up would make it easier.
Hugh Dickins Nov. 27, 2018, 8:23 p.m. UTC | #2
On Tue, 27 Nov 2018, Kirill A. Shutemov wrote:
> On Mon, Nov 26, 2018 at 03:27:52PM -0800, Hugh Dickins wrote:
> > Several cleanups in collapse_shmem(): most of which probably do not
> > really matter, beyond doing things in a more familiar and reassuring
> > order.  Simplify the failure gotos in the main loop, and on success
> > update stats while interrupts still disabled from the last iteration.
> > 
> > Fixes: f3f0e1d2150b2 ("khugepaged: add support of collapse for tmpfs/shmem pages")
> > Signed-off-by: Hugh Dickins <hughd@google.com>
> > Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
> > Cc: stable@vger.kernel.org # 4.8+
> > ---
> >  mm/khugepaged.c | 72 ++++++++++++++++++++++---------------------------
> >  1 file changed, 32 insertions(+), 40 deletions(-)
> > 
> > diff --git a/mm/khugepaged.c b/mm/khugepaged.c
> > index 1c402d33547e..9d4e9ff1af95 100644
> > --- a/mm/khugepaged.c
> > +++ b/mm/khugepaged.c
> > @@ -1329,10 +1329,10 @@ static void collapse_shmem(struct mm_struct *mm,
> >  		goto out;
> >  	}
> >  
> > +	__SetPageLocked(new_page);
> > +	__SetPageSwapBacked(new_page);
> >  	new_page->index = start;
> >  	new_page->mapping = mapping;
> > -	__SetPageSwapBacked(new_page);
> > -	__SetPageLocked(new_page);
> >  	BUG_ON(!page_ref_freeze(new_page, 1));
> >  
> >  	/*
> > @@ -1366,13 +1366,13 @@ static void collapse_shmem(struct mm_struct *mm,
> >  			if (index == start) {
> >  				if (!xas_next_entry(&xas, end - 1)) {
> >  					result = SCAN_TRUNCATED;
> > -					break;
> > +					goto xa_locked;
> >  				}
> >  				xas_set(&xas, index);
> >  			}
> >  			if (!shmem_charge(mapping->host, 1)) {
> >  				result = SCAN_FAIL;
> > -				break;
> > +				goto xa_locked;
> >  			}
> >  			xas_store(&xas, new_page + (index % HPAGE_PMD_NR));
> >  			nr_none++;
> > @@ -1387,13 +1387,12 @@ static void collapse_shmem(struct mm_struct *mm,
> >  				result = SCAN_FAIL;
> >  				goto xa_unlocked;
> >  			}
> > -			xas_lock_irq(&xas);
> > -			xas_set(&xas, index);
> >  		} else if (trylock_page(page)) {
> >  			get_page(page);
> > +			xas_unlock_irq(&xas);
> >  		} else {
> >  			result = SCAN_PAGE_LOCK;
> > -			break;
> > +			goto xa_locked;
> >  		}
> >  
> >  		/*
> 
> I'm puzzled by locking change here.

The locking change here is to not re-get xas_lock_irq (in shmem_getpage
case) just before we drop it anyway: you point out that it used to cover
		/*
		 * The page must be locked, so we can drop the i_pages lock
		 * without racing with truncate.
		 */
		VM_BUG_ON_PAGE(!PageLocked(page), page);
		VM_BUG_ON_PAGE(!PageUptodate(page), page);
		VM_BUG_ON_PAGE(PageTransCompound(page), page);
		if (page_mapping(page) != mapping) {
but now does not.

But the comment you wrote there originally (ah, git blame shows
that Matthew has made it say i_pages lock instead of tree_lock),
"The page must be locked, so we can drop...", was correct all along,
I'm just following what it says.

It would wrong if the trylock_page came after the xas_unlock_irq,
but it comes before (as before): holding i_pages lock across the
lookup makes sure we look up the right page (no RCU racing) and
trylock_page makes sure that it cannot be truncated or hole-punched
or migrated or whatever from that point on - so can drop i_pages lock.

Actually, I think we could VM_BUG_ON(page_mapping(page) != mapping),
couldn't we? Not that I propose to make such a change at this stage.

> 
> Isn't the change responsible for the bug you are fixing in 09/10?

In which I relaxed the VM_BUG_ON_PAGE(PageTransCompound(page), page)
that appears in the sequence above.

Well, what I was thinking of in 9/10 was a THP being inserted at some
stage between selecting this range for collapse and reaching the last
(usually first) xas_lock_irq(&xas) in the "This will be less messy..."
loop above: I don't see any locking against that possibility.  (And it
has to be that initial xas_lock_irq(&xas), because once the PageLocked
head of new_page is inserted in the i_pages tree, there is no more
chance for truncation and a competing THP to be inserted there.)

So 9/10 would be required anyway; but you're thinking that the page
we looked up under i_pages lock and got trylock_page on, could then
become Compound once i_pages lock is dropped?  I don't think so: pages
don't become Compound after they've left the page allocator, do they?
And if we ever manage to change that, I'm pretty sure it would be with
page locks held and page refcounts frozen.

> 
> IIRC, my intend for the locking scheme was to protect against
> truncate-repopulate race.
> 
> What do I miss?

The stage in between selecting the range for collapse, and getting
the initial i_pages lock?  Pages not becoming Compound underneath
you, with or without page lock, with or without i_pages lock?  Page
lock being sufficient protection against truncation and migration?

> 
> The rest of the patch *looks* okay, but I found it hard to follow.
> Splitting it up would make it easier.

It needs some time, I admit: thanks a lot for persisting with it.
And thanks (to you and to Matthew) for the speedy Acks elsewhere.

Hugh
Kirill A. Shutemov Nov. 28, 2018, 10:59 a.m. UTC | #3
On Tue, Nov 27, 2018 at 12:23:32PM -0800, Hugh Dickins wrote:
> On Tue, 27 Nov 2018, Kirill A. Shutemov wrote:
> > On Mon, Nov 26, 2018 at 03:27:52PM -0800, Hugh Dickins wrote:
> > > Several cleanups in collapse_shmem(): most of which probably do not
> > > really matter, beyond doing things in a more familiar and reassuring
> > > order.  Simplify the failure gotos in the main loop, and on success
> > > update stats while interrupts still disabled from the last iteration.
> > > 
> > > Fixes: f3f0e1d2150b2 ("khugepaged: add support of collapse for tmpfs/shmem pages")
> > > Signed-off-by: Hugh Dickins <hughd@google.com>
> > > Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
> > > Cc: stable@vger.kernel.org # 4.8+
> > > ---
> > >  mm/khugepaged.c | 72 ++++++++++++++++++++++---------------------------
> > >  1 file changed, 32 insertions(+), 40 deletions(-)
> > > 
> > > diff --git a/mm/khugepaged.c b/mm/khugepaged.c
> > > index 1c402d33547e..9d4e9ff1af95 100644
> > > --- a/mm/khugepaged.c
> > > +++ b/mm/khugepaged.c
> > > @@ -1329,10 +1329,10 @@ static void collapse_shmem(struct mm_struct *mm,
> > >  		goto out;
> > >  	}
> > >  
> > > +	__SetPageLocked(new_page);
> > > +	__SetPageSwapBacked(new_page);
> > >  	new_page->index = start;
> > >  	new_page->mapping = mapping;
> > > -	__SetPageSwapBacked(new_page);
> > > -	__SetPageLocked(new_page);
> > >  	BUG_ON(!page_ref_freeze(new_page, 1));
> > >  
> > >  	/*
> > > @@ -1366,13 +1366,13 @@ static void collapse_shmem(struct mm_struct *mm,
> > >  			if (index == start) {
> > >  				if (!xas_next_entry(&xas, end - 1)) {
> > >  					result = SCAN_TRUNCATED;
> > > -					break;
> > > +					goto xa_locked;
> > >  				}
> > >  				xas_set(&xas, index);
> > >  			}
> > >  			if (!shmem_charge(mapping->host, 1)) {
> > >  				result = SCAN_FAIL;
> > > -				break;
> > > +				goto xa_locked;
> > >  			}
> > >  			xas_store(&xas, new_page + (index % HPAGE_PMD_NR));
> > >  			nr_none++;
> > > @@ -1387,13 +1387,12 @@ static void collapse_shmem(struct mm_struct *mm,
> > >  				result = SCAN_FAIL;
> > >  				goto xa_unlocked;
> > >  			}
> > > -			xas_lock_irq(&xas);
> > > -			xas_set(&xas, index);
> > >  		} else if (trylock_page(page)) {
> > >  			get_page(page);
> > > +			xas_unlock_irq(&xas);
> > >  		} else {
> > >  			result = SCAN_PAGE_LOCK;
> > > -			break;
> > > +			goto xa_locked;
> > >  		}
> > >  
> > >  		/*
> > 
> > I'm puzzled by locking change here.
> 
> The locking change here is to not re-get xas_lock_irq (in shmem_getpage
> case) just before we drop it anyway: you point out that it used to cover
> 		/*
> 		 * The page must be locked, so we can drop the i_pages lock
> 		 * without racing with truncate.
> 		 */
> 		VM_BUG_ON_PAGE(!PageLocked(page), page);
> 		VM_BUG_ON_PAGE(!PageUptodate(page), page);
> 		VM_BUG_ON_PAGE(PageTransCompound(page), page);
> 		if (page_mapping(page) != mapping) {
> but now does not.
> 
> But the comment you wrote there originally (ah, git blame shows
> that Matthew has made it say i_pages lock instead of tree_lock),
> "The page must be locked, so we can drop...", was correct all along,
> I'm just following what it says.
> 
> It would wrong if the trylock_page came after the xas_unlock_irq,
> but it comes before (as before): holding i_pages lock across the
> lookup makes sure we look up the right page (no RCU racing) and
> trylock_page makes sure that it cannot be truncated or hole-punched
> or migrated or whatever from that point on - so can drop i_pages lock.

You are right. I confused myself.

> Actually, I think we could VM_BUG_ON(page_mapping(page) != mapping),
> couldn't we? Not that I propose to make such a change at this stage.

Yeah it should be safe. We may put WARN there.

> > Isn't the change responsible for the bug you are fixing in 09/10?
> 
> In which I relaxed the VM_BUG_ON_PAGE(PageTransCompound(page), page)
> that appears in the sequence above.
> 
> Well, what I was thinking of in 9/10 was a THP being inserted at some
> stage between selecting this range for collapse and reaching the last
> (usually first) xas_lock_irq(&xas) in the "This will be less messy..."
> loop above: I don't see any locking against that possibility.  (And it
> has to be that initial xas_lock_irq(&xas), because once the PageLocked
> head of new_page is inserted in the i_pages tree, there is no more
> chance for truncation and a competing THP to be inserted there.)
> 
> So 9/10 would be required anyway; but you're thinking that the page
> we looked up under i_pages lock and got trylock_page on, could then
> become Compound once i_pages lock is dropped?  I don't think so: pages
> don't become Compound after they've left the page allocator, do they?
> And if we ever manage to change that, I'm pretty sure it would be with
> page locks held and page refcounts frozen.
> > IIRC, my intend for the locking scheme was to protect against
> > truncate-repopulate race.
> > 
> > What do I miss?
> 
> The stage in between selecting the range for collapse, and getting
> the initial i_pages lock?  Pages not becoming Compound underneath
> you, with or without page lock, with or without i_pages lock?  Page
> lock being sufficient protection against truncation and migration?

Agreed on all fronts. Sorry for the noise.

> > The rest of the patch *looks* okay, but I found it hard to follow.
> > Splitting it up would make it easier.
> 
> It needs some time, I admit: thanks a lot for persisting with it.
> And thanks (to you and to Matthew) for the speedy Acks elsewhere.
> 
> Hugh
Hugh Dickins Nov. 28, 2018, 7:40 p.m. UTC | #4
On Wed, 28 Nov 2018, Kirill A. Shutemov wrote:
> On Tue, Nov 27, 2018 at 12:23:32PM -0800, Hugh Dickins wrote:
> 
> > Actually, I think we could VM_BUG_ON(page_mapping(page) != mapping),
> > couldn't we? Not that I propose to make such a change at this stage.
> 
> Yeah it should be safe. We may put WARN there.

Later yes, but for now I'm leaving the patch unchanged -
been burnt before by last minute changes that didn't turn out so well!

> Agreed on all fronts. Sorry for the noise.

No problem at all: it's important that you challenge what looked wrong.
This time around, I was the one with the advantage of recent familiarity.

> 
> > > The rest of the patch *looks* okay, but I found it hard to follow.
> > > Splitting it up would make it easier.
> > 
> > It needs some time, I admit: thanks a lot for persisting with it.
> > And thanks (to you and to Matthew) for the speedy Acks elsewhere.
> > 
> > Hugh
> 
> -- 
>  Kirill A. Shutemov

Thanks again,
Hugh
diff mbox series

Patch

diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 1c402d33547e..9d4e9ff1af95 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -1329,10 +1329,10 @@  static void collapse_shmem(struct mm_struct *mm,
 		goto out;
 	}
 
+	__SetPageLocked(new_page);
+	__SetPageSwapBacked(new_page);
 	new_page->index = start;
 	new_page->mapping = mapping;
-	__SetPageSwapBacked(new_page);
-	__SetPageLocked(new_page);
 	BUG_ON(!page_ref_freeze(new_page, 1));
 
 	/*
@@ -1366,13 +1366,13 @@  static void collapse_shmem(struct mm_struct *mm,
 			if (index == start) {
 				if (!xas_next_entry(&xas, end - 1)) {
 					result = SCAN_TRUNCATED;
-					break;
+					goto xa_locked;
 				}
 				xas_set(&xas, index);
 			}
 			if (!shmem_charge(mapping->host, 1)) {
 				result = SCAN_FAIL;
-				break;
+				goto xa_locked;
 			}
 			xas_store(&xas, new_page + (index % HPAGE_PMD_NR));
 			nr_none++;
@@ -1387,13 +1387,12 @@  static void collapse_shmem(struct mm_struct *mm,
 				result = SCAN_FAIL;
 				goto xa_unlocked;
 			}
-			xas_lock_irq(&xas);
-			xas_set(&xas, index);
 		} else if (trylock_page(page)) {
 			get_page(page);
+			xas_unlock_irq(&xas);
 		} else {
 			result = SCAN_PAGE_LOCK;
-			break;
+			goto xa_locked;
 		}
 
 		/*
@@ -1408,11 +1407,10 @@  static void collapse_shmem(struct mm_struct *mm,
 			result = SCAN_TRUNCATED;
 			goto out_unlock;
 		}
-		xas_unlock_irq(&xas);
 
 		if (isolate_lru_page(page)) {
 			result = SCAN_DEL_PAGE_LRU;
-			goto out_isolate_failed;
+			goto out_unlock;
 		}
 
 		if (page_mapped(page))
@@ -1432,7 +1430,9 @@  static void collapse_shmem(struct mm_struct *mm,
 		 */
 		if (!page_ref_freeze(page, 3)) {
 			result = SCAN_PAGE_COUNT;
-			goto out_lru;
+			xas_unlock_irq(&xas);
+			putback_lru_page(page);
+			goto out_unlock;
 		}
 
 		/*
@@ -1444,24 +1444,26 @@  static void collapse_shmem(struct mm_struct *mm,
 		/* Finally, replace with the new page. */
 		xas_store(&xas, new_page + (index % HPAGE_PMD_NR));
 		continue;
-out_lru:
-		xas_unlock_irq(&xas);
-		putback_lru_page(page);
-out_isolate_failed:
-		unlock_page(page);
-		put_page(page);
-		goto xa_unlocked;
 out_unlock:
 		unlock_page(page);
 		put_page(page);
-		break;
+		goto xa_unlocked;
 	}
-	xas_unlock_irq(&xas);
 
+	__inc_node_page_state(new_page, NR_SHMEM_THPS);
+	if (nr_none) {
+		struct zone *zone = page_zone(new_page);
+
+		__mod_node_page_state(zone->zone_pgdat, NR_FILE_PAGES, nr_none);
+		__mod_node_page_state(zone->zone_pgdat, NR_SHMEM, nr_none);
+	}
+
+xa_locked:
+	xas_unlock_irq(&xas);
 xa_unlocked:
+
 	if (result == SCAN_SUCCEED) {
 		struct page *page, *tmp;
-		struct zone *zone = page_zone(new_page);
 
 		/*
 		 * Replacing old pages with new one has succeeded, now we
@@ -1476,11 +1478,11 @@  static void collapse_shmem(struct mm_struct *mm,
 			copy_highpage(new_page + (page->index % HPAGE_PMD_NR),
 					page);
 			list_del(&page->lru);
-			unlock_page(page);
-			page_ref_unfreeze(page, 1);
 			page->mapping = NULL;
+			page_ref_unfreeze(page, 1);
 			ClearPageActive(page);
 			ClearPageUnevictable(page);
+			unlock_page(page);
 			put_page(page);
 			index++;
 		}
@@ -1489,28 +1491,17 @@  static void collapse_shmem(struct mm_struct *mm,
 			index++;
 		}
 
-		local_irq_disable();
-		__inc_node_page_state(new_page, NR_SHMEM_THPS);
-		if (nr_none) {
-			__mod_node_page_state(zone->zone_pgdat, NR_FILE_PAGES, nr_none);
-			__mod_node_page_state(zone->zone_pgdat, NR_SHMEM, nr_none);
-		}
-		local_irq_enable();
-
-		/*
-		 * Remove pte page tables, so we can re-fault
-		 * the page as huge.
-		 */
-		retract_page_tables(mapping, start);
-
 		/* Everything is ready, let's unfreeze the new_page */
-		set_page_dirty(new_page);
 		SetPageUptodate(new_page);
 		page_ref_unfreeze(new_page, HPAGE_PMD_NR);
+		set_page_dirty(new_page);
 		mem_cgroup_commit_charge(new_page, memcg, false, true);
 		lru_cache_add_anon(new_page);
-		unlock_page(new_page);
 
+		/*
+		 * Remove pte page tables, so we can re-fault the page as huge.
+		 */
+		retract_page_tables(mapping, start);
 		*hpage = NULL;
 
 		khugepaged_pages_collapsed++;
@@ -1543,8 +1534,8 @@  static void collapse_shmem(struct mm_struct *mm,
 			xas_store(&xas, page);
 			xas_pause(&xas);
 			xas_unlock_irq(&xas);
-			putback_lru_page(page);
 			unlock_page(page);
+			putback_lru_page(page);
 			xas_lock_irq(&xas);
 		}
 		VM_BUG_ON(nr_none);
@@ -1553,9 +1544,10 @@  static void collapse_shmem(struct mm_struct *mm,
 		/* Unfreeze new_page, caller would take care about freeing it */
 		page_ref_unfreeze(new_page, 1);
 		mem_cgroup_cancel_charge(new_page, memcg, true);
-		unlock_page(new_page);
 		new_page->mapping = NULL;
 	}
+
+	unlock_page(new_page);
 out:
 	VM_BUG_ON(!list_empty(&pagelist));
 	/* TODO: tracepoints */