diff mbox series

[v1,2/5] mm/rmap: introduce and use hugetlb_remove_rmap()

Message ID 20231128145205.215026-3-david@redhat.com (mailing list archive)
State New
Headers show
Series mm/rmap: separate hugetlb rmap handling | expand

Commit Message

David Hildenbrand Nov. 28, 2023, 2:52 p.m. UTC
hugetlb rmap handling differs quite a lot from "ordinary" rmap code. We
don't want this hugetlb special-casing in the rmap functions, as
we're special casing the callers already. Let's simply use a separate
function for hugetlb.

Let's introduce and use hugetlb_remove_rmap() and remove the hugetlb
code from page_remove_rmap(). This effectively removes one check on the
small-folio path as well.

While this is a cleanup, this will also make it easier to change rmap
handling for partially-mappable folios.

Note: all possible candidates that need care are page_remove_rmap() that
      pass compound=true.

Signed-off-by: David Hildenbrand <david@redhat.com>
---
 include/linux/rmap.h |  5 +++++
 mm/hugetlb.c         |  4 ++--
 mm/rmap.c            | 17 ++++++++---------
 3 files changed, 15 insertions(+), 11 deletions(-)

Comments

Peter Xu Nov. 28, 2023, 4:08 p.m. UTC | #1
On Tue, Nov 28, 2023 at 03:52:02PM +0100, David Hildenbrand wrote:
> hugetlb rmap handling differs quite a lot from "ordinary" rmap code. We
> don't want this hugetlb special-casing in the rmap functions, as
> we're special casing the callers already. Let's simply use a separate
> function for hugetlb.

We were special casing some, not all..

And IIUC the suggestion from the community is we reduce that "special
casing", instead of adding more?  To be explicit below..

> 
> Let's introduce and use hugetlb_remove_rmap() and remove the hugetlb
> code from page_remove_rmap(). This effectively removes one check on the
> small-folio path as well.
> 
> While this is a cleanup, this will also make it easier to change rmap
> handling for partially-mappable folios.
> 
> Note: all possible candidates that need care are page_remove_rmap() that
>       pass compound=true.
> 
> Signed-off-by: David Hildenbrand <david@redhat.com>
> ---
>  include/linux/rmap.h |  5 +++++
>  mm/hugetlb.c         |  4 ++--
>  mm/rmap.c            | 17 ++++++++---------
>  3 files changed, 15 insertions(+), 11 deletions(-)
> 
> diff --git a/include/linux/rmap.h b/include/linux/rmap.h
> index 4c5bfeb05463..e8d1dc1d5361 100644
> --- a/include/linux/rmap.h
> +++ b/include/linux/rmap.h
> @@ -208,6 +208,11 @@ void hugetlb_add_anon_rmap(struct folio *, struct vm_area_struct *,
>  void hugetlb_add_new_anon_rmap(struct folio *, struct vm_area_struct *,
>  		unsigned long address);
>  
> +static inline void hugetlb_remove_rmap(struct folio *folio)
> +{
> +	atomic_dec(&folio->_entire_mapcount);
> +}
> +
>  static inline void __page_dup_rmap(struct page *page, bool compound)
>  {
>  	if (compound) {
> diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> index 4cfa0679661e..d17bb53b19ff 100644
> --- a/mm/hugetlb.c
> +++ b/mm/hugetlb.c
> @@ -5669,7 +5669,7 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
>  					make_pte_marker(PTE_MARKER_UFFD_WP),
>  					sz);
>  		hugetlb_count_sub(pages_per_huge_page(h), mm);
> -		page_remove_rmap(page, vma, true);
> +		hugetlb_remove_rmap(page_folio(page));
>  
>  		spin_unlock(ptl);
>  		tlb_remove_page_size(tlb, page, huge_page_size(h));
> @@ -5980,7 +5980,7 @@ static vm_fault_t hugetlb_wp(struct mm_struct *mm, struct vm_area_struct *vma,
>  
>  		/* Break COW or unshare */
>  		huge_ptep_clear_flush(vma, haddr, ptep);
> -		page_remove_rmap(&old_folio->page, vma, true);
> +		hugetlb_remove_rmap(old_folio);
>  		hugetlb_add_new_anon_rmap(new_folio, vma, haddr);
>  		if (huge_pte_uffd_wp(pte))
>  			newpte = huge_pte_mkuffd_wp(newpte);
> diff --git a/mm/rmap.c b/mm/rmap.c
> index 112467c30b2c..5037581b79ec 100644
> --- a/mm/rmap.c
> +++ b/mm/rmap.c
> @@ -1440,13 +1440,6 @@ void page_remove_rmap(struct page *page, struct vm_area_struct *vma,
>  
>  	VM_BUG_ON_PAGE(compound && !PageHead(page), page);
>  
> -	/* Hugetlb pages are not counted in NR_*MAPPED */
> -	if (unlikely(folio_test_hugetlb(folio))) {
> -		/* hugetlb pages are always mapped with pmds */
> -		atomic_dec(&folio->_entire_mapcount);
> -		return;
> -	}

Fundamentally in the ideal world when hugetlb can be merged into generic
mm, I'd imagine that as dropping here, meanwhile...

> -
>  	/* Is page being unmapped by PTE? Is this its last map to be removed? */
>  	if (likely(!compound)) {
>  		last = atomic_add_negative(-1, &page->_mapcount);
> @@ -1804,7 +1797,10 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma,
>  			dec_mm_counter(mm, mm_counter_file(&folio->page));
>  		}
>  discard:
> -		page_remove_rmap(subpage, vma, folio_test_hugetlb(folio));
> +		if (unlikely(folio_test_hugetlb(folio)))
> +			hugetlb_remove_rmap(folio);
> +		else
> +			page_remove_rmap(subpage, vma, false);

... rather than moving hugetlb_* handlings even upper the stack, we should
hopefully be able to keep this one as a generic api.

I worry this patch is adding more hugetlb-specific paths even onto higher
call stacks so it's harder to generalize, going the other way round to what
we wanted per previous discussions.

Said that, indeed I read mostly nothing yet with the recent rmap patches,
so I may miss some important goal here.

>  		if (vma->vm_flags & VM_LOCKED)
>  			mlock_drain_local();
>  		folio_put(folio);
> @@ -2157,7 +2153,10 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma,
>  			 */
>  		}
>  
> -		page_remove_rmap(subpage, vma, folio_test_hugetlb(folio));
> +		if (unlikely(folio_test_hugetlb(folio)))
> +			hugetlb_remove_rmap(folio);
> +		else
> +			page_remove_rmap(subpage, vma, false);
>  		if (vma->vm_flags & VM_LOCKED)
>  			mlock_drain_local();
>  		folio_put(folio);
> -- 
> 2.41.0
> 
> 

Thanks,
Peter Xu Nov. 28, 2023, 5:13 p.m. UTC | #2
On Tue, Nov 28, 2023 at 05:39:35PM +0100, David Hildenbrand wrote:
> Quoting from the cover letter:
> 
> "We have hugetlb special-casing/checks in the callers in all cases either
> way already in place: it doesn't make too much sense to call generic-looking
> functions that end up doing hugetlb specific things from hugetlb
> special-cases."

I'll take this one as an example: I think one goal (of my understanding of
the mm community) is to make the generic looking functions keep being
generic, dropping any function named as "*hugetlb*" if possible one day
within that generic implementation.  I said that in my previous reply.

Having that "*hugetlb*" code already in the code base may or may not be a
good reason to further move it upward the stack.

Strong feelings?  No, I don't have.  I'm not knowledged enough to do so.

Thanks,
David Hildenbrand Nov. 28, 2023, 5:42 p.m. UTC | #3
On 28.11.23 18:13, Peter Xu wrote:
> On Tue, Nov 28, 2023 at 05:39:35PM +0100, David Hildenbrand wrote:
>> Quoting from the cover letter:
>>
>> "We have hugetlb special-casing/checks in the callers in all cases either
>> way already in place: it doesn't make too much sense to call generic-looking
>> functions that end up doing hugetlb specific things from hugetlb
>> special-cases."
> 
> I'll take this one as an example: I think one goal (of my understanding of
> the mm community) is to make the generic looking functions keep being
> generic, dropping any function named as "*hugetlb*" if possible one day
> within that generic implementation.  I said that in my previous reply.

Yes, and I am one of the people asking for that. However, only where it 
makes sense (e.g., like page table walking, GUP as you said), and only 
when it is actually unified.

I don't think that rmap handling or fault handling will ever be 
completely unified to that extreme, and it might also not be desirable. 
Just like we have separate paths for anon and file in areas where they 
are reasonable different.

What doesn't make sense is using patterns like:

	page_remove_rmap(subpage, vma, folio_test_hugetlb(folio));

and then, inside page_remove_rmap() have an initial folio_test_hugetlb() 
check that does something completely different.

So each and everyone calling page_remove_rmap (and knowing that it's 
certainly not a hugetlb folio) has to run through that check.

Then, we have functions like page_add_file_rmap() that look like they 
can be used for hugetlb, but hugetlb is smart enough and only calls 
page_dup_file_rmap(), because it doesn't want to touch any file/anon 
counters. And to handle that we would have to add folio_test_hugetlb() 
inside there, which adds the same as above, trying to unify without 
unifying.

Once we're in the area of folio_add_file_rmap_range(), it all stops 
making sense, because there is no way we could possibly partially map a 
folio today. (and if we can in the future, we might still want separate 
handling, because most caller know with which pages they are dealing, below)

Last but not least, it's all inconsistent right now with 
hugetlb_add_anon_rmap()/hugetlb_add_new_anon_rmap() being there because 
they differ reasonably well from the "ordinary" counterparts.

> 
> Having that "*hugetlb*" code already in the code base may or may not be a
> good reason to further move it upward the stack.

If you see a path forward in the foreseeable future where we would have 
code that doesn't special-case hugetlb in rmap calling code already, I'd 
be interested in that.

hugetlb.c knows that it's dealing with hugetlb pages.

huge_memory.c knows that it's dealing with PMD-mapped thp.

memory.c knows that it it's dealing with PTE-mapped thp or small folios.

Only migrate.c (and e.g., try_to_unmap()) in rmap.c handle different 
types. But there is plenty of hugetlb special-casing in there that I 
don't really see going away.

> 
> Strong feelings?  No, I don't have.  I'm not knowledged enough to do so.

I'm sure you are, so I'm trusting your judgment :)

I don't think going in the other direction and e.g., removing 
hugetlb_add_anon_rmap / hugetlb_add_new_anon_rmap is making a 
unification that is not really reasonable. It will only make things 
slower and the individual functions more complicated.
Peter Xu Nov. 28, 2023, 7:48 p.m. UTC | #4
On Tue, Nov 28, 2023 at 06:42:44PM +0100, David Hildenbrand wrote:
> On 28.11.23 18:13, Peter Xu wrote:
> > On Tue, Nov 28, 2023 at 05:39:35PM +0100, David Hildenbrand wrote:
> > > Quoting from the cover letter:
> > > 
> > > "We have hugetlb special-casing/checks in the callers in all cases either
> > > way already in place: it doesn't make too much sense to call generic-looking
> > > functions that end up doing hugetlb specific things from hugetlb
> > > special-cases."
> > 
> > I'll take this one as an example: I think one goal (of my understanding of
> > the mm community) is to make the generic looking functions keep being
> > generic, dropping any function named as "*hugetlb*" if possible one day
> > within that generic implementation.  I said that in my previous reply.
> 
> Yes, and I am one of the people asking for that. However, only where it
> makes sense (e.g., like page table walking, GUP as you said), and only when
> it is actually unified.
> 
> I don't think that rmap handling or fault handling will ever be completely
> unified to that extreme, and it might also not be desirable. Just like we
> have separate paths for anon and file in areas where they are reasonable
> different.

Yes I haven't check further in that direction, that'll be after the pgtable
work for me if that can first move on smoothly, and it'll also depend on
whether merging the pgtable changes would be good enough so that we can
move on to consider small mappings for hugetlb, until then we need to
settle a final mapcount solution for hugetlb.

> 
> What doesn't make sense is using patterns like:
> 
> 	page_remove_rmap(subpage, vma, folio_test_hugetlb(folio));
> 
> and then, inside page_remove_rmap() have an initial folio_test_hugetlb()
> check that does something completely different.

IIUC above "folio_test_hugetlb(folio)" pattern can become "false" for
hugetlb, if we decided to do mapcount for small hugetlb mappings.  If that
happens, I think something like this patch _may_ need to be reverted again
more or less. Or we start to copy some of page_remove_rmap() into the new
hugetlb rmap api.

> 
> So each and everyone calling page_remove_rmap (and knowing that it's
> certainly not a hugetlb folio) has to run through that check.

Note that right after this change applied, hugetlb already start to lose
something in common with generic compound folios, where page_remove_rmap()
had:

	VM_BUG_ON_PAGE(compound && !PageHead(page), page);

That sanity check goes away immediately for hugetlb, which is still
logically applicable.

> 
> Then, we have functions like page_add_file_rmap() that look like they can be
> used for hugetlb, but hugetlb is smart enough and only calls
> page_dup_file_rmap(), because it doesn't want to touch any file/anon
> counters. And to handle that we would have to add folio_test_hugetlb()
> inside there, which adds the same as above, trying to unify without
> unifying.
> 
> Once we're in the area of folio_add_file_rmap_range(), it all stops making
> sense, because there is no way we could possibly partially map a folio
> today. (and if we can in the future, we might still want separate handling,
> because most caller know with which pages they are dealing, below)
> 
> Last but not least, it's all inconsistent right now with
> hugetlb_add_anon_rmap()/hugetlb_add_new_anon_rmap() being there because they
> differ reasonably well from the "ordinary" counterparts.

Taking hugepage_add_new_anon_rmap() as example, IMHO they still share a lot
of things with !hugetlb codes, and maybe they can already be cleaned up
into something common for a large mapping:

void folio_add_new_anon_rmap(struct folio *folio, struct vm_area_struct *vma,
		unsigned long address)
{
	int nr;

	VM_BUG_ON_VMA(address < vma->vm_start || address >= vma->vm_end, vma);

        if (folio_is_hugetlb(folio)) {
                folio_clear_hugetlb_restore_reserve(folio);
        } else {
                __folio_set_swapbacked(folio);
		atomic_set(&folio->_nr_pages_mapped, COMPOUND_MAPPED);
		nr = folio_nr_pages(folio);
		__lruvec_stat_mod_folio(folio, NR_ANON_THPS, nr);
                __lruvec_stat_mod_folio(folio, NR_ANON_MAPPED, nr);
        }

        /* increment count (starts at -1) */
        atomic_set(&folio->_entire_mapcount, 0);
	__folio_set_anon(folio, vma, address, true);
	SetPageAnonExclusive(&folio->page);
}

For folio_add_file_rmap_range(): would it work if it just pass the hugetlb
folio range into it?  Would that make it much more difficult with the
recent work on large folios from you or anyone?

> I don't think going in the other direction and e.g., removing
> hugetlb_add_anon_rmap / hugetlb_add_new_anon_rmap is making a unification
> that is not really reasonable. It will only make things slower and the
> individual functions more complicated.

IIUC so far the performance difference should be minimum on which helper to
use.

As I mentioned, I sincerely don't know whether this patch is good or not as
I don't know enough on everything around that is happening.  It's just that
I'll still think twice if to create hugetlb its own rmap API, because from
the high level it's going the other way round to me.  So I still want to
raise this as a pure question.

Thanks,
diff mbox series

Patch

diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index 4c5bfeb05463..e8d1dc1d5361 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -208,6 +208,11 @@  void hugetlb_add_anon_rmap(struct folio *, struct vm_area_struct *,
 void hugetlb_add_new_anon_rmap(struct folio *, struct vm_area_struct *,
 		unsigned long address);
 
+static inline void hugetlb_remove_rmap(struct folio *folio)
+{
+	atomic_dec(&folio->_entire_mapcount);
+}
+
 static inline void __page_dup_rmap(struct page *page, bool compound)
 {
 	if (compound) {
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 4cfa0679661e..d17bb53b19ff 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -5669,7 +5669,7 @@  void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
 					make_pte_marker(PTE_MARKER_UFFD_WP),
 					sz);
 		hugetlb_count_sub(pages_per_huge_page(h), mm);
-		page_remove_rmap(page, vma, true);
+		hugetlb_remove_rmap(page_folio(page));
 
 		spin_unlock(ptl);
 		tlb_remove_page_size(tlb, page, huge_page_size(h));
@@ -5980,7 +5980,7 @@  static vm_fault_t hugetlb_wp(struct mm_struct *mm, struct vm_area_struct *vma,
 
 		/* Break COW or unshare */
 		huge_ptep_clear_flush(vma, haddr, ptep);
-		page_remove_rmap(&old_folio->page, vma, true);
+		hugetlb_remove_rmap(old_folio);
 		hugetlb_add_new_anon_rmap(new_folio, vma, haddr);
 		if (huge_pte_uffd_wp(pte))
 			newpte = huge_pte_mkuffd_wp(newpte);
diff --git a/mm/rmap.c b/mm/rmap.c
index 112467c30b2c..5037581b79ec 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1440,13 +1440,6 @@  void page_remove_rmap(struct page *page, struct vm_area_struct *vma,
 
 	VM_BUG_ON_PAGE(compound && !PageHead(page), page);
 
-	/* Hugetlb pages are not counted in NR_*MAPPED */
-	if (unlikely(folio_test_hugetlb(folio))) {
-		/* hugetlb pages are always mapped with pmds */
-		atomic_dec(&folio->_entire_mapcount);
-		return;
-	}
-
 	/* Is page being unmapped by PTE? Is this its last map to be removed? */
 	if (likely(!compound)) {
 		last = atomic_add_negative(-1, &page->_mapcount);
@@ -1804,7 +1797,10 @@  static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma,
 			dec_mm_counter(mm, mm_counter_file(&folio->page));
 		}
 discard:
-		page_remove_rmap(subpage, vma, folio_test_hugetlb(folio));
+		if (unlikely(folio_test_hugetlb(folio)))
+			hugetlb_remove_rmap(folio);
+		else
+			page_remove_rmap(subpage, vma, false);
 		if (vma->vm_flags & VM_LOCKED)
 			mlock_drain_local();
 		folio_put(folio);
@@ -2157,7 +2153,10 @@  static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma,
 			 */
 		}
 
-		page_remove_rmap(subpage, vma, folio_test_hugetlb(folio));
+		if (unlikely(folio_test_hugetlb(folio)))
+			hugetlb_remove_rmap(folio);
+		else
+			page_remove_rmap(subpage, vma, false);
 		if (vma->vm_flags & VM_LOCKED)
 			mlock_drain_local();
 		folio_put(folio);