diff mbox series

[v1,15/39] mm/huge_memory: batch rmap operations in __split_huge_pmd_locked()

Message ID 20231211155652.131054-16-david@redhat.com (mailing list archive)
State New
Headers show
Series mm/rmap: interface overhaul | expand

Commit Message

David Hildenbrand Dec. 11, 2023, 3:56 p.m. UTC
Let's use folio_add_anon_rmap_ptes(), batching the rmap operations.

While at it, use more folio operations (but only in the code branch we're
touching), use VM_WARN_ON_FOLIO(), and pass RMAP_EXCLUSIVE instead of
manually setting PageAnonExclusive.

We should never see non-anon pages on that branch: otherwise, the
existing page_add_anon_rmap() call would have been flawed already.

Signed-off-by: David Hildenbrand <david@redhat.com>
---
 mm/huge_memory.c | 23 +++++++++++++++--------
 1 file changed, 15 insertions(+), 8 deletions(-)

Comments

Yin Fengwei Dec. 15, 2023, 2:27 a.m. UTC | #1
On 12/11/2023 11:56 PM, David Hildenbrand wrote:
> Let's use folio_add_anon_rmap_ptes(), batching the rmap operations.
> 
> While at it, use more folio operations (but only in the code branch we're
> touching), use VM_WARN_ON_FOLIO(), and pass RMAP_EXCLUSIVE instead of
> manually setting PageAnonExclusive.
> 
> We should never see non-anon pages on that branch: otherwise, the
> existing page_add_anon_rmap() call would have been flawed already.
> 
> Signed-off-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Yin Fengwei <fengwei.yin@intel.com>
Yin Fengwei Dec. 15, 2023, 2:39 a.m. UTC | #2
On 12/11/2023 11:56 PM, David Hildenbrand wrote:
> Let's use folio_add_anon_rmap_ptes(), batching the rmap operations.
> 
> While at it, use more folio operations (but only in the code branch we're
> touching), use VM_WARN_ON_FOLIO(), and pass RMAP_EXCLUSIVE instead of
> manually setting PageAnonExclusive.
> 
> We should never see non-anon pages on that branch: otherwise, the
> existing page_add_anon_rmap() call would have been flawed already.
> 
> Signed-off-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Yin Fengwei <fengwei.yin@intel.com>
Ryan Roberts Dec. 18, 2023, 4:22 p.m. UTC | #3
On 11/12/2023 15:56, David Hildenbrand wrote:
> Let's use folio_add_anon_rmap_ptes(), batching the rmap operations.
> 
> While at it, use more folio operations (but only in the code branch we're
> touching), use VM_WARN_ON_FOLIO(), and pass RMAP_EXCLUSIVE instead of
> manually setting PageAnonExclusive.
> 
> We should never see non-anon pages on that branch: otherwise, the
> existing page_add_anon_rmap() call would have been flawed already.
> 
> Signed-off-by: David Hildenbrand <david@redhat.com>
> ---
>  mm/huge_memory.c | 23 +++++++++++++++--------
>  1 file changed, 15 insertions(+), 8 deletions(-)
> 
> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
> index 1f5634b2f374..82ad68fe0d12 100644
> --- a/mm/huge_memory.c
> +++ b/mm/huge_memory.c
> @@ -2398,6 +2398,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
>  		unsigned long haddr, bool freeze)
>  {
>  	struct mm_struct *mm = vma->vm_mm;
> +	struct folio *folio;
>  	struct page *page;
>  	pgtable_t pgtable;
>  	pmd_t old_pmd, _pmd;
> @@ -2493,16 +2494,18 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
>  		uffd_wp = pmd_swp_uffd_wp(old_pmd);
>  	} else {
>  		page = pmd_page(old_pmd);
> +		folio = page_folio(page);
>  		if (pmd_dirty(old_pmd)) {
>  			dirty = true;
> -			SetPageDirty(page);
> +			folio_set_dirty(folio);
>  		}
>  		write = pmd_write(old_pmd);
>  		young = pmd_young(old_pmd);
>  		soft_dirty = pmd_soft_dirty(old_pmd);
>  		uffd_wp = pmd_uffd_wp(old_pmd);
>  
> -		VM_BUG_ON_PAGE(!page_count(page), page);
> +		VM_WARN_ON_FOLIO(!folio_ref_count(folio), folio);
> +		VM_WARN_ON_FOLIO(!folio_test_anon(folio), folio);

Is this warning really correct? file-backed memory can be PMD-mapped with
CONFIG_READ_ONLY_THP_FOR_FS, so presumably it can also have the need to be
remapped as pte? Although I guess if we did have a file-backed folio, it
definitely wouldn't be correct to call page_add_anon_rmap() /
folio_add_anon_rmap_ptes()...

>  
>  		/*
>  		 * Without "freeze", we'll simply split the PMD, propagating the
> @@ -2519,11 +2522,18 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
>  		 *
>  		 * See page_try_share_anon_rmap(): invalidate PMD first.
>  		 */
> -		anon_exclusive = PageAnon(page) && PageAnonExclusive(page);
> +		anon_exclusive = PageAnonExclusive(page);
>  		if (freeze && anon_exclusive && page_try_share_anon_rmap(page))
>  			freeze = false;
> -		if (!freeze)
> -			page_ref_add(page, HPAGE_PMD_NR - 1);
> +		if (!freeze) {
> +			rmap_t rmap_flags = RMAP_NONE;
> +
> +			folio_ref_add(folio, HPAGE_PMD_NR - 1);
> +			if (anon_exclusive)
> +				rmap_flags |= RMAP_EXCLUSIVE;
> +			folio_add_anon_rmap_ptes(folio, page, HPAGE_PMD_NR,
> +						 vma, haddr, rmap_flags);
> +		}
>  	}
>  
>  	/*
> @@ -2566,8 +2576,6 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
>  			entry = mk_pte(page + i, READ_ONCE(vma->vm_page_prot));
>  			if (write)
>  				entry = pte_mkwrite(entry, vma);
> -			if (anon_exclusive)
> -				SetPageAnonExclusive(page + i);
>  			if (!young)
>  				entry = pte_mkold(entry);
>  			/* NOTE: this may set soft-dirty too on some archs */
> @@ -2577,7 +2585,6 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
>  				entry = pte_mksoft_dirty(entry);
>  			if (uffd_wp)
>  				entry = pte_mkuffd_wp(entry);
> -			page_add_anon_rmap(page + i, vma, addr, RMAP_NONE);
>  		}
>  		VM_BUG_ON(!pte_none(ptep_get(pte)));
>  		set_pte_at(mm, addr, pte, entry);
David Hildenbrand Dec. 18, 2023, 5:03 p.m. UTC | #4
On 18.12.23 17:22, Ryan Roberts wrote:
> On 11/12/2023 15:56, David Hildenbrand wrote:
>> Let's use folio_add_anon_rmap_ptes(), batching the rmap operations.
>>
>> While at it, use more folio operations (but only in the code branch we're
>> touching), use VM_WARN_ON_FOLIO(), and pass RMAP_EXCLUSIVE instead of
>> manually setting PageAnonExclusive.
>>
>> We should never see non-anon pages on that branch: otherwise, the
>> existing page_add_anon_rmap() call would have been flawed already.
>>
>> Signed-off-by: David Hildenbrand <david@redhat.com>
>> ---
>>   mm/huge_memory.c | 23 +++++++++++++++--------
>>   1 file changed, 15 insertions(+), 8 deletions(-)
>>
>> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
>> index 1f5634b2f374..82ad68fe0d12 100644
>> --- a/mm/huge_memory.c
>> +++ b/mm/huge_memory.c
>> @@ -2398,6 +2398,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
>>   		unsigned long haddr, bool freeze)
>>   {
>>   	struct mm_struct *mm = vma->vm_mm;
>> +	struct folio *folio;
>>   	struct page *page;
>>   	pgtable_t pgtable;
>>   	pmd_t old_pmd, _pmd;
>> @@ -2493,16 +2494,18 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
>>   		uffd_wp = pmd_swp_uffd_wp(old_pmd);
>>   	} else {
>>   		page = pmd_page(old_pmd);
>> +		folio = page_folio(page);
>>   		if (pmd_dirty(old_pmd)) {
>>   			dirty = true;
>> -			SetPageDirty(page);
>> +			folio_set_dirty(folio);
>>   		}
>>   		write = pmd_write(old_pmd);
>>   		young = pmd_young(old_pmd);
>>   		soft_dirty = pmd_soft_dirty(old_pmd);
>>   		uffd_wp = pmd_uffd_wp(old_pmd);
>>   
>> -		VM_BUG_ON_PAGE(!page_count(page), page);
>> +		VM_WARN_ON_FOLIO(!folio_ref_count(folio), folio);
>> +		VM_WARN_ON_FOLIO(!folio_test_anon(folio), folio);
> 
> Is this warning really correct? file-backed memory can be PMD-mapped with
> CONFIG_READ_ONLY_THP_FOR_FS, so presumably it can also have the need to be
> remapped as pte? Although I guess if we did have a file-backed folio, it
> definitely wouldn't be correct to call page_add_anon_rmap() /
> folio_add_anon_rmap_ptes()...

Yes, see the patch description where I spell that out.

PTE-remapping a file-back folio will simply zap the PMD and refault from 
the page cache after creating a page table.

So this is anon-only code.
Ryan Roberts Dec. 19, 2023, 8:42 a.m. UTC | #5
On 18/12/2023 17:03, David Hildenbrand wrote:
> On 18.12.23 17:22, Ryan Roberts wrote:
>> On 11/12/2023 15:56, David Hildenbrand wrote:
>>> Let's use folio_add_anon_rmap_ptes(), batching the rmap operations.
>>>
>>> While at it, use more folio operations (but only in the code branch we're
>>> touching), use VM_WARN_ON_FOLIO(), and pass RMAP_EXCLUSIVE instead of
>>> manually setting PageAnonExclusive.
>>>
>>> We should never see non-anon pages on that branch: otherwise, the
>>> existing page_add_anon_rmap() call would have been flawed already.
>>>
>>> Signed-off-by: David Hildenbrand <david@redhat.com>
>>> ---
>>>   mm/huge_memory.c | 23 +++++++++++++++--------
>>>   1 file changed, 15 insertions(+), 8 deletions(-)
>>>
>>> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
>>> index 1f5634b2f374..82ad68fe0d12 100644
>>> --- a/mm/huge_memory.c
>>> +++ b/mm/huge_memory.c
>>> @@ -2398,6 +2398,7 @@ static void __split_huge_pmd_locked(struct
>>> vm_area_struct *vma, pmd_t *pmd,
>>>           unsigned long haddr, bool freeze)
>>>   {
>>>       struct mm_struct *mm = vma->vm_mm;
>>> +    struct folio *folio;
>>>       struct page *page;
>>>       pgtable_t pgtable;
>>>       pmd_t old_pmd, _pmd;
>>> @@ -2493,16 +2494,18 @@ static void __split_huge_pmd_locked(struct
>>> vm_area_struct *vma, pmd_t *pmd,
>>>           uffd_wp = pmd_swp_uffd_wp(old_pmd);
>>>       } else {
>>>           page = pmd_page(old_pmd);
>>> +        folio = page_folio(page);
>>>           if (pmd_dirty(old_pmd)) {
>>>               dirty = true;
>>> -            SetPageDirty(page);
>>> +            folio_set_dirty(folio);
>>>           }
>>>           write = pmd_write(old_pmd);
>>>           young = pmd_young(old_pmd);
>>>           soft_dirty = pmd_soft_dirty(old_pmd);
>>>           uffd_wp = pmd_uffd_wp(old_pmd);
>>>   -        VM_BUG_ON_PAGE(!page_count(page), page);
>>> +        VM_WARN_ON_FOLIO(!folio_ref_count(folio), folio);
>>> +        VM_WARN_ON_FOLIO(!folio_test_anon(folio), folio);
>>
>> Is this warning really correct? file-backed memory can be PMD-mapped with
>> CONFIG_READ_ONLY_THP_FOR_FS, so presumably it can also have the need to be
>> remapped as pte? Although I guess if we did have a file-backed folio, it
>> definitely wouldn't be correct to call page_add_anon_rmap() /
>> folio_add_anon_rmap_ptes()...
> 
> Yes, see the patch description where I spell that out.

Oh god, how did I miss that... sorry!

> 
> PTE-remapping a file-back folio will simply zap the PMD and refault from the
> page cache after creating a page table.


Yep, that makes sense.

> 
> So this is anon-only code.
>
diff mbox series

Patch

diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 1f5634b2f374..82ad68fe0d12 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2398,6 +2398,7 @@  static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
 		unsigned long haddr, bool freeze)
 {
 	struct mm_struct *mm = vma->vm_mm;
+	struct folio *folio;
 	struct page *page;
 	pgtable_t pgtable;
 	pmd_t old_pmd, _pmd;
@@ -2493,16 +2494,18 @@  static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
 		uffd_wp = pmd_swp_uffd_wp(old_pmd);
 	} else {
 		page = pmd_page(old_pmd);
+		folio = page_folio(page);
 		if (pmd_dirty(old_pmd)) {
 			dirty = true;
-			SetPageDirty(page);
+			folio_set_dirty(folio);
 		}
 		write = pmd_write(old_pmd);
 		young = pmd_young(old_pmd);
 		soft_dirty = pmd_soft_dirty(old_pmd);
 		uffd_wp = pmd_uffd_wp(old_pmd);
 
-		VM_BUG_ON_PAGE(!page_count(page), page);
+		VM_WARN_ON_FOLIO(!folio_ref_count(folio), folio);
+		VM_WARN_ON_FOLIO(!folio_test_anon(folio), folio);
 
 		/*
 		 * Without "freeze", we'll simply split the PMD, propagating the
@@ -2519,11 +2522,18 @@  static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
 		 *
 		 * See page_try_share_anon_rmap(): invalidate PMD first.
 		 */
-		anon_exclusive = PageAnon(page) && PageAnonExclusive(page);
+		anon_exclusive = PageAnonExclusive(page);
 		if (freeze && anon_exclusive && page_try_share_anon_rmap(page))
 			freeze = false;
-		if (!freeze)
-			page_ref_add(page, HPAGE_PMD_NR - 1);
+		if (!freeze) {
+			rmap_t rmap_flags = RMAP_NONE;
+
+			folio_ref_add(folio, HPAGE_PMD_NR - 1);
+			if (anon_exclusive)
+				rmap_flags |= RMAP_EXCLUSIVE;
+			folio_add_anon_rmap_ptes(folio, page, HPAGE_PMD_NR,
+						 vma, haddr, rmap_flags);
+		}
 	}
 
 	/*
@@ -2566,8 +2576,6 @@  static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
 			entry = mk_pte(page + i, READ_ONCE(vma->vm_page_prot));
 			if (write)
 				entry = pte_mkwrite(entry, vma);
-			if (anon_exclusive)
-				SetPageAnonExclusive(page + i);
 			if (!young)
 				entry = pte_mkold(entry);
 			/* NOTE: this may set soft-dirty too on some archs */
@@ -2577,7 +2585,6 @@  static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
 				entry = pte_mksoft_dirty(entry);
 			if (uffd_wp)
 				entry = pte_mkuffd_wp(entry);
-			page_add_anon_rmap(page + i, vma, addr, RMAP_NONE);
 		}
 		VM_BUG_ON(!pte_none(ptep_get(pte)));
 		set_pte_at(mm, addr, pte, entry);