diff mbox series

[v1,1/9] mm/memory: factor out zapping of present pte into zap_present_pte()

Message ID 20240129143221.263763-2-david@redhat.com (mailing list archive)
State New
Headers show
Series mm/memory: optimize unmap/zap with PTE-mapped THP | expand

Commit Message

David Hildenbrand Jan. 29, 2024, 2:32 p.m. UTC
Let's prepare for further changes by factoring out processing of present
PTEs.

Signed-off-by: David Hildenbrand <david@redhat.com>
---
 mm/memory.c | 92 ++++++++++++++++++++++++++++++-----------------------
 1 file changed, 52 insertions(+), 40 deletions(-)

Comments

Ryan Roberts Jan. 30, 2024, 8:13 a.m. UTC | #1
On 29/01/2024 14:32, David Hildenbrand wrote:
> Let's prepare for further changes by factoring out processing of present
> PTEs.
> 
> Signed-off-by: David Hildenbrand <david@redhat.com>
> ---
>  mm/memory.c | 92 ++++++++++++++++++++++++++++++-----------------------
>  1 file changed, 52 insertions(+), 40 deletions(-)
> 
> diff --git a/mm/memory.c b/mm/memory.c
> index b05fd28dbce1..50a6c79c78fc 100644
> --- a/mm/memory.c
> +++ b/mm/memory.c
> @@ -1532,13 +1532,61 @@ zap_install_uffd_wp_if_needed(struct vm_area_struct *vma,
>  	pte_install_uffd_wp_if_needed(vma, addr, pte, pteval);
>  }
>  
> +static inline void zap_present_pte(struct mmu_gather *tlb,
> +		struct vm_area_struct *vma, pte_t *pte, pte_t ptent,
> +		unsigned long addr, struct zap_details *details,
> +		int *rss, bool *force_flush, bool *force_break)
> +{
> +	struct mm_struct *mm = tlb->mm;
> +	bool delay_rmap = false;
> +	struct folio *folio;

You need to init this to NULL otherwise its a random value when calling
should_zap_folio() if vm_normal_page() returns NULL.

> +	struct page *page;
> +
> +	page = vm_normal_page(vma, addr, ptent);
> +	if (page)
> +		folio = page_folio(page);
> +
> +	if (unlikely(!should_zap_folio(details, folio)))
> +		return;
> +	ptent = ptep_get_and_clear_full(mm, addr, pte, tlb->fullmm);
> +	arch_check_zapped_pte(vma, ptent);
> +	tlb_remove_tlb_entry(tlb, pte, addr);
> +	zap_install_uffd_wp_if_needed(vma, addr, pte, details, ptent);
> +	if (unlikely(!page)) {
> +		ksm_might_unmap_zero_page(mm, ptent);
> +		return;
> +	}
> +
> +	if (!folio_test_anon(folio)) {
> +		if (pte_dirty(ptent)) {
> +			folio_mark_dirty(folio);
> +			if (tlb_delay_rmap(tlb)) {
> +				delay_rmap = true;
> +				*force_flush = true;
> +			}
> +		}
> +		if (pte_young(ptent) && likely(vma_has_recency(vma)))
> +			folio_mark_accessed(folio);
> +	}
> +	rss[mm_counter(folio)]--;
> +	if (!delay_rmap) {
> +		folio_remove_rmap_pte(folio, page, vma);
> +		if (unlikely(page_mapcount(page) < 0))
> +			print_bad_pte(vma, addr, ptent, page);
> +	}
> +	if (unlikely(__tlb_remove_page(tlb, page, delay_rmap))) {
> +		*force_flush = true;
> +		*force_break = true;
> +	}
> +}
> +
>  static unsigned long zap_pte_range(struct mmu_gather *tlb,
>  				struct vm_area_struct *vma, pmd_t *pmd,
>  				unsigned long addr, unsigned long end,
>  				struct zap_details *details)
>  {
> +	bool force_flush = false, force_break = false;
>  	struct mm_struct *mm = tlb->mm;
> -	int force_flush = 0;
>  	int rss[NR_MM_COUNTERS];
>  	spinlock_t *ptl;
>  	pte_t *start_pte;
> @@ -1565,45 +1613,9 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
>  			break;
>  
>  		if (pte_present(ptent)) {
> -			unsigned int delay_rmap;
> -
> -			page = vm_normal_page(vma, addr, ptent);
> -			if (page)
> -				folio = page_folio(page);
> -
> -			if (unlikely(!should_zap_folio(details, folio)))
> -				continue;
> -			ptent = ptep_get_and_clear_full(mm, addr, pte,
> -							tlb->fullmm);
> -			arch_check_zapped_pte(vma, ptent);
> -			tlb_remove_tlb_entry(tlb, pte, addr);
> -			zap_install_uffd_wp_if_needed(vma, addr, pte, details,
> -						      ptent);
> -			if (unlikely(!page)) {
> -				ksm_might_unmap_zero_page(mm, ptent);
> -				continue;
> -			}
> -
> -			delay_rmap = 0;
> -			if (!folio_test_anon(folio)) {
> -				if (pte_dirty(ptent)) {
> -					folio_mark_dirty(folio);
> -					if (tlb_delay_rmap(tlb)) {
> -						delay_rmap = 1;
> -						force_flush = 1;
> -					}
> -				}
> -				if (pte_young(ptent) && likely(vma_has_recency(vma)))
> -					folio_mark_accessed(folio);
> -			}
> -			rss[mm_counter(folio)]--;
> -			if (!delay_rmap) {
> -				folio_remove_rmap_pte(folio, page, vma);
> -				if (unlikely(page_mapcount(page) < 0))
> -					print_bad_pte(vma, addr, ptent, page);
> -			}
> -			if (unlikely(__tlb_remove_page(tlb, page, delay_rmap))) {
> -				force_flush = 1;
> +			zap_present_pte(tlb, vma, pte, ptent, addr, details,
> +					rss, &force_flush, &force_break);
> +			if (unlikely(force_break)) {
>  				addr += PAGE_SIZE;
>  				break;
>  			}
David Hildenbrand Jan. 30, 2024, 8:41 a.m. UTC | #2
On 30.01.24 09:13, Ryan Roberts wrote:
> On 29/01/2024 14:32, David Hildenbrand wrote:
>> Let's prepare for further changes by factoring out processing of present
>> PTEs.
>>
>> Signed-off-by: David Hildenbrand <david@redhat.com>
>> ---
>>   mm/memory.c | 92 ++++++++++++++++++++++++++++++-----------------------
>>   1 file changed, 52 insertions(+), 40 deletions(-)
>>
>> diff --git a/mm/memory.c b/mm/memory.c
>> index b05fd28dbce1..50a6c79c78fc 100644
>> --- a/mm/memory.c
>> +++ b/mm/memory.c
>> @@ -1532,13 +1532,61 @@ zap_install_uffd_wp_if_needed(struct vm_area_struct *vma,
>>   	pte_install_uffd_wp_if_needed(vma, addr, pte, pteval);
>>   }
>>   
>> +static inline void zap_present_pte(struct mmu_gather *tlb,
>> +		struct vm_area_struct *vma, pte_t *pte, pte_t ptent,
>> +		unsigned long addr, struct zap_details *details,
>> +		int *rss, bool *force_flush, bool *force_break)
>> +{
>> +	struct mm_struct *mm = tlb->mm;
>> +	bool delay_rmap = false;
>> +	struct folio *folio;
> 
> You need to init this to NULL otherwise its a random value when calling
> should_zap_folio() if vm_normal_page() returns NULL.

Right, and we can stop setting it to NULL in the original function. 
Patch #2 changes these checks, which is why it's only a problem in this 
patch.

Will fix, thanks!
Ryan Roberts Jan. 30, 2024, 8:46 a.m. UTC | #3
On 30/01/2024 08:41, David Hildenbrand wrote:
> On 30.01.24 09:13, Ryan Roberts wrote:
>> On 29/01/2024 14:32, David Hildenbrand wrote:
>>> Let's prepare for further changes by factoring out processing of present
>>> PTEs.
>>>
>>> Signed-off-by: David Hildenbrand <david@redhat.com>
>>> ---
>>>   mm/memory.c | 92 ++++++++++++++++++++++++++++++-----------------------
>>>   1 file changed, 52 insertions(+), 40 deletions(-)
>>>
>>> diff --git a/mm/memory.c b/mm/memory.c
>>> index b05fd28dbce1..50a6c79c78fc 100644
>>> --- a/mm/memory.c
>>> +++ b/mm/memory.c
>>> @@ -1532,13 +1532,61 @@ zap_install_uffd_wp_if_needed(struct vm_area_struct
>>> *vma,
>>>       pte_install_uffd_wp_if_needed(vma, addr, pte, pteval);
>>>   }
>>>   +static inline void zap_present_pte(struct mmu_gather *tlb,
>>> +        struct vm_area_struct *vma, pte_t *pte, pte_t ptent,
>>> +        unsigned long addr, struct zap_details *details,
>>> +        int *rss, bool *force_flush, bool *force_break)
>>> +{
>>> +    struct mm_struct *mm = tlb->mm;
>>> +    bool delay_rmap = false;
>>> +    struct folio *folio;
>>
>> You need to init this to NULL otherwise its a random value when calling
>> should_zap_folio() if vm_normal_page() returns NULL.
> 
> Right, and we can stop setting it to NULL in the original function. Patch #2
> changes these checks, which is why it's only a problem in this patch.

Yeah I only noticed that after sending out this reply and moving to the next
patch. Still worth fixing this intermediate state I think.

> 
> Will fix, thanks!
>
David Hildenbrand Jan. 30, 2024, 8:49 a.m. UTC | #4
On 30.01.24 09:46, Ryan Roberts wrote:
> On 30/01/2024 08:41, David Hildenbrand wrote:
>> On 30.01.24 09:13, Ryan Roberts wrote:
>>> On 29/01/2024 14:32, David Hildenbrand wrote:
>>>> Let's prepare for further changes by factoring out processing of present
>>>> PTEs.
>>>>
>>>> Signed-off-by: David Hildenbrand <david@redhat.com>
>>>> ---
>>>>    mm/memory.c | 92 ++++++++++++++++++++++++++++++-----------------------
>>>>    1 file changed, 52 insertions(+), 40 deletions(-)
>>>>
>>>> diff --git a/mm/memory.c b/mm/memory.c
>>>> index b05fd28dbce1..50a6c79c78fc 100644
>>>> --- a/mm/memory.c
>>>> +++ b/mm/memory.c
>>>> @@ -1532,13 +1532,61 @@ zap_install_uffd_wp_if_needed(struct vm_area_struct
>>>> *vma,
>>>>        pte_install_uffd_wp_if_needed(vma, addr, pte, pteval);
>>>>    }
>>>>    +static inline void zap_present_pte(struct mmu_gather *tlb,
>>>> +        struct vm_area_struct *vma, pte_t *pte, pte_t ptent,
>>>> +        unsigned long addr, struct zap_details *details,
>>>> +        int *rss, bool *force_flush, bool *force_break)
>>>> +{
>>>> +    struct mm_struct *mm = tlb->mm;
>>>> +    bool delay_rmap = false;
>>>> +    struct folio *folio;
>>>
>>> You need to init this to NULL otherwise its a random value when calling
>>> should_zap_folio() if vm_normal_page() returns NULL.
>>
>> Right, and we can stop setting it to NULL in the original function. Patch #2
>> changes these checks, which is why it's only a problem in this patch.
> 
> Yeah I only noticed that after sending out this reply and moving to the next
> patch. Still worth fixing this intermediate state I think.

Absolutely, I didn't do path-by-patch compilation yet (I suspect the 
compiler would complain).
diff mbox series

Patch

diff --git a/mm/memory.c b/mm/memory.c
index b05fd28dbce1..50a6c79c78fc 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1532,13 +1532,61 @@  zap_install_uffd_wp_if_needed(struct vm_area_struct *vma,
 	pte_install_uffd_wp_if_needed(vma, addr, pte, pteval);
 }
 
+static inline void zap_present_pte(struct mmu_gather *tlb,
+		struct vm_area_struct *vma, pte_t *pte, pte_t ptent,
+		unsigned long addr, struct zap_details *details,
+		int *rss, bool *force_flush, bool *force_break)
+{
+	struct mm_struct *mm = tlb->mm;
+	bool delay_rmap = false;
+	struct folio *folio;
+	struct page *page;
+
+	page = vm_normal_page(vma, addr, ptent);
+	if (page)
+		folio = page_folio(page);
+
+	if (unlikely(!should_zap_folio(details, folio)))
+		return;
+	ptent = ptep_get_and_clear_full(mm, addr, pte, tlb->fullmm);
+	arch_check_zapped_pte(vma, ptent);
+	tlb_remove_tlb_entry(tlb, pte, addr);
+	zap_install_uffd_wp_if_needed(vma, addr, pte, details, ptent);
+	if (unlikely(!page)) {
+		ksm_might_unmap_zero_page(mm, ptent);
+		return;
+	}
+
+	if (!folio_test_anon(folio)) {
+		if (pte_dirty(ptent)) {
+			folio_mark_dirty(folio);
+			if (tlb_delay_rmap(tlb)) {
+				delay_rmap = true;
+				*force_flush = true;
+			}
+		}
+		if (pte_young(ptent) && likely(vma_has_recency(vma)))
+			folio_mark_accessed(folio);
+	}
+	rss[mm_counter(folio)]--;
+	if (!delay_rmap) {
+		folio_remove_rmap_pte(folio, page, vma);
+		if (unlikely(page_mapcount(page) < 0))
+			print_bad_pte(vma, addr, ptent, page);
+	}
+	if (unlikely(__tlb_remove_page(tlb, page, delay_rmap))) {
+		*force_flush = true;
+		*force_break = true;
+	}
+}
+
 static unsigned long zap_pte_range(struct mmu_gather *tlb,
 				struct vm_area_struct *vma, pmd_t *pmd,
 				unsigned long addr, unsigned long end,
 				struct zap_details *details)
 {
+	bool force_flush = false, force_break = false;
 	struct mm_struct *mm = tlb->mm;
-	int force_flush = 0;
 	int rss[NR_MM_COUNTERS];
 	spinlock_t *ptl;
 	pte_t *start_pte;
@@ -1565,45 +1613,9 @@  static unsigned long zap_pte_range(struct mmu_gather *tlb,
 			break;
 
 		if (pte_present(ptent)) {
-			unsigned int delay_rmap;
-
-			page = vm_normal_page(vma, addr, ptent);
-			if (page)
-				folio = page_folio(page);
-
-			if (unlikely(!should_zap_folio(details, folio)))
-				continue;
-			ptent = ptep_get_and_clear_full(mm, addr, pte,
-							tlb->fullmm);
-			arch_check_zapped_pte(vma, ptent);
-			tlb_remove_tlb_entry(tlb, pte, addr);
-			zap_install_uffd_wp_if_needed(vma, addr, pte, details,
-						      ptent);
-			if (unlikely(!page)) {
-				ksm_might_unmap_zero_page(mm, ptent);
-				continue;
-			}
-
-			delay_rmap = 0;
-			if (!folio_test_anon(folio)) {
-				if (pte_dirty(ptent)) {
-					folio_mark_dirty(folio);
-					if (tlb_delay_rmap(tlb)) {
-						delay_rmap = 1;
-						force_flush = 1;
-					}
-				}
-				if (pte_young(ptent) && likely(vma_has_recency(vma)))
-					folio_mark_accessed(folio);
-			}
-			rss[mm_counter(folio)]--;
-			if (!delay_rmap) {
-				folio_remove_rmap_pte(folio, page, vma);
-				if (unlikely(page_mapcount(page) < 0))
-					print_bad_pte(vma, addr, ptent, page);
-			}
-			if (unlikely(__tlb_remove_page(tlb, page, delay_rmap))) {
-				force_flush = 1;
+			zap_present_pte(tlb, vma, pte, ptent, addr, details,
+					rss, &force_flush, &force_break);
+			if (unlikely(force_break)) {
 				addr += PAGE_SIZE;
 				break;
 			}