diff mbox series

[v1,2/4] mm: Cleanup apply_to_pte_range() routine

Message ID 93102722541b1daf541fce9fb316a1a2614d8c86.1744037648.git.agordeev@linux.ibm.com (mailing list archive)
State New
Headers show
Series mm: Fix apply_to_pte_range() vs lazy MMU mode | expand

Commit Message

Alexander Gordeev April 7, 2025, 3:11 p.m. UTC
Reverse 'create' vs 'mm == &init_mm' conditions and move
page table mask modification out of the atomic context.

Signed-off-by: Alexander Gordeev <agordeev@linux.ibm.com>
---
 mm/memory.c | 28 +++++++++++++++++-----------
 1 file changed, 17 insertions(+), 11 deletions(-)

Comments

Nicholas Piggin April 11, 2025, 6:46 a.m. UTC | #1
On Tue Apr 8, 2025 at 1:11 AM AEST, Alexander Gordeev wrote:
> Reverse 'create' vs 'mm == &init_mm' conditions and move
> page table mask modification out of the atomic context.
>
> Signed-off-by: Alexander Gordeev <agordeev@linux.ibm.com>
> ---
>  mm/memory.c | 28 +++++++++++++++++-----------
>  1 file changed, 17 insertions(+), 11 deletions(-)
>
> diff --git a/mm/memory.c b/mm/memory.c
> index 2d8c265fc7d6..f0201c8ec1ce 100644
> --- a/mm/memory.c
> +++ b/mm/memory.c
> @@ -2915,24 +2915,28 @@ static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd,
>  				     pte_fn_t fn, void *data, bool create,
>  				     pgtbl_mod_mask *mask)
>  {
> +	int err = create ? -ENOMEM : -EINVAL;

Could you make this a new variable instead of reusing
existing err? 'const int pte_err' or something?

>  	pte_t *pte, *mapped_pte;
> -	int err = 0;
>  	spinlock_t *ptl;
>  
> -	if (create) {
> -		mapped_pte = pte = (mm == &init_mm) ?
> -			pte_alloc_kernel_track(pmd, addr, mask) :
> -			pte_alloc_map_lock(mm, pmd, addr, &ptl);
> +	if (mm == &init_mm) {
> +		if (create)
> +			pte = pte_alloc_kernel_track(pmd, addr, mask);
> +		else
> +			pte = pte_offset_kernel(pmd, addr);
>  		if (!pte)
> -			return -ENOMEM;
> +			return err;
>  	} else {
> -		mapped_pte = pte = (mm == &init_mm) ?
> -			pte_offset_kernel(pmd, addr) :
> -			pte_offset_map_lock(mm, pmd, addr, &ptl);
> +		if (create)
> +			pte = pte_alloc_map_lock(mm, pmd, addr, &ptl);
> +		else
> +			pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
>  		if (!pte)
> -			return -EINVAL;
> +			return err;
> +		mapped_pte = pte;
>  	}
>  
> +	err = 0;
>  	arch_enter_lazy_mmu_mode();
>  
>  	if (fn) {
> @@ -2944,12 +2948,14 @@ static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd,
>  			}
>  		} while (addr += PAGE_SIZE, addr != end);
>  	}
> -	*mask |= PGTBL_PTE_MODIFIED;
>  
>  	arch_leave_lazy_mmu_mode();
>  
>  	if (mm != &init_mm)
>  		pte_unmap_unlock(mapped_pte, ptl);
> +
> +	*mask |= PGTBL_PTE_MODIFIED;

This is done just because we might as well? Less work in critical
section?

Reviewed-by: Nicholas Piggin <npiggin@gmail.com>

> +
>  	return err;
>  }
>
Alexander Gordeev April 14, 2025, 2:17 p.m. UTC | #2
On Fri, Apr 11, 2025 at 04:46:58PM +1000, Nicholas Piggin wrote:
> On Tue Apr 8, 2025 at 1:11 AM AEST, Alexander Gordeev wrote:
> > Reverse 'create' vs 'mm == &init_mm' conditions and move
> > page table mask modification out of the atomic context.
> >
> > Signed-off-by: Alexander Gordeev <agordeev@linux.ibm.com>
> > ---
> >  mm/memory.c | 28 +++++++++++++++++-----------
> >  1 file changed, 17 insertions(+), 11 deletions(-)
> >
> > diff --git a/mm/memory.c b/mm/memory.c
> > index 2d8c265fc7d6..f0201c8ec1ce 100644
> > --- a/mm/memory.c
> > +++ b/mm/memory.c
> > @@ -2915,24 +2915,28 @@ static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd,
> >  				     pte_fn_t fn, void *data, bool create,
> >  				     pgtbl_mod_mask *mask)
> >  {
> > +	int err = create ? -ENOMEM : -EINVAL;
> 
> Could you make this a new variable instead of reusing
> existing err? 'const int pte_err' or something?

Will do, when/if repost.

...

> > @@ -2944,12 +2948,14 @@ static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd,
> >  			}
> >  		} while (addr += PAGE_SIZE, addr != end);
> >  	}
> > -	*mask |= PGTBL_PTE_MODIFIED;
> >  
> >  	arch_leave_lazy_mmu_mode();
> >  
> >  	if (mm != &init_mm)
> >  		pte_unmap_unlock(mapped_pte, ptl);
> > +
> > +	*mask |= PGTBL_PTE_MODIFIED;
> 
> This is done just because we might as well? Less work in critical
> section?

Yes.

Thanks!
diff mbox series

Patch

diff --git a/mm/memory.c b/mm/memory.c
index 2d8c265fc7d6..f0201c8ec1ce 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2915,24 +2915,28 @@  static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd,
 				     pte_fn_t fn, void *data, bool create,
 				     pgtbl_mod_mask *mask)
 {
+	int err = create ? -ENOMEM : -EINVAL;
 	pte_t *pte, *mapped_pte;
-	int err = 0;
 	spinlock_t *ptl;
 
-	if (create) {
-		mapped_pte = pte = (mm == &init_mm) ?
-			pte_alloc_kernel_track(pmd, addr, mask) :
-			pte_alloc_map_lock(mm, pmd, addr, &ptl);
+	if (mm == &init_mm) {
+		if (create)
+			pte = pte_alloc_kernel_track(pmd, addr, mask);
+		else
+			pte = pte_offset_kernel(pmd, addr);
 		if (!pte)
-			return -ENOMEM;
+			return err;
 	} else {
-		mapped_pte = pte = (mm == &init_mm) ?
-			pte_offset_kernel(pmd, addr) :
-			pte_offset_map_lock(mm, pmd, addr, &ptl);
+		if (create)
+			pte = pte_alloc_map_lock(mm, pmd, addr, &ptl);
+		else
+			pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
 		if (!pte)
-			return -EINVAL;
+			return err;
+		mapped_pte = pte;
 	}
 
+	err = 0;
 	arch_enter_lazy_mmu_mode();
 
 	if (fn) {
@@ -2944,12 +2948,14 @@  static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd,
 			}
 		} while (addr += PAGE_SIZE, addr != end);
 	}
-	*mask |= PGTBL_PTE_MODIFIED;
 
 	arch_leave_lazy_mmu_mode();
 
 	if (mm != &init_mm)
 		pte_unmap_unlock(mapped_pte, ptl);
+
+	*mask |= PGTBL_PTE_MODIFIED;
+
 	return err;
 }