diff mbox series

[5/9] mm/madvise: define and use madvise_behavior struct for madvise_do_behavior()

Message ID 20250310172318.653630-6-sj@kernel.org (mailing list archive)
State New
Headers show
Series mm/madvise: batch tlb flushes for MADV_DONTNEED and MADV_FREE | expand

Commit Message

SeongJae Park March 10, 2025, 5:23 p.m. UTC
To implement batched tlb flushes for MADV_DONTNEED[_LOCKED] and
MADV_FREE, an mmu_gather object in addition to the behavior integer need
to be passed to the internal logics.  Using a struct can make it easy
without increasing the number of parameters of all code paths towards
the internal logic.  Define a struct for the purpose and use it on the
code path that starts from madvise_do_behavior() and ends on
madvise_dontneed_free().

Signed-off-by: SeongJae Park <sj@kernel.org>
---
 mm/madvise.c | 36 ++++++++++++++++++++++++------------
 1 file changed, 24 insertions(+), 12 deletions(-)

Comments

Lorenzo Stoakes March 11, 2025, 12:17 p.m. UTC | #1
On Mon, Mar 10, 2025 at 10:23:14AM -0700, SeongJae Park wrote:
> To implement batched tlb flushes for MADV_DONTNEED[_LOCKED] and
> MADV_FREE, an mmu_gather object in addition to the behavior integer need
> to be passed to the internal logics.  Using a struct can make it easy
> without increasing the number of parameters of all code paths towards
> the internal logic.  Define a struct for the purpose and use it on the
> code path that starts from madvise_do_behavior() and ends on
> madvise_dontneed_free().

Oh a helper struct! I like these!

Nitty but...

I wonder if we should just add the the mmu_gather field immediately even if
it isn't used yet?

Also I feel like this patch and 6 should be swapped around, as you are
laying the groundwork here for patch 7 but then doing something unrelated
in 6, unless I'm missing something.

Also maybe add a bit in commit msg about changing the madvise_walk_vmas()
visitor type signature (I wonder if that'd be better as a typedef tbh?)

However, this change looks fine aside from nits (and you know, helper
struct and I'm sold obviously ;) so:

Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>

>
> Signed-off-by: SeongJae Park <sj@kernel.org>
> ---
>  mm/madvise.c | 36 ++++++++++++++++++++++++------------
>  1 file changed, 24 insertions(+), 12 deletions(-)
>
> diff --git a/mm/madvise.c b/mm/madvise.c
> index 469c25690a0e..ba2a78795207 100644
> --- a/mm/madvise.c
> +++ b/mm/madvise.c
> @@ -890,11 +890,16 @@ static bool madvise_dontneed_free_valid_vma(struct vm_area_struct *vma,
>  	return true;
>  }
>
> +struct madvise_behavior {
> +	int behavior;
> +};
> +
>  static long madvise_dontneed_free(struct vm_area_struct *vma,
>  				  struct vm_area_struct **prev,
>  				  unsigned long start, unsigned long end,
> -				  int behavior)
> +				  struct madvise_behavior *madv_behavior)

Nitty, but not sure about the need for 'madv_' here. I think keeping this as
'behavior' is fine, as the type is very clear.

>  {
> +	int behavior = madv_behavior->behavior;
>  	struct mm_struct *mm = vma->vm_mm;
>
>  	*prev = vma;
> @@ -1249,8 +1254,10 @@ static long madvise_guard_remove(struct vm_area_struct *vma,
>  static int madvise_vma_behavior(struct vm_area_struct *vma,
>  				struct vm_area_struct **prev,
>  				unsigned long start, unsigned long end,
> -				unsigned long behavior)
> +				void *behavior_arg)
>  {
> +	struct madvise_behavior *arg = behavior_arg;
> +	int behavior = arg->behavior;
>  	int error;
>  	struct anon_vma_name *anon_name;
>  	unsigned long new_flags = vma->vm_flags;
> @@ -1270,7 +1277,7 @@ static int madvise_vma_behavior(struct vm_area_struct *vma,
>  	case MADV_FREE:
>  	case MADV_DONTNEED:
>  	case MADV_DONTNEED_LOCKED:
> -		return madvise_dontneed_free(vma, prev, start, end, behavior);
> +		return madvise_dontneed_free(vma, prev, start, end, arg);
>  	case MADV_NORMAL:
>  		new_flags = new_flags & ~VM_RAND_READ & ~VM_SEQ_READ;
>  		break;
> @@ -1487,10 +1494,10 @@ static bool process_madvise_remote_valid(int behavior)
>   */
>  static
>  int madvise_walk_vmas(struct mm_struct *mm, unsigned long start,
> -		      unsigned long end, unsigned long arg,
> +		      unsigned long end, void *arg,
>  		      int (*visit)(struct vm_area_struct *vma,
>  				   struct vm_area_struct **prev, unsigned long start,
> -				   unsigned long end, unsigned long arg))
> +				   unsigned long end, void *arg))
>  {
>  	struct vm_area_struct *vma;
>  	struct vm_area_struct *prev;
> @@ -1548,7 +1555,7 @@ int madvise_walk_vmas(struct mm_struct *mm, unsigned long start,
>  static int madvise_vma_anon_name(struct vm_area_struct *vma,
>  				 struct vm_area_struct **prev,
>  				 unsigned long start, unsigned long end,
> -				 unsigned long anon_name)
> +				 void *anon_name)
>  {
>  	int error;
>
> @@ -1557,7 +1564,7 @@ static int madvise_vma_anon_name(struct vm_area_struct *vma,
>  		return -EBADF;
>
>  	error = madvise_update_vma(vma, prev, start, end, vma->vm_flags,
> -				   (struct anon_vma_name *)anon_name);
> +				   anon_name);
>
>  	/*
>  	 * madvise() returns EAGAIN if kernel resources, such as
> @@ -1589,7 +1596,7 @@ int madvise_set_anon_name(struct mm_struct *mm, unsigned long start,
>  	if (end == start)
>  		return 0;
>
> -	return madvise_walk_vmas(mm, start, end, (unsigned long)anon_name,
> +	return madvise_walk_vmas(mm, start, end, anon_name,
>  				 madvise_vma_anon_name);
>  }
>  #endif /* CONFIG_ANON_VMA_NAME */
> @@ -1673,8 +1680,10 @@ static bool is_madvise_populate(int behavior)
>  }
>
>  static int madvise_do_behavior(struct mm_struct *mm,
> -		unsigned long start, size_t len_in, int behavior)
> +		unsigned long start, size_t len_in,
> +		struct madvise_behavior *madv_behavior)
>  {
> +	int behavior = madv_behavior->behavior;
>  	struct blk_plug plug;
>  	unsigned long end;
>  	int error;
> @@ -1688,7 +1697,7 @@ static int madvise_do_behavior(struct mm_struct *mm,
>  	if (is_madvise_populate(behavior))
>  		error = madvise_populate(mm, start, end, behavior);
>  	else
> -		error = madvise_walk_vmas(mm, start, end, behavior,
> +		error = madvise_walk_vmas(mm, start, end, madv_behavior,
>  					  madvise_vma_behavior);
>  	blk_finish_plug(&plug);
>  	return error;
> @@ -1769,13 +1778,14 @@ static int madvise_do_behavior(struct mm_struct *mm,
>  int do_madvise(struct mm_struct *mm, unsigned long start, size_t len_in, int behavior)
>  {
>  	int error;
> +	struct madvise_behavior madv_behavior = {.behavior = behavior};
>
>  	if (madvise_should_skip(start, len_in, behavior, &error))
>  		return error;
>  	error = madvise_lock(mm, behavior);
>  	if (error)
>  		return error;
> -	error = madvise_do_behavior(mm, start, len_in, behavior);
> +	error = madvise_do_behavior(mm, start, len_in, &madv_behavior);
>  	madvise_unlock(mm, behavior);
>
>  	return error;
> @@ -1792,6 +1802,7 @@ static ssize_t vector_madvise(struct mm_struct *mm, struct iov_iter *iter,
>  {
>  	ssize_t ret = 0;
>  	size_t total_len;
> +	struct madvise_behavior madv_behavior = {.behavior = behavior};
>
>  	total_len = iov_iter_count(iter);
>
> @@ -1807,7 +1818,8 @@ static ssize_t vector_madvise(struct mm_struct *mm, struct iov_iter *iter,
>  		if (madvise_should_skip(start, len_in, behavior, &error))
>  			ret = error;
>  		else
> -			ret = madvise_do_behavior(mm, start, len_in, behavior);
> +			ret = madvise_do_behavior(mm, start, len_in,
> +					&madv_behavior);
>  		/*
>  		 * An madvise operation is attempting to restart the syscall,
>  		 * but we cannot proceed as it would not be correct to repeat
> --
> 2.39.5
diff mbox series

Patch

diff --git a/mm/madvise.c b/mm/madvise.c
index 469c25690a0e..ba2a78795207 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -890,11 +890,16 @@  static bool madvise_dontneed_free_valid_vma(struct vm_area_struct *vma,
 	return true;
 }
 
+struct madvise_behavior {
+	int behavior;
+};
+
 static long madvise_dontneed_free(struct vm_area_struct *vma,
 				  struct vm_area_struct **prev,
 				  unsigned long start, unsigned long end,
-				  int behavior)
+				  struct madvise_behavior *madv_behavior)
 {
+	int behavior = madv_behavior->behavior;
 	struct mm_struct *mm = vma->vm_mm;
 
 	*prev = vma;
@@ -1249,8 +1254,10 @@  static long madvise_guard_remove(struct vm_area_struct *vma,
 static int madvise_vma_behavior(struct vm_area_struct *vma,
 				struct vm_area_struct **prev,
 				unsigned long start, unsigned long end,
-				unsigned long behavior)
+				void *behavior_arg)
 {
+	struct madvise_behavior *arg = behavior_arg;
+	int behavior = arg->behavior;
 	int error;
 	struct anon_vma_name *anon_name;
 	unsigned long new_flags = vma->vm_flags;
@@ -1270,7 +1277,7 @@  static int madvise_vma_behavior(struct vm_area_struct *vma,
 	case MADV_FREE:
 	case MADV_DONTNEED:
 	case MADV_DONTNEED_LOCKED:
-		return madvise_dontneed_free(vma, prev, start, end, behavior);
+		return madvise_dontneed_free(vma, prev, start, end, arg);
 	case MADV_NORMAL:
 		new_flags = new_flags & ~VM_RAND_READ & ~VM_SEQ_READ;
 		break;
@@ -1487,10 +1494,10 @@  static bool process_madvise_remote_valid(int behavior)
  */
 static
 int madvise_walk_vmas(struct mm_struct *mm, unsigned long start,
-		      unsigned long end, unsigned long arg,
+		      unsigned long end, void *arg,
 		      int (*visit)(struct vm_area_struct *vma,
 				   struct vm_area_struct **prev, unsigned long start,
-				   unsigned long end, unsigned long arg))
+				   unsigned long end, void *arg))
 {
 	struct vm_area_struct *vma;
 	struct vm_area_struct *prev;
@@ -1548,7 +1555,7 @@  int madvise_walk_vmas(struct mm_struct *mm, unsigned long start,
 static int madvise_vma_anon_name(struct vm_area_struct *vma,
 				 struct vm_area_struct **prev,
 				 unsigned long start, unsigned long end,
-				 unsigned long anon_name)
+				 void *anon_name)
 {
 	int error;
 
@@ -1557,7 +1564,7 @@  static int madvise_vma_anon_name(struct vm_area_struct *vma,
 		return -EBADF;
 
 	error = madvise_update_vma(vma, prev, start, end, vma->vm_flags,
-				   (struct anon_vma_name *)anon_name);
+				   anon_name);
 
 	/*
 	 * madvise() returns EAGAIN if kernel resources, such as
@@ -1589,7 +1596,7 @@  int madvise_set_anon_name(struct mm_struct *mm, unsigned long start,
 	if (end == start)
 		return 0;
 
-	return madvise_walk_vmas(mm, start, end, (unsigned long)anon_name,
+	return madvise_walk_vmas(mm, start, end, anon_name,
 				 madvise_vma_anon_name);
 }
 #endif /* CONFIG_ANON_VMA_NAME */
@@ -1673,8 +1680,10 @@  static bool is_madvise_populate(int behavior)
 }
 
 static int madvise_do_behavior(struct mm_struct *mm,
-		unsigned long start, size_t len_in, int behavior)
+		unsigned long start, size_t len_in,
+		struct madvise_behavior *madv_behavior)
 {
+	int behavior = madv_behavior->behavior;
 	struct blk_plug plug;
 	unsigned long end;
 	int error;
@@ -1688,7 +1697,7 @@  static int madvise_do_behavior(struct mm_struct *mm,
 	if (is_madvise_populate(behavior))
 		error = madvise_populate(mm, start, end, behavior);
 	else
-		error = madvise_walk_vmas(mm, start, end, behavior,
+		error = madvise_walk_vmas(mm, start, end, madv_behavior,
 					  madvise_vma_behavior);
 	blk_finish_plug(&plug);
 	return error;
@@ -1769,13 +1778,14 @@  static int madvise_do_behavior(struct mm_struct *mm,
 int do_madvise(struct mm_struct *mm, unsigned long start, size_t len_in, int behavior)
 {
 	int error;
+	struct madvise_behavior madv_behavior = {.behavior = behavior};
 
 	if (madvise_should_skip(start, len_in, behavior, &error))
 		return error;
 	error = madvise_lock(mm, behavior);
 	if (error)
 		return error;
-	error = madvise_do_behavior(mm, start, len_in, behavior);
+	error = madvise_do_behavior(mm, start, len_in, &madv_behavior);
 	madvise_unlock(mm, behavior);
 
 	return error;
@@ -1792,6 +1802,7 @@  static ssize_t vector_madvise(struct mm_struct *mm, struct iov_iter *iter,
 {
 	ssize_t ret = 0;
 	size_t total_len;
+	struct madvise_behavior madv_behavior = {.behavior = behavior};
 
 	total_len = iov_iter_count(iter);
 
@@ -1807,7 +1818,8 @@  static ssize_t vector_madvise(struct mm_struct *mm, struct iov_iter *iter,
 		if (madvise_should_skip(start, len_in, behavior, &error))
 			ret = error;
 		else
-			ret = madvise_do_behavior(mm, start, len_in, behavior);
+			ret = madvise_do_behavior(mm, start, len_in,
+					&madv_behavior);
 		/*
 		 * An madvise operation is attempting to restart the syscall,
 		 * but we cannot proceed as it would not be correct to repeat