diff mbox series

[V2,1/3] sched/numa: Apply the scan delay to every vma instead of tasks

Message ID 1aebc55030925998a3df3cafb79c5cd28b199ea8.1675159422.git.raghavendra.kt@amd.com (mailing list archive)
State New
Headers show
Series sched/numa: Enhance vma scanning | expand

Commit Message

Raghavendra K T Feb. 1, 2023, 8:02 a.m. UTC
From: Mel Gorman <mgorman@techsingularity.net>

 Avoid scanning new or very short-lived VMAs.

(Raghavendra: Add initialization in vm_area_dup())

Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
Signed-off-by: Raghavendra K T <raghavendra.kt@amd.com>
---
 include/linux/mm.h       |  9 +++++++++
 include/linux/mm_types.h |  7 +++++++
 kernel/fork.c            |  2 ++
 kernel/sched/fair.c      | 17 +++++++++++++++++
 4 files changed, 35 insertions(+)

Comments

Peter Zijlstra Feb. 3, 2023, 10:24 a.m. UTC | #1
On Wed, Feb 01, 2023 at 01:32:20PM +0530, Raghavendra K T wrote:
> From: Mel Gorman <mgorman@techsingularity.net>
> 
>  Avoid scanning new or very short-lived VMAs.
> 
> (Raghavendra: Add initialization in vm_area_dup())

Given this is a performance centric patch -- some sort of qualification
/ justification would be much appreciated.

Also, perhaps explain the rationale for the actual heuristics chosen.

> Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
> Signed-off-by: Raghavendra K T <raghavendra.kt@amd.com>
> ---
>  include/linux/mm.h       |  9 +++++++++
>  include/linux/mm_types.h |  7 +++++++
>  kernel/fork.c            |  2 ++
>  kernel/sched/fair.c      | 17 +++++++++++++++++
>  4 files changed, 35 insertions(+)
> 
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index 974ccca609d2..74d9df1d8982 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -611,6 +611,14 @@ struct vm_operations_struct {
>  					  unsigned long addr);
>  };
>  
> +#ifdef CONFIG_NUMA_BALANCING
> +#define vma_numab_init(vma) do { (vma)->numab = NULL; } while (0)
> +#define vma_numab_free(vma) do { kfree((vma)->numab); } while (0)
> +#else
> +static inline void vma_numab_init(struct vm_area_struct *vma) {}
> +static inline void vma_numab_free(struct vm_area_struct *vma) {}
> +#endif /* CONFIG_NUMA_BALANCING */

I'm tripping over the inconsistency of macros and functions here. I'd
suggest making both cases functions.


> diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
> index 500e536796ca..e84f95a77321 100644
> --- a/include/linux/mm_types.h
> +++ b/include/linux/mm_types.h
> @@ -435,6 +435,10 @@ struct anon_vma_name {
>  	char name[];
>  };
>  
> +struct vma_numab {
> +	unsigned long next_scan;
> +};

I'm not sure what a numab is; contraction of new-kebab, something else?

While I appreciate short names, they'd ideally also make sense. If we
cannot come up with a better one, perhaps elucidate the reader with a
comment.

> +
>  /*
>   * This struct describes a virtual memory area. There is one of these
>   * per VM-area/task. A VM area is any part of the process virtual memory
> @@ -504,6 +508,9 @@ struct vm_area_struct {

> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index e4a0b8bd941c..060b241ce3c5 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -3015,6 +3015,23 @@ static void task_numa_work(struct callback_head *work)
>  		if (!vma_is_accessible(vma))
>  			continue;
>  
> +		/* Initialise new per-VMA NUMAB state. */
> +		if (!vma->numab) {
> +			vma->numab = kzalloc(sizeof(struct vma_numab), GFP_KERNEL);
> +			if (!vma->numab)
> +				continue;
> +
> +			vma->numab->next_scan = now +
> +				msecs_to_jiffies(sysctl_numa_balancing_scan_delay);
> +		}
> +
> +		/*
> +		 * After the first scan is complete, delay the balancing scan
> +		 * for new VMAs.
> +		 */
> +		if (mm->numa_scan_seq && time_before(jiffies, vma->numab->next_scan))
> +			continue;

I think I sorta see why, but I'm thinking it would be good to include
more of the why in that comment.
Raghavendra K T Feb. 4, 2023, 5:19 p.m. UTC | #2
On 2/3/2023 3:54 PM, Peter Zijlstra wrote:
> On Wed, Feb 01, 2023 at 01:32:20PM +0530, Raghavendra K T wrote:
>> From: Mel Gorman <mgorman@techsingularity.net>
>>
>>   Avoid scanning new or very short-lived VMAs.
>>
>> (Raghavendra: Add initialization in vm_area_dup())
> 
> Given this is a performance centric patch -- some sort of qualification
> / justification would be much appreciated.
> 

Thank you Peter for the review.
Sure will add more detailed result in cover and summary for the patch
commit message.

> Also, perhaps explain the rationale for the actual heuristics chosen.
> 

Sure will add more detail in the V3

>> Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
>> Signed-off-by: Raghavendra K T <raghavendra.kt@amd.com>
>> ---
>>   include/linux/mm.h       |  9 +++++++++
>>   include/linux/mm_types.h |  7 +++++++
>>   kernel/fork.c            |  2 ++
>>   kernel/sched/fair.c      | 17 +++++++++++++++++
>>   4 files changed, 35 insertions(+)
>>
>> diff --git a/include/linux/mm.h b/include/linux/mm.h
>> index 974ccca609d2..74d9df1d8982 100644
>> --- a/include/linux/mm.h
>> +++ b/include/linux/mm.h
>> @@ -611,6 +611,14 @@ struct vm_operations_struct {
>>   					  unsigned long addr);
>>   };
>>   
>> +#ifdef CONFIG_NUMA_BALANCING
>> +#define vma_numab_init(vma) do { (vma)->numab = NULL; } while (0)
>> +#define vma_numab_free(vma) do { kfree((vma)->numab); } while (0)
>> +#else
>> +static inline void vma_numab_init(struct vm_area_struct *vma) {}
>> +static inline void vma_numab_free(struct vm_area_struct *vma) {}
>> +#endif /* CONFIG_NUMA_BALANCING */
> 
> I'm tripping over the inconsistency of macros and functions here. I'd
> suggest making both cases functions.
> 
> 

Sure will do that

>> diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
>> index 500e536796ca..e84f95a77321 100644
>> --- a/include/linux/mm_types.h
>> +++ b/include/linux/mm_types.h
>> @@ -435,6 +435,10 @@ struct anon_vma_name {
>>   	char name[];
>>   };
>>   
>> +struct vma_numab {
>> +	unsigned long next_scan;
>> +};
> 
> I'm not sure what a numab is; contraction of new-kebab, something else?
> 
> While I appreciate short names, they'd ideally also make sense. If we
> cannot come up with a better one, perhaps elucidate the reader with a
> comment.

Agree.. How about vma_nuamb vma_numab_state or vma_numab_info as
abbreviation for vma_numa_balancing_info /state?

> 
>> +
>>   /*
>>    * This struct describes a virtual memory area. There is one of these
>>    * per VM-area/task. A VM area is any part of the process virtual memory
>> @@ -504,6 +508,9 @@ struct vm_area_struct {
> 
>> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
>> index e4a0b8bd941c..060b241ce3c5 100644
>> --- a/kernel/sched/fair.c
>> +++ b/kernel/sched/fair.c
>> @@ -3015,6 +3015,23 @@ static void task_numa_work(struct callback_head *work)
>>   		if (!vma_is_accessible(vma))
>>   			continue;
>>   
>> +		/* Initialise new per-VMA NUMAB state. */
>> +		if (!vma->numab) {
>> +			vma->numab = kzalloc(sizeof(struct vma_numab), GFP_KERNEL);
>> +			if (!vma->numab)
>> +				continue;
>> +
>> +			vma->numab->next_scan = now +
>> +				msecs_to_jiffies(sysctl_numa_balancing_scan_delay);
>> +		}
>> +
>> +		/*
>> +		 * After the first scan is complete, delay the balancing scan
>> +		 * for new VMAs.
>> +		 */
>> +		if (mm->numa_scan_seq && time_before(jiffies, vma->numab->next_scan))
>> +			continue;
> 
> I think I sorta see why, but I'm thinking it would be good to include
> more of the why in that comment.

Sure. Will add something in the lines of.. "scanning the VMA's of short
lived tasks add more overhead than benefit...."
diff mbox series

Patch

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 974ccca609d2..74d9df1d8982 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -611,6 +611,14 @@  struct vm_operations_struct {
 					  unsigned long addr);
 };
 
+#ifdef CONFIG_NUMA_BALANCING
+#define vma_numab_init(vma) do { (vma)->numab = NULL; } while (0)
+#define vma_numab_free(vma) do { kfree((vma)->numab); } while (0)
+#else
+static inline void vma_numab_init(struct vm_area_struct *vma) {}
+static inline void vma_numab_free(struct vm_area_struct *vma) {}
+#endif /* CONFIG_NUMA_BALANCING */
+
 static inline void vma_init(struct vm_area_struct *vma, struct mm_struct *mm)
 {
 	static const struct vm_operations_struct dummy_vm_ops = {};
@@ -619,6 +627,7 @@  static inline void vma_init(struct vm_area_struct *vma, struct mm_struct *mm)
 	vma->vm_mm = mm;
 	vma->vm_ops = &dummy_vm_ops;
 	INIT_LIST_HEAD(&vma->anon_vma_chain);
+	vma_numab_init(vma);
 }
 
 static inline void vma_set_anonymous(struct vm_area_struct *vma)
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 500e536796ca..e84f95a77321 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -435,6 +435,10 @@  struct anon_vma_name {
 	char name[];
 };
 
+struct vma_numab {
+	unsigned long next_scan;
+};
+
 /*
  * This struct describes a virtual memory area. There is one of these
  * per VM-area/task. A VM area is any part of the process virtual memory
@@ -504,6 +508,9 @@  struct vm_area_struct {
 #endif
 #ifdef CONFIG_NUMA
 	struct mempolicy *vm_policy;	/* NUMA policy for the VMA */
+#endif
+#ifdef CONFIG_NUMA_BALANCING
+	struct vma_numab *numab;	/* NUMA Balancing state */
 #endif
 	struct vm_userfaultfd_ctx vm_userfaultfd_ctx;
 } __randomize_layout;
diff --git a/kernel/fork.c b/kernel/fork.c
index 08969f5aa38d..ac6f0477cf6e 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -474,6 +474,7 @@  struct vm_area_struct *vm_area_dup(struct vm_area_struct *orig)
 		 */
 		*new = data_race(*orig);
 		INIT_LIST_HEAD(&new->anon_vma_chain);
+		vma_numab_init(new);
 		dup_anon_vma_name(orig, new);
 	}
 	return new;
@@ -481,6 +482,7 @@  struct vm_area_struct *vm_area_dup(struct vm_area_struct *orig)
 
 void vm_area_free(struct vm_area_struct *vma)
 {
+	vma_numab_free(vma);
 	free_anon_vma_name(vma);
 	kmem_cache_free(vm_area_cachep, vma);
 }
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index e4a0b8bd941c..060b241ce3c5 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -3015,6 +3015,23 @@  static void task_numa_work(struct callback_head *work)
 		if (!vma_is_accessible(vma))
 			continue;
 
+		/* Initialise new per-VMA NUMAB state. */
+		if (!vma->numab) {
+			vma->numab = kzalloc(sizeof(struct vma_numab), GFP_KERNEL);
+			if (!vma->numab)
+				continue;
+
+			vma->numab->next_scan = now +
+				msecs_to_jiffies(sysctl_numa_balancing_scan_delay);
+		}
+
+		/*
+		 * After the first scan is complete, delay the balancing scan
+		 * for new VMAs.
+		 */
+		if (mm->numa_scan_seq && time_before(jiffies, vma->numab->next_scan))
+			continue;
+
 		do {
 			start = max(start, vma->vm_start);
 			end = ALIGN(start + (pages << PAGE_SHIFT), HPAGE_SIZE);