diff mbox series

[2/3] mm/page_alloc: Avoid changing pcp->high decaying when adjusting CONFIG_PCP_BATCH_SCALE_MAX

Message ID 20240707094956.94654-3-laoar.shao@gmail.com (mailing list archive)
State New
Headers show
Series mm/page_alloc: Introduce a new sysctl knob vm.pcp_batch_scale_max | expand

Commit Message

Yafang Shao July 7, 2024, 9:49 a.m. UTC
When adjusting the CONFIG_PCP_BATCH_SCALE_MAX configuration from its
default value of 5 to a lower value, such as 0, it's important to ensure
that the pcp->high decaying is not inadvertently slowed down. Similarly,
when increasing CONFIG_PCP_BATCH_SCALE_MAX to a larger value, like 6, we
must avoid inadvertently increasing the number of pages freed in
free_pcppages_bulk() as a result of this change.

So below improvements are made:
- hardcode the default value of 5 to avoiding modifying the pcp->high
- refactore free_pcppages_bulk() into multiple steps, with each step
  processing a fixed batch size of pages

Suggested-by: "Huang, Ying" <ying.huang@intel.com>
Signed-off-by: Yafang Shao <laoar.shao@gmail.com>
---
 mm/page_alloc.c | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

Comments

Huang, Ying July 10, 2024, 1:51 a.m. UTC | #1
Yafang Shao <laoar.shao@gmail.com> writes:

> When adjusting the CONFIG_PCP_BATCH_SCALE_MAX configuration from its
> default value of 5 to a lower value, such as 0, it's important to ensure
> that the pcp->high decaying is not inadvertently slowed down. Similarly,
> when increasing CONFIG_PCP_BATCH_SCALE_MAX to a larger value, like 6, we
> must avoid inadvertently increasing the number of pages freed in
> free_pcppages_bulk() as a result of this change.
>
> So below improvements are made:
> - hardcode the default value of 5 to avoiding modifying the pcp->high
> - refactore free_pcppages_bulk() into multiple steps, with each step
>   processing a fixed batch size of pages

This is confusing.  You don't change free_pcppages_bulk() itself.  I
guess what you mean is "change free_pcppages_bulk() calling into
multiple steps".

>
> Suggested-by: "Huang, Ying" <ying.huang@intel.com>
> Signed-off-by: Yafang Shao <laoar.shao@gmail.com>
> ---
>  mm/page_alloc.c | 15 +++++++++++----
>  1 file changed, 11 insertions(+), 4 deletions(-)
>
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> index 8e2f4e1ab4f2..2b76754a48e0 100644
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -2247,7 +2247,7 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
>  int decay_pcp_high(struct zone *zone, struct per_cpu_pages *pcp)
>  {
>  	int high_min, to_drain, batch;
> -	int todo = 0;
> +	int todo = 0, count = 0;
>  
>  	high_min = READ_ONCE(pcp->high_min);
>  	batch = READ_ONCE(pcp->batch);
> @@ -2257,18 +2257,25 @@ int decay_pcp_high(struct zone *zone, struct per_cpu_pages *pcp)
>  	 * control latency.  This caps pcp->high decrement too.
>  	 */
>  	if (pcp->high > high_min) {
> -		pcp->high = max3(pcp->count - (batch << CONFIG_PCP_BATCH_SCALE_MAX),
> +		/* When tuning the pcp batch scale value, we want to ensure that
> +		 * the pcp->high decay rate is not slowed down. Therefore, we
> +		 * hard-code the historical default scale value of 5 here to
> +		 * prevent any unintended effects.
> +		 */

This is good description for history.  But, in the result code, it's
not easy for people to connect the code with pcp batch scale directly.
How about something as follows,

We will decay 1/8 pcp->high each time in general, so that the idle PCP
pages can be returned to buddy system timely.  To control the max
latency of decay, we also constrain the number pages freed each time.

> +		pcp->high = max3(pcp->count - (batch << 5),
>  				 pcp->high - (pcp->high >> 3), high_min);
>  		if (pcp->high > high_min)
>  			todo++;
>  	}
>  
>  	to_drain = pcp->count - pcp->high;
> -	if (to_drain > 0) {
> +	while (count < to_drain) {
>  		spin_lock(&pcp->lock);
> -		free_pcppages_bulk(zone, to_drain, pcp, 0);
> +		free_pcppages_bulk(zone, batch, pcp, 0);

"to_drain - count" may < batch.

>  		spin_unlock(&pcp->lock);
> +		count += batch;
>  		todo++;
> +		cond_resched();
>  	}
>  
>  	return todo;

--
Best Regards,
Huang, Ying
Yafang Shao July 10, 2024, 2:07 a.m. UTC | #2
On Wed, Jul 10, 2024 at 9:53 AM Huang, Ying <ying.huang@intel.com> wrote:
>
> Yafang Shao <laoar.shao@gmail.com> writes:
>
> > When adjusting the CONFIG_PCP_BATCH_SCALE_MAX configuration from its
> > default value of 5 to a lower value, such as 0, it's important to ensure
> > that the pcp->high decaying is not inadvertently slowed down. Similarly,
> > when increasing CONFIG_PCP_BATCH_SCALE_MAX to a larger value, like 6, we
> > must avoid inadvertently increasing the number of pages freed in
> > free_pcppages_bulk() as a result of this change.
> >
> > So below improvements are made:
> > - hardcode the default value of 5 to avoiding modifying the pcp->high
> > - refactore free_pcppages_bulk() into multiple steps, with each step
> >   processing a fixed batch size of pages
>
> This is confusing.  You don't change free_pcppages_bulk() itself.  I
> guess what you mean is "change free_pcppages_bulk() calling into
> multiple steps".

will change it.

>
> >
> > Suggested-by: "Huang, Ying" <ying.huang@intel.com>
> > Signed-off-by: Yafang Shao <laoar.shao@gmail.com>
> > ---
> >  mm/page_alloc.c | 15 +++++++++++----
> >  1 file changed, 11 insertions(+), 4 deletions(-)
> >
> > diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> > index 8e2f4e1ab4f2..2b76754a48e0 100644
> > --- a/mm/page_alloc.c
> > +++ b/mm/page_alloc.c
> > @@ -2247,7 +2247,7 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
> >  int decay_pcp_high(struct zone *zone, struct per_cpu_pages *pcp)
> >  {
> >       int high_min, to_drain, batch;
> > -     int todo = 0;
> > +     int todo = 0, count = 0;
> >
> >       high_min = READ_ONCE(pcp->high_min);
> >       batch = READ_ONCE(pcp->batch);
> > @@ -2257,18 +2257,25 @@ int decay_pcp_high(struct zone *zone, struct per_cpu_pages *pcp)
> >        * control latency.  This caps pcp->high decrement too.
> >        */
> >       if (pcp->high > high_min) {
> > -             pcp->high = max3(pcp->count - (batch << CONFIG_PCP_BATCH_SCALE_MAX),
> > +             /* When tuning the pcp batch scale value, we want to ensure that
> > +              * the pcp->high decay rate is not slowed down. Therefore, we
> > +              * hard-code the historical default scale value of 5 here to
> > +              * prevent any unintended effects.
> > +              */
>
> This is good description for history.  But, in the result code, it's
> not easy for people to connect the code with pcp batch scale directly.
> How about something as follows,
>
> We will decay 1/8 pcp->high each time in general, so that the idle PCP
> pages can be returned to buddy system timely.  To control the max
> latency of decay, we also constrain the number pages freed each time.

Thanks for your suggestion.
will change it.

>
> > +             pcp->high = max3(pcp->count - (batch << 5),
> >                                pcp->high - (pcp->high >> 3), high_min);
> >               if (pcp->high > high_min)
> >                       todo++;
> >       }
> >
> >       to_drain = pcp->count - pcp->high;
> > -     if (to_drain > 0) {
> > +     while (count < to_drain) {
> >               spin_lock(&pcp->lock);
> > -             free_pcppages_bulk(zone, to_drain, pcp, 0);
> > +             free_pcppages_bulk(zone, batch, pcp, 0);
>
> "to_drain - count" may < batch.

Nice catch. will fix it.

>
> >               spin_unlock(&pcp->lock);
> > +             count += batch;
> >               todo++;
> > +             cond_resched();
> >       }
> >
> >       return todo;
>
> --
> Best Regards,
> Huang, Ying
diff mbox series

Patch

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 8e2f4e1ab4f2..2b76754a48e0 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2247,7 +2247,7 @@  static int rmqueue_bulk(struct zone *zone, unsigned int order,
 int decay_pcp_high(struct zone *zone, struct per_cpu_pages *pcp)
 {
 	int high_min, to_drain, batch;
-	int todo = 0;
+	int todo = 0, count = 0;
 
 	high_min = READ_ONCE(pcp->high_min);
 	batch = READ_ONCE(pcp->batch);
@@ -2257,18 +2257,25 @@  int decay_pcp_high(struct zone *zone, struct per_cpu_pages *pcp)
 	 * control latency.  This caps pcp->high decrement too.
 	 */
 	if (pcp->high > high_min) {
-		pcp->high = max3(pcp->count - (batch << CONFIG_PCP_BATCH_SCALE_MAX),
+		/* When tuning the pcp batch scale value, we want to ensure that
+		 * the pcp->high decay rate is not slowed down. Therefore, we
+		 * hard-code the historical default scale value of 5 here to
+		 * prevent any unintended effects.
+		 */
+		pcp->high = max3(pcp->count - (batch << 5),
 				 pcp->high - (pcp->high >> 3), high_min);
 		if (pcp->high > high_min)
 			todo++;
 	}
 
 	to_drain = pcp->count - pcp->high;
-	if (to_drain > 0) {
+	while (count < to_drain) {
 		spin_lock(&pcp->lock);
-		free_pcppages_bulk(zone, to_drain, pcp, 0);
+		free_pcppages_bulk(zone, batch, pcp, 0);
 		spin_unlock(&pcp->lock);
+		count += batch;
 		todo++;
+		cond_resched();
 	}
 
 	return todo;