diff mbox series

[22/25] mm, compaction: Sample pageblocks for free pages

Message ID 20190104125011.16071-23-mgorman@techsingularity.net (mailing list archive)
State New, archived
Headers show
Series Increase success rates and reduce latency of compaction v2 | expand

Commit Message

Mel Gorman Jan. 4, 2019, 12:50 p.m. UTC
Once fast searching finishes, there is a possibility that the linear
scanner is scanning full blocks found by the fast scanner earlier. This
patch uses an adaptive stride to sample pageblocks for free pages. The
more consecutive full pageblocks encountered, the larger the stride until
a pageblock with free pages is found. The scanners might meet slightly
sooner but it is an acceptable risk given that the search of the free
lists may still encounter the pages and adjust the cached PFN of the free
scanner accordingly.

In terms of latency and success rates, the impact is not obvious but the
free scan rate is reduced by 87% on a 1-socket machine and 92% on a
2-socket machine. It's also the first time in the series where the number
of pages scanned by the migration scanner is greater than the free scanner
due to the increased search efficiency.

Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
---
 mm/compaction.c | 27 +++++++++++++++++++++------
 1 file changed, 21 insertions(+), 6 deletions(-)

Comments

Vlastimil Babka Jan. 18, 2019, 10:38 a.m. UTC | #1
On 1/4/19 1:50 PM, Mel Gorman wrote:
> Once fast searching finishes, there is a possibility that the linear
> scanner is scanning full blocks found by the fast scanner earlier. This
> patch uses an adaptive stride to sample pageblocks for free pages. The
> more consecutive full pageblocks encountered, the larger the stride until
> a pageblock with free pages is found. The scanners might meet slightly
> sooner but it is an acceptable risk given that the search of the free
> lists may still encounter the pages and adjust the cached PFN of the free
> scanner accordingly.
> 
> In terms of latency and success rates, the impact is not obvious but the
> free scan rate is reduced by 87% on a 1-socket machine and 92% on a
> 2-socket machine. It's also the first time in the series where the number
> of pages scanned by the migration scanner is greater than the free scanner
> due to the increased search efficiency.
> 
> Signed-off-by: Mel Gorman <mgorman@techsingularity.net>

OK, I admit this is quite counterintuitive to me. I would have expected
this change to result in meeting scanners much more sooner, while
missing many free pages (especially when starting with stride 32 for
async compaction). I would have expected that pageblocks that we already
depleted are marked for skipping, while freeing pages by reclaim
scatters them randomly in the remaining ones, and this will then miss
many. But you have benchmarking data so I won't object :)

> ---
>  mm/compaction.c | 27 +++++++++++++++++++++------
>  1 file changed, 21 insertions(+), 6 deletions(-)
> 
> diff --git a/mm/compaction.c b/mm/compaction.c
> index 652e249168b1..cc532e81a7b7 100644
> --- a/mm/compaction.c
> +++ b/mm/compaction.c
> @@ -441,6 +441,7 @@ static unsigned long isolate_freepages_block(struct compact_control *cc,
>  				unsigned long *start_pfn,
>  				unsigned long end_pfn,
>  				struct list_head *freelist,
> +				unsigned int stride,
>  				bool strict)
>  {
>  	int nr_scanned = 0, total_isolated = 0;
> @@ -450,10 +451,14 @@ static unsigned long isolate_freepages_block(struct compact_control *cc,
>  	unsigned long blockpfn = *start_pfn;
>  	unsigned int order;
>  
> +	/* Strict mode is for isolation, speed is secondary */
> +	if (strict)
> +		stride = 1;

Why not just call this from strict context with stride 1, instead of
passing 0 and then changing it to 1.

> +
>  	cursor = pfn_to_page(blockpfn);
>  
>  	/* Isolate free pages. */
> -	for (; blockpfn < end_pfn; blockpfn++, cursor++) {
> +	for (; blockpfn < end_pfn; blockpfn += stride, cursor += stride) {
>  		int isolated;
>  		struct page *page = cursor;
>  
> @@ -624,7 +629,7 @@ isolate_freepages_range(struct compact_control *cc,
>  			break;
>  
>  		isolated = isolate_freepages_block(cc, &isolate_start_pfn,
> -						block_end_pfn, &freelist, true);
> +					block_end_pfn, &freelist, 0, true);
>  
>  		/*
>  		 * In strict mode, isolate_freepages_block() returns 0 if
> @@ -1139,7 +1144,7 @@ fast_isolate_around(struct compact_control *cc, unsigned long pfn, unsigned long
>  
>  	/* Scan before */
>  	if (start_pfn != pfn) {
> -		isolate_freepages_block(cc, &start_pfn, pfn, &cc->freepages, false);
> +		isolate_freepages_block(cc, &start_pfn, pfn, &cc->freepages, 1, false);
>  		if (cc->nr_freepages >= cc->nr_migratepages)
>  			return;
>  	}
> @@ -1147,7 +1152,7 @@ fast_isolate_around(struct compact_control *cc, unsigned long pfn, unsigned long
>  	/* Scan after */
>  	start_pfn = pfn + nr_isolated;
>  	if (start_pfn != end_pfn)
> -		isolate_freepages_block(cc, &start_pfn, end_pfn, &cc->freepages, false);
> +		isolate_freepages_block(cc, &start_pfn, end_pfn, &cc->freepages, 1, false);
>  
>  	/* Skip this pageblock in the future as it's full or nearly full */
>  	if (cc->nr_freepages < cc->nr_migratepages)
> @@ -1333,7 +1338,9 @@ static void isolate_freepages(struct compact_control *cc)
>  	unsigned long isolate_start_pfn; /* exact pfn we start at */
>  	unsigned long block_end_pfn;	/* end of current pageblock */
>  	unsigned long low_pfn;	     /* lowest pfn scanner is able to scan */
> +	unsigned long nr_isolated;
>  	struct list_head *freelist = &cc->freepages;
> +	unsigned int stride;
>  
>  	/* Try a small search of the free lists for a candidate */
>  	isolate_start_pfn = fast_isolate_freepages(cc);
> @@ -1356,6 +1363,7 @@ static void isolate_freepages(struct compact_control *cc)
>  	block_end_pfn = min(block_start_pfn + pageblock_nr_pages,
>  						zone_end_pfn(zone));
>  	low_pfn = pageblock_end_pfn(cc->migrate_pfn);
> +	stride = cc->mode == MIGRATE_ASYNC ? COMPACT_CLUSTER_MAX : 1;
>  
>  	/*
>  	 * Isolate free pages until enough are available to migrate the
> @@ -1387,8 +1395,8 @@ static void isolate_freepages(struct compact_control *cc)
>  			continue;
>  
>  		/* Found a block suitable for isolating free pages from. */
> -		isolate_freepages_block(cc, &isolate_start_pfn, block_end_pfn,
> -					freelist, false);
> +		nr_isolated = isolate_freepages_block(cc, &isolate_start_pfn,
> +					block_end_pfn, freelist, stride, false);
>  
>  		/* Update the skip hint if the full pageblock was scanned */
>  		if (isolate_start_pfn == block_end_pfn)
> @@ -1412,6 +1420,13 @@ static void isolate_freepages(struct compact_control *cc)
>  			 */
>  			break;
>  		}
> +
> +		/* Adjust stride depending on isolation */
> +		if (nr_isolated) {
> +			stride = 1;
> +			continue;
> +		}

If we hit a free page with a large stride, wouldn't it make sense to
reset it to 1 immediately in the same pageblock, and possibly also start
over from its beginning, if the assumption is that free pages appear
close together?

> +		stride = min_t(unsigned int, COMPACT_CLUSTER_MAX, stride << 1);
>  	}
>  
>  	/*
>
Mel Gorman Jan. 18, 2019, 1:44 p.m. UTC | #2
On Fri, Jan 18, 2019 at 11:38:38AM +0100, Vlastimil Babka wrote:
> On 1/4/19 1:50 PM, Mel Gorman wrote:
> > Once fast searching finishes, there is a possibility that the linear
> > scanner is scanning full blocks found by the fast scanner earlier. This
> > patch uses an adaptive stride to sample pageblocks for free pages. The
> > more consecutive full pageblocks encountered, the larger the stride until
> > a pageblock with free pages is found. The scanners might meet slightly
> > sooner but it is an acceptable risk given that the search of the free
> > lists may still encounter the pages and adjust the cached PFN of the free
> > scanner accordingly.
> > 
> > In terms of latency and success rates, the impact is not obvious but the
> > free scan rate is reduced by 87% on a 1-socket machine and 92% on a
> > 2-socket machine. It's also the first time in the series where the number
> > of pages scanned by the migration scanner is greater than the free scanner
> > due to the increased search efficiency.
> > 
> > Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
> 
> OK, I admit this is quite counterintuitive to me. I would have expected
> this change to result in meeting scanners much more sooner, while
> missing many free pages (especially when starting with stride 32 for
> async compaction). I would have expected that pageblocks that we already
> depleted are marked for skipping, while freeing pages by reclaim
> scatters them randomly in the remaining ones, and this will then miss
> many. But you have benchmarking data so I won't object :)
> 

So, it comes down to probabilities to some extent which we cannot
really calculate because they are a function of the reference string
for allocations and frees in combination with compaction activity both
of which depend on the workload.

Fundamentally, the key is that compaction typically moves data from lower
addresses to higher addresses. The longer compaction is running, the more
packed the higher addresses become until there are no free pages. When
the fast search fails and the linear search starts, it has to proceed
through a large number of pageblocks that have been tightly packed one
page at a time. However, the location of the free pages doesn't change
very much so the locationwhere compaction finds a target and when the
scanners meet doesn't change by very much at all.

Now, with sampling, some candidates might be missed depending on the
size of the stride and the scanners meet fractionally sooner but the
difference is very marginal. The difference is that we skip over heavily
packed pageblocks much quicker.

Does that help the counterintuitive nature of the patch?

> > ---
> >  mm/compaction.c | 27 +++++++++++++++++++++------
> >  1 file changed, 21 insertions(+), 6 deletions(-)
> > 
> > diff --git a/mm/compaction.c b/mm/compaction.c
> > index 652e249168b1..cc532e81a7b7 100644
> > --- a/mm/compaction.c
> > +++ b/mm/compaction.c
> > @@ -441,6 +441,7 @@ static unsigned long isolate_freepages_block(struct compact_control *cc,
> >  				unsigned long *start_pfn,
> >  				unsigned long end_pfn,
> >  				struct list_head *freelist,
> > +				unsigned int stride,
> >  				bool strict)
> >  {
> >  	int nr_scanned = 0, total_isolated = 0;
> > @@ -450,10 +451,14 @@ static unsigned long isolate_freepages_block(struct compact_control *cc,
> >  	unsigned long blockpfn = *start_pfn;
> >  	unsigned int order;
> >  
> > +	/* Strict mode is for isolation, speed is secondary */
> > +	if (strict)
> > +		stride = 1;
> 
> Why not just call this from strict context with stride 1, instead of
> passing 0 and then changing it to 1.

No particular reason other than I wanted to make it clear that strict
mode shouldn't play games with stride. I can change it if you prefer.

> > @@ -1412,6 +1420,13 @@ static void isolate_freepages(struct compact_control *cc)
> >  			 */
> >  			break;
> >  		}
> > +
> > +		/* Adjust stride depending on isolation */
> > +		if (nr_isolated) {
> > +			stride = 1;
> > +			continue;
> > +		}
> 
> If we hit a free page with a large stride, wouldn't it make sense to
> reset it to 1 immediately in the same pageblock, and possibly also start
> over from its beginning, if the assumption is that free pages appear
> close together?
> 

I felt that the likely benefit would be marginal and without additional
complexity, we end up scanning the same pageblock twice. I didn't think
the marginal upside was worth it.
diff mbox series

Patch

diff --git a/mm/compaction.c b/mm/compaction.c
index 652e249168b1..cc532e81a7b7 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -441,6 +441,7 @@  static unsigned long isolate_freepages_block(struct compact_control *cc,
 				unsigned long *start_pfn,
 				unsigned long end_pfn,
 				struct list_head *freelist,
+				unsigned int stride,
 				bool strict)
 {
 	int nr_scanned = 0, total_isolated = 0;
@@ -450,10 +451,14 @@  static unsigned long isolate_freepages_block(struct compact_control *cc,
 	unsigned long blockpfn = *start_pfn;
 	unsigned int order;
 
+	/* Strict mode is for isolation, speed is secondary */
+	if (strict)
+		stride = 1;
+
 	cursor = pfn_to_page(blockpfn);
 
 	/* Isolate free pages. */
-	for (; blockpfn < end_pfn; blockpfn++, cursor++) {
+	for (; blockpfn < end_pfn; blockpfn += stride, cursor += stride) {
 		int isolated;
 		struct page *page = cursor;
 
@@ -624,7 +629,7 @@  isolate_freepages_range(struct compact_control *cc,
 			break;
 
 		isolated = isolate_freepages_block(cc, &isolate_start_pfn,
-						block_end_pfn, &freelist, true);
+					block_end_pfn, &freelist, 0, true);
 
 		/*
 		 * In strict mode, isolate_freepages_block() returns 0 if
@@ -1139,7 +1144,7 @@  fast_isolate_around(struct compact_control *cc, unsigned long pfn, unsigned long
 
 	/* Scan before */
 	if (start_pfn != pfn) {
-		isolate_freepages_block(cc, &start_pfn, pfn, &cc->freepages, false);
+		isolate_freepages_block(cc, &start_pfn, pfn, &cc->freepages, 1, false);
 		if (cc->nr_freepages >= cc->nr_migratepages)
 			return;
 	}
@@ -1147,7 +1152,7 @@  fast_isolate_around(struct compact_control *cc, unsigned long pfn, unsigned long
 	/* Scan after */
 	start_pfn = pfn + nr_isolated;
 	if (start_pfn != end_pfn)
-		isolate_freepages_block(cc, &start_pfn, end_pfn, &cc->freepages, false);
+		isolate_freepages_block(cc, &start_pfn, end_pfn, &cc->freepages, 1, false);
 
 	/* Skip this pageblock in the future as it's full or nearly full */
 	if (cc->nr_freepages < cc->nr_migratepages)
@@ -1333,7 +1338,9 @@  static void isolate_freepages(struct compact_control *cc)
 	unsigned long isolate_start_pfn; /* exact pfn we start at */
 	unsigned long block_end_pfn;	/* end of current pageblock */
 	unsigned long low_pfn;	     /* lowest pfn scanner is able to scan */
+	unsigned long nr_isolated;
 	struct list_head *freelist = &cc->freepages;
+	unsigned int stride;
 
 	/* Try a small search of the free lists for a candidate */
 	isolate_start_pfn = fast_isolate_freepages(cc);
@@ -1356,6 +1363,7 @@  static void isolate_freepages(struct compact_control *cc)
 	block_end_pfn = min(block_start_pfn + pageblock_nr_pages,
 						zone_end_pfn(zone));
 	low_pfn = pageblock_end_pfn(cc->migrate_pfn);
+	stride = cc->mode == MIGRATE_ASYNC ? COMPACT_CLUSTER_MAX : 1;
 
 	/*
 	 * Isolate free pages until enough are available to migrate the
@@ -1387,8 +1395,8 @@  static void isolate_freepages(struct compact_control *cc)
 			continue;
 
 		/* Found a block suitable for isolating free pages from. */
-		isolate_freepages_block(cc, &isolate_start_pfn, block_end_pfn,
-					freelist, false);
+		nr_isolated = isolate_freepages_block(cc, &isolate_start_pfn,
+					block_end_pfn, freelist, stride, false);
 
 		/* Update the skip hint if the full pageblock was scanned */
 		if (isolate_start_pfn == block_end_pfn)
@@ -1412,6 +1420,13 @@  static void isolate_freepages(struct compact_control *cc)
 			 */
 			break;
 		}
+
+		/* Adjust stride depending on isolation */
+		if (nr_isolated) {
+			stride = 1;
+			continue;
+		}
+		stride = min_t(unsigned int, COMPACT_CLUSTER_MAX, stride << 1);
 	}
 
 	/*