Message ID | 20240111183321.19984-2-ryncsn@gmail.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | mm, lru_gen: batch update pages when aging | expand |
Kairui Song <ryncsn@gmail.com> 于2024年1月12日周五 02:33写道: > > From: Kairui Song <kasong@tencent.com> > > When lru_gen is aging, it will update mm counters page by page, > which causes a higher overhead if age happens frequently or there > are a lot of pages in one generation getting moved. > Optimize this by doing the counter update in batch. > > Although most __mod_*_state has its own caches the overhead > is still observable. > > Tested in a 4G memcg on a EPYC 7K62 with: > > memcached -u nobody -m 16384 -s /tmp/memcached.socket \ > -a 0766 -t 16 -B binary & > > memtier_benchmark -S /tmp/memcached.socket \ > -P memcache_binary -n allkeys \ > --key-minimum=1 --key-maximum=16000000 -d 1024 \ > --ratio=1:0 --key-pattern=P:P -c 2 -t 16 --pipeline 8 -x 6 > > Average result of 18 test runs: > > Before: 44017.78 Ops/sec > After: 44687.08 Ops/sec (+1.5%) > > Signed-off-by: Kairui Song <kasong@tencent.com> > --- > mm/vmscan.c | 64 +++++++++++++++++++++++++++++++++++++++++++++-------- > 1 file changed, 55 insertions(+), 9 deletions(-) > My apology for being careless again here... the title here should be: mm, lru_gen: batch update counters on aging
On Thu, Jan 11, 2024 at 10:33 AM Kairui Song <ryncsn@gmail.com> wrote: > > From: Kairui Song <kasong@tencent.com> > > When lru_gen is aging, it will update mm counters page by page, > which causes a higher overhead if age happens frequently or there > are a lot of pages in one generation getting moved. > Optimize this by doing the counter update in batch. > > Although most __mod_*_state has its own caches the overhead > is still observable. > > Tested in a 4G memcg on a EPYC 7K62 with: > > memcached -u nobody -m 16384 -s /tmp/memcached.socket \ > -a 0766 -t 16 -B binary & > > memtier_benchmark -S /tmp/memcached.socket \ > -P memcache_binary -n allkeys \ > --key-minimum=1 --key-maximum=16000000 -d 1024 \ > --ratio=1:0 --key-pattern=P:P -c 2 -t 16 --pipeline 8 -x 6 > > Average result of 18 test runs: > > Before: 44017.78 Ops/sec > After: 44687.08 Ops/sec (+1.5%) I see the same performance numbers get quoted in all the 3 patches. How much performance improvements does this particular patch provide (the same for the other 2 patches)? If as the cover letter says, the most performance benefits come from patch 3 (prefetching), can we just have that patch alone to avoid the extra complexities. > Signed-off-by: Kairui Song <kasong@tencent.com> > --- > mm/vmscan.c | 64 +++++++++++++++++++++++++++++++++++++++++++++-------- > 1 file changed, 55 insertions(+), 9 deletions(-) > > diff --git a/mm/vmscan.c b/mm/vmscan.c > index 4f9c854ce6cc..185d53607c7e 100644 > --- a/mm/vmscan.c > +++ b/mm/vmscan.c > @@ -3113,9 +3113,47 @@ static int folio_update_gen(struct folio *folio, int gen) > return ((old_flags & LRU_GEN_MASK) >> LRU_GEN_PGOFF) - 1; > } > > +/* > + * Update LRU gen in batch for each lru_gen LRU list. The batch is limited to > + * each gen / type / zone level LRU. Batch is applied after finished or aborted > + * scanning one LRU list. > + */ > +struct gen_update_batch { > + int delta[MAX_NR_GENS]; > +}; > + > +static void lru_gen_update_batch(struct lruvec *lruvec, int type, int zone, > + struct gen_update_batch *batch) > +{ > + int gen; > + int promoted = 0; > + struct lru_gen_folio *lrugen = &lruvec->lrugen; > + enum lru_list lru = type ? LRU_INACTIVE_FILE : LRU_INACTIVE_ANON; > + > + for (gen = 0; gen < MAX_NR_GENS; gen++) { > + int delta = batch->delta[gen]; > + > + if (!delta) > + continue; > + > + WRITE_ONCE(lrugen->nr_pages[gen][type][zone], > + lrugen->nr_pages[gen][type][zone] + delta); > + > + if (lru_gen_is_active(lruvec, gen)) > + promoted += delta; > + } > + > + if (promoted) { > + __update_lru_size(lruvec, lru, zone, -promoted); > + __update_lru_size(lruvec, lru + LRU_ACTIVE, zone, promoted); > + } > +} > + > /* protect pages accessed multiple times through file descriptors */ > -static int folio_inc_gen(struct lruvec *lruvec, struct folio *folio, bool reclaiming) > +static int folio_inc_gen(struct lruvec *lruvec, struct folio *folio, > + bool reclaiming, struct gen_update_batch *batch) > { > + int delta = folio_nr_pages(folio); > int type = folio_is_file_lru(folio); > struct lru_gen_folio *lrugen = &lruvec->lrugen; > int new_gen, old_gen = lru_gen_from_seq(lrugen->min_seq[type]); > @@ -3138,7 +3176,8 @@ static int folio_inc_gen(struct lruvec *lruvec, struct folio *folio, bool reclai > new_flags |= BIT(PG_reclaim); > } while (!try_cmpxchg(&folio->flags, &old_flags, new_flags)); > > - lru_gen_update_size(lruvec, folio, old_gen, new_gen); > + batch->delta[old_gen] -= delta; > + batch->delta[new_gen] += delta; > > return new_gen; > } > @@ -3672,6 +3711,7 @@ static bool inc_min_seq(struct lruvec *lruvec, int type, bool can_swap) > { > int zone; > int remaining = MAX_LRU_BATCH; > + struct gen_update_batch batch = { }; Can this batch variable be moved away from the stack? We (Google) use a much larger value for MAX_NR_GENS internally. This large stack allocation from "struct gen_update_batch batch" can significantly increase the risk of stack overflow for our use cases. > struct lru_gen_folio *lrugen = &lruvec->lrugen; > int new_gen, old_gen = lru_gen_from_seq(lrugen->min_seq[type]); > > @@ -3690,12 +3730,15 @@ static bool inc_min_seq(struct lruvec *lruvec, int type, bool can_swap) > VM_WARN_ON_ONCE_FOLIO(folio_is_file_lru(folio) != type, folio); > VM_WARN_ON_ONCE_FOLIO(folio_zonenum(folio) != zone, folio); > > - new_gen = folio_inc_gen(lruvec, folio, false); > + new_gen = folio_inc_gen(lruvec, folio, false, &batch); > list_move_tail(&folio->lru, &lrugen->folios[new_gen][type][zone]); > > - if (!--remaining) > + if (!--remaining) { > + lru_gen_update_batch(lruvec, type, zone, &batch); > return false; > + } > } > + lru_gen_update_batch(lruvec, type, zone, &batch); > } > done: > reset_ctrl_pos(lruvec, type, true); > @@ -4215,7 +4258,7 @@ void lru_gen_soft_reclaim(struct mem_cgroup *memcg, int nid) > ******************************************************************************/ > > static bool sort_folio(struct lruvec *lruvec, struct folio *folio, struct scan_control *sc, > - int tier_idx) > + int tier_idx, struct gen_update_batch *batch) > { > bool success; > int gen = folio_lru_gen(folio); > @@ -4257,7 +4300,7 @@ static bool sort_folio(struct lruvec *lruvec, struct folio *folio, struct scan_c > if (tier > tier_idx || refs == BIT(LRU_REFS_WIDTH)) { > int hist = lru_hist_from_seq(lrugen->min_seq[type]); > > - gen = folio_inc_gen(lruvec, folio, false); > + gen = folio_inc_gen(lruvec, folio, false, batch); > list_move_tail(&folio->lru, &lrugen->folios[gen][type][zone]); > > WRITE_ONCE(lrugen->protected[hist][type][tier - 1], > @@ -4267,7 +4310,7 @@ static bool sort_folio(struct lruvec *lruvec, struct folio *folio, struct scan_c > > /* ineligible */ > if (zone > sc->reclaim_idx || skip_cma(folio, sc)) { > - gen = folio_inc_gen(lruvec, folio, false); > + gen = folio_inc_gen(lruvec, folio, false, batch); > list_move_tail(&folio->lru, &lrugen->folios[gen][type][zone]); > return true; > } > @@ -4275,7 +4318,7 @@ static bool sort_folio(struct lruvec *lruvec, struct folio *folio, struct scan_c > /* waiting for writeback */ > if (folio_test_locked(folio) || folio_test_writeback(folio) || > (type == LRU_GEN_FILE && folio_test_dirty(folio))) { > - gen = folio_inc_gen(lruvec, folio, true); > + gen = folio_inc_gen(lruvec, folio, true, batch); > list_move(&folio->lru, &lrugen->folios[gen][type][zone]); > return true; > } > @@ -4341,6 +4384,7 @@ static int scan_folios(struct lruvec *lruvec, struct scan_control *sc, > for (i = MAX_NR_ZONES; i > 0; i--) { > LIST_HEAD(moved); > int skipped_zone = 0; > + struct gen_update_batch batch = { }; > int zone = (sc->reclaim_idx + i) % MAX_NR_ZONES; > struct list_head *head = &lrugen->folios[gen][type][zone]; > > @@ -4355,7 +4399,7 @@ static int scan_folios(struct lruvec *lruvec, struct scan_control *sc, > > scanned += delta; > > - if (sort_folio(lruvec, folio, sc, tier)) > + if (sort_folio(lruvec, folio, sc, tier, &batch)) > sorted += delta; > else if (isolate_folio(lruvec, folio, sc)) { > list_add(&folio->lru, list); > @@ -4375,6 +4419,8 @@ static int scan_folios(struct lruvec *lruvec, struct scan_control *sc, > skipped += skipped_zone; > } > > + lru_gen_update_batch(lruvec, type, zone, &batch); > + > if (!remaining || isolated >= MIN_LRU_BATCH) > break; > } > -- > 2.43.0 > >
Wei Xu <weixugc@google.com> 于2024年1月13日周六 05:01写道: > > On Thu, Jan 11, 2024 at 10:33 AM Kairui Song <ryncsn@gmail.com> wrote: > > > > From: Kairui Song <kasong@tencent.com> > > > > When lru_gen is aging, it will update mm counters page by page, > > which causes a higher overhead if age happens frequently or there > > are a lot of pages in one generation getting moved. > > Optimize this by doing the counter update in batch. > > > > Although most __mod_*_state has its own caches the overhead > > is still observable. > > > > Tested in a 4G memcg on a EPYC 7K62 with: > > > > memcached -u nobody -m 16384 -s /tmp/memcached.socket \ > > -a 0766 -t 16 -B binary & > > > > memtier_benchmark -S /tmp/memcached.socket \ > > -P memcache_binary -n allkeys \ > > --key-minimum=1 --key-maximum=16000000 -d 1024 \ > > --ratio=1:0 --key-pattern=P:P -c 2 -t 16 --pipeline 8 -x 6 > > > > Average result of 18 test runs: > > > > Before: 44017.78 Ops/sec > > After: 44687.08 Ops/sec (+1.5%) > > I see the same performance numbers get quoted in all the 3 patches. > How much performance improvements does this particular patch provide > (the same for the other 2 patches)? If as the cover letter says, the > most performance benefits come from patch 3 (prefetching), can we just > have that patch alone to avoid the extra complexities. Hi Wei, Indeed these are two different optimization technique, I can reorder the series, prefetch is the first one and should be more acceptable, other optimizations can come later. And add standalone info about improvement of batch operations. > > > Signed-off-by: Kairui Song <kasong@tencent.com> > > --- > > mm/vmscan.c | 64 +++++++++++++++++++++++++++++++++++++++++++++-------- > > 1 file changed, 55 insertions(+), 9 deletions(-) > > > > diff --git a/mm/vmscan.c b/mm/vmscan.c > > index 4f9c854ce6cc..185d53607c7e 100644 > > --- a/mm/vmscan.c > > +++ b/mm/vmscan.c > > @@ -3113,9 +3113,47 @@ static int folio_update_gen(struct folio *folio, int gen) > > return ((old_flags & LRU_GEN_MASK) >> LRU_GEN_PGOFF) - 1; > > } > > > > +/* > > + * Update LRU gen in batch for each lru_gen LRU list. The batch is limited to > > + * each gen / type / zone level LRU. Batch is applied after finished or aborted > > + * scanning one LRU list. > > + */ > > +struct gen_update_batch { > > + int delta[MAX_NR_GENS]; > > +}; > > + > > +static void lru_gen_update_batch(struct lruvec *lruvec, int type, int zone, > > + struct gen_update_batch *batch) > > +{ > > + int gen; > > + int promoted = 0; > > + struct lru_gen_folio *lrugen = &lruvec->lrugen; > > + enum lru_list lru = type ? LRU_INACTIVE_FILE : LRU_INACTIVE_ANON; > > + > > + for (gen = 0; gen < MAX_NR_GENS; gen++) { > > + int delta = batch->delta[gen]; > > + > > + if (!delta) > > + continue; > > + > > + WRITE_ONCE(lrugen->nr_pages[gen][type][zone], > > + lrugen->nr_pages[gen][type][zone] + delta); > > + > > + if (lru_gen_is_active(lruvec, gen)) > > + promoted += delta; > > + } > > + > > + if (promoted) { > > + __update_lru_size(lruvec, lru, zone, -promoted); > > + __update_lru_size(lruvec, lru + LRU_ACTIVE, zone, promoted); > > + } > > +} > > + > > /* protect pages accessed multiple times through file descriptors */ > > -static int folio_inc_gen(struct lruvec *lruvec, struct folio *folio, bool reclaiming) > > +static int folio_inc_gen(struct lruvec *lruvec, struct folio *folio, > > + bool reclaiming, struct gen_update_batch *batch) > > { > > + int delta = folio_nr_pages(folio); > > int type = folio_is_file_lru(folio); > > struct lru_gen_folio *lrugen = &lruvec->lrugen; > > int new_gen, old_gen = lru_gen_from_seq(lrugen->min_seq[type]); > > @@ -3138,7 +3176,8 @@ static int folio_inc_gen(struct lruvec *lruvec, struct folio *folio, bool reclai > > new_flags |= BIT(PG_reclaim); > > } while (!try_cmpxchg(&folio->flags, &old_flags, new_flags)); > > > > - lru_gen_update_size(lruvec, folio, old_gen, new_gen); > > + batch->delta[old_gen] -= delta; > > + batch->delta[new_gen] += delta; > > > > return new_gen; > > } > > @@ -3672,6 +3711,7 @@ static bool inc_min_seq(struct lruvec *lruvec, int type, bool can_swap) > > { > > int zone; > > int remaining = MAX_LRU_BATCH; > > + struct gen_update_batch batch = { }; > > Can this batch variable be moved away from the stack? We (Google) use > a much larger value for MAX_NR_GENS internally. This large stack > allocation from "struct gen_update_batch batch" can significantly > increase the risk of stack overflow for our use cases. > Thanks for the info. Do you have any suggestion about where we should put the batch info? I though about merging it with lru_gen_mm_walk but that depend on kzalloc and not useable for slow allocation path, the overhead could be larger than benefit in many cases. Not sure if we can use some thing like a preallocated per-cpu cache here to avoid all the issues.
Kairui Song <ryncsn@gmail.com> 于2024年1月15日周一 01:42写道: > > Wei Xu <weixugc@google.com> 于2024年1月13日周六 05:01写道: > > > > On Thu, Jan 11, 2024 at 10:33 AM Kairui Song <ryncsn@gmail.com> wrote: > > > > > > From: Kairui Song <kasong@tencent.com> > > > > > > When lru_gen is aging, it will update mm counters page by page, > > > which causes a higher overhead if age happens frequently or there > > > are a lot of pages in one generation getting moved. > > > Optimize this by doing the counter update in batch. > > > > > > Although most __mod_*_state has its own caches the overhead > > > is still observable. > > > > > > Tested in a 4G memcg on a EPYC 7K62 with: > > > > > > memcached -u nobody -m 16384 -s /tmp/memcached.socket \ > > > -a 0766 -t 16 -B binary & > > > > > > memtier_benchmark -S /tmp/memcached.socket \ > > > -P memcache_binary -n allkeys \ > > > --key-minimum=1 --key-maximum=16000000 -d 1024 \ > > > --ratio=1:0 --key-pattern=P:P -c 2 -t 16 --pipeline 8 -x 6 > > > > > > Average result of 18 test runs: > > > > > > Before: 44017.78 Ops/sec > > > After: 44687.08 Ops/sec (+1.5%) > > > > I see the same performance numbers get quoted in all the 3 patches. > > How much performance improvements does this particular patch provide > > (the same for the other 2 patches)? If as the cover letter says, the > > most performance benefits come from patch 3 (prefetching), can we just > > have that patch alone to avoid the extra complexities. > > Hi Wei, > > Indeed these are two different optimization technique, I can reorder > the series, prefetch is the first one and should be more acceptable, > other optimizations can come later. And add standalone info about > improvement of batch operations. > > > > > > Signed-off-by: Kairui Song <kasong@tencent.com> > > > --- > > > mm/vmscan.c | 64 +++++++++++++++++++++++++++++++++++++++++++++-------- > > > 1 file changed, 55 insertions(+), 9 deletions(-) > > > > > > diff --git a/mm/vmscan.c b/mm/vmscan.c > > > index 4f9c854ce6cc..185d53607c7e 100644 > > > --- a/mm/vmscan.c > > > +++ b/mm/vmscan.c > > > @@ -3113,9 +3113,47 @@ static int folio_update_gen(struct folio *folio, int gen) > > > return ((old_flags & LRU_GEN_MASK) >> LRU_GEN_PGOFF) - 1; > > > } > > > > > > +/* > > > + * Update LRU gen in batch for each lru_gen LRU list. The batch is limited to > > > + * each gen / type / zone level LRU. Batch is applied after finished or aborted > > > + * scanning one LRU list. > > > + */ > > > +struct gen_update_batch { > > > + int delta[MAX_NR_GENS]; > > > +}; > > > + > > > +static void lru_gen_update_batch(struct lruvec *lruvec, int type, int zone, > > > + struct gen_update_batch *batch) > > > +{ > > > + int gen; > > > + int promoted = 0; > > > + struct lru_gen_folio *lrugen = &lruvec->lrugen; > > > + enum lru_list lru = type ? LRU_INACTIVE_FILE : LRU_INACTIVE_ANON; > > > + > > > + for (gen = 0; gen < MAX_NR_GENS; gen++) { > > > + int delta = batch->delta[gen]; > > > + > > > + if (!delta) > > > + continue; > > > + > > > + WRITE_ONCE(lrugen->nr_pages[gen][type][zone], > > > + lrugen->nr_pages[gen][type][zone] + delta); > > > + > > > + if (lru_gen_is_active(lruvec, gen)) > > > + promoted += delta; > > > + } > > > + > > > + if (promoted) { > > > + __update_lru_size(lruvec, lru, zone, -promoted); > > > + __update_lru_size(lruvec, lru + LRU_ACTIVE, zone, promoted); > > > + } > > > +} > > > + > > > /* protect pages accessed multiple times through file descriptors */ > > > -static int folio_inc_gen(struct lruvec *lruvec, struct folio *folio, bool reclaiming) > > > +static int folio_inc_gen(struct lruvec *lruvec, struct folio *folio, > > > + bool reclaiming, struct gen_update_batch *batch) > > > { > > > + int delta = folio_nr_pages(folio); > > > int type = folio_is_file_lru(folio); > > > struct lru_gen_folio *lrugen = &lruvec->lrugen; > > > int new_gen, old_gen = lru_gen_from_seq(lrugen->min_seq[type]); > > > @@ -3138,7 +3176,8 @@ static int folio_inc_gen(struct lruvec *lruvec, struct folio *folio, bool reclai > > > new_flags |= BIT(PG_reclaim); > > > } while (!try_cmpxchg(&folio->flags, &old_flags, new_flags)); > > > > > > - lru_gen_update_size(lruvec, folio, old_gen, new_gen); > > > + batch->delta[old_gen] -= delta; > > > + batch->delta[new_gen] += delta; > > > > > > return new_gen; > > > } > > > @@ -3672,6 +3711,7 @@ static bool inc_min_seq(struct lruvec *lruvec, int type, bool can_swap) > > > { > > > int zone; > > > int remaining = MAX_LRU_BATCH; > > > + struct gen_update_batch batch = { }; > > > > Can this batch variable be moved away from the stack? We (Google) use > > a much larger value for MAX_NR_GENS internally. This large stack > > allocation from "struct gen_update_batch batch" can significantly > > increase the risk of stack overflow for our use cases. > > > > Thanks for the info. > Do you have any suggestion about where we should put the batch info? I > though about merging it with lru_gen_mm_walk but that depend on > kzalloc and not useable for slow allocation path, the overhead could > be larger than benefit in many cases. > > Not sure if we can use some thing like a preallocated per-cpu cache > here to avoid all the issues. Hi Wei, After second thought, the batch is mostly used together with folio_inc_gen which means most pages are only being moved between two gens (being protected/unreclaimable), so I think only one counter int is needed in the batch, I'll update this patch and do some test based on this.
diff --git a/mm/vmscan.c b/mm/vmscan.c index 4f9c854ce6cc..185d53607c7e 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -3113,9 +3113,47 @@ static int folio_update_gen(struct folio *folio, int gen) return ((old_flags & LRU_GEN_MASK) >> LRU_GEN_PGOFF) - 1; } +/* + * Update LRU gen in batch for each lru_gen LRU list. The batch is limited to + * each gen / type / zone level LRU. Batch is applied after finished or aborted + * scanning one LRU list. + */ +struct gen_update_batch { + int delta[MAX_NR_GENS]; +}; + +static void lru_gen_update_batch(struct lruvec *lruvec, int type, int zone, + struct gen_update_batch *batch) +{ + int gen; + int promoted = 0; + struct lru_gen_folio *lrugen = &lruvec->lrugen; + enum lru_list lru = type ? LRU_INACTIVE_FILE : LRU_INACTIVE_ANON; + + for (gen = 0; gen < MAX_NR_GENS; gen++) { + int delta = batch->delta[gen]; + + if (!delta) + continue; + + WRITE_ONCE(lrugen->nr_pages[gen][type][zone], + lrugen->nr_pages[gen][type][zone] + delta); + + if (lru_gen_is_active(lruvec, gen)) + promoted += delta; + } + + if (promoted) { + __update_lru_size(lruvec, lru, zone, -promoted); + __update_lru_size(lruvec, lru + LRU_ACTIVE, zone, promoted); + } +} + /* protect pages accessed multiple times through file descriptors */ -static int folio_inc_gen(struct lruvec *lruvec, struct folio *folio, bool reclaiming) +static int folio_inc_gen(struct lruvec *lruvec, struct folio *folio, + bool reclaiming, struct gen_update_batch *batch) { + int delta = folio_nr_pages(folio); int type = folio_is_file_lru(folio); struct lru_gen_folio *lrugen = &lruvec->lrugen; int new_gen, old_gen = lru_gen_from_seq(lrugen->min_seq[type]); @@ -3138,7 +3176,8 @@ static int folio_inc_gen(struct lruvec *lruvec, struct folio *folio, bool reclai new_flags |= BIT(PG_reclaim); } while (!try_cmpxchg(&folio->flags, &old_flags, new_flags)); - lru_gen_update_size(lruvec, folio, old_gen, new_gen); + batch->delta[old_gen] -= delta; + batch->delta[new_gen] += delta; return new_gen; } @@ -3672,6 +3711,7 @@ static bool inc_min_seq(struct lruvec *lruvec, int type, bool can_swap) { int zone; int remaining = MAX_LRU_BATCH; + struct gen_update_batch batch = { }; struct lru_gen_folio *lrugen = &lruvec->lrugen; int new_gen, old_gen = lru_gen_from_seq(lrugen->min_seq[type]); @@ -3690,12 +3730,15 @@ static bool inc_min_seq(struct lruvec *lruvec, int type, bool can_swap) VM_WARN_ON_ONCE_FOLIO(folio_is_file_lru(folio) != type, folio); VM_WARN_ON_ONCE_FOLIO(folio_zonenum(folio) != zone, folio); - new_gen = folio_inc_gen(lruvec, folio, false); + new_gen = folio_inc_gen(lruvec, folio, false, &batch); list_move_tail(&folio->lru, &lrugen->folios[new_gen][type][zone]); - if (!--remaining) + if (!--remaining) { + lru_gen_update_batch(lruvec, type, zone, &batch); return false; + } } + lru_gen_update_batch(lruvec, type, zone, &batch); } done: reset_ctrl_pos(lruvec, type, true); @@ -4215,7 +4258,7 @@ void lru_gen_soft_reclaim(struct mem_cgroup *memcg, int nid) ******************************************************************************/ static bool sort_folio(struct lruvec *lruvec, struct folio *folio, struct scan_control *sc, - int tier_idx) + int tier_idx, struct gen_update_batch *batch) { bool success; int gen = folio_lru_gen(folio); @@ -4257,7 +4300,7 @@ static bool sort_folio(struct lruvec *lruvec, struct folio *folio, struct scan_c if (tier > tier_idx || refs == BIT(LRU_REFS_WIDTH)) { int hist = lru_hist_from_seq(lrugen->min_seq[type]); - gen = folio_inc_gen(lruvec, folio, false); + gen = folio_inc_gen(lruvec, folio, false, batch); list_move_tail(&folio->lru, &lrugen->folios[gen][type][zone]); WRITE_ONCE(lrugen->protected[hist][type][tier - 1], @@ -4267,7 +4310,7 @@ static bool sort_folio(struct lruvec *lruvec, struct folio *folio, struct scan_c /* ineligible */ if (zone > sc->reclaim_idx || skip_cma(folio, sc)) { - gen = folio_inc_gen(lruvec, folio, false); + gen = folio_inc_gen(lruvec, folio, false, batch); list_move_tail(&folio->lru, &lrugen->folios[gen][type][zone]); return true; } @@ -4275,7 +4318,7 @@ static bool sort_folio(struct lruvec *lruvec, struct folio *folio, struct scan_c /* waiting for writeback */ if (folio_test_locked(folio) || folio_test_writeback(folio) || (type == LRU_GEN_FILE && folio_test_dirty(folio))) { - gen = folio_inc_gen(lruvec, folio, true); + gen = folio_inc_gen(lruvec, folio, true, batch); list_move(&folio->lru, &lrugen->folios[gen][type][zone]); return true; } @@ -4341,6 +4384,7 @@ static int scan_folios(struct lruvec *lruvec, struct scan_control *sc, for (i = MAX_NR_ZONES; i > 0; i--) { LIST_HEAD(moved); int skipped_zone = 0; + struct gen_update_batch batch = { }; int zone = (sc->reclaim_idx + i) % MAX_NR_ZONES; struct list_head *head = &lrugen->folios[gen][type][zone]; @@ -4355,7 +4399,7 @@ static int scan_folios(struct lruvec *lruvec, struct scan_control *sc, scanned += delta; - if (sort_folio(lruvec, folio, sc, tier)) + if (sort_folio(lruvec, folio, sc, tier, &batch)) sorted += delta; else if (isolate_folio(lruvec, folio, sc)) { list_add(&folio->lru, list); @@ -4375,6 +4419,8 @@ static int scan_folios(struct lruvec *lruvec, struct scan_control *sc, skipped += skipped_zone; } + lru_gen_update_batch(lruvec, type, zone, &batch); + if (!remaining || isolated >= MIN_LRU_BATCH) break; }