diff mbox series

[RFC] mm: mglru: provide a separate list for lazyfree anon folios

Message ID 20240914063746.46290-1-21cnbao@gmail.com (mailing list archive)
State New
Headers show
Series [RFC] mm: mglru: provide a separate list for lazyfree anon folios | expand

Commit Message

Barry Song Sept. 14, 2024, 6:37 a.m. UTC
From: Barry Song <v-songbaohua@oppo.com>

This follows up on the discussion regarding Gaoxu's work[1]. It's
unclear if there's still interest in implementing a separate LRU
list for lazyfree folios, but I decided to explore it out of
curiosity.

According to Lokesh, MADV_FREE'd anon folios are expected to be 
released earlier than file folios. One option, as implemented 
by Gao Xu, is to place lazyfree anon folios at the tail of the 
file's `min_seq` generation. However, this approach results in 
lazyfree folios being released in a LIFO manner, which conflicts 
with LRU behavior, as noted by Michal.

To address this, this patch proposes maintaining a separate list 
for lazyfree anon folios while keeping them classified under the 
"file" LRU type to minimize code changes. These lazyfree anon 
folios will still be counted as file folios and share the same 
generation with regular files. In the eviction path, the lazyfree 
list will be prioritized for scanning before the actual file 
LRU list.

[1] https://lore.kernel.org/linux-mm/f29f64e29c08427b95e3df30a5770056@honor.com/

Signed-off-by: Barry Song <v-songbaohua@oppo.com>
---
 include/linux/mm_inline.h |  5 +-
 include/linux/mmzone.h    |  2 +-
 mm/vmscan.c               | 97 +++++++++++++++++++++++----------------
 3 files changed, 61 insertions(+), 43 deletions(-)

Comments

wang wei Sept. 15, 2024, 12:58 a.m. UTC | #1
> -----Original Message-----
> From: linux-kernel+bounces-329184-a929244872=163.com@vger.kernel.org
> <linux-kernel+bounces-329184-a929244872=163.com@vger.kernel.org> On
> Behalf Of Barry Song
> Sent: Saturday, September 14, 2024 2:38 PM
> To: akpm@linux-foundation.org; linux-mm@kvack.org
> Cc: mhocko@suse.com; fengbaopeng@honor.com; gaoxu2@honor.com;
> hailong.liu@oppo.com; kaleshsingh@google.com; linux-
> kernel@vger.kernel.org; lokeshgidra@google.com; ngeoffray@google.com;
> shli@fb.com; surenb@google.com; yipengxiang@honor.com;
> david@redhat.com; yuzhao@google.com; minchan@kernel.org; Barry Song
> <v-songbaohua@oppo.com>
> Subject: [PATCH RFC] mm: mglru: provide a separate list for lazyfree anon
> folios
> 
> From: Barry Song <v-songbaohua@oppo.com>
> 
> This follows up on the discussion regarding Gaoxu's work[1]. It's unclear
if
> there's still interest in implementing a separate LRU list for lazyfree
folios, but I
> decided to explore it out of curiosity.
> 
> According to Lokesh, MADV_FREE'd anon folios are expected to be released
> earlier than file folios. One option, as implemented by Gao Xu, is to
place
> lazyfree anon folios at the tail of the file's `min_seq` generation.
However, this
> approach results in lazyfree folios being released in a LIFO manner, which
> conflicts with LRU behavior, as noted by Michal.
> 
> To address this, this patch proposes maintaining a separate list for
lazyfree
> anon folios while keeping them classified under the "file" LRU type to
minimize
> code changes. These lazyfree anon folios will still be counted as file
folios and
> share the same generation with regular files. In the eviction path, the
lazyfree
> list will be prioritized for scanning before the actual file LRU list.
> 
> [1] https://lore.kernel.org/linux-
> mm/f29f64e29c08427b95e3df30a5770056@honor.com/
> 
> Signed-off-by: Barry Song <v-songbaohua@oppo.com>
> ---
>  include/linux/mm_inline.h |  5 +-
>  include/linux/mmzone.h    |  2 +-
>  mm/vmscan.c               | 97 +++++++++++++++++++++++----------------
>  3 files changed, 61 insertions(+), 43 deletions(-)
> 
> diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h index
> f4fe593c1400..118d70ed3120 100644
> --- a/include/linux/mm_inline.h
> +++ b/include/linux/mm_inline.h
> @@ -225,6 +225,7 @@ static inline bool lru_gen_add_folio(struct lruvec
> *lruvec, struct folio *folio,
>  	int gen = folio_lru_gen(folio);
>  	int type = folio_is_file_lru(folio);
>  	int zone = folio_zonenum(folio);
> +	int lazyfree = type ? folio_test_anon(folio) : 0;
>  	struct lru_gen_folio *lrugen = &lruvec->lrugen;
> 
>  	VM_WARN_ON_ONCE_FOLIO(gen != -1, folio); @@ -262,9 +263,9
> @@ static inline bool lru_gen_add_folio(struct lruvec *lruvec, struct
folio
> *folio,
>  	lru_gen_update_size(lruvec, folio, -1, gen);
>  	/* for folio_rotate_reclaimable() */
>  	if (reclaiming)
> -		list_add_tail(&folio->lru,
&lrugen->folios[gen][type][zone]);
> +		list_add_tail(&folio->lru, &lrugen->folios[gen][type +
> +lazyfree][zone]);
>  	else
> -		list_add(&folio->lru, &lrugen->folios[gen][type][zone]);
> +		list_add(&folio->lru, &lrugen->folios[gen][type +
> lazyfree][zone]);
> 
>  	return true;
>  }
> diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index
> 17506e4a2835..5d2331778528 100644
> --- a/include/linux/mmzone.h
> +++ b/include/linux/mmzone.h
> @@ -434,7 +434,7 @@ struct lru_gen_folio {
>  	/* the birth time of each generation in jiffies */
>  	unsigned long timestamps[MAX_NR_GENS];
>  	/* the multi-gen LRU lists, lazily sorted on eviction */
> -	struct list_head
> folios[MAX_NR_GENS][ANON_AND_FILE][MAX_NR_ZONES];
> +	struct list_head folios[MAX_NR_GENS][ANON_AND_FILE +
> 1][MAX_NR_ZONES];
This also divides lazy free filio into MAX_NR_ZONES generations. 
The gen of a lazy free filio depends on the gen in the anno list before 
it is marked as lazy free. Whether it will happen that lazy free filios 
are released in an order that is not consistent with the order of the mark?

>  	/* the multi-gen LRU sizes, eventually consistent */
>  	long nr_pages[MAX_NR_GENS][ANON_AND_FILE][MAX_NR_ZONES];
>  	/* the exponential moving average of refaulted */ diff --git
> a/mm/vmscan.c b/mm/vmscan.c index 96abf4a52382..9dc665dc6ba9
> 100644
> --- a/mm/vmscan.c
> +++ b/mm/vmscan.c
> @@ -3725,21 +3725,25 @@ static bool inc_min_seq(struct lruvec *lruvec, int
> type, bool can_swap)
> 
>  	/* prevent cold/hot inversion if force_scan is true */
>  	for (zone = 0; zone < MAX_NR_ZONES; zone++) {
> -		struct list_head *head = &lrugen-
> >folios[old_gen][type][zone];
> +		int list_num = type ? 2 : 1;
> +		struct list_head *head;
> 
> -		while (!list_empty(head)) {
> -			struct folio *folio = lru_to_folio(head);
> +		for (int i = list_num - 1; i >= 0; i--) {
> +			head = &lrugen->folios[old_gen][type + i][zone];
> +			while (!list_empty(head)) {
> +				struct folio *folio = lru_to_folio(head);
> 
> -
> 	VM_WARN_ON_ONCE_FOLIO(folio_test_unevictable(folio), folio);
> -			VM_WARN_ON_ONCE_FOLIO(folio_test_active(folio),
> folio);
> -
> 	VM_WARN_ON_ONCE_FOLIO(folio_is_file_lru(folio) != type, folio);
> -
> 	VM_WARN_ON_ONCE_FOLIO(folio_zonenum(folio) != zone, folio);
> +
> 	VM_WARN_ON_ONCE_FOLIO(folio_test_unevictable(folio), folio);
> +
> 	VM_WARN_ON_ONCE_FOLIO(folio_test_active(folio), folio);
> +
> 	VM_WARN_ON_ONCE_FOLIO(folio_is_file_lru(folio) != type, folio);
> +
> 	VM_WARN_ON_ONCE_FOLIO(folio_zonenum(folio) != zone, folio);
> 
> -			new_gen = folio_inc_gen(lruvec, folio, false);
> -			list_move_tail(&folio->lru, &lrugen-
> >folios[new_gen][type][zone]);
> +				new_gen = folio_inc_gen(lruvec, folio,
false);
> +				list_move_tail(&folio->lru, &lrugen-
> >folios[new_gen][type +
> +i][zone]);
> 
> -			if (!--remaining)
> -				return false;
> +				if (!--remaining)
> +					return false;
> +			}
>  		}
>  	}
>  done:
> @@ -4291,6 +4295,7 @@ static bool sort_folio(struct lruvec *lruvec, struct
> folio *folio, struct scan_c
>  	int refs = folio_lru_refs(folio);
>  	int tier = lru_tier_from_refs(refs);
>  	struct lru_gen_folio *lrugen = &lruvec->lrugen;
> +	int lazyfree = type ? folio_test_anon(folio) : 0;
> 
>  	VM_WARN_ON_ONCE_FOLIO(gen >= MAX_NR_GENS, folio);
> 
> @@ -4306,7 +4311,7 @@ static bool sort_folio(struct lruvec *lruvec, struct
> folio *folio, struct scan_c
> 
>  	/* promoted */
>  	if (gen != lru_gen_from_seq(lrugen->min_seq[type])) {
> -		list_move(&folio->lru, &lrugen->folios[gen][type][zone]);
> +		list_move(&folio->lru, &lrugen->folios[gen][type +
> lazyfree][zone]);
>  		return true;
>  	}
> 
> @@ -4315,7 +4320,7 @@ static bool sort_folio(struct lruvec *lruvec, struct
> folio *folio, struct scan_c
>  		int hist = lru_hist_from_seq(lrugen->min_seq[type]);
> 
>  		gen = folio_inc_gen(lruvec, folio, false);
> -		list_move_tail(&folio->lru,
&lrugen->folios[gen][type][zone]);
> +		list_move_tail(&folio->lru, &lrugen->folios[gen][type +
> +lazyfree][zone]);
> 
>  		WRITE_ONCE(lrugen->protected[hist][type][tier - 1],
>  			   lrugen->protected[hist][type][tier - 1] + delta);
@@ -
> 4325,7 +4330,7 @@ static bool sort_folio(struct lruvec *lruvec, struct
folio
> *folio, struct scan_c
>  	/* ineligible */
>  	if (!folio_test_lru(folio) || zone > sc->reclaim_idx) {
>  		gen = folio_inc_gen(lruvec, folio, false);
> -		list_move_tail(&folio->lru,
&lrugen->folios[gen][type][zone]);
> +		list_move_tail(&folio->lru, &lrugen->folios[gen][type +
> +lazyfree][zone]);
>  		return true;
>  	}
> 
> @@ -4333,7 +4338,7 @@ static bool sort_folio(struct lruvec *lruvec, struct
> folio *folio, struct scan_c
>  	if (folio_test_locked(folio) || folio_test_writeback(folio) ||
>  	    (type == LRU_GEN_FILE && folio_test_dirty(folio))) {
>  		gen = folio_inc_gen(lruvec, folio, true);
> -		list_move(&folio->lru, &lrugen->folios[gen][type][zone]);
> +		list_move(&folio->lru, &lrugen->folios[gen][type +
> lazyfree][zone]);
>  		return true;
>  	}
> 
> @@ -4377,7 +4382,7 @@ static bool isolate_folio(struct lruvec *lruvec,
struct
> folio *folio, struct sca  static int scan_folios(struct lruvec *lruvec,
struct
> scan_control *sc,
>  		       int type, int tier, struct list_head *list)  {
> -	int i;
> +	int i, j;
>  	int gen;
>  	enum vm_event_item item;
>  	int sorted = 0;
> @@ -4399,33 +4404,38 @@ static int scan_folios(struct lruvec *lruvec,
struct
> scan_control *sc,
>  		LIST_HEAD(moved);
>  		int skipped_zone = 0;
>  		int zone = (sc->reclaim_idx + i) % MAX_NR_ZONES;
> -		struct list_head *head = &lrugen->folios[gen][type][zone];
> -
> -		while (!list_empty(head)) {
> -			struct folio *folio = lru_to_folio(head);
> -			int delta = folio_nr_pages(folio);
> -
> -
> 	VM_WARN_ON_ONCE_FOLIO(folio_test_unevictable(folio), folio);
> -			VM_WARN_ON_ONCE_FOLIO(folio_test_active(folio),
> folio);
> -
> 	VM_WARN_ON_ONCE_FOLIO(folio_is_file_lru(folio) != type, folio);
> -
> 	VM_WARN_ON_ONCE_FOLIO(folio_zonenum(folio) != zone, folio);
> -
> -			scanned += delta;
> +		int list_num = type ? 2 : 1;
> +		struct list_head *head;
In addition, scan_folios will also age lazy free list. Is this necessary?
> +
> +		for (j = list_num - 1; j >= 0; j--) {
> +			head = &lrugen->folios[gen][type + j][zone];
> +			while (!list_empty(head)) {
> +				struct folio *folio = lru_to_folio(head);
> +				int delta = folio_nr_pages(folio);
> +
> +
> 	VM_WARN_ON_ONCE_FOLIO(folio_test_unevictable(folio), folio);
> +
> 	VM_WARN_ON_ONCE_FOLIO(folio_test_active(folio), folio);
> +
> 	VM_WARN_ON_ONCE_FOLIO(folio_is_file_lru(folio) != type, folio);
> +
> 	VM_WARN_ON_ONCE_FOLIO(folio_zonenum(folio) != zone, folio);
> +
> +				scanned += delta;
> +
> +				if (sort_folio(lruvec, folio, sc, tier))
> +					sorted += delta;
> +				else if (isolate_folio(lruvec, folio, sc)) {
> +					list_add(&folio->lru, list);
> +					isolated += delta;
> +				} else {
> +					list_move(&folio->lru, &moved);
> +					skipped_zone += delta;
> +				}
> 
> -			if (sort_folio(lruvec, folio, sc, tier))
> -				sorted += delta;
> -			else if (isolate_folio(lruvec, folio, sc)) {
> -				list_add(&folio->lru, list);
> -				isolated += delta;
> -			} else {
> -				list_move(&folio->lru, &moved);
> -				skipped_zone += delta;
> +				if (!--remaining || max(isolated,
> skipped_zone) >= MIN_LRU_BATCH)
> +					goto isolate_done;
>  			}
> -
> -			if (!--remaining || max(isolated, skipped_zone) >=
> MIN_LRU_BATCH)
> -				break;
>  		}
> 
> +isolate_done:
>  		if (skipped_zone) {
>  			list_splice(&moved, head);
>  			__count_zid_vm_events(PGSCAN_SKIP, zone,
> skipped_zone); @@ -5586,8 +5596,15 @@ void lru_gen_init_lruvec(struct
> lruvec *lruvec)
>  	for (i = 0; i <= MIN_NR_GENS + 1; i++)
>  		lrugen->timestamps[i] = jiffies;
> 
> -	for_each_gen_type_zone(gen, type, zone)
> +	for_each_gen_type_zone(gen, type, zone) {
>  		INIT_LIST_HEAD(&lrugen->folios[gen][type][zone]);
> +		/*
> +		 * lazyfree anon folios have a separate list while using
> +		 * file as type
> +		 */
> +		if (type)
> +			INIT_LIST_HEAD(&lrugen->folios[gen][type +
> 1][zone]);
> +	}
> 
>  	if (mm_state)
>  		mm_state->seq = MIN_NR_GENS;
> --
> 2.39.3 (Apple Git-146)
David Hildenbrand Sept. 17, 2024, 12:02 p.m. UTC | #2
On 14.09.24 08:37, Barry Song wrote:
> From: Barry Song <v-songbaohua@oppo.com>
> 
> This follows up on the discussion regarding Gaoxu's work[1]. It's
> unclear if there's still interest in implementing a separate LRU
> list for lazyfree folios, but I decided to explore it out of
> curiosity.
> 
> According to Lokesh, MADV_FREE'd anon folios are expected to be
> released earlier than file folios. One option, as implemented
> by Gao Xu, is to place lazyfree anon folios at the tail of the
> file's `min_seq` generation. However, this approach results in
> lazyfree folios being released in a LIFO manner, which conflicts
> with LRU behavior, as noted by Michal.
> 
> To address this, this patch proposes maintaining a separate list
> for lazyfree anon folios while keeping them classified under the
> "file" LRU type to minimize code changes. These lazyfree anon
> folios will still be counted as file folios and share the same
> generation with regular files. In the eviction path, the lazyfree
> list will be prioritized for scanning before the actual file
> LRU list.
> 

What's the downside of another LRU list? Do we have any experience on that?
gaoxu Sept. 18, 2024, 6:19 a.m. UTC | #3
> 
> From: Barry Song <v-songbaohua@oppo.com>
> 
> This follows up on the discussion regarding Gaoxu's work[1]. It's
> unclear if there's still interest in implementing a separate LRU
> list for lazyfree folios, but I decided to explore it out of
> curiosity.
> 
> According to Lokesh, MADV_FREE'd anon folios are expected to be
> released earlier than file folios. One option, as implemented
> by Gao Xu, is to place lazyfree anon folios at the tail of the
> file's `min_seq` generation. However, this approach results in
> lazyfree folios being released in a LIFO manner, which conflicts
> with LRU behavior, as noted by Michal.
> 
> To address this, this patch proposes maintaining a separate list
> for lazyfree anon folios while keeping them classified under the
> "file" LRU type to minimize code changes. These lazyfree anon
> folios will still be counted as file folios and share the same
> generation with regular files. In the eviction path, the lazyfree
> list will be prioritized for scanning before the actual file
> LRU list.
It seems like a very feasible solution. I will conduct comparative tests
based on this patch and synchronize the test results (it will take some time);
Thanks to Barry for providing the patch!
> 
> [1]
> https://lore.kernel.org/linux-mm/f29f64e29c08427b95e3df30a5770056@honor
> .com/
> 
> Signed-off-by: Barry Song <v-songbaohua@oppo.com>
> ---
>  include/linux/mm_inline.h |  5 +-
>  include/linux/mmzone.h    |  2 +-
>  mm/vmscan.c               | 97 +++++++++++++++++++++++----------------
>  3 files changed, 61 insertions(+), 43 deletions(-)
> 
> diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
> index f4fe593c1400..118d70ed3120 100644
> --- a/include/linux/mm_inline.h
> +++ b/include/linux/mm_inline.h
> @@ -225,6 +225,7 @@ static inline bool lru_gen_add_folio(struct lruvec
> *lruvec, struct folio *folio,
>  	int gen = folio_lru_gen(folio);
>  	int type = folio_is_file_lru(folio);
>  	int zone = folio_zonenum(folio);
> +	int lazyfree = type ? folio_test_anon(folio) : 0;
>  	struct lru_gen_folio *lrugen = &lruvec->lrugen;
> 
>  	VM_WARN_ON_ONCE_FOLIO(gen != -1, folio);
> @@ -262,9 +263,9 @@ static inline bool lru_gen_add_folio(struct lruvec
> *lruvec, struct folio *folio,
>  	lru_gen_update_size(lruvec, folio, -1, gen);
>  	/* for folio_rotate_reclaimable() */
>  	if (reclaiming)
> -		list_add_tail(&folio->lru, &lrugen->folios[gen][type][zone]);
> +		list_add_tail(&folio->lru, &lrugen->folios[gen][type + lazyfree][zone]);
>  	else
> -		list_add(&folio->lru, &lrugen->folios[gen][type][zone]);
> +		list_add(&folio->lru, &lrugen->folios[gen][type + lazyfree][zone]);
> 
>  	return true;
>  }
> diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
> index 17506e4a2835..5d2331778528 100644
> --- a/include/linux/mmzone.h
> +++ b/include/linux/mmzone.h
> @@ -434,7 +434,7 @@ struct lru_gen_folio {
>  	/* the birth time of each generation in jiffies */
>  	unsigned long timestamps[MAX_NR_GENS];
>  	/* the multi-gen LRU lists, lazily sorted on eviction */
> -	struct list_head
> folios[MAX_NR_GENS][ANON_AND_FILE][MAX_NR_ZONES];
> +	struct list_head folios[MAX_NR_GENS][ANON_AND_FILE +
> 1][MAX_NR_ZONES];
For better understanding and future scalability, could use enum types
instead of numbers, Create a new type, such as: enum folio_type.
>  	/* the multi-gen LRU sizes, eventually consistent */
>  	long nr_pages[MAX_NR_GENS][ANON_AND_FILE][MAX_NR_ZONES];
>  	/* the exponential moving average of refaulted */
> diff --git a/mm/vmscan.c b/mm/vmscan.c
> index 96abf4a52382..9dc665dc6ba9 100644
> --- a/mm/vmscan.c
> +++ b/mm/vmscan.c
> @@ -3725,21 +3725,25 @@ static bool inc_min_seq(struct lruvec *lruvec, int
> type, bool can_swap)
> 
>  	/* prevent cold/hot inversion if force_scan is true */
>  	for (zone = 0; zone < MAX_NR_ZONES; zone++) {
> -		struct list_head *head = &lrugen->folios[old_gen][type][zone];
> +		int list_num = type ? 2 : 1;
> +		struct list_head *head;
> 
> -		while (!list_empty(head)) {
> -			struct folio *folio = lru_to_folio(head);
> +		for (int i = list_num - 1; i >= 0; i--) {
> +			head = &lrugen->folios[old_gen][type + i][zone];
> +			while (!list_empty(head)) {
> +				struct folio *folio = lru_to_folio(head);
> 
> -			VM_WARN_ON_ONCE_FOLIO(folio_test_unevictable(folio),
> folio);
> -			VM_WARN_ON_ONCE_FOLIO(folio_test_active(folio), folio);
> -			VM_WARN_ON_ONCE_FOLIO(folio_is_file_lru(folio) != type,
> folio);
> -			VM_WARN_ON_ONCE_FOLIO(folio_zonenum(folio) != zone,
> folio);
> +				VM_WARN_ON_ONCE_FOLIO(folio_test_unevictable(folio),
> folio);
> +				VM_WARN_ON_ONCE_FOLIO(folio_test_active(folio), folio);
> +				VM_WARN_ON_ONCE_FOLIO(folio_is_file_lru(folio) != type,
> folio);
> +				VM_WARN_ON_ONCE_FOLIO(folio_zonenum(folio) != zone,
> folio);
> 
> -			new_gen = folio_inc_gen(lruvec, folio, false);
> -			list_move_tail(&folio->lru,
> &lrugen->folios[new_gen][type][zone]);
> +				new_gen = folio_inc_gen(lruvec, folio, false);
> +				list_move_tail(&folio->lru, &lrugen->folios[new_gen][type +
> i][zone]);
> 
> -			if (!--remaining)
> -				return false;
> +				if (!--remaining)
> +					return false;
> +			}
>  		}
>  	}
>  done:
> @@ -4291,6 +4295,7 @@ static bool sort_folio(struct lruvec *lruvec, struct folio
> *folio, struct scan_c
>  	int refs = folio_lru_refs(folio);
>  	int tier = lru_tier_from_refs(refs);
>  	struct lru_gen_folio *lrugen = &lruvec->lrugen;
> +	int lazyfree = type ? folio_test_anon(folio) : 0;
> 
>  	VM_WARN_ON_ONCE_FOLIO(gen >= MAX_NR_GENS, folio);
> 
> @@ -4306,7 +4311,7 @@ static bool sort_folio(struct lruvec *lruvec, struct folio
> *folio, struct scan_c
> 
>  	/* promoted */
>  	if (gen != lru_gen_from_seq(lrugen->min_seq[type])) {
> -		list_move(&folio->lru, &lrugen->folios[gen][type][zone]);
> +		list_move(&folio->lru, &lrugen->folios[gen][type + lazyfree][zone]);
>  		return true;
>  	}
> 
> @@ -4315,7 +4320,7 @@ static bool sort_folio(struct lruvec *lruvec, struct folio
> *folio, struct scan_c
>  		int hist = lru_hist_from_seq(lrugen->min_seq[type]);
> 
>  		gen = folio_inc_gen(lruvec, folio, false);
> -		list_move_tail(&folio->lru, &lrugen->folios[gen][type][zone]);
> +		list_move_tail(&folio->lru, &lrugen->folios[gen][type +
> lazyfree][zone]);
> 
>  		WRITE_ONCE(lrugen->protected[hist][type][tier - 1],
>  			   lrugen->protected[hist][type][tier - 1] + delta);
> @@ -4325,7 +4330,7 @@ static bool sort_folio(struct lruvec *lruvec, struct folio
> *folio, struct scan_c
>  	/* ineligible */
>  	if (!folio_test_lru(folio) || zone > sc->reclaim_idx) {
>  		gen = folio_inc_gen(lruvec, folio, false);
> -		list_move_tail(&folio->lru, &lrugen->folios[gen][type][zone]);
> +		list_move_tail(&folio->lru, &lrugen->folios[gen][type +
> lazyfree][zone]);
>  		return true;
>  	}
> 
> @@ -4333,7 +4338,7 @@ static bool sort_folio(struct lruvec *lruvec, struct folio
> *folio, struct scan_c
>  	if (folio_test_locked(folio) || folio_test_writeback(folio) ||
>  	    (type == LRU_GEN_FILE && folio_test_dirty(folio))) {
>  		gen = folio_inc_gen(lruvec, folio, true);
> -		list_move(&folio->lru, &lrugen->folios[gen][type][zone]);
> +		list_move(&folio->lru, &lrugen->folios[gen][type + lazyfree][zone]);
>  		return true;
>  	}
> 
> @@ -4377,7 +4382,7 @@ static bool isolate_folio(struct lruvec *lruvec, struct
> folio *folio, struct sca
>  static int scan_folios(struct lruvec *lruvec, struct scan_control *sc,
>  		       int type, int tier, struct list_head *list)
>  {
> -	int i;
> +	int i, j;
>  	int gen;
>  	enum vm_event_item item;
>  	int sorted = 0;
> @@ -4399,33 +4404,38 @@ static int scan_folios(struct lruvec *lruvec, struct
> scan_control *sc,
>  		LIST_HEAD(moved);
>  		int skipped_zone = 0;
>  		int zone = (sc->reclaim_idx + i) % MAX_NR_ZONES;
> -		struct list_head *head = &lrugen->folios[gen][type][zone];
> -
> -		while (!list_empty(head)) {
> -			struct folio *folio = lru_to_folio(head);
> -			int delta = folio_nr_pages(folio);
> -
> -			VM_WARN_ON_ONCE_FOLIO(folio_test_unevictable(folio),
> folio);
> -			VM_WARN_ON_ONCE_FOLIO(folio_test_active(folio), folio);
> -			VM_WARN_ON_ONCE_FOLIO(folio_is_file_lru(folio) != type,
> folio);
> -			VM_WARN_ON_ONCE_FOLIO(folio_zonenum(folio) != zone,
> folio);
> -
> -			scanned += delta;
> +		int list_num = type ? 2 : 1;
> +		struct list_head *head;
> +
> +		for (j = list_num - 1; j >= 0; j--) {
> +			head = &lrugen->folios[gen][type + j][zone];
> +			while (!list_empty(head)) {
> +				struct folio *folio = lru_to_folio(head);
> +				int delta = folio_nr_pages(folio);
> +
> +				VM_WARN_ON_ONCE_FOLIO(folio_test_unevictable(folio),
> folio);
> +				VM_WARN_ON_ONCE_FOLIO(folio_test_active(folio), folio);
> +				VM_WARN_ON_ONCE_FOLIO(folio_is_file_lru(folio) != type,
> folio);
> +				VM_WARN_ON_ONCE_FOLIO(folio_zonenum(folio) != zone,
> folio);
> +
> +				scanned += delta;
> +
> +				if (sort_folio(lruvec, folio, sc, tier))
> +					sorted += delta;
> +				else if (isolate_folio(lruvec, folio, sc)) {
> +					list_add(&folio->lru, list);
> +					isolated += delta;
> +				} else {
> +					list_move(&folio->lru, &moved);
> +					skipped_zone += delta;
> +				}
> 
> -			if (sort_folio(lruvec, folio, sc, tier))
> -				sorted += delta;
> -			else if (isolate_folio(lruvec, folio, sc)) {
> -				list_add(&folio->lru, list);
> -				isolated += delta;
> -			} else {
> -				list_move(&folio->lru, &moved);
> -				skipped_zone += delta;
> +				if (!--remaining || max(isolated, skipped_zone) >=
> MIN_LRU_BATCH)
> +					goto isolate_done;
>  			}
> -
> -			if (!--remaining || max(isolated, skipped_zone) >=
> MIN_LRU_BATCH)
> -				break;
>  		}
> 
> +isolate_done:
>  		if (skipped_zone) {
>  			list_splice(&moved, head);
>  			__count_zid_vm_events(PGSCAN_SKIP, zone, skipped_zone);
> @@ -5586,8 +5596,15 @@ void lru_gen_init_lruvec(struct lruvec *lruvec)
>  	for (i = 0; i <= MIN_NR_GENS + 1; i++)
>  		lrugen->timestamps[i] = jiffies;
> 
> -	for_each_gen_type_zone(gen, type, zone)
> +	for_each_gen_type_zone(gen, type, zone) {
>  		INIT_LIST_HEAD(&lrugen->folios[gen][type][zone]);
> +		/*
> +		 * lazyfree anon folios have a separate list while using
> +		 * file as type
> +		 */
> +		if (type)
> +			INIT_LIST_HEAD(&lrugen->folios[gen][type + 1][zone]);
> +	}
> 
>  	if (mm_state)
>  		mm_state->seq = MIN_NR_GENS;
> --
> 2.39.3 (Apple Git-146)
Barry Song Sept. 20, 2024, 1:17 a.m. UTC | #4
On Wed, Sep 18, 2024 at 6:19 PM gaoxu <gaoxu2@honor.com> wrote:
>
> >
> > From: Barry Song <v-songbaohua@oppo.com>
> >
> > This follows up on the discussion regarding Gaoxu's work[1]. It's
> > unclear if there's still interest in implementing a separate LRU
> > list for lazyfree folios, but I decided to explore it out of
> > curiosity.
> >
> > According to Lokesh, MADV_FREE'd anon folios are expected to be
> > released earlier than file folios. One option, as implemented
> > by Gao Xu, is to place lazyfree anon folios at the tail of the
> > file's `min_seq` generation. However, this approach results in
> > lazyfree folios being released in a LIFO manner, which conflicts
> > with LRU behavior, as noted by Michal.
> >
> > To address this, this patch proposes maintaining a separate list
> > for lazyfree anon folios while keeping them classified under the
> > "file" LRU type to minimize code changes. These lazyfree anon
> > folios will still be counted as file folios and share the same
> > generation with regular files. In the eviction path, the lazyfree
> > list will be prioritized for scanning before the actual file
> > LRU list.
> It seems like a very feasible solution. I will conduct comparative tests
> based on this patch and synchronize the test results (it will take some time);
> Thanks to Barry for providing the patch!

Thank you, I will await your test results.

> >
> > [1]
> > https://lore.kernel.org/linux-mm/f29f64e29c08427b95e3df30a5770056@honor
> > .com/
> >
> > Signed-off-by: Barry Song <v-songbaohua@oppo.com>
> > ---
> >  include/linux/mm_inline.h |  5 +-
> >  include/linux/mmzone.h    |  2 +-
> >  mm/vmscan.c               | 97 +++++++++++++++++++++++----------------
> >  3 files changed, 61 insertions(+), 43 deletions(-)
> >
> > diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
> > index f4fe593c1400..118d70ed3120 100644
> > --- a/include/linux/mm_inline.h
> > +++ b/include/linux/mm_inline.h
> > @@ -225,6 +225,7 @@ static inline bool lru_gen_add_folio(struct lruvec
> > *lruvec, struct folio *folio,
> >       int gen = folio_lru_gen(folio);
> >       int type = folio_is_file_lru(folio);
> >       int zone = folio_zonenum(folio);
> > +     int lazyfree = type ? folio_test_anon(folio) : 0;
> >       struct lru_gen_folio *lrugen = &lruvec->lrugen;
> >
> >       VM_WARN_ON_ONCE_FOLIO(gen != -1, folio);
> > @@ -262,9 +263,9 @@ static inline bool lru_gen_add_folio(struct lruvec
> > *lruvec, struct folio *folio,
> >       lru_gen_update_size(lruvec, folio, -1, gen);
> >       /* for folio_rotate_reclaimable() */
> >       if (reclaiming)
> > -             list_add_tail(&folio->lru, &lrugen->folios[gen][type][zone]);
> > +             list_add_tail(&folio->lru, &lrugen->folios[gen][type + lazyfree][zone]);
> >       else
> > -             list_add(&folio->lru, &lrugen->folios[gen][type][zone]);
> > +             list_add(&folio->lru, &lrugen->folios[gen][type + lazyfree][zone]);
> >
> >       return true;
> >  }
> > diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
> > index 17506e4a2835..5d2331778528 100644
> > --- a/include/linux/mmzone.h
> > +++ b/include/linux/mmzone.h
> > @@ -434,7 +434,7 @@ struct lru_gen_folio {
> >       /* the birth time of each generation in jiffies */
> >       unsigned long timestamps[MAX_NR_GENS];
> >       /* the multi-gen LRU lists, lazily sorted on eviction */
> > -     struct list_head
> > folios[MAX_NR_GENS][ANON_AND_FILE][MAX_NR_ZONES];
> > +     struct list_head folios[MAX_NR_GENS][ANON_AND_FILE +
> > 1][MAX_NR_ZONES];
> For better understanding and future scalability, could use enum types
> instead of numbers, Create a new type, such as: enum folio_type.

I'd rather follow the "trick" that Yu Zhao has been using such as
int type = folio_is_file_lru(folio);
while I agree providing two macros as below
#define LRU_TYPE_ANON 0
#define LRU_TYPE_FILE   1

might improve the readability by things like:

int list_num = (type  == LRU_TYPE_FILE) ? 2 : 1;

However, considering the code in a larger context, since type =
folio_is_file_lru(folio),
doesn't that imply that type is already set to file? Therefore, the comparison
type == LRU_TYPE_FILE seems redundant.

So, if we want to continue using this approach, it seems that there’s nothing
worth changing?

> >       /* the multi-gen LRU sizes, eventually consistent */
> >       long nr_pages[MAX_NR_GENS][ANON_AND_FILE][MAX_NR_ZONES];
> >       /* the exponential moving average of refaulted */
> > diff --git a/mm/vmscan.c b/mm/vmscan.c
> > index 96abf4a52382..9dc665dc6ba9 100644
> > --- a/mm/vmscan.c
> > +++ b/mm/vmscan.c
> > @@ -3725,21 +3725,25 @@ static bool inc_min_seq(struct lruvec *lruvec, int
> > type, bool can_swap)
> >
> >       /* prevent cold/hot inversion if force_scan is true */
> >       for (zone = 0; zone < MAX_NR_ZONES; zone++) {
> > -             struct list_head *head = &lrugen->folios[old_gen][type][zone];
> > +             int list_num = type ? 2 : 1;
> > +             struct list_head *head;
> >
> > -             while (!list_empty(head)) {
> > -                     struct folio *folio = lru_to_folio(head);
> > +             for (int i = list_num - 1; i >= 0; i--) {
> > +                     head = &lrugen->folios[old_gen][type + i][zone];
> > +                     while (!list_empty(head)) {
> > +                             struct folio *folio = lru_to_folio(head);
> >
> > -                     VM_WARN_ON_ONCE_FOLIO(folio_test_unevictable(folio),
> > folio);
> > -                     VM_WARN_ON_ONCE_FOLIO(folio_test_active(folio), folio);
> > -                     VM_WARN_ON_ONCE_FOLIO(folio_is_file_lru(folio) != type,
> > folio);
> > -                     VM_WARN_ON_ONCE_FOLIO(folio_zonenum(folio) != zone,
> > folio);
> > +                             VM_WARN_ON_ONCE_FOLIO(folio_test_unevictable(folio),
> > folio);
> > +                             VM_WARN_ON_ONCE_FOLIO(folio_test_active(folio), folio);
> > +                             VM_WARN_ON_ONCE_FOLIO(folio_is_file_lru(folio) != type,
> > folio);
> > +                             VM_WARN_ON_ONCE_FOLIO(folio_zonenum(folio) != zone,
> > folio);
> >
> > -                     new_gen = folio_inc_gen(lruvec, folio, false);
> > -                     list_move_tail(&folio->lru,
> > &lrugen->folios[new_gen][type][zone]);
> > +                             new_gen = folio_inc_gen(lruvec, folio, false);
> > +                             list_move_tail(&folio->lru, &lrugen->folios[new_gen][type +
> > i][zone]);
> >
> > -                     if (!--remaining)
> > -                             return false;
> > +                             if (!--remaining)
> > +                                     return false;
> > +                     }
> >               }
> >       }
> >  done:
> > @@ -4291,6 +4295,7 @@ static bool sort_folio(struct lruvec *lruvec, struct folio
> > *folio, struct scan_c
> >       int refs = folio_lru_refs(folio);
> >       int tier = lru_tier_from_refs(refs);
> >       struct lru_gen_folio *lrugen = &lruvec->lrugen;
> > +     int lazyfree = type ? folio_test_anon(folio) : 0;
> >
> >       VM_WARN_ON_ONCE_FOLIO(gen >= MAX_NR_GENS, folio);
> >
> > @@ -4306,7 +4311,7 @@ static bool sort_folio(struct lruvec *lruvec, struct folio
> > *folio, struct scan_c
> >
> >       /* promoted */
> >       if (gen != lru_gen_from_seq(lrugen->min_seq[type])) {
> > -             list_move(&folio->lru, &lrugen->folios[gen][type][zone]);
> > +             list_move(&folio->lru, &lrugen->folios[gen][type + lazyfree][zone]);
> >               return true;
> >       }
> >
> > @@ -4315,7 +4320,7 @@ static bool sort_folio(struct lruvec *lruvec, struct folio
> > *folio, struct scan_c
> >               int hist = lru_hist_from_seq(lrugen->min_seq[type]);
> >
> >               gen = folio_inc_gen(lruvec, folio, false);
> > -             list_move_tail(&folio->lru, &lrugen->folios[gen][type][zone]);
> > +             list_move_tail(&folio->lru, &lrugen->folios[gen][type +
> > lazyfree][zone]);
> >
> >               WRITE_ONCE(lrugen->protected[hist][type][tier - 1],
> >                          lrugen->protected[hist][type][tier - 1] + delta);
> > @@ -4325,7 +4330,7 @@ static bool sort_folio(struct lruvec *lruvec, struct folio
> > *folio, struct scan_c
> >       /* ineligible */
> >       if (!folio_test_lru(folio) || zone > sc->reclaim_idx) {
> >               gen = folio_inc_gen(lruvec, folio, false);
> > -             list_move_tail(&folio->lru, &lrugen->folios[gen][type][zone]);
> > +             list_move_tail(&folio->lru, &lrugen->folios[gen][type +
> > lazyfree][zone]);
> >               return true;
> >       }
> >
> > @@ -4333,7 +4338,7 @@ static bool sort_folio(struct lruvec *lruvec, struct folio
> > *folio, struct scan_c
> >       if (folio_test_locked(folio) || folio_test_writeback(folio) ||
> >           (type == LRU_GEN_FILE && folio_test_dirty(folio))) {
> >               gen = folio_inc_gen(lruvec, folio, true);
> > -             list_move(&folio->lru, &lrugen->folios[gen][type][zone]);
> > +             list_move(&folio->lru, &lrugen->folios[gen][type + lazyfree][zone]);
> >               return true;
> >       }
> >
> > @@ -4377,7 +4382,7 @@ static bool isolate_folio(struct lruvec *lruvec, struct
> > folio *folio, struct sca
> >  static int scan_folios(struct lruvec *lruvec, struct scan_control *sc,
> >                      int type, int tier, struct list_head *list)
> >  {
> > -     int i;
> > +     int i, j;
> >       int gen;
> >       enum vm_event_item item;
> >       int sorted = 0;
> > @@ -4399,33 +4404,38 @@ static int scan_folios(struct lruvec *lruvec, struct
> > scan_control *sc,
> >               LIST_HEAD(moved);
> >               int skipped_zone = 0;
> >               int zone = (sc->reclaim_idx + i) % MAX_NR_ZONES;
> > -             struct list_head *head = &lrugen->folios[gen][type][zone];
> > -
> > -             while (!list_empty(head)) {
> > -                     struct folio *folio = lru_to_folio(head);
> > -                     int delta = folio_nr_pages(folio);
> > -
> > -                     VM_WARN_ON_ONCE_FOLIO(folio_test_unevictable(folio),
> > folio);
> > -                     VM_WARN_ON_ONCE_FOLIO(folio_test_active(folio), folio);
> > -                     VM_WARN_ON_ONCE_FOLIO(folio_is_file_lru(folio) != type,
> > folio);
> > -                     VM_WARN_ON_ONCE_FOLIO(folio_zonenum(folio) != zone,
> > folio);
> > -
> > -                     scanned += delta;
> > +             int list_num = type ? 2 : 1;
> > +             struct list_head *head;
> > +
> > +             for (j = list_num - 1; j >= 0; j--) {
> > +                     head = &lrugen->folios[gen][type + j][zone];
> > +                     while (!list_empty(head)) {
> > +                             struct folio *folio = lru_to_folio(head);
> > +                             int delta = folio_nr_pages(folio);
> > +
> > +                             VM_WARN_ON_ONCE_FOLIO(folio_test_unevictable(folio),
> > folio);
> > +                             VM_WARN_ON_ONCE_FOLIO(folio_test_active(folio), folio);
> > +                             VM_WARN_ON_ONCE_FOLIO(folio_is_file_lru(folio) != type,
> > folio);
> > +                             VM_WARN_ON_ONCE_FOLIO(folio_zonenum(folio) != zone,
> > folio);
> > +
> > +                             scanned += delta;
> > +
> > +                             if (sort_folio(lruvec, folio, sc, tier))
> > +                                     sorted += delta;
> > +                             else if (isolate_folio(lruvec, folio, sc)) {
> > +                                     list_add(&folio->lru, list);
> > +                                     isolated += delta;
> > +                             } else {
> > +                                     list_move(&folio->lru, &moved);
> > +                                     skipped_zone += delta;
> > +                             }
> >
> > -                     if (sort_folio(lruvec, folio, sc, tier))
> > -                             sorted += delta;
> > -                     else if (isolate_folio(lruvec, folio, sc)) {
> > -                             list_add(&folio->lru, list);
> > -                             isolated += delta;
> > -                     } else {
> > -                             list_move(&folio->lru, &moved);
> > -                             skipped_zone += delta;
> > +                             if (!--remaining || max(isolated, skipped_zone) >=
> > MIN_LRU_BATCH)
> > +                                     goto isolate_done;
> >                       }
> > -
> > -                     if (!--remaining || max(isolated, skipped_zone) >=
> > MIN_LRU_BATCH)
> > -                             break;
> >               }
> >
> > +isolate_done:
> >               if (skipped_zone) {
> >                       list_splice(&moved, head);
> >                       __count_zid_vm_events(PGSCAN_SKIP, zone, skipped_zone);
> > @@ -5586,8 +5596,15 @@ void lru_gen_init_lruvec(struct lruvec *lruvec)
> >       for (i = 0; i <= MIN_NR_GENS + 1; i++)
> >               lrugen->timestamps[i] = jiffies;
> >
> > -     for_each_gen_type_zone(gen, type, zone)
> > +     for_each_gen_type_zone(gen, type, zone) {
> >               INIT_LIST_HEAD(&lrugen->folios[gen][type][zone]);
> > +             /*
> > +              * lazyfree anon folios have a separate list while using
> > +              * file as type
> > +              */
> > +             if (type)
> > +                     INIT_LIST_HEAD(&lrugen->folios[gen][type + 1][zone]);
> > +     }
> >
> >       if (mm_state)
> >               mm_state->seq = MIN_NR_GENS;
> > --
> > 2.39.3 (Apple Git-146)
>

Thanks
Barry
Barry Song Sept. 20, 2024, 1:23 a.m. UTC | #5
On Wed, Sep 18, 2024 at 12:02 AM David Hildenbrand <david@redhat.com> wrote:
>
> On 14.09.24 08:37, Barry Song wrote:
> > From: Barry Song <v-songbaohua@oppo.com>
> >
> > This follows up on the discussion regarding Gaoxu's work[1]. It's
> > unclear if there's still interest in implementing a separate LRU
> > list for lazyfree folios, but I decided to explore it out of
> > curiosity.
> >
> > According to Lokesh, MADV_FREE'd anon folios are expected to be
> > released earlier than file folios. One option, as implemented
> > by Gao Xu, is to place lazyfree anon folios at the tail of the
> > file's `min_seq` generation. However, this approach results in
> > lazyfree folios being released in a LIFO manner, which conflicts
> > with LRU behavior, as noted by Michal.
> >
> > To address this, this patch proposes maintaining a separate list
> > for lazyfree anon folios while keeping them classified under the
> > "file" LRU type to minimize code changes. These lazyfree anon
> > folios will still be counted as file folios and share the same
> > generation with regular files. In the eviction path, the lazyfree
> > list will be prioritized for scanning before the actual file
> > LRU list.
> >
>
> What's the downside of another LRU list? Do we have any experience on that?

Essentially, the goal is to address the downsides of using a single LRU list for
files and lazyfree anonymous pages - seriously more files re-faults.

I'm not entirely clear on the downsides of having an additional LRU
list. While it
does increase complexity, it doesn't seem to be significant.

Let's wait for Gaoxu's test results before deciding on the next steps.
I was just
curious about how difficult it would be to add a separate list, so I
took two hours
to explore it :-)

>
> --
> Cheers,
>
> David / dhildenb
>

Thanks
Barry
Minchan Kim Sept. 23, 2024, 10:19 p.m. UTC | #6
On Fri, Sep 20, 2024 at 01:23:57PM +1200, Barry Song wrote:
> On Wed, Sep 18, 2024 at 12:02 AM David Hildenbrand <david@redhat.com> wrote:
> >
> > On 14.09.24 08:37, Barry Song wrote:
> > > From: Barry Song <v-songbaohua@oppo.com>
> > >
> > > This follows up on the discussion regarding Gaoxu's work[1]. It's
> > > unclear if there's still interest in implementing a separate LRU
> > > list for lazyfree folios, but I decided to explore it out of
> > > curiosity.
> > >
> > > According to Lokesh, MADV_FREE'd anon folios are expected to be
> > > released earlier than file folios. One option, as implemented
> > > by Gao Xu, is to place lazyfree anon folios at the tail of the
> > > file's `min_seq` generation. However, this approach results in
> > > lazyfree folios being released in a LIFO manner, which conflicts
> > > with LRU behavior, as noted by Michal.
> > >
> > > To address this, this patch proposes maintaining a separate list
> > > for lazyfree anon folios while keeping them classified under the
> > > "file" LRU type to minimize code changes. These lazyfree anon
> > > folios will still be counted as file folios and share the same
> > > generation with regular files. In the eviction path, the lazyfree
> > > list will be prioritized for scanning before the actual file
> > > LRU list.
> > >
> >
> > What's the downside of another LRU list? Do we have any experience on that?
> 
> Essentially, the goal is to address the downsides of using a single LRU list for
> files and lazyfree anonymous pages - seriously more files re-faults.
> 
> I'm not entirely clear on the downsides of having an additional LRU
> list. While it
> does increase complexity, it doesn't seem to be significant.

It's not a catastrophic[1]. I prefer the idea of an additional LRU
because it offers flexibility for various potential use cases[2].

orthgonal topic(but may be interest for someone)

My main interest in a new LRU list is to enable the system to maintain a
quickly reclaimable memory pool and expose the size to the admin with 
a knob to decide how many memory pool they want.

This pool would consist of clean, unmapped pages from both the page cache
and/or the swap cache. This would allow the system to reclaim memory quickly
when free memory is low, at the cost of minor fault overhead.

[1] https://lore.kernel.org/linux-kernel//1448006568-16031-15-git-send-email-minchan@kernel.org/
[2] https://lkml.org/lkml/2012/6/19/24
Barry Song Sept. 23, 2024, 10:38 p.m. UTC | #7
On Tue, Sep 24, 2024 at 10:19 AM Minchan Kim <minchan@kernel.org> wrote:
>
> On Fri, Sep 20, 2024 at 01:23:57PM +1200, Barry Song wrote:
> > On Wed, Sep 18, 2024 at 12:02 AM David Hildenbrand <david@redhat.com> wrote:
> > >
> > > On 14.09.24 08:37, Barry Song wrote:
> > > > From: Barry Song <v-songbaohua@oppo.com>
> > > >
> > > > This follows up on the discussion regarding Gaoxu's work[1]. It's
> > > > unclear if there's still interest in implementing a separate LRU
> > > > list for lazyfree folios, but I decided to explore it out of
> > > > curiosity.
> > > >
> > > > According to Lokesh, MADV_FREE'd anon folios are expected to be
> > > > released earlier than file folios. One option, as implemented
> > > > by Gao Xu, is to place lazyfree anon folios at the tail of the
> > > > file's `min_seq` generation. However, this approach results in
> > > > lazyfree folios being released in a LIFO manner, which conflicts
> > > > with LRU behavior, as noted by Michal.
> > > >
> > > > To address this, this patch proposes maintaining a separate list
> > > > for lazyfree anon folios while keeping them classified under the
> > > > "file" LRU type to minimize code changes. These lazyfree anon
> > > > folios will still be counted as file folios and share the same
> > > > generation with regular files. In the eviction path, the lazyfree
> > > > list will be prioritized for scanning before the actual file
> > > > LRU list.
> > > >
> > >
> > > What's the downside of another LRU list? Do we have any experience on that?
> >
> > Essentially, the goal is to address the downsides of using a single LRU list for
> > files and lazyfree anonymous pages - seriously more files re-faults.
> >
> > I'm not entirely clear on the downsides of having an additional LRU
> > list. While it
> > does increase complexity, it doesn't seem to be significant.
>
> It's not a catastrophic[1]. I prefer the idea of an additional LRU
> because it offers flexibility for various potential use cases[2].
>
> orthgonal topic(but may be interest for someone)
>
> My main interest in a new LRU list is to enable the system to maintain a
> quickly reclaimable memory pool and expose the size to the admin with
> a knob to decide how many memory pool they want.
>
> This pool would consist of clean, unmapped pages from both the page cache
> and/or the swap cache. This would allow the system to reclaim memory quickly
> when free memory is low, at the cost of minor fault overhead.

My current implementation only handles the MADV_FREE anonymous case. If they
are placed in a single LRU, they should be able to be reclaimed very
quickly, simply
discarded without needing to be swapped out.

I've been thinking about the issue of unmapped pagecache recently.
These unmapped
pagecaches can be reclaimed much faster than mapped ones, especially
when the latter
have a high mapcount and incur significant rmap costs. However, many
pagecaches are
inherently unmapped (e.g., from syscall read). If they are placed in a
single LRU, the
challenge would be comparing the age of unmapped pagecache with mapped ones.
Currently, with the mglru tier mechanism, frequently accessed unmapped
pagecaches
have a chance to be placed in a spot where they are harder to reclaim.

personally I am quite interested in putting unmapped pagecache
together as right now
reclamation could be like this:

lru list:
unmapped pagecache(A) - mapped pagecached(B) - unmapped pagecache(C) - mapped
pagecached with huge mapcount(D)

A and C can be reclaimed with zero cost but they have to wait for D and B.

But the question is that if make two lists:

list1: A - C
list2: B - D

How can we ensure that A and C won't experience many refaults, even though
reclaiming them would be cost-free? Or that B and D might actually be
colder than
A and C?

If this isn't an issue, I'd be very interested in implementing it. Any thoughts?

>
> [1] https://lore.kernel.org/linux-kernel//1448006568-16031-15-git-send-email-minchan@kernel.org/
> [2] https://lkml.org/lkml/2012/6/19/24

Thanks
Barry
Minchan Kim Sept. 24, 2024, 8:12 p.m. UTC | #8
On Tue, Sep 24, 2024 at 10:38:37AM +1200, Barry Song wrote:
> On Tue, Sep 24, 2024 at 10:19 AM Minchan Kim <minchan@kernel.org> wrote:
> >
> > On Fri, Sep 20, 2024 at 01:23:57PM +1200, Barry Song wrote:
> > > On Wed, Sep 18, 2024 at 12:02 AM David Hildenbrand <david@redhat.com> wrote:
> > > >
> > > > On 14.09.24 08:37, Barry Song wrote:
> > > > > From: Barry Song <v-songbaohua@oppo.com>
> > > > >
> > > > > This follows up on the discussion regarding Gaoxu's work[1]. It's
> > > > > unclear if there's still interest in implementing a separate LRU
> > > > > list for lazyfree folios, but I decided to explore it out of
> > > > > curiosity.
> > > > >
> > > > > According to Lokesh, MADV_FREE'd anon folios are expected to be
> > > > > released earlier than file folios. One option, as implemented
> > > > > by Gao Xu, is to place lazyfree anon folios at the tail of the
> > > > > file's `min_seq` generation. However, this approach results in
> > > > > lazyfree folios being released in a LIFO manner, which conflicts
> > > > > with LRU behavior, as noted by Michal.
> > > > >
> > > > > To address this, this patch proposes maintaining a separate list
> > > > > for lazyfree anon folios while keeping them classified under the
> > > > > "file" LRU type to minimize code changes. These lazyfree anon
> > > > > folios will still be counted as file folios and share the same
> > > > > generation with regular files. In the eviction path, the lazyfree
> > > > > list will be prioritized for scanning before the actual file
> > > > > LRU list.
> > > > >
> > > >
> > > > What's the downside of another LRU list? Do we have any experience on that?
> > >
> > > Essentially, the goal is to address the downsides of using a single LRU list for
> > > files and lazyfree anonymous pages - seriously more files re-faults.
> > >
> > > I'm not entirely clear on the downsides of having an additional LRU
> > > list. While it
> > > does increase complexity, it doesn't seem to be significant.
> >
> > It's not a catastrophic[1]. I prefer the idea of an additional LRU
> > because it offers flexibility for various potential use cases[2].
> >
> > orthgonal topic(but may be interest for someone)
> >
> > My main interest in a new LRU list is to enable the system to maintain a
> > quickly reclaimable memory pool and expose the size to the admin with
> > a knob to decide how many memory pool they want.
> >
> > This pool would consist of clean, unmapped pages from both the page cache
> > and/or the swap cache. This would allow the system to reclaim memory quickly
> > when free memory is low, at the cost of minor fault overhead.
> 
> My current implementation only handles the MADV_FREE anonymous case. If they
> are placed in a single LRU, they should be able to be reclaimed very
> quickly, simply
> discarded without needing to be swapped out.
> 
> I've been thinking about the issue of unmapped pagecache recently.
> These unmapped
> pagecaches can be reclaimed much faster than mapped ones, especially
> when the latter
> have a high mapcount and incur significant rmap costs. However, many
> pagecaches are
> inherently unmapped (e.g., from syscall read). If they are placed in a
> single LRU, the
> challenge would be comparing the age of unmapped pagecache with mapped ones.
> Currently, with the mglru tier mechanism, frequently accessed unmapped
> pagecaches
> have a chance to be placed in a spot where they are harder to reclaim.
> 
> personally I am quite interested in putting unmapped pagecache
> together as right now
> reclamation could be like this:
> 
> lru list:
> unmapped pagecache(A) - mapped pagecached(B) - unmapped pagecache(C) - mapped
> pagecached with huge mapcount(D)
> 
> A and C can be reclaimed with zero cost but they have to wait for D and B.
> 
> But the question is that if make two lists:
> 
> list1: A - C
> list2: B - D
> 
> How can we ensure that A and C won't experience many refaults, even though
> reclaiming them would be cost-free? Or that B and D might actually be
> colder than
> A and C?
> 
> If this isn't an issue, I'd be very interested in implementing it. Any thoughts?

My proposal involves the following:

1. Introduce an "easily reclaimable" LRU list. This list would hold pages
   that can be quickly freed without significant overhead.

2. Implement a parameter to control the size of this list. This allows for
   system tuning based on available memory and performance requirements.

3. Modify kswapd behavior to utilize this list. When kswapd is awakened due
   to memory pressure, it should attempt to drop those pages first to refill
   free pages up to the high watermark by first reclaiming.

4. Before kswapd goes to sleep, it should scan the tail of the LRU list and
   move cold pages to the easily reclaimable list, unmapping them from the
   page table.

5. Whenever page cache hit, move the page into evictable LRU.

This approach allows the system to maintain a pool of readily available
memory, mitigating the "aging" problem. The trade-off is the potential for
minor page faults and LRU movement ovehreads if these pages in ez_reclaimable
LRU are accessed again.

Furthermore, we could put some asynchrnous writeback pages(e.g., swap
out or writeback the fs pages) into the list, too.
Currently, what we are doing is rotate those pages back to head of LRU
and once writeback is done, move the page to the tail of LRU again.
We can simply put the page into ez_reclaimable LRU without rotating
back and forth.
gaoxu Oct. 15, 2024, 10:03 a.m. UTC | #9
> 
> On Wed, Sep 18, 2024 at 12:02 AM David Hildenbrand <david@redhat.com>
> wrote:
> >
> > On 14.09.24 08:37, Barry Song wrote:
> > > From: Barry Song <v-songbaohua@oppo.com>
> > >
> > > This follows up on the discussion regarding Gaoxu's work[1]. It's
> > > unclear if there's still interest in implementing a separate LRU
> > > list for lazyfree folios, but I decided to explore it out of
> > > curiosity.
> > >
> > > According to Lokesh, MADV_FREE'd anon folios are expected to be
> > > released earlier than file folios. One option, as implemented by Gao
> > > Xu, is to place lazyfree anon folios at the tail of the file's
> > > `min_seq` generation. However, this approach results in lazyfree
> > > folios being released in a LIFO manner, which conflicts with LRU
> > > behavior, as noted by Michal.
> > >
> > > To address this, this patch proposes maintaining a separate list for
> > > lazyfree anon folios while keeping them classified under the "file"
> > > LRU type to minimize code changes. These lazyfree anon folios will
> > > still be counted as file folios and share the same generation with
> > > regular files. In the eviction path, the lazyfree list will be
> > > prioritized for scanning before the actual file LRU list.
> > >
> >
> > What's the downside of another LRU list? Do we have any experience on that?
> 
> Essentially, the goal is to address the downsides of using a single LRU list for files
> and lazyfree anonymous pages - seriously more files re-faults.
> 
> I'm not entirely clear on the downsides of having an additional LRU list. While it
> does increase complexity, it doesn't seem to be significant.
> 
> Let's wait for Gaoxu's test results before deciding on the next steps.
> I was just
> curious about how difficult it would be to add a separate list, so I took two hours
> to explore it :-)
Hi song,
I'm very sorry, various reasons combined have caused the delay in the results.

Basic version:android V (enable Android ART use MADV_FREE)
Test cases: 60 apps repeatedly restarted, tested for 8 hours;
The test results are as follows:
        workingset_refault_anon   workingset_refault_file
base        42016805                92010542
patch       19834873                49383572
% diff       -52.79%                  -46.33%

Additionally, a comparative test was conducted on
add-lazyfree-folio-to-lru-tail.patch[1], and the results are as follows:
               workingset_refault_anon   workingset_refault_file
lazyfree-tail        20313395                 52203061
patch             19834873                 49383572
% diff              -2.36%                    -5.40%

From the results, it can be seen that this patch is very beneficial and
better than the results in [1]; it can solve the performance issue of high
IO caused by extensive use of MADV_FREE on the Android platform.

Test case notes: There is a discrepancy between the test results mentioned in
[1] and the current test results because the test cases are different. The test
case used in [1] involves actions such as clicking and swiping within the app
after it starts; For the sake of convenience and result stability, the current
test case only involves app startup without clicking and swiping, and the number
of apps has been increased (30->60).

1. https://lore.kernel.org/all/f29f64e29c08427b95e3df30a5770056@honor.com/T/#u
> 
> >
> > --
> > Cheers,
> >
> > David / dhildenb
> >
> 
> Thanks
> Barry
Barry Song Oct. 15, 2024, 8:10 p.m. UTC | #10
On Tue, Oct 15, 2024 at 11:03 PM gaoxu <gaoxu2@honor.com> wrote:
>
> >
> > On Wed, Sep 18, 2024 at 12:02 AM David Hildenbrand <david@redhat.com>
> > wrote:
> > >
> > > On 14.09.24 08:37, Barry Song wrote:
> > > > From: Barry Song <v-songbaohua@oppo.com>
> > > >
> > > > This follows up on the discussion regarding Gaoxu's work[1]. It's
> > > > unclear if there's still interest in implementing a separate LRU
> > > > list for lazyfree folios, but I decided to explore it out of
> > > > curiosity.
> > > >
> > > > According to Lokesh, MADV_FREE'd anon folios are expected to be
> > > > released earlier than file folios. One option, as implemented by Gao
> > > > Xu, is to place lazyfree anon folios at the tail of the file's
> > > > `min_seq` generation. However, this approach results in lazyfree
> > > > folios being released in a LIFO manner, which conflicts with LRU
> > > > behavior, as noted by Michal.
> > > >
> > > > To address this, this patch proposes maintaining a separate list for
> > > > lazyfree anon folios while keeping them classified under the "file"
> > > > LRU type to minimize code changes. These lazyfree anon folios will
> > > > still be counted as file folios and share the same generation with
> > > > regular files. In the eviction path, the lazyfree list will be
> > > > prioritized for scanning before the actual file LRU list.
> > > >
> > >
> > > What's the downside of another LRU list? Do we have any experience on that?
> >
> > Essentially, the goal is to address the downsides of using a single LRU list for files
> > and lazyfree anonymous pages - seriously more files re-faults.
> >
> > I'm not entirely clear on the downsides of having an additional LRU list. While it
> > does increase complexity, it doesn't seem to be significant.
> >
> > Let's wait for Gaoxu's test results before deciding on the next steps.
> > I was just
> > curious about how difficult it would be to add a separate list, so I took two hours
> > to explore it :-)
> Hi song,
> I'm very sorry, various reasons combined have caused the delay in the results.
>
> Basic version:android V (enable Android ART use MADV_FREE)
> Test cases: 60 apps repeatedly restarted, tested for 8 hours;
> The test results are as follows:
>         workingset_refault_anon   workingset_refault_file
> base        42016805                92010542
> patch       19834873                49383572
> % diff       -52.79%                  -46.33%
>
> Additionally, a comparative test was conducted on
> add-lazyfree-folio-to-lru-tail.patch[1], and the results are as follows:
>                workingset_refault_anon   workingset_refault_file
> lazyfree-tail        20313395                 52203061
> patch             19834873                 49383572
> % diff              -2.36%                    -5.40%
>
> From the results, it can be seen that this patch is very beneficial and
> better than the results in [1]; it can solve the performance issue of high
> IO caused by extensive use of MADV_FREE on the Android platform.
>

Thank you for the testing and data. The results look promising. Would you
mind if I send a v2 with the test data and your tag included in the changelog?
I mean:

Tested-by: Gao Xu <gaoxu2@hihonor.com>

> Test case notes: There is a discrepancy between the test results mentioned in
> [1] and the current test results because the test cases are different. The test
> case used in [1] involves actions such as clicking and swiping within the app
> after it starts; For the sake of convenience and result stability, the current
> test case only involves app startup without clicking and swiping, and the number
> of apps has been increased (30->60).
>
> 1. https://lore.kernel.org/all/f29f64e29c08427b95e3df30a5770056@honor.com/T/#u
> >
> > >
> > > --
> > > Cheers,
> > >
> > > David / dhildenb
> > >
> >

Thanks
Barry
gaoxu Oct. 16, 2024, 1:25 a.m. UTC | #11
> -----邮件原件-----
> 发件人: Barry Song <21cnbao@gmail.com>
> 发送时间: 2024年10月16日 4:10
> 收件人: gaoxu <gaoxu2@honor.com>
> 抄送: David Hildenbrand <david@redhat.com>; akpm@linux-foundation.org;
> linux-mm@kvack.org; mhocko@suse.com; hailong.liu@oppo.com;
> kaleshsingh@google.com; linux-kernel@vger.kernel.org;
> lokeshgidra@google.com; ngeoffray@google.com; shli@fb.com;
> surenb@google.com; yuzhao@google.com; minchan@kernel.org; Barry Song
> <v-songbaohua@oppo.com>
> 主题: Re: [PATCH RFC] mm: mglru: provide a separate list for lazyfree anon folios
> 
> On Tue, Oct 15, 2024 at 11:03 PM gaoxu <gaoxu2@honor.com> wrote:
> >
> > >
> > > On Wed, Sep 18, 2024 at 12:02 AM David Hildenbrand
> > > <david@redhat.com>
> > > wrote:
> > > >
> > > > On 14.09.24 08:37, Barry Song wrote:
> > > > > From: Barry Song <v-songbaohua@oppo.com>
> > > > >
> > > > > This follows up on the discussion regarding Gaoxu's work[1].
> > > > > It's unclear if there's still interest in implementing a
> > > > > separate LRU list for lazyfree folios, but I decided to explore
> > > > > it out of curiosity.
> > > > >
> > > > > According to Lokesh, MADV_FREE'd anon folios are expected to be
> > > > > released earlier than file folios. One option, as implemented by
> > > > > Gao Xu, is to place lazyfree anon folios at the tail of the
> > > > > file's `min_seq` generation. However, this approach results in
> > > > > lazyfree folios being released in a LIFO manner, which conflicts
> > > > > with LRU behavior, as noted by Michal.
> > > > >
> > > > > To address this, this patch proposes maintaining a separate list
> > > > > for lazyfree anon folios while keeping them classified under the "file"
> > > > > LRU type to minimize code changes. These lazyfree anon folios
> > > > > will still be counted as file folios and share the same
> > > > > generation with regular files. In the eviction path, the
> > > > > lazyfree list will be prioritized for scanning before the actual file LRU list.
> > > > >
> > > >
> > > > What's the downside of another LRU list? Do we have any experience on
> that?
> > >
> > > Essentially, the goal is to address the downsides of using a single
> > > LRU list for files and lazyfree anonymous pages - seriously more files re-faults.
> > >
> > > I'm not entirely clear on the downsides of having an additional LRU
> > > list. While it does increase complexity, it doesn't seem to be significant.
> > >
> > > Let's wait for Gaoxu's test results before deciding on the next steps.
> > > I was just
> > > curious about how difficult it would be to add a separate list, so I
> > > took two hours to explore it :-)
> > Hi song,
> > I'm very sorry, various reasons combined have caused the delay in the results.
> >
> > Basic version:android V (enable Android ART use MADV_FREE) Test cases:
> > 60 apps repeatedly restarted, tested for 8 hours; The test results are
> > as follows:
> >         workingset_refault_anon   workingset_refault_file
> > base        42016805                92010542
> > patch       19834873                49383572
> > % diff       -52.79%                  -46.33%
> >
> > Additionally, a comparative test was conducted on
> > add-lazyfree-folio-to-lru-tail.patch[1], and the results are as follows:
> >                workingset_refault_anon   workingset_refault_file
> > lazyfree-tail        20313395                 52203061
> > patch             19834873                 49383572
> > % diff              -2.36%                    -5.40%
> >
> > From the results, it can be seen that this patch is very beneficial
> > and better than the results in [1]; it can solve the performance issue
> > of high IO caused by extensive use of MADV_FREE on the Android platform.
> >
> 
> Thank you for the testing and data. The results look promising. Would you mind
> if I send a v2 with the test data and your tag included in the changelog?
> I mean:
> 
> Tested-by: Gao Xu <gaoxu2@hihonor.com>
Of course not, I'd be happy to.

Due to IO performance issues, Android has reverted the ART use of MADV_FREE;
it is expected that once the issue is resolved, Android ART will re-enable MADV_FREE 
and promote the use of MADV_FREE in more modules.
> 
> > Test case notes: There is a discrepancy between the test results
> > mentioned in [1] and the current test results because the test cases
> > are different. The test case used in [1] involves actions such as
> > clicking and swiping within the app after it starts; For the sake of
> > convenience and result stability, the current test case only involves
> > app startup without clicking and swiping, and the number of apps has been
> increased (30->60).
> >
> > 1.
> > https://lore.kernel.org/all/f29f64e29c08427b95e3df30a5770056@honor.com
> > /T/#u
> > >
> > > >
> > > > --
> > > > Cheers,
> > > >
> > > > David / dhildenb
> > > >
> > >
> 
> Thanks
> Barry
Barry Song Oct. 16, 2024, 2:54 a.m. UTC | #12
>> +++ b/include/linux/mmzone.h
>> @@ -434,7 +434,7 @@ struct lru_gen_folio {
>>  	/* the birth time of each generation in jiffies */
>>  	unsigned long timestamps[MAX_NR_GENS];
>>  	/* the multi-gen LRU lists, lazily sorted on eviction */
>> -	struct list_head
>> folios[MAX_NR_GENS][ANON_AND_FILE][MAX_NR_ZONES];
>> +	struct list_head folios[MAX_NR_GENS][ANON_AND_FILE +
>> 1][MAX_NR_ZONES];
> This also divides lazy free filio into MAX_NR_ZONES generations.
> The gen of a lazy free filio depends on the gen in the anno list before
> it is marked as lazy free. Whether it will happen that lazy free filios
> are released in an order that is not consistent with the order of the mark?

No, this separate list ensures that lazyfree folios are released in the
same order they were marked as lazyfree. Note that any newly marked
lazyfree folio is always placed in the most likely reclaimed
generation, regardless of the list.

static void lru_lazyfree(struct lruvec *lruvec, struct folio *folio)
{
        long nr_pages = folio_nr_pages(folio);

        if (!folio_test_anon(folio) || !folio_test_swapbacked(folio) ||
            folio_test_swapcache(folio) || folio_test_unevictable(folio))
                return;

        lruvec_del_folio(lruvec, folio);
        folio_clear_active(folio);
        folio_clear_referenced(folio);
        /*
         * Lazyfree folios are clean anonymous folios.  They have
         * the swapbacked flag cleared, to distinguish them from normal
         * anonymous folios
         */
        folio_clear_swapbacked(folio);
        lruvec_add_folio(lruvec, folio);

        __count_vm_events(PGLAZYFREE, nr_pages);
        __count_memcg_events(lruvec_memcg(lruvec), PGLAZYFREE, nr_pages);
}

Thanks
Barry
diff mbox series

Patch

diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
index f4fe593c1400..118d70ed3120 100644
--- a/include/linux/mm_inline.h
+++ b/include/linux/mm_inline.h
@@ -225,6 +225,7 @@  static inline bool lru_gen_add_folio(struct lruvec *lruvec, struct folio *folio,
 	int gen = folio_lru_gen(folio);
 	int type = folio_is_file_lru(folio);
 	int zone = folio_zonenum(folio);
+	int lazyfree = type ? folio_test_anon(folio) : 0;
 	struct lru_gen_folio *lrugen = &lruvec->lrugen;
 
 	VM_WARN_ON_ONCE_FOLIO(gen != -1, folio);
@@ -262,9 +263,9 @@  static inline bool lru_gen_add_folio(struct lruvec *lruvec, struct folio *folio,
 	lru_gen_update_size(lruvec, folio, -1, gen);
 	/* for folio_rotate_reclaimable() */
 	if (reclaiming)
-		list_add_tail(&folio->lru, &lrugen->folios[gen][type][zone]);
+		list_add_tail(&folio->lru, &lrugen->folios[gen][type + lazyfree][zone]);
 	else
-		list_add(&folio->lru, &lrugen->folios[gen][type][zone]);
+		list_add(&folio->lru, &lrugen->folios[gen][type + lazyfree][zone]);
 
 	return true;
 }
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 17506e4a2835..5d2331778528 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -434,7 +434,7 @@  struct lru_gen_folio {
 	/* the birth time of each generation in jiffies */
 	unsigned long timestamps[MAX_NR_GENS];
 	/* the multi-gen LRU lists, lazily sorted on eviction */
-	struct list_head folios[MAX_NR_GENS][ANON_AND_FILE][MAX_NR_ZONES];
+	struct list_head folios[MAX_NR_GENS][ANON_AND_FILE + 1][MAX_NR_ZONES];
 	/* the multi-gen LRU sizes, eventually consistent */
 	long nr_pages[MAX_NR_GENS][ANON_AND_FILE][MAX_NR_ZONES];
 	/* the exponential moving average of refaulted */
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 96abf4a52382..9dc665dc6ba9 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -3725,21 +3725,25 @@  static bool inc_min_seq(struct lruvec *lruvec, int type, bool can_swap)
 
 	/* prevent cold/hot inversion if force_scan is true */
 	for (zone = 0; zone < MAX_NR_ZONES; zone++) {
-		struct list_head *head = &lrugen->folios[old_gen][type][zone];
+		int list_num = type ? 2 : 1;
+		struct list_head *head;
 
-		while (!list_empty(head)) {
-			struct folio *folio = lru_to_folio(head);
+		for (int i = list_num - 1; i >= 0; i--) {
+			head = &lrugen->folios[old_gen][type + i][zone];
+			while (!list_empty(head)) {
+				struct folio *folio = lru_to_folio(head);
 
-			VM_WARN_ON_ONCE_FOLIO(folio_test_unevictable(folio), folio);
-			VM_WARN_ON_ONCE_FOLIO(folio_test_active(folio), folio);
-			VM_WARN_ON_ONCE_FOLIO(folio_is_file_lru(folio) != type, folio);
-			VM_WARN_ON_ONCE_FOLIO(folio_zonenum(folio) != zone, folio);
+				VM_WARN_ON_ONCE_FOLIO(folio_test_unevictable(folio), folio);
+				VM_WARN_ON_ONCE_FOLIO(folio_test_active(folio), folio);
+				VM_WARN_ON_ONCE_FOLIO(folio_is_file_lru(folio) != type, folio);
+				VM_WARN_ON_ONCE_FOLIO(folio_zonenum(folio) != zone, folio);
 
-			new_gen = folio_inc_gen(lruvec, folio, false);
-			list_move_tail(&folio->lru, &lrugen->folios[new_gen][type][zone]);
+				new_gen = folio_inc_gen(lruvec, folio, false);
+				list_move_tail(&folio->lru, &lrugen->folios[new_gen][type + i][zone]);
 
-			if (!--remaining)
-				return false;
+				if (!--remaining)
+					return false;
+			}
 		}
 	}
 done:
@@ -4291,6 +4295,7 @@  static bool sort_folio(struct lruvec *lruvec, struct folio *folio, struct scan_c
 	int refs = folio_lru_refs(folio);
 	int tier = lru_tier_from_refs(refs);
 	struct lru_gen_folio *lrugen = &lruvec->lrugen;
+	int lazyfree = type ? folio_test_anon(folio) : 0;
 
 	VM_WARN_ON_ONCE_FOLIO(gen >= MAX_NR_GENS, folio);
 
@@ -4306,7 +4311,7 @@  static bool sort_folio(struct lruvec *lruvec, struct folio *folio, struct scan_c
 
 	/* promoted */
 	if (gen != lru_gen_from_seq(lrugen->min_seq[type])) {
-		list_move(&folio->lru, &lrugen->folios[gen][type][zone]);
+		list_move(&folio->lru, &lrugen->folios[gen][type + lazyfree][zone]);
 		return true;
 	}
 
@@ -4315,7 +4320,7 @@  static bool sort_folio(struct lruvec *lruvec, struct folio *folio, struct scan_c
 		int hist = lru_hist_from_seq(lrugen->min_seq[type]);
 
 		gen = folio_inc_gen(lruvec, folio, false);
-		list_move_tail(&folio->lru, &lrugen->folios[gen][type][zone]);
+		list_move_tail(&folio->lru, &lrugen->folios[gen][type + lazyfree][zone]);
 
 		WRITE_ONCE(lrugen->protected[hist][type][tier - 1],
 			   lrugen->protected[hist][type][tier - 1] + delta);
@@ -4325,7 +4330,7 @@  static bool sort_folio(struct lruvec *lruvec, struct folio *folio, struct scan_c
 	/* ineligible */
 	if (!folio_test_lru(folio) || zone > sc->reclaim_idx) {
 		gen = folio_inc_gen(lruvec, folio, false);
-		list_move_tail(&folio->lru, &lrugen->folios[gen][type][zone]);
+		list_move_tail(&folio->lru, &lrugen->folios[gen][type + lazyfree][zone]);
 		return true;
 	}
 
@@ -4333,7 +4338,7 @@  static bool sort_folio(struct lruvec *lruvec, struct folio *folio, struct scan_c
 	if (folio_test_locked(folio) || folio_test_writeback(folio) ||
 	    (type == LRU_GEN_FILE && folio_test_dirty(folio))) {
 		gen = folio_inc_gen(lruvec, folio, true);
-		list_move(&folio->lru, &lrugen->folios[gen][type][zone]);
+		list_move(&folio->lru, &lrugen->folios[gen][type + lazyfree][zone]);
 		return true;
 	}
 
@@ -4377,7 +4382,7 @@  static bool isolate_folio(struct lruvec *lruvec, struct folio *folio, struct sca
 static int scan_folios(struct lruvec *lruvec, struct scan_control *sc,
 		       int type, int tier, struct list_head *list)
 {
-	int i;
+	int i, j;
 	int gen;
 	enum vm_event_item item;
 	int sorted = 0;
@@ -4399,33 +4404,38 @@  static int scan_folios(struct lruvec *lruvec, struct scan_control *sc,
 		LIST_HEAD(moved);
 		int skipped_zone = 0;
 		int zone = (sc->reclaim_idx + i) % MAX_NR_ZONES;
-		struct list_head *head = &lrugen->folios[gen][type][zone];
-
-		while (!list_empty(head)) {
-			struct folio *folio = lru_to_folio(head);
-			int delta = folio_nr_pages(folio);
-
-			VM_WARN_ON_ONCE_FOLIO(folio_test_unevictable(folio), folio);
-			VM_WARN_ON_ONCE_FOLIO(folio_test_active(folio), folio);
-			VM_WARN_ON_ONCE_FOLIO(folio_is_file_lru(folio) != type, folio);
-			VM_WARN_ON_ONCE_FOLIO(folio_zonenum(folio) != zone, folio);
-
-			scanned += delta;
+		int list_num = type ? 2 : 1;
+		struct list_head *head;
+
+		for (j = list_num - 1; j >= 0; j--) {
+			head = &lrugen->folios[gen][type + j][zone];
+			while (!list_empty(head)) {
+				struct folio *folio = lru_to_folio(head);
+				int delta = folio_nr_pages(folio);
+
+				VM_WARN_ON_ONCE_FOLIO(folio_test_unevictable(folio), folio);
+				VM_WARN_ON_ONCE_FOLIO(folio_test_active(folio), folio);
+				VM_WARN_ON_ONCE_FOLIO(folio_is_file_lru(folio) != type, folio);
+				VM_WARN_ON_ONCE_FOLIO(folio_zonenum(folio) != zone, folio);
+
+				scanned += delta;
+
+				if (sort_folio(lruvec, folio, sc, tier))
+					sorted += delta;
+				else if (isolate_folio(lruvec, folio, sc)) {
+					list_add(&folio->lru, list);
+					isolated += delta;
+				} else {
+					list_move(&folio->lru, &moved);
+					skipped_zone += delta;
+				}
 
-			if (sort_folio(lruvec, folio, sc, tier))
-				sorted += delta;
-			else if (isolate_folio(lruvec, folio, sc)) {
-				list_add(&folio->lru, list);
-				isolated += delta;
-			} else {
-				list_move(&folio->lru, &moved);
-				skipped_zone += delta;
+				if (!--remaining || max(isolated, skipped_zone) >= MIN_LRU_BATCH)
+					goto isolate_done;
 			}
-
-			if (!--remaining || max(isolated, skipped_zone) >= MIN_LRU_BATCH)
-				break;
 		}
 
+isolate_done:
 		if (skipped_zone) {
 			list_splice(&moved, head);
 			__count_zid_vm_events(PGSCAN_SKIP, zone, skipped_zone);
@@ -5586,8 +5596,15 @@  void lru_gen_init_lruvec(struct lruvec *lruvec)
 	for (i = 0; i <= MIN_NR_GENS + 1; i++)
 		lrugen->timestamps[i] = jiffies;
 
-	for_each_gen_type_zone(gen, type, zone)
+	for_each_gen_type_zone(gen, type, zone) {
 		INIT_LIST_HEAD(&lrugen->folios[gen][type][zone]);
+		/*
+		 * lazyfree anon folios have a separate list while using
+		 * file as type
+		 */
+		if (type)
+			INIT_LIST_HEAD(&lrugen->folios[gen][type + 1][zone]);
+	}
 
 	if (mm_state)
 		mm_state->seq = MIN_NR_GENS;