diff mbox series

[v2] vmscan: add trace events for lru_gen

Message ID 20230921062206.14429-1-jaewon31.kim@samsung.com (mailing list archive)
State New
Headers show
Series [v2] vmscan: add trace events for lru_gen | expand

Commit Message

Jaewon Kim Sept. 21, 2023, 6:22 a.m. UTC
As the legacy lru provides, the lru_gen needs some trace events for
debugging.

This commit introduces 2 trace events.
  trace_mm_vmscan_lru_gen_scan
  trace_mm_vmscan_lru_gen_evict

Each event is similar to the following legacy events.
  trace_mm_vmscan_lru_isolate,
  trace_mm_vmscan_lru_shrink_[in]active

Here's an example
  mm_vmscan_lru_gen_scan: isolate_mode=0 classzone=1 order=9 nr_requested=4096 nr_scanned=431 nr_skipped=0 nr_taken=55 lru=anon
  mm_vmscan_lru_gen_evict: nid=0 nr_reclaimed=42 nr_dirty=0 nr_writeback=0 nr_congested=0 nr_immediate=0 nr_activate_anon=13 nr_activate_file=0 nr_ref_keep=0 nr_unmap_fail=0 priority=2 flags=RECLAIM_WB_ANON|RECLAIM_WB_ASYNC
  mm_vmscan_lru_gen_scan: isolate_mode=0 classzone=1 order=9 nr_requested=4096 nr_scanned=66 nr_skipped=0 nr_taken=64 lru=file
  mm_vmscan_lru_gen_evict: nid=0 nr_reclaimed=62 nr_dirty=0 nr_writeback=0 nr_congested=0 nr_immediate=0 nr_activate_anon=0 nr_activate_file=2 nr_ref_keep=0 nr_unmap_fail=0 priority=2 flags=RECLAIM_WB_FILE|RECLAIM_WB_ASYNC

Signed-off-by: Jaewon Kim <jaewon31.kim@samsung.com>
---
v2: use condition and make it aligned
v1: introduce trace events
---
 include/trace/events/mmflags.h |  5 ++
 include/trace/events/vmscan.h  | 98 ++++++++++++++++++++++++++++++++++
 mm/vmscan.c                    | 17 ++++--
 3 files changed, 115 insertions(+), 5 deletions(-)

Comments

T.J. Mercier Sept. 21, 2023, 4:12 p.m. UTC | #1
On Wed, Sep 20, 2023 at 11:19 PM Jaewon Kim <jaewon31.kim@samsung.com> wrote:
>
> As the legacy lru provides, the lru_gen needs some trace events for
> debugging.
>
Hi Jaewon, thanks for adding this.

> This commit introduces 2 trace events.
>   trace_mm_vmscan_lru_gen_scan
>   trace_mm_vmscan_lru_gen_evict
>
> Each event is similar to the following legacy events.
>   trace_mm_vmscan_lru_isolate,
>   trace_mm_vmscan_lru_shrink_[in]active
>
> Here's an example
>   mm_vmscan_lru_gen_scan: isolate_mode=0 classzone=1 order=9 nr_requested=4096 nr_scanned=431 nr_skipped=0 nr_taken=55 lru=anon
>   mm_vmscan_lru_gen_evict: nid=0 nr_reclaimed=42 nr_dirty=0 nr_writeback=0 nr_congested=0 nr_immediate=0 nr_activate_anon=13 nr_activate_file=0 nr_ref_keep=0 nr_unmap_fail=0 priority=2 flags=RECLAIM_WB_ANON|RECLAIM_WB_ASYNC
>   mm_vmscan_lru_gen_scan: isolate_mode=0 classzone=1 order=9 nr_requested=4096 nr_scanned=66 nr_skipped=0 nr_taken=64 lru=file
>   mm_vmscan_lru_gen_evict: nid=0 nr_reclaimed=62 nr_dirty=0 nr_writeback=0 nr_congested=0 nr_immediate=0 nr_activate_anon=0 nr_activate_file=2 nr_ref_keep=0 nr_unmap_fail=0 priority=2 flags=RECLAIM_WB_FILE|RECLAIM_WB_ASYNC
>
> Signed-off-by: Jaewon Kim <jaewon31.kim@samsung.com>
> ---
> v2: use condition and make it aligned
> v1: introduce trace events
> ---
>  include/trace/events/mmflags.h |  5 ++
>  include/trace/events/vmscan.h  | 98 ++++++++++++++++++++++++++++++++++
>  mm/vmscan.c                    | 17 ++++--
>  3 files changed, 115 insertions(+), 5 deletions(-)
>
> diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h
> index 1478b9dd05fa..44e9b38f83e7 100644
> --- a/include/trace/events/mmflags.h
> +++ b/include/trace/events/mmflags.h
> @@ -274,6 +274,10 @@ IF_HAVE_VM_SOFTDIRTY(VM_SOFTDIRTY, "softdirty"     )               \
>                 EM (LRU_ACTIVE_FILE, "active_file") \
>                 EMe(LRU_UNEVICTABLE, "unevictable")
>
> +#define LRU_GEN_NAMES          \
> +               EM (LRU_GEN_ANON, "anon") \
> +               EMe(LRU_GEN_FILE, "file")
> +
>  /*
>   * First define the enums in the above macros to be exported to userspace
>   * via TRACE_DEFINE_ENUM().
> @@ -288,6 +292,7 @@ COMPACTION_PRIORITY
>  /* COMPACTION_FEEDBACK are defines not enums. Not needed here. */
>  ZONE_TYPE
>  LRU_NAMES
> +LRU_GEN_NAMES
>
>  /*
>   * Now redefine the EM() and EMe() macros to map the enums to the strings
> diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h
> index d2123dd960d5..f0c3a4bd72db 100644
> --- a/include/trace/events/vmscan.h
> +++ b/include/trace/events/vmscan.h
> @@ -327,6 +327,57 @@ TRACE_EVENT(mm_vmscan_lru_isolate,
>                 __print_symbolic(__entry->lru, LRU_NAMES))
>  );
>
> +TRACE_EVENT_CONDITION(mm_vmscan_lru_gen_scan,
> +       TP_PROTO(int highest_zoneidx,
> +               int order,
> +               unsigned long nr_requested,
> +               unsigned long nr_scanned,
> +               unsigned long nr_skipped,
> +               unsigned long nr_taken,
> +               isolate_mode_t isolate_mode,
> +               int lru),
> +
> +       TP_ARGS(highest_zoneidx, order, nr_requested, nr_scanned, nr_skipped, nr_taken, isolate_mode, lru),
> +
> +       TP_CONDITION(nr_scanned),
> +
> +       TP_STRUCT__entry(
> +               __field(int, highest_zoneidx)
> +               __field(int, order)
> +               __field(unsigned long, nr_requested)
> +               __field(unsigned long, nr_scanned)
> +               __field(unsigned long, nr_skipped)
> +               __field(unsigned long, nr_taken)
> +               __field(unsigned int, isolate_mode)
> +               __field(int, lru)
> +       ),
> +
> +       TP_fast_assign(
> +               __entry->highest_zoneidx = highest_zoneidx;
> +               __entry->order = order;
> +               __entry->nr_requested = nr_requested;
> +               __entry->nr_scanned = nr_scanned;
> +               __entry->nr_skipped = nr_skipped;
> +               __entry->nr_taken = nr_taken;
> +               __entry->isolate_mode = (__force unsigned int)isolate_mode;
> +               __entry->lru = lru;
> +       ),
> +
> +       /*
> +        * classzone is previous name of the highest_zoneidx.
> +        * Reason not to change it is the ABI requirement of the tracepoint.
> +        */
> +       TP_printk("isolate_mode=%d classzone=%d order=%d nr_requested=%lu nr_scanned=%lu nr_skipped=%lu nr_taken=%lu lru=%s",
> +               __entry->isolate_mode,
> +               __entry->highest_zoneidx,
> +               __entry->order,
> +               __entry->nr_requested,
> +               __entry->nr_scanned,
> +               __entry->nr_skipped,
> +               __entry->nr_taken,
> +               __print_symbolic(__entry->lru, LRU_GEN_NAMES))
> +);
> +
>  TRACE_EVENT(mm_vmscan_write_folio,
>
>         TP_PROTO(struct folio *folio),
> @@ -437,6 +488,53 @@ TRACE_EVENT(mm_vmscan_lru_shrink_active,
>                 show_reclaim_flags(__entry->reclaim_flags))
>  );
>
> +TRACE_EVENT(mm_vmscan_lru_gen_evict,
> +
> +       TP_PROTO(int nid, unsigned long nr_reclaimed,
> +               struct reclaim_stat *stat, int priority, int file),
> +
> +       TP_ARGS(nid, nr_reclaimed, stat, priority, file),
> +
> +       TP_STRUCT__entry(
> +               __field(unsigned long, nr_reclaimed)
> +               __field(unsigned long, nr_dirty)
> +               __field(unsigned long, nr_writeback)
> +               __field(unsigned long, nr_congested)
> +               __field(unsigned long, nr_immediate)
> +               __field(unsigned int, nr_activate0)
> +               __field(unsigned int, nr_activate1)
> +               __field(unsigned long, nr_ref_keep)
> +               __field(unsigned long, nr_unmap_fail)
> +               __field(int, nid)
> +               __field(int, priority)
> +               __field(int, reclaim_flags)
> +       ),
> +
> +       TP_fast_assign(
> +               __entry->nid = nid;
> +               __entry->nr_reclaimed = nr_reclaimed;
> +               __entry->nr_dirty = stat->nr_dirty;
> +               __entry->nr_writeback = stat->nr_writeback;
> +               __entry->nr_congested = stat->nr_congested;
> +               __entry->nr_immediate = stat->nr_immediate;
> +               __entry->nr_activate0 = stat->nr_activate[0];
> +               __entry->nr_activate1 = stat->nr_activate[1];
> +               __entry->nr_ref_keep = stat->nr_ref_keep;
> +               __entry->nr_unmap_fail = stat->nr_unmap_fail;
> +               __entry->priority = priority;
> +               __entry->reclaim_flags = trace_reclaim_flags(file);
> +       ),
> +
> +       TP_printk("nid=%d nr_reclaimed=%ld nr_dirty=%ld nr_writeback=%ld nr_congested=%ld nr_immediate=%ld nr_activate_anon=%d nr_activate_file=%d nr_ref_keep=%ld nr_unmap_fail=%ld priority=%d flags=%s",

Many of these values are unsigned so I think many of these format
specifiers should be %lu instead of %ld.

> +               __entry->nid, __entry->nr_reclaimed,
> +               __entry->nr_dirty, __entry->nr_writeback,
> +               __entry->nr_congested, __entry->nr_immediate,
> +               __entry->nr_activate0, __entry->nr_activate1,
> +               __entry->nr_ref_keep, __entry->nr_unmap_fail,
> +               __entry->priority,
> +               show_reclaim_flags(__entry->reclaim_flags))
> +);
> +
>  TRACE_EVENT(mm_vmscan_node_reclaim_begin,
>
>         TP_PROTO(int nid, int order, gfp_t gfp_flags),
> diff --git a/mm/vmscan.c b/mm/vmscan.c
> index 6f13394b112e..f453a0f8ceef 100644
> --- a/mm/vmscan.c
> +++ b/mm/vmscan.c
> @@ -5005,6 +5005,7 @@ static int scan_folios(struct lruvec *lruvec, struct scan_control *sc,
>         int sorted = 0;
>         int scanned = 0;
>         int isolated = 0;
> +       int skipped = 0;
>         int remaining = MAX_LRU_BATCH;
>         struct lru_gen_folio *lrugen = &lruvec->lrugen;
>         struct mem_cgroup *memcg = lruvec_memcg(lruvec);
> @@ -5018,7 +5019,7 @@ static int scan_folios(struct lruvec *lruvec, struct scan_control *sc,
>
>         for (i = MAX_NR_ZONES; i > 0; i--) {
>                 LIST_HEAD(moved);
> -               int skipped = 0;
> +               int skipped_zone = 0;
>                 int zone = (sc->reclaim_idx + i) % MAX_NR_ZONES;
>                 struct list_head *head = &lrugen->folios[gen][type][zone];
>
> @@ -5040,16 +5041,17 @@ static int scan_folios(struct lruvec *lruvec, struct scan_control *sc,
>                                 isolated += delta;
>                         } else {
>                                 list_move(&folio->lru, &moved);
> -                               skipped += delta;
> +                               skipped_zone += delta;
>                         }
>
> -                       if (!--remaining || max(isolated, skipped) >= MIN_LRU_BATCH)
> +                       if (!--remaining || max(isolated, skipped_zone) >= MIN_LRU_BATCH)
>                                 break;
>                 }
>
> -               if (skipped) {
> +               if (skipped_zone) {
>                         list_splice(&moved, head);
> -                       __count_zid_vm_events(PGSCAN_SKIP, zone, skipped);
> +                       __count_zid_vm_events(PGSCAN_SKIP, zone, skipped_zone);
> +                       skipped += skipped_zone;
>                 }
>
>                 if (!remaining || isolated >= MIN_LRU_BATCH)
> @@ -5065,6 +5067,9 @@ static int scan_folios(struct lruvec *lruvec, struct scan_control *sc,
>         __count_memcg_events(memcg, PGREFILL, sorted);
>         __count_vm_events(PGSCAN_ANON + type, isolated);
>
> +       trace_mm_vmscan_lru_gen_scan(sc->reclaim_idx, sc->order, MAX_LRU_BATCH,
> +                       scanned, skipped, isolated,
> +                       sc->may_unmap ? 0 : ISOLATE_UNMAPPED, type);
>         /*
>          * There might not be eligible folios due to reclaim_idx. Check the
>          * remaining to prevent livelock if it's not making progress.
> @@ -5194,6 +5199,8 @@ static int evict_folios(struct lruvec *lruvec, struct scan_control *sc, int swap
>  retry:
>         reclaimed = shrink_folio_list(&list, pgdat, sc, &stat, false);
>         sc->nr_reclaimed += reclaimed;
> +       trace_mm_vmscan_lru_gen_evict(pgdat->node_id, reclaimed, &stat,
> +                                     sc->priority, type);
>
>         list_for_each_entry_safe_reverse(folio, next, &list, lru) {
>                 if (!folio_evictable(folio)) {
> --
> 2.17.1
>
Steven Rostedt Sept. 21, 2023, 4:22 p.m. UTC | #2
On Thu, 21 Sep 2023 09:12:30 -0700
"T.J. Mercier" <tjmercier@google.com> wrote:

> > +       TP_fast_assign(
> > +               __entry->nid = nid;
> > +               __entry->nr_reclaimed = nr_reclaimed;
> > +               __entry->nr_dirty = stat->nr_dirty;
> > +               __entry->nr_writeback = stat->nr_writeback;
> > +               __entry->nr_congested = stat->nr_congested;
> > +               __entry->nr_immediate = stat->nr_immediate;
> > +               __entry->nr_activate0 = stat->nr_activate[0];
> > +               __entry->nr_activate1 = stat->nr_activate[1];
> > +               __entry->nr_ref_keep = stat->nr_ref_keep;
> > +               __entry->nr_unmap_fail = stat->nr_unmap_fail;
> > +               __entry->priority = priority;
> > +               __entry->reclaim_flags = trace_reclaim_flags(file);
> > +       ),
> > +
> > +       TP_printk("nid=%d nr_reclaimed=%ld nr_dirty=%ld nr_writeback=%ld nr_congested=%ld nr_immediate=%ld nr_activate_anon=%d nr_activate_file=%d nr_ref_keep=%ld nr_unmap_fail=%ld priority=%d flags=%s",  
> 
> Many of these values are unsigned so I think many of these format
> specifiers should be %lu instead of %ld.

Other than this, from the tracing POV:

Reviewed-by: Steven Rostedt (Google) <rostedt@goodmis.org>

-- Steve
Jaewon Kim Sept. 22, 2023, 2:27 a.m. UTC | #3
>On Thu, 21 Sep 2023 09:12:30 -0700                                                                                                                                                                               
>"T.J. Mercier" <tjmercier@google.com> wrote:                                                                                                                                                                     
>                                                                                                                                                                                                                 
>> > +       TP_fast_assign(                                                                                                                                                                                      
>> > +               __entry->nid = nid;                                                                                                                                                                          
>> > +               __entry->nr_reclaimed = nr_reclaimed;                                                                                                                                                        
>> > +               __entry->nr_dirty = stat->nr_dirty;                                                                                                                                                          
>> > +               __entry->nr_writeback = stat->nr_writeback;                                                                                                                                                  
>> > +               __entry->nr_congested = stat->nr_congested;                                                                                                                                                  
>> > +               __entry->nr_immediate = stat->nr_immediate;                                                                                                                                                  
>> > +               __entry->nr_activate0 = stat->nr_activate[0];                                                                                                                                                
>> > +               __entry->nr_activate1 = stat->nr_activate[1];                                                                                                                                                
>> > +               __entry->nr_ref_keep = stat->nr_ref_keep;                                                                                                                                                    
>> > +               __entry->nr_unmap_fail = stat->nr_unmap_fail;                                                                                                                                                
>> > +               __entry->priority = priority;                                                                                                                                                                
>> > +               __entry->reclaim_flags = trace_reclaim_flags(file);                                                                                                                                          
>> > +       ),                                                                                                                                                                                                   
>> > +                                                                                                                                                                                                            
>> > +       TP_printk("nid=%d nr_reclaimed=%ld nr_dirty=%ld nr_writeback=%ld nr_congested=%ld nr_immediate=%ld nr_activate_anon=%d nr_activate_file=%d nr_ref_keep=%ld nr_unmap_fail=%ld priority=%d flags=%s",  
>>                                                                                                                                                                                                                
>> Many of these values are unsigned so I think many of these format                                                                                                                                              
>> specifiers should be %lu instead of %ld.                                                                                                                                                                       

Hello T.J.
Thank you for your comment 
As you expected I got this from the legacy lru trace.
I've changed as you recommended.
Actually I changed isolate_mode, too. Please let me know if this is not actually needed.

--- a/include/trace/events/vmscan.h
+++ b/include/trace/events/vmscan.h
@@ -367,7 +367,7 @@ TRACE_EVENT_CONDITION(mm_vmscan_lru_gen_scan,
         * classzone is previous name of the highest_zoneidx.
         * Reason not to change it is the ABI requirement of the tracepoint.
         */
-       TP_printk("isolate_mode=%d classzone=%d order=%d nr_requested=%lu nr_scanned=%lu nr_skipped=%lu nr_taken=%lu lru=%s",
+       TP_printk("isolate_mode=%u classzone=%d order=%d nr_requested=%lu nr_scanned=%lu nr_skipped=%lu nr_taken=%lu lru=%s",
                __entry->isolate_mode,
                __entry->highest_zoneidx,
                __entry->order,
@@ -525,7 +525,7 @@ TRACE_EVENT(mm_vmscan_lru_gen_evict,
                __entry->reclaim_flags = trace_reclaim_flags(file);
        ),
 
-       TP_printk("nid=%d nr_reclaimed=%ld nr_dirty=%ld nr_writeback=%ld nr_congested=%ld nr_immediate=%ld nr_activate_anon=%d nr_activate_file=%d nr_ref_keep=%ld nr_unmap_fail=%ld priority=%d flags=%s",
+       TP_printk("nid=%d nr_reclaimed=%lu nr_dirty=%lu nr_writeback=%lu nr_congested=%lu nr_immediate=%lu nr_activate_anon=%u nr_activate_file=%u nr_ref_keep=%lu nr_unmap_fail=%lu priority=%d flags=%s",
                __entry->nid, __entry->nr_reclaimed,
                __entry->nr_dirty, __entry->nr_writeback,
                __entry->nr_congested, __entry->nr_immediate,

>                                                                                                                                                                                                                 
>Other than this, from the tracing POV:                                                                                                                                                                           
>                                                                                                                                                                                                                 
>Reviewed-by: Steven Rostedt (Google) <rostedt@goodmis.org>                                                                                                                                                       


Hello Steven
I've appreciated your Reviewed-by, let me take this on my next v3 patch.

>                                                                                                                                                                                                                 
>-- Steve
T.J. Mercier Sept. 23, 2023, 3:26 p.m. UTC | #4
On Thu, Sep 21, 2023 at 7:27 PM 김재원 <jaewon31.kim@samsung.com> wrote:
>
> >On Thu, 21 Sep 2023 09:12:30 -0700
> >"T.J. Mercier" <tjmercier@google.com> wrote:
> >
> >> > +       TP_fast_assign(
> >> > +               __entry->nid = nid;
> >> > +               __entry->nr_reclaimed = nr_reclaimed;
> >> > +               __entry->nr_dirty = stat->nr_dirty;
> >> > +               __entry->nr_writeback = stat->nr_writeback;
> >> > +               __entry->nr_congested = stat->nr_congested;
> >> > +               __entry->nr_immediate = stat->nr_immediate;
> >> > +               __entry->nr_activate0 = stat->nr_activate[0];
> >> > +               __entry->nr_activate1 = stat->nr_activate[1];
> >> > +               __entry->nr_ref_keep = stat->nr_ref_keep;
> >> > +               __entry->nr_unmap_fail = stat->nr_unmap_fail;
> >> > +               __entry->priority = priority;
> >> > +               __entry->reclaim_flags = trace_reclaim_flags(file);
> >> > +       ),
> >> > +
> >> > +       TP_printk("nid=%d nr_reclaimed=%ld nr_dirty=%ld nr_writeback=%ld nr_congested=%ld nr_immediate=%ld nr_activate_anon=%d nr_activate_file=%d nr_ref_keep=%ld nr_unmap_fail=%ld priority=%d flags=%s",
> >>
> >> Many of these values are unsigned so I think many of these format
> >> specifiers should be %lu instead of %ld.
>
> Hello T.J.
> Thank you for your comment
> As you expected I got this from the legacy lru trace.
> I've changed as you recommended.
> Actually I changed isolate_mode, too. Please let me know if this is not actually needed.
>
Great, looks good to me.
Reviewed-by: T.J. Mercier <tjmercier@google.com>

> --- a/include/trace/events/vmscan.h
> +++ b/include/trace/events/vmscan.h
> @@ -367,7 +367,7 @@ TRACE_EVENT_CONDITION(mm_vmscan_lru_gen_scan,
>          * classzone is previous name of the highest_zoneidx.
>          * Reason not to change it is the ABI requirement of the tracepoint.
>          */
> -       TP_printk("isolate_mode=%d classzone=%d order=%d nr_requested=%lu nr_scanned=%lu nr_skipped=%lu nr_taken=%lu lru=%s",
> +       TP_printk("isolate_mode=%u classzone=%d order=%d nr_requested=%lu nr_scanned=%lu nr_skipped=%lu nr_taken=%lu lru=%s",
>                 __entry->isolate_mode,
>                 __entry->highest_zoneidx,
>                 __entry->order,
> @@ -525,7 +525,7 @@ TRACE_EVENT(mm_vmscan_lru_gen_evict,
>                 __entry->reclaim_flags = trace_reclaim_flags(file);
>         ),
>
> -       TP_printk("nid=%d nr_reclaimed=%ld nr_dirty=%ld nr_writeback=%ld nr_congested=%ld nr_immediate=%ld nr_activate_anon=%d nr_activate_file=%d nr_ref_keep=%ld nr_unmap_fail=%ld priority=%d flags=%s",
> +       TP_printk("nid=%d nr_reclaimed=%lu nr_dirty=%lu nr_writeback=%lu nr_congested=%lu nr_immediate=%lu nr_activate_anon=%u nr_activate_file=%u nr_ref_keep=%lu nr_unmap_fail=%lu priority=%d flags=%s",
>                 __entry->nid, __entry->nr_reclaimed,
>                 __entry->nr_dirty, __entry->nr_writeback,
>                 __entry->nr_congested, __entry->nr_immediate,
>
> >
> >Other than this, from the tracing POV:
> >
> >Reviewed-by: Steven Rostedt (Google) <rostedt@goodmis.org>
>
>
> Hello Steven
> I've appreciated your Reviewed-by, let me take this on my next v3 patch.
>
> >
> >-- Steve
diff mbox series

Patch

diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h
index 1478b9dd05fa..44e9b38f83e7 100644
--- a/include/trace/events/mmflags.h
+++ b/include/trace/events/mmflags.h
@@ -274,6 +274,10 @@  IF_HAVE_VM_SOFTDIRTY(VM_SOFTDIRTY,	"softdirty"	)		\
 		EM (LRU_ACTIVE_FILE, "active_file") \
 		EMe(LRU_UNEVICTABLE, "unevictable")
 
+#define LRU_GEN_NAMES		\
+		EM (LRU_GEN_ANON, "anon") \
+		EMe(LRU_GEN_FILE, "file")
+
 /*
  * First define the enums in the above macros to be exported to userspace
  * via TRACE_DEFINE_ENUM().
@@ -288,6 +292,7 @@  COMPACTION_PRIORITY
 /* COMPACTION_FEEDBACK are defines not enums. Not needed here. */
 ZONE_TYPE
 LRU_NAMES
+LRU_GEN_NAMES
 
 /*
  * Now redefine the EM() and EMe() macros to map the enums to the strings
diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h
index d2123dd960d5..f0c3a4bd72db 100644
--- a/include/trace/events/vmscan.h
+++ b/include/trace/events/vmscan.h
@@ -327,6 +327,57 @@  TRACE_EVENT(mm_vmscan_lru_isolate,
 		__print_symbolic(__entry->lru, LRU_NAMES))
 );
 
+TRACE_EVENT_CONDITION(mm_vmscan_lru_gen_scan,
+	TP_PROTO(int highest_zoneidx,
+		int order,
+		unsigned long nr_requested,
+		unsigned long nr_scanned,
+		unsigned long nr_skipped,
+		unsigned long nr_taken,
+		isolate_mode_t isolate_mode,
+		int lru),
+
+	TP_ARGS(highest_zoneidx, order, nr_requested, nr_scanned, nr_skipped, nr_taken, isolate_mode, lru),
+
+	TP_CONDITION(nr_scanned),
+
+	TP_STRUCT__entry(
+		__field(int, highest_zoneidx)
+		__field(int, order)
+		__field(unsigned long, nr_requested)
+		__field(unsigned long, nr_scanned)
+		__field(unsigned long, nr_skipped)
+		__field(unsigned long, nr_taken)
+		__field(unsigned int, isolate_mode)
+		__field(int, lru)
+	),
+
+	TP_fast_assign(
+		__entry->highest_zoneidx = highest_zoneidx;
+		__entry->order = order;
+		__entry->nr_requested = nr_requested;
+		__entry->nr_scanned = nr_scanned;
+		__entry->nr_skipped = nr_skipped;
+		__entry->nr_taken = nr_taken;
+		__entry->isolate_mode = (__force unsigned int)isolate_mode;
+		__entry->lru = lru;
+	),
+
+	/*
+	 * classzone is previous name of the highest_zoneidx.
+	 * Reason not to change it is the ABI requirement of the tracepoint.
+	 */
+	TP_printk("isolate_mode=%d classzone=%d order=%d nr_requested=%lu nr_scanned=%lu nr_skipped=%lu nr_taken=%lu lru=%s",
+		__entry->isolate_mode,
+		__entry->highest_zoneidx,
+		__entry->order,
+		__entry->nr_requested,
+		__entry->nr_scanned,
+		__entry->nr_skipped,
+		__entry->nr_taken,
+		__print_symbolic(__entry->lru, LRU_GEN_NAMES))
+);
+
 TRACE_EVENT(mm_vmscan_write_folio,
 
 	TP_PROTO(struct folio *folio),
@@ -437,6 +488,53 @@  TRACE_EVENT(mm_vmscan_lru_shrink_active,
 		show_reclaim_flags(__entry->reclaim_flags))
 );
 
+TRACE_EVENT(mm_vmscan_lru_gen_evict,
+
+	TP_PROTO(int nid, unsigned long nr_reclaimed,
+		struct reclaim_stat *stat, int priority, int file),
+
+	TP_ARGS(nid, nr_reclaimed, stat, priority, file),
+
+	TP_STRUCT__entry(
+		__field(unsigned long, nr_reclaimed)
+		__field(unsigned long, nr_dirty)
+		__field(unsigned long, nr_writeback)
+		__field(unsigned long, nr_congested)
+		__field(unsigned long, nr_immediate)
+		__field(unsigned int, nr_activate0)
+		__field(unsigned int, nr_activate1)
+		__field(unsigned long, nr_ref_keep)
+		__field(unsigned long, nr_unmap_fail)
+		__field(int, nid)
+		__field(int, priority)
+		__field(int, reclaim_flags)
+	),
+
+	TP_fast_assign(
+		__entry->nid = nid;
+		__entry->nr_reclaimed = nr_reclaimed;
+		__entry->nr_dirty = stat->nr_dirty;
+		__entry->nr_writeback = stat->nr_writeback;
+		__entry->nr_congested = stat->nr_congested;
+		__entry->nr_immediate = stat->nr_immediate;
+		__entry->nr_activate0 = stat->nr_activate[0];
+		__entry->nr_activate1 = stat->nr_activate[1];
+		__entry->nr_ref_keep = stat->nr_ref_keep;
+		__entry->nr_unmap_fail = stat->nr_unmap_fail;
+		__entry->priority = priority;
+		__entry->reclaim_flags = trace_reclaim_flags(file);
+	),
+
+	TP_printk("nid=%d nr_reclaimed=%ld nr_dirty=%ld nr_writeback=%ld nr_congested=%ld nr_immediate=%ld nr_activate_anon=%d nr_activate_file=%d nr_ref_keep=%ld nr_unmap_fail=%ld priority=%d flags=%s",
+		__entry->nid, __entry->nr_reclaimed,
+		__entry->nr_dirty, __entry->nr_writeback,
+		__entry->nr_congested, __entry->nr_immediate,
+		__entry->nr_activate0, __entry->nr_activate1,
+		__entry->nr_ref_keep, __entry->nr_unmap_fail,
+		__entry->priority,
+		show_reclaim_flags(__entry->reclaim_flags))
+);
+
 TRACE_EVENT(mm_vmscan_node_reclaim_begin,
 
 	TP_PROTO(int nid, int order, gfp_t gfp_flags),
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 6f13394b112e..f453a0f8ceef 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -5005,6 +5005,7 @@  static int scan_folios(struct lruvec *lruvec, struct scan_control *sc,
 	int sorted = 0;
 	int scanned = 0;
 	int isolated = 0;
+	int skipped = 0;
 	int remaining = MAX_LRU_BATCH;
 	struct lru_gen_folio *lrugen = &lruvec->lrugen;
 	struct mem_cgroup *memcg = lruvec_memcg(lruvec);
@@ -5018,7 +5019,7 @@  static int scan_folios(struct lruvec *lruvec, struct scan_control *sc,
 
 	for (i = MAX_NR_ZONES; i > 0; i--) {
 		LIST_HEAD(moved);
-		int skipped = 0;
+		int skipped_zone = 0;
 		int zone = (sc->reclaim_idx + i) % MAX_NR_ZONES;
 		struct list_head *head = &lrugen->folios[gen][type][zone];
 
@@ -5040,16 +5041,17 @@  static int scan_folios(struct lruvec *lruvec, struct scan_control *sc,
 				isolated += delta;
 			} else {
 				list_move(&folio->lru, &moved);
-				skipped += delta;
+				skipped_zone += delta;
 			}
 
-			if (!--remaining || max(isolated, skipped) >= MIN_LRU_BATCH)
+			if (!--remaining || max(isolated, skipped_zone) >= MIN_LRU_BATCH)
 				break;
 		}
 
-		if (skipped) {
+		if (skipped_zone) {
 			list_splice(&moved, head);
-			__count_zid_vm_events(PGSCAN_SKIP, zone, skipped);
+			__count_zid_vm_events(PGSCAN_SKIP, zone, skipped_zone);
+			skipped += skipped_zone;
 		}
 
 		if (!remaining || isolated >= MIN_LRU_BATCH)
@@ -5065,6 +5067,9 @@  static int scan_folios(struct lruvec *lruvec, struct scan_control *sc,
 	__count_memcg_events(memcg, PGREFILL, sorted);
 	__count_vm_events(PGSCAN_ANON + type, isolated);
 
+	trace_mm_vmscan_lru_gen_scan(sc->reclaim_idx, sc->order, MAX_LRU_BATCH,
+			scanned, skipped, isolated,
+			sc->may_unmap ? 0 : ISOLATE_UNMAPPED, type);
 	/*
 	 * There might not be eligible folios due to reclaim_idx. Check the
 	 * remaining to prevent livelock if it's not making progress.
@@ -5194,6 +5199,8 @@  static int evict_folios(struct lruvec *lruvec, struct scan_control *sc, int swap
 retry:
 	reclaimed = shrink_folio_list(&list, pgdat, sc, &stat, false);
 	sc->nr_reclaimed += reclaimed;
+	trace_mm_vmscan_lru_gen_evict(pgdat->node_id, reclaimed, &stat,
+				      sc->priority, type);
 
 	list_for_each_entry_safe_reverse(folio, next, &list, lru) {
 		if (!folio_evictable(folio)) {