diff mbox series

[v4] vmscan: add trace events for lru_gen

Message ID 20230926042250.6028-1-jaewon31.kim@samsung.com (mailing list archive)
State New
Headers show
Series [v4] vmscan: add trace events for lru_gen | expand

Commit Message

Jaewon Kim Sept. 26, 2023, 4:22 a.m. UTC
As the legacy lru provides, the lru_gen needs some trace events for
debugging.

This commit introduces 2 trace events.
  trace_mm_vmscan_lru_gen_scan
  trace_mm_vmscan_lru_gen_evict

Each event is similar to the following legacy events.
  trace_mm_vmscan_lru_isolate,
  trace_mm_vmscan_lru_shrink_[in]active

Here's an example
  mm_vmscan_lru_gen_scan: classzone=2 order=0 nr_requested=4096 nr_scanned=64 nr_skipped=0 nr_taken=64 lru=anon
  mm_vmscan_lru_gen_evict: nid=0 nr_reclaimed=64 nr_dirty=0 nr_writeback=0 nr_congested=0 nr_immediate=0 nr_activate_anon=0 nr_activate_file=0 nr_ref_keep=0 nr_unmap_fail=0 priority=2 flags=RECLAIM_WB_ANON|RECLAIM_WB_ASYNC
  mm_vmscan_lru_gen_scan: classzone=1 order=0 nr_requested=4096 nr_scanned=64 nr_skipped=0 nr_taken=64 lru=file
  mm_vmscan_lru_gen_evict: nid=0 nr_reclaimed=64 nr_dirty=0 nr_writeback=0 nr_congested=0 nr_immediate=0 nr_activate_anon=0 nr_activate_file=0 nr_ref_keep=0 nr_unmap_fail=0 priority=12 flags=RECLAIM_WB_FILE|RECLAIM_WB_ASYNC

Signed-off-by: Jaewon Kim <jaewon31.kim@samsung.com>
---
v4: wrap with #ifdef CONFIG_LRU_GEN
v3: change printk format
v2: use condition and make it aligned
v1: introduce trace events
---
 include/trace/events/mmflags.h |  9 ++++
 include/trace/events/vmscan.h  | 96 ++++++++++++++++++++++++++++++++++
 mm/vmscan.c                    | 20 +++++--
 3 files changed, 120 insertions(+), 5 deletions(-)

Comments

Yu Zhao Sept. 26, 2023, 4:42 a.m. UTC | #1
On Mon, Sep 25, 2023 at 10:20 PM Jaewon Kim <jaewon31.kim@samsung.com> wrote:
>
> As the legacy lru provides, the lru_gen needs some trace events for
> debugging.
>
> This commit introduces 2 trace events.
>   trace_mm_vmscan_lru_gen_scan
>   trace_mm_vmscan_lru_gen_evict
>
> Each event is similar to the following legacy events.
>   trace_mm_vmscan_lru_isolate,
>   trace_mm_vmscan_lru_shrink_[in]active

We should just reuse trace_mm_vmscan_lru_isolate and
trace_mm_vmscan_lru_shrink_inactive instead of adding new tracepoints.

To reuse trace_mm_vmscan_lru_isolate, we'd just need to append two new
names to LRU_NAMES.

The naming of trace_mm_vmscan_lru_shrink_inactive might seem confusing
but it's how MGLRU maintains the compatibility, e.g., the existing
active/inactive counters in /proc/vmstat.

> Here's an example
>   mm_vmscan_lru_gen_scan: classzone=2 order=0 nr_requested=4096 nr_scanned=64 nr_skipped=0 nr_taken=64 lru=anon
>   mm_vmscan_lru_gen_evict: nid=0 nr_reclaimed=64 nr_dirty=0 nr_writeback=0 nr_congested=0 nr_immediate=0 nr_activate_anon=0 nr_activate_file=0 nr_ref_keep=0 nr_unmap_fail=0 priority=2 flags=RECLAIM_WB_ANON|RECLAIM_WB_ASYNC
>   mm_vmscan_lru_gen_scan: classzone=1 order=0 nr_requested=4096 nr_scanned=64 nr_skipped=0 nr_taken=64 lru=file
>   mm_vmscan_lru_gen_evict: nid=0 nr_reclaimed=64 nr_dirty=0 nr_writeback=0 nr_congested=0 nr_immediate=0 nr_activate_anon=0 nr_activate_file=0 nr_ref_keep=0 nr_unmap_fail=0 priority=12 flags=RECLAIM_WB_FILE|RECLAIM_WB_ASYNC
>
> Signed-off-by: Jaewon Kim <jaewon31.kim@samsung.com>
> ---
> v4: wrap with #ifdef CONFIG_LRU_GEN
> v3: change printk format
> v2: use condition and make it aligned
> v1: introduce trace events
> ---
>  include/trace/events/mmflags.h |  9 ++++
>  include/trace/events/vmscan.h  | 96 ++++++++++++++++++++++++++++++++++
>  mm/vmscan.c                    | 20 +++++--
>  3 files changed, 120 insertions(+), 5 deletions(-)
>
> diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h
> index 1478b9dd05fa..6dfe85bd4e81 100644
> --- a/include/trace/events/mmflags.h
> +++ b/include/trace/events/mmflags.h
> @@ -274,6 +274,12 @@ IF_HAVE_VM_SOFTDIRTY(VM_SOFTDIRTY, "softdirty"     )               \
>                 EM (LRU_ACTIVE_FILE, "active_file") \
>                 EMe(LRU_UNEVICTABLE, "unevictable")
>
> +#ifdef CONFIG_LRU_GEN
> +#define LRU_GEN_NAMES          \
> +               EM (LRU_GEN_ANON, "anon") \
> +               EMe(LRU_GEN_FILE, "file")
> +#endif
> +
>  /*
>   * First define the enums in the above macros to be exported to userspace
>   * via TRACE_DEFINE_ENUM().
> @@ -288,6 +294,9 @@ COMPACTION_PRIORITY
>  /* COMPACTION_FEEDBACK are defines not enums. Not needed here. */
>  ZONE_TYPE
>  LRU_NAMES
> +#ifdef CONFIG_LRU_GEN
> +LRU_GEN_NAMES
> +#endif
>
>  /*
>   * Now redefine the EM() and EMe() macros to map the enums to the strings
> diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h
> index d2123dd960d5..2080ef742f89 100644
> --- a/include/trace/events/vmscan.h
> +++ b/include/trace/events/vmscan.h
> @@ -327,6 +327,102 @@ TRACE_EVENT(mm_vmscan_lru_isolate,
>                 __print_symbolic(__entry->lru, LRU_NAMES))
>  );
>
> +#ifdef CONFIG_LRU_GEN
> +TRACE_EVENT_CONDITION(mm_vmscan_lru_gen_scan,
> +       TP_PROTO(int highest_zoneidx,
> +               int order,
> +               unsigned long nr_requested,
> +               unsigned long nr_scanned,
> +               unsigned long nr_skipped,
> +               unsigned long nr_taken,
> +               int lru),
> +
> +       TP_ARGS(highest_zoneidx, order, nr_requested, nr_scanned, nr_skipped, nr_taken, lru),
> +
> +       TP_CONDITION(nr_scanned),
> +
> +       TP_STRUCT__entry(
> +               __field(int, highest_zoneidx)
> +               __field(int, order)
> +               __field(unsigned long, nr_requested)
> +               __field(unsigned long, nr_scanned)
> +               __field(unsigned long, nr_skipped)
> +               __field(unsigned long, nr_taken)
> +               __field(int, lru)
> +       ),
> +
> +       TP_fast_assign(
> +               __entry->highest_zoneidx = highest_zoneidx;
> +               __entry->order = order;
> +               __entry->nr_requested = nr_requested;
> +               __entry->nr_scanned = nr_scanned;
> +               __entry->nr_skipped = nr_skipped;
> +               __entry->nr_taken = nr_taken;
> +               __entry->lru = lru;
> +       ),
> +
> +       /*
> +        * classzone is previous name of the highest_zoneidx.
> +        * Reason not to change it is the ABI requirement of the tracepoint.
> +        */
> +       TP_printk("classzone=%d order=%d nr_requested=%lu nr_scanned=%lu nr_skipped=%lu nr_taken=%lu lru=%s",
> +               __entry->highest_zoneidx,
> +               __entry->order,
> +               __entry->nr_requested,
> +               __entry->nr_scanned,
> +               __entry->nr_skipped,
> +               __entry->nr_taken,
> +               __print_symbolic(__entry->lru, LRU_GEN_NAMES))
> +);
> +
> +TRACE_EVENT(mm_vmscan_lru_gen_evict,
> +
> +       TP_PROTO(int nid, unsigned long nr_reclaimed,
> +               struct reclaim_stat *stat, int priority, int file),
> +
> +       TP_ARGS(nid, nr_reclaimed, stat, priority, file),
> +
> +       TP_STRUCT__entry(
> +               __field(unsigned long, nr_reclaimed)
> +               __field(unsigned long, nr_dirty)
> +               __field(unsigned long, nr_writeback)
> +               __field(unsigned long, nr_congested)
> +               __field(unsigned long, nr_immediate)
> +               __field(unsigned int, nr_activate0)
> +               __field(unsigned int, nr_activate1)
> +               __field(unsigned long, nr_ref_keep)
> +               __field(unsigned long, nr_unmap_fail)
> +               __field(int, nid)
> +               __field(int, priority)
> +               __field(int, reclaim_flags)
> +       ),
> +
> +       TP_fast_assign(
> +               __entry->nid = nid;
> +               __entry->nr_reclaimed = nr_reclaimed;
> +               __entry->nr_dirty = stat->nr_dirty;
> +               __entry->nr_writeback = stat->nr_writeback;
> +               __entry->nr_congested = stat->nr_congested;
> +               __entry->nr_immediate = stat->nr_immediate;
> +               __entry->nr_activate0 = stat->nr_activate[0];
> +               __entry->nr_activate1 = stat->nr_activate[1];
> +               __entry->nr_ref_keep = stat->nr_ref_keep;
> +               __entry->nr_unmap_fail = stat->nr_unmap_fail;
> +               __entry->priority = priority;
> +               __entry->reclaim_flags = trace_reclaim_flags(file);
> +       ),
> +
> +       TP_printk("nid=%d nr_reclaimed=%lu nr_dirty=%lu nr_writeback=%lu nr_congested=%lu nr_immediate=%lu nr_activate_anon=%u nr_activate_file=%u nr_ref_keep=%lu nr_unmap_fail=%lu priority=%d flags=%s",
> +               __entry->nid, __entry->nr_reclaimed,
> +               __entry->nr_dirty, __entry->nr_writeback,
> +               __entry->nr_congested, __entry->nr_immediate,
> +               __entry->nr_activate0, __entry->nr_activate1,
> +               __entry->nr_ref_keep, __entry->nr_unmap_fail,
> +               __entry->priority,
> +               show_reclaim_flags(__entry->reclaim_flags))
> +);
> +#endif
> +
>  TRACE_EVENT(mm_vmscan_write_folio,
>
>         TP_PROTO(struct folio *folio),
> diff --git a/mm/vmscan.c b/mm/vmscan.c
> index 6f13394b112e..0c8b48bcb461 100644
> --- a/mm/vmscan.c
> +++ b/mm/vmscan.c
> @@ -5005,6 +5005,7 @@ static int scan_folios(struct lruvec *lruvec, struct scan_control *sc,
>         int sorted = 0;
>         int scanned = 0;
>         int isolated = 0;
> +       int skipped = 0;
>         int remaining = MAX_LRU_BATCH;
>         struct lru_gen_folio *lrugen = &lruvec->lrugen;
>         struct mem_cgroup *memcg = lruvec_memcg(lruvec);
> @@ -5018,7 +5019,7 @@ static int scan_folios(struct lruvec *lruvec, struct scan_control *sc,
>
>         for (i = MAX_NR_ZONES; i > 0; i--) {
>                 LIST_HEAD(moved);
> -               int skipped = 0;
> +               int skipped_zone = 0;
>                 int zone = (sc->reclaim_idx + i) % MAX_NR_ZONES;
>                 struct list_head *head = &lrugen->folios[gen][type][zone];
>
> @@ -5040,16 +5041,17 @@ static int scan_folios(struct lruvec *lruvec, struct scan_control *sc,
>                                 isolated += delta;
>                         } else {
>                                 list_move(&folio->lru, &moved);
> -                               skipped += delta;
> +                               skipped_zone += delta;
>                         }
>
> -                       if (!--remaining || max(isolated, skipped) >= MIN_LRU_BATCH)
> +                       if (!--remaining || max(isolated, skipped_zone) >= MIN_LRU_BATCH)
>                                 break;
>                 }
>
> -               if (skipped) {
> +               if (skipped_zone) {
>                         list_splice(&moved, head);
> -                       __count_zid_vm_events(PGSCAN_SKIP, zone, skipped);
> +                       __count_zid_vm_events(PGSCAN_SKIP, zone, skipped_zone);
> +                       skipped += skipped_zone;
>                 }
>
>                 if (!remaining || isolated >= MIN_LRU_BATCH)
> @@ -5065,6 +5067,10 @@ static int scan_folios(struct lruvec *lruvec, struct scan_control *sc,
>         __count_memcg_events(memcg, PGREFILL, sorted);
>         __count_vm_events(PGSCAN_ANON + type, isolated);
>
> +#ifdef CONFIG_LRU_GEN
> +       trace_mm_vmscan_lru_gen_scan(sc->reclaim_idx, sc->order, MAX_LRU_BATCH,
> +                       scanned, skipped, isolated, type);
> +#endif

These functions are already within CONFIG_LRU_GEN.

>         /*
>          * There might not be eligible folios due to reclaim_idx. Check the
>          * remaining to prevent livelock if it's not making progress.
> @@ -5194,6 +5200,10 @@ static int evict_folios(struct lruvec *lruvec, struct scan_control *sc, int swap
>  retry:
>         reclaimed = shrink_folio_list(&list, pgdat, sc, &stat, false);
>         sc->nr_reclaimed += reclaimed;
> +#ifdef CONFIG_LRU_GEN
> +       trace_mm_vmscan_lru_gen_evict(pgdat->node_id, reclaimed, &stat,
> +                                     sc->priority, type);
> +#endif

Ditto.
Jaewon Kim Sept. 26, 2023, 5:10 a.m. UTC | #2
>On Mon, Sep 25, 2023 at 10:20?PM Jaewon Kim <jaewon31.kim@samsung.com> wrote:
>>
>> As the legacy lru provides, the lru_gen needs some trace events for
>> debugging.
>>
>> This commit introduces 2 trace events.
>>   trace_mm_vmscan_lru_gen_scan
>>   trace_mm_vmscan_lru_gen_evict
>>
>> Each event is similar to the following legacy events.
>>   trace_mm_vmscan_lru_isolate,
>>   trace_mm_vmscan_lru_shrink_[in]active
>
>We should just reuse trace_mm_vmscan_lru_isolate and
>trace_mm_vmscan_lru_shrink_inactive instead of adding new tracepoints.
>
>To reuse trace_mm_vmscan_lru_isolate, we'd just need to append two new
>names to LRU_NAMES.
>
>The naming of trace_mm_vmscan_lru_shrink_inactive might seem confusing
>but it's how MGLRU maintains the compatibility, e.g., the existing
>active/inactive counters in /proc/vmstat.


Hello

Actually I had tried to reuse them. But some value was not that compatible.
Let me try that way again.

>
>> Here's an example
>>   mm_vmscan_lru_gen_scan: classzone=2 order=0 nr_requested=4096 nr_scanned=64 nr_skipped=0 nr_taken=64 lru=anon
>>   mm_vmscan_lru_gen_evict: nid=0 nr_reclaimed=64 nr_dirty=0 nr_writeback=0 nr_congested=0 nr_immediate=0 nr_activate_anon=0 nr_activate_file=0 nr_ref_keep=0 nr_unmap_fail=0 priority=2 flags=RECLAIM_WB_ANON|RECLAIM_WB_ASYNC
>>   mm_vmscan_lru_gen_scan: classzone=1 order=0 nr_requested=4096 nr_scanned=64 nr_skipped=0 nr_taken=64 lru=file
>>   mm_vmscan_lru_gen_evict: nid=0 nr_reclaimed=64 nr_dirty=0 nr_writeback=0 nr_congested=0 nr_immediate=0 nr_activate_anon=0 nr_activate_file=0 nr_ref_keep=0 nr_unmap_fail=0 priority=12 flags=RECLAIM_WB_FILE|RECLAIM_WB_ASYNC
>>
>> Signed-off-by: Jaewon Kim <jaewon31.kim@samsung.com>
>> ---
>> v4: wrap with #ifdef CONFIG_LRU_GEN
>> v3: change printk format
>> v2: use condition and make it aligned
>> v1: introduce trace events
>> ---
>>  include/trace/events/mmflags.h |  9 ++++
>>  include/trace/events/vmscan.h  | 96 ++++++++++++++++++++++++++++++++++
>>  mm/vmscan.c                    | 20 +++++--
>>  3 files changed, 120 insertions(+), 5 deletions(-)
>>
>> diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h
>> index 1478b9dd05fa..6dfe85bd4e81 100644
>> --- a/include/trace/events/mmflags.h
>> +++ b/include/trace/events/mmflags.h
>> @@ -274,6 +274,12 @@ IF_HAVE_VM_SOFTDIRTY(VM_SOFTDIRTY, "softdirty"     )               \
>>                 EM (LRU_ACTIVE_FILE, "active_file") \
>>                 EMe(LRU_UNEVICTABLE, "unevictable")
>>
>> +#ifdef CONFIG_LRU_GEN
>> +#define LRU_GEN_NAMES          \
>> +               EM (LRU_GEN_ANON, "anon") \
>> +               EMe(LRU_GEN_FILE, "file")
>> +#endif
>> +
>>  /*
>>   * First define the enums in the above macros to be exported to userspace
>>   * via TRACE_DEFINE_ENUM().
>> @@ -288,6 +294,9 @@ COMPACTION_PRIORITY
>>  /* COMPACTION_FEEDBACK are defines not enums. Not needed here. */
>>  ZONE_TYPE
>>  LRU_NAMES
>> +#ifdef CONFIG_LRU_GEN
>> +LRU_GEN_NAMES
>> +#endif
>>
>>  /*
>>   * Now redefine the EM() and EMe() macros to map the enums to the strings
>> diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h
>> index d2123dd960d5..2080ef742f89 100644
>> --- a/include/trace/events/vmscan.h
>> +++ b/include/trace/events/vmscan.h
>> @@ -327,6 +327,102 @@ TRACE_EVENT(mm_vmscan_lru_isolate,
>>                 __print_symbolic(__entry->lru, LRU_NAMES))
>>  );
>>
>> +#ifdef CONFIG_LRU_GEN
>> +TRACE_EVENT_CONDITION(mm_vmscan_lru_gen_scan,
>> +       TP_PROTO(int highest_zoneidx,
>> +               int order,
>> +               unsigned long nr_requested,
>> +               unsigned long nr_scanned,
>> +               unsigned long nr_skipped,
>> +               unsigned long nr_taken,
>> +               int lru),
>> +
>> +       TP_ARGS(highest_zoneidx, order, nr_requested, nr_scanned, nr_skipped, nr_taken, lru),
>> +
>> +       TP_CONDITION(nr_scanned),
>> +
>> +       TP_STRUCT__entry(
>> +               __field(int, highest_zoneidx)
>> +               __field(int, order)
>> +               __field(unsigned long, nr_requested)
>> +               __field(unsigned long, nr_scanned)
>> +               __field(unsigned long, nr_skipped)
>> +               __field(unsigned long, nr_taken)
>> +               __field(int, lru)
>> +       ),
>> +
>> +       TP_fast_assign(
>> +               __entry->highest_zoneidx = highest_zoneidx;
>> +               __entry->order = order;
>> +               __entry->nr_requested = nr_requested;
>> +               __entry->nr_scanned = nr_scanned;
>> +               __entry->nr_skipped = nr_skipped;
>> +               __entry->nr_taken = nr_taken;
>> +               __entry->lru = lru;
>> +       ),
>> +
>> +       /*
>> +        * classzone is previous name of the highest_zoneidx.
>> +        * Reason not to change it is the ABI requirement of the tracepoint.
>> +        */
>> +       TP_printk("classzone=%d order=%d nr_requested=%lu nr_scanned=%lu nr_skipped=%lu nr_taken=%lu lru=%s",
>> +               __entry->highest_zoneidx,
>> +               __entry->order,
>> +               __entry->nr_requested,
>> +               __entry->nr_scanned,
>> +               __entry->nr_skipped,
>> +               __entry->nr_taken,
>> +               __print_symbolic(__entry->lru, LRU_GEN_NAMES))
>> +);
>> +
>> +TRACE_EVENT(mm_vmscan_lru_gen_evict,
>> +
>> +       TP_PROTO(int nid, unsigned long nr_reclaimed,
>> +               struct reclaim_stat *stat, int priority, int file),
>> +
>> +       TP_ARGS(nid, nr_reclaimed, stat, priority, file),
>> +
>> +       TP_STRUCT__entry(
>> +               __field(unsigned long, nr_reclaimed)
>> +               __field(unsigned long, nr_dirty)
>> +               __field(unsigned long, nr_writeback)
>> +               __field(unsigned long, nr_congested)
>> +               __field(unsigned long, nr_immediate)
>> +               __field(unsigned int, nr_activate0)
>> +               __field(unsigned int, nr_activate1)
>> +               __field(unsigned long, nr_ref_keep)
>> +               __field(unsigned long, nr_unmap_fail)
>> +               __field(int, nid)
>> +               __field(int, priority)
>> +               __field(int, reclaim_flags)
>> +       ),
>> +
>> +       TP_fast_assign(
>> +               __entry->nid = nid;
>> +               __entry->nr_reclaimed = nr_reclaimed;
>> +               __entry->nr_dirty = stat->nr_dirty;
>> +               __entry->nr_writeback = stat->nr_writeback;
>> +               __entry->nr_congested = stat->nr_congested;
>> +               __entry->nr_immediate = stat->nr_immediate;
>> +               __entry->nr_activate0 = stat->nr_activate[0];
>> +               __entry->nr_activate1 = stat->nr_activate[1];
>> +               __entry->nr_ref_keep = stat->nr_ref_keep;
>> +               __entry->nr_unmap_fail = stat->nr_unmap_fail;
>> +               __entry->priority = priority;
>> +               __entry->reclaim_flags = trace_reclaim_flags(file);
>> +       ),
>> +
>> +       TP_printk("nid=%d nr_reclaimed=%lu nr_dirty=%lu nr_writeback=%lu nr_congested=%lu nr_immediate=%lu nr_activate_anon=%u nr_activate_file=%u nr_ref_keep=%lu nr_unmap_fail=%lu priority=%d flags=%s",
>> +               __entry->nid, __entry->nr_reclaimed,
>> +               __entry->nr_dirty, __entry->nr_writeback,
>> +               __entry->nr_congested, __entry->nr_immediate,
>> +               __entry->nr_activate0, __entry->nr_activate1,
>> +               __entry->nr_ref_keep, __entry->nr_unmap_fail,
>> +               __entry->priority,
>> +               show_reclaim_flags(__entry->reclaim_flags))
>> +);
>> +#endif
>> +
>>  TRACE_EVENT(mm_vmscan_write_folio,
>>
>>         TP_PROTO(struct folio *folio),
>> diff --git a/mm/vmscan.c b/mm/vmscan.c
>> index 6f13394b112e..0c8b48bcb461 100644
>> --- a/mm/vmscan.c
>> +++ b/mm/vmscan.c
>> @@ -5005,6 +5005,7 @@ static int scan_folios(struct lruvec *lruvec, struct scan_control *sc,
>>         int sorted = 0;
>>         int scanned = 0;
>>         int isolated = 0;
>> +       int skipped = 0;
>>         int remaining = MAX_LRU_BATCH;
>>         struct lru_gen_folio *lrugen = &lruvec->lrugen;
>>         struct mem_cgroup *memcg = lruvec_memcg(lruvec);
>> @@ -5018,7 +5019,7 @@ static int scan_folios(struct lruvec *lruvec, struct scan_control *sc,
>>
>>         for (i = MAX_NR_ZONES; i > 0; i--) {
>>                 LIST_HEAD(moved);
>> -               int skipped = 0;
>> +               int skipped_zone = 0;
>>                 int zone = (sc->reclaim_idx + i) % MAX_NR_ZONES;
>>                 struct list_head *head = &lrugen->folios[gen][type][zone];
>>
>> @@ -5040,16 +5041,17 @@ static int scan_folios(struct lruvec *lruvec, struct scan_control *sc,
>>                                 isolated += delta;
>>                         } else {
>>                                 list_move(&folio->lru, &moved);
>> -                               skipped += delta;
>> +                               skipped_zone += delta;
>>                         }
>>
>> -                       if (!--remaining || max(isolated, skipped) >= MIN_LRU_BATCH)
>> +                       if (!--remaining || max(isolated, skipped_zone) >= MIN_LRU_BATCH)
>>                                 break;
>>                 }
>>
>> -               if (skipped) {
>> +               if (skipped_zone) {
>>                         list_splice(&moved, head);
>> -                       __count_zid_vm_events(PGSCAN_SKIP, zone, skipped);
>> +                       __count_zid_vm_events(PGSCAN_SKIP, zone, skipped_zone);
>> +                       skipped += skipped_zone;
>>                 }
>>
>>                 if (!remaining || isolated >= MIN_LRU_BATCH)
>> @@ -5065,6 +5067,10 @@ static int scan_folios(struct lruvec *lruvec, struct scan_control *sc,
>>         __count_memcg_events(memcg, PGREFILL, sorted);
>>         __count_vm_events(PGSCAN_ANON + type, isolated);
>>
>> +#ifdef CONFIG_LRU_GEN
>> +       trace_mm_vmscan_lru_gen_scan(sc->reclaim_idx, sc->order, MAX_LRU_BATCH,
>> +                       scanned, skipped, isolated, type);
>> +#endif
>
>These functions are already within CONFIG_LRU_GEN.
>
>>         /*
>>          * There might not be eligible folios due to reclaim_idx. Check the
>>          * remaining to prevent livelock if it's not making progress.
>> @@ -5194,6 +5200,10 @@ static int evict_folios(struct lruvec *lruvec, struct scan_control *sc, int swap
>>  retry:
>>         reclaimed = shrink_folio_list(&list, pgdat, sc, &stat, false);
>>         sc->nr_reclaimed += reclaimed;
>> +#ifdef CONFIG_LRU_GEN
>> +       trace_mm_vmscan_lru_gen_evict(pgdat->node_id, reclaimed, &stat,
>> +                                     sc->priority, type);
>> +#endif
>
>Ditto.
Jaewon Kim Sept. 26, 2023, 7:33 a.m. UTC | #3
>>On Mon, Sep 25, 2023 at 10:20?PM Jaewon Kim <jaewon31.kim@samsung.com> wrote:
>>>
>>> As the legacy lru provides, the lru_gen needs some trace events for
>>> debugging.
>>>
>>> This commit introduces 2 trace events.
>>>   trace_mm_vmscan_lru_gen_scan
>>>   trace_mm_vmscan_lru_gen_evict
>>>
>>> Each event is similar to the following legacy events.
>>>   trace_mm_vmscan_lru_isolate,
>>>   trace_mm_vmscan_lru_shrink_[in]active
>>
>>We should just reuse trace_mm_vmscan_lru_isolate and
>>trace_mm_vmscan_lru_shrink_inactive instead of adding new tracepoints.
>>
>>To reuse trace_mm_vmscan_lru_isolate, we'd just need to append two new
>>names to LRU_NAMES.
>>
>>The naming of trace_mm_vmscan_lru_shrink_inactive might seem confusing
>>but it's how MGLRU maintains the compatibility, e.g., the existing
>>active/inactive counters in /proc/vmstat.
>
>
>Hello
>
>Actually I had tried to reuse them. But some value was not that compatible.
>Let me try that way again.
>
>>

Hello Yu Zhao

Could you look into what I tried below? I reused the legacy trace events as you recommened.

For the nr_scanned for trace_mm_vmscan_lru_shrink_inactive, I just used the scanned returned from isolate_folios.
I thought this is right as scan_folios also uses its isolated.
  __count_vm_events(PGSCAN_ANON + type, isolated);
But I guess the scanned in scan_folios is actually the one used in shrink_inactive_list

I tested this on both 0 and 7 of /sys/kernel/mm/lru_gen/enabled


diff --git a/mm/vmscan.c b/mm/vmscan.c
index a4e44f1c97c1..b61a0156559c 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -4328,6 +4328,7 @@ static int scan_folios(struct lruvec *lruvec, struct scan_control *sc,
        int sorted = 0;
        int scanned = 0;
        int isolated = 0;
+       int skipped = 0;
        int remaining = MAX_LRU_BATCH;
        struct lru_gen_folio *lrugen = &lruvec->lrugen;
        struct mem_cgroup *memcg = lruvec_memcg(lruvec);
@@ -4341,7 +4342,7 @@ static int scan_folios(struct lruvec *lruvec, struct scan_control *sc,
 
        for (i = MAX_NR_ZONES; i > 0; i--) {
                LIST_HEAD(moved);
-               int skipped = 0;
+               int skipped_zone = 0;
                int zone = (sc->reclaim_idx + i) % MAX_NR_ZONES;
                struct list_head *head = &lrugen->folios[gen][type][zone];
 
@@ -4363,16 +4364,17 @@ static int scan_folios(struct lruvec *lruvec, struct scan_control *sc,
                                isolated += delta;
                        } else {
                                list_move(&folio->lru, &moved);
-                               skipped += delta;
+                               skipped_zone += delta;
                        }
 
-                       if (!--remaining || max(isolated, skipped) >= MIN_LRU_BATCH)
+                       if (!--remaining || max(isolated, skipped_zone) >= MIN_LRU_BATCH)
                                break;
                }
 
-               if (skipped) {
+               if (skipped_zone) {
                        list_splice(&moved, head);
-                       __count_zid_vm_events(PGSCAN_SKIP, zone, skipped);
+                       __count_zid_vm_events(PGSCAN_SKIP, zone, skipped_zone);
+                       skipped += skipped_zone;
                }
 
                if (!remaining || isolated >= MIN_LRU_BATCH)
@@ -4387,6 +4389,9 @@ static int scan_folios(struct lruvec *lruvec, struct scan_control *sc,
        __count_memcg_events(memcg, item, isolated);
        __count_memcg_events(memcg, PGREFILL, sorted);
        __count_vm_events(PGSCAN_ANON + type, isolated);
+       trace_mm_vmscan_lru_isolate(sc->reclaim_idx, sc->order, MAX_LRU_BATCH,
+                                   scanned, skipped, isolated,
+                                   type ? LRU_INACTIVE_FILE : LRU_INACTIVE_ANON);
 
        /*
         * There might not be eligible folios due to reclaim_idx. Check the
@@ -4517,6 +4522,9 @@ static int evict_folios(struct lruvec *lruvec, struct scan_control *sc, int swap
 retry:
        reclaimed = shrink_folio_list(&list, pgdat, sc, &stat, false);
        sc->nr_reclaimed += reclaimed;
+       trace_mm_vmscan_lru_shrink_inactive(pgdat->node_id,
+                       scanned, reclaimed, &stat, sc->priority,
+                       type ? LRU_INACTIVE_FILE : LRU_INACTIVE_ANON);
 
        list_for_each_entry_safe_reverse(folio, next, &list, lru) {
                if (!folio_evictable(folio)) {



>>> Here's an example
>>>   mm_vmscan_lru_gen_scan: classzone=2 order=0 nr_requested=4096 nr_scanned=64 nr_skipped=0 nr_taken=64 lru=anon
>>>   mm_vmscan_lru_gen_evict: nid=0 nr_reclaimed=64 nr_dirty=0 nr_writeback=0 nr_congested=0 nr_immediate=0 nr_activate_anon=0 nr_activate_file=0 nr_ref_keep=0 nr_unmap_fail=0 priority=2 flags=RECLAIM_WB_ANON|RECLAIM_WB_ASYNC
>>>   mm_vmscan_lru_gen_scan: classzone=1 order=0 nr_requested=4096 nr_scanned=64 nr_skipped=0 nr_taken=64 lru=file
>>>   mm_vmscan_lru_gen_evict: nid=0 nr_reclaimed=64 nr_dirty=0 nr_writeback=0 nr_congested=0 nr_immediate=0 nr_activate_anon=0 nr_activate_file=0 nr_ref_keep=0 nr_unmap_fail=0 priority=12 flags=RECLAIM_WB_FILE|RECLAIM_WB_ASYNC
>>>
>>> Signed-off-by: Jaewon Kim <jaewon31.kim@samsung.com>
>>> ---
>>> v4: wrap with #ifdef CONFIG_LRU_GEN
>>> v3: change printk format
>>> v2: use condition and make it aligned
>>> v1: introduce trace events
>>> ---
>>>  include/trace/events/mmflags.h |  9 ++++
>>>  include/trace/events/vmscan.h  | 96 ++++++++++++++++++++++++++++++++++
>>>  mm/vmscan.c                    | 20 +++++--
>>>  3 files changed, 120 insertions(+), 5 deletions(-)
>>>
>>> diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h
>>> index 1478b9dd05fa..6dfe85bd4e81 100644
>>> --- a/include/trace/events/mmflags.h
>>> +++ b/include/trace/events/mmflags.h
>>> @@ -274,6 +274,12 @@ IF_HAVE_VM_SOFTDIRTY(VM_SOFTDIRTY, "softdirty"     )               \
>>>                 EM (LRU_ACTIVE_FILE, "active_file") \
>>>                 EMe(LRU_UNEVICTABLE, "unevictable")
>>>
>>> +#ifdef CONFIG_LRU_GEN
>>> +#define LRU_GEN_NAMES          \
>>> +               EM (LRU_GEN_ANON, "anon") \
>>> +               EMe(LRU_GEN_FILE, "file")
>>> +#endif
>>> +
>>>  /*
>>>   * First define the enums in the above macros to be exported to userspace
>>>   * via TRACE_DEFINE_ENUM().
>>> @@ -288,6 +294,9 @@ COMPACTION_PRIORITY
>>>  /* COMPACTION_FEEDBACK are defines not enums. Not needed here. */
>>>  ZONE_TYPE
>>>  LRU_NAMES
>>> +#ifdef CONFIG_LRU_GEN
>>> +LRU_GEN_NAMES
>>> +#endif
>>>
>>>  /*
>>>   * Now redefine the EM() and EMe() macros to map the enums to the strings
>>> diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h
>>> index d2123dd960d5..2080ef742f89 100644
>>> --- a/include/trace/events/vmscan.h
>>> +++ b/include/trace/events/vmscan.h
>>> @@ -327,6 +327,102 @@ TRACE_EVENT(mm_vmscan_lru_isolate,
>>>                 __print_symbolic(__entry->lru, LRU_NAMES))
>>>  );
>>>
>>> +#ifdef CONFIG_LRU_GEN
>>> +TRACE_EVENT_CONDITION(mm_vmscan_lru_gen_scan,
>>> +       TP_PROTO(int highest_zoneidx,
>>> +               int order,
>>> +               unsigned long nr_requested,
>>> +               unsigned long nr_scanned,
>>> +               unsigned long nr_skipped,
>>> +               unsigned long nr_taken,
>>> +               int lru),
>>> +
>>> +       TP_ARGS(highest_zoneidx, order, nr_requested, nr_scanned, nr_skipped, nr_taken, lru),
>>> +
>>> +       TP_CONDITION(nr_scanned),
>>> +
>>> +       TP_STRUCT__entry(
>>> +               __field(int, highest_zoneidx)
>>> +               __field(int, order)
>>> +               __field(unsigned long, nr_requested)
>>> +               __field(unsigned long, nr_scanned)
>>> +               __field(unsigned long, nr_skipped)
>>> +               __field(unsigned long, nr_taken)
>>> +               __field(int, lru)
>>> +       ),
>>> +
>>> +       TP_fast_assign(
>>> +               __entry->highest_zoneidx = highest_zoneidx;
>>> +               __entry->order = order;
>>> +               __entry->nr_requested = nr_requested;
>>> +               __entry->nr_scanned = nr_scanned;
>>> +               __entry->nr_skipped = nr_skipped;
>>> +               __entry->nr_taken = nr_taken;
>>> +               __entry->lru = lru;
>>> +       ),
>>> +
>>> +       /*
>>> +        * classzone is previous name of the highest_zoneidx.
>>> +        * Reason not to change it is the ABI requirement of the tracepoint.
>>> +        */
>>> +       TP_printk("classzone=%d order=%d nr_requested=%lu nr_scanned=%lu nr_skipped=%lu nr_taken=%lu lru=%s",
>>> +               __entry->highest_zoneidx,
>>> +               __entry->order,
>>> +               __entry->nr_requested,
>>> +               __entry->nr_scanned,
>>> +               __entry->nr_skipped,
>>> +               __entry->nr_taken,
>>> +               __print_symbolic(__entry->lru, LRU_GEN_NAMES))
>>> +);
>>> +
>>> +TRACE_EVENT(mm_vmscan_lru_gen_evict,
>>> +
>>> +       TP_PROTO(int nid, unsigned long nr_reclaimed,
>>> +               struct reclaim_stat *stat, int priority, int file),
>>> +
>>> +       TP_ARGS(nid, nr_reclaimed, stat, priority, file),
>>> +
>>> +       TP_STRUCT__entry(
>>> +               __field(unsigned long, nr_reclaimed)
>>> +               __field(unsigned long, nr_dirty)
>>> +               __field(unsigned long, nr_writeback)
>>> +               __field(unsigned long, nr_congested)
>>> +               __field(unsigned long, nr_immediate)
>>> +               __field(unsigned int, nr_activate0)
>>> +               __field(unsigned int, nr_activate1)
>>> +               __field(unsigned long, nr_ref_keep)
>>> +               __field(unsigned long, nr_unmap_fail)
>>> +               __field(int, nid)
>>> +               __field(int, priority)
>>> +               __field(int, reclaim_flags)
>>> +       ),
>>> +
>>> +       TP_fast_assign(
>>> +               __entry->nid = nid;
>>> +               __entry->nr_reclaimed = nr_reclaimed;
>>> +               __entry->nr_dirty = stat->nr_dirty;
>>> +               __entry->nr_writeback = stat->nr_writeback;
>>> +               __entry->nr_congested = stat->nr_congested;
>>> +               __entry->nr_immediate = stat->nr_immediate;
>>> +               __entry->nr_activate0 = stat->nr_activate[0];
>>> +               __entry->nr_activate1 = stat->nr_activate[1];
>>> +               __entry->nr_ref_keep = stat->nr_ref_keep;
>>> +               __entry->nr_unmap_fail = stat->nr_unmap_fail;
>>> +               __entry->priority = priority;
>>> +               __entry->reclaim_flags = trace_reclaim_flags(file);
>>> +       ),
>>> +
>>> +       TP_printk("nid=%d nr_reclaimed=%lu nr_dirty=%lu nr_writeback=%lu nr_congested=%lu nr_immediate=%lu nr_activate_anon=%u nr_activate_file=%u nr_ref_keep=%lu nr_unmap_fail=%lu priority=%d flags=%s",
>>> +               __entry->nid, __entry->nr_reclaimed,
>>> +               __entry->nr_dirty, __entry->nr_writeback,
>>> +               __entry->nr_congested, __entry->nr_immediate,
>>> +               __entry->nr_activate0, __entry->nr_activate1,
>>> +               __entry->nr_ref_keep, __entry->nr_unmap_fail,
>>> +               __entry->priority,
>>> +               show_reclaim_flags(__entry->reclaim_flags))
>>> +);
>>> +#endif
>>> +
>>>  TRACE_EVENT(mm_vmscan_write_folio,
>>>
>>>         TP_PROTO(struct folio *folio),
>>> diff --git a/mm/vmscan.c b/mm/vmscan.c
>>> index 6f13394b112e..0c8b48bcb461 100644
>>> --- a/mm/vmscan.c
>>> +++ b/mm/vmscan.c
>>> @@ -5005,6 +5005,7 @@ static int scan_folios(struct lruvec *lruvec, struct scan_control *sc,
>>>         int sorted = 0;
>>>         int scanned = 0;
>>>         int isolated = 0;
>>> +       int skipped = 0;
>>>         int remaining = MAX_LRU_BATCH;
>>>         struct lru_gen_folio *lrugen = &lruvec->lrugen;
>>>         struct mem_cgroup *memcg = lruvec_memcg(lruvec);
>>> @@ -5018,7 +5019,7 @@ static int scan_folios(struct lruvec *lruvec, struct scan_control *sc,
>>>
>>>         for (i = MAX_NR_ZONES; i > 0; i--) {
>>>                 LIST_HEAD(moved);
>>> -               int skipped = 0;
>>> +               int skipped_zone = 0;
>>>                 int zone = (sc->reclaim_idx + i) % MAX_NR_ZONES;
>>>                 struct list_head *head = &lrugen->folios[gen][type][zone];
>>>
>>> @@ -5040,16 +5041,17 @@ static int scan_folios(struct lruvec *lruvec, struct scan_control *sc,
>>>                                 isolated += delta;
>>>                         } else {
>>>                                 list_move(&folio->lru, &moved);
>>> -                               skipped += delta;
>>> +                               skipped_zone += delta;
>>>                         }
>>>
>>> -                       if (!--remaining || max(isolated, skipped) >= MIN_LRU_BATCH)
>>> +                       if (!--remaining || max(isolated, skipped_zone) >= MIN_LRU_BATCH)
>>>                                 break;
>>>                 }
>>>
>>> -               if (skipped) {
>>> +               if (skipped_zone) {
>>>                         list_splice(&moved, head);
>>> -                       __count_zid_vm_events(PGSCAN_SKIP, zone, skipped);
>>> +                       __count_zid_vm_events(PGSCAN_SKIP, zone, skipped_zone);
>>> +                       skipped += skipped_zone;
>>>                 }
>>>
>>>                 if (!remaining || isolated >= MIN_LRU_BATCH)
>>> @@ -5065,6 +5067,10 @@ static int scan_folios(struct lruvec *lruvec, struct scan_control *sc,
>>>         __count_memcg_events(memcg, PGREFILL, sorted);
>>>         __count_vm_events(PGSCAN_ANON + type, isolated);
>>>
>>> +#ifdef CONFIG_LRU_GEN
>>> +       trace_mm_vmscan_lru_gen_scan(sc->reclaim_idx, sc->order, MAX_LRU_BATCH,
>>> +                       scanned, skipped, isolated, type);
>>> +#endif
>>
>>These functions are already within CONFIG_LRU_GEN.
>>
>>>         /*
>>>          * There might not be eligible folios due to reclaim_idx. Check the
>>>          * remaining to prevent livelock if it's not making progress.
>>> @@ -5194,6 +5200,10 @@ static int evict_folios(struct lruvec *lruvec, struct scan_control *sc, int swap
>>>  retry:
>>>         reclaimed = shrink_folio_list(&list, pgdat, sc, &stat, false);
>>>         sc->nr_reclaimed += reclaimed;
>>> +#ifdef CONFIG_LRU_GEN
>>> +       trace_mm_vmscan_lru_gen_evict(pgdat->node_id, reclaimed, &stat,
>>> +                                     sc->priority, type);
>>> +#endif
>>
>>Ditto.
Jaewon Kim Sept. 26, 2023, 2:15 p.m. UTC | #4
>>>On Mon, Sep 25, 2023 at 10:20?PM Jaewon Kim <jaewon31.kim@samsung.com> wrote:
>>>>
>>>> As the legacy lru provides, the lru_gen needs some trace events for
>>>> debugging.
>>>>
>>>> This commit introduces 2 trace events.
>>>>   trace_mm_vmscan_lru_gen_scan
>>>>   trace_mm_vmscan_lru_gen_evict
>>>>
>>>> Each event is similar to the following legacy events.
>>>>   trace_mm_vmscan_lru_isolate,
>>>>   trace_mm_vmscan_lru_shrink_[in]active
>>>
>>>We should just reuse trace_mm_vmscan_lru_isolate and
>>>trace_mm_vmscan_lru_shrink_inactive instead of adding new tracepoints.
>>>
>>>To reuse trace_mm_vmscan_lru_isolate, we'd just need to append two new
>>>names to LRU_NAMES.
>>>
>>>The naming of trace_mm_vmscan_lru_shrink_inactive might seem confusing
>>>but it's how MGLRU maintains the compatibility, e.g., the existing
>>>active/inactive counters in /proc/vmstat.
>>
>>
>>Hello
>>
>>Actually I had tried to reuse them. But some value was not that compatible.
>>Let me try that way again.
>>
>>>
>
>Hello Yu Zhao
>
>Could you look into what I tried below? I reused the legacy trace events as you recommened.
>
>For the nr_scanned for trace_mm_vmscan_lru_shrink_inactive, I just used the scanned returned from isolate_folios.
>I thought this is right as scan_folios also uses its isolated.
>  __count_vm_events(PGSCAN_ANON + type, isolated);
>But I guess the scanned in scan_folios is actually the one used in shrink_inactive_list

please ignore nr_scanned thing above I just misread the code.

This is an example, I think it works well.

 mm_vmscan_lru_isolate: isolate_mode=0 classzone=2 order=0 nr_requested=4096 nr_scanned=64 nr_skipped=0 nr_taken=64 lru=inactive_file
 mm_vmscan_lru_shrink_inactive: nid=0 nr_scanned=64 nr_reclaimed=63 nr_dirty=0 nr_writeback=0 nr_congested=0 nr_immediate=0 nr_activate_anon=0 nr_activate_file=1 nr_ref_keep=0 nr_unmap_fail=0 priority=2 flags=RECLAIM_WB_FILE|RECLAIM_WB_ASYNC

>
>I tested this on both 0 and 7 of /sys/kernel/mm/lru_gen/enabled
>
>
>diff --git a/mm/vmscan.c b/mm/vmscan.c
>index a4e44f1c97c1..b61a0156559c 100644
>--- a/mm/vmscan.c
>+++ b/mm/vmscan.c
>@@ -4328,6 +4328,7 @@ static int scan_folios(struct lruvec *lruvec, struct scan_control *sc,
>        int sorted = 0;
>        int scanned = 0;
>        int isolated = 0;
>+       int skipped = 0;
>        int remaining = MAX_LRU_BATCH;
>        struct lru_gen_folio *lrugen = &lruvec->lrugen;
>        struct mem_cgroup *memcg = lruvec_memcg(lruvec);
>@@ -4341,7 +4342,7 @@ static int scan_folios(struct lruvec *lruvec, struct scan_control *sc,
> 
>        for (i = MAX_NR_ZONES; i > 0; i--) {
>                LIST_HEAD(moved);
>-               int skipped = 0;
>+               int skipped_zone = 0;
>                int zone = (sc->reclaim_idx + i) % MAX_NR_ZONES;
>                struct list_head *head = &lrugen->folios[gen][type][zone];
> 
>@@ -4363,16 +4364,17 @@ static int scan_folios(struct lruvec *lruvec, struct scan_control *sc,
>                                isolated += delta;
>                        } else {
>                                list_move(&folio->lru, &moved);
>-                               skipped += delta;
>+                               skipped_zone += delta;
>                        }
> 
>-                       if (!--remaining || max(isolated, skipped) >= MIN_LRU_BATCH)
>+                       if (!--remaining || max(isolated, skipped_zone) >= MIN_LRU_BATCH)
>                                break;
>                }
> 
>-               if (skipped) {
>+               if (skipped_zone) {
>                        list_splice(&moved, head);
>-                       __count_zid_vm_events(PGSCAN_SKIP, zone, skipped);
>+                       __count_zid_vm_events(PGSCAN_SKIP, zone, skipped_zone);
>+                       skipped += skipped_zone;
>                }
> 
>                if (!remaining || isolated >= MIN_LRU_BATCH)
>@@ -4387,6 +4389,9 @@ static int scan_folios(struct lruvec *lruvec, struct scan_control *sc,
>        __count_memcg_events(memcg, item, isolated);
>        __count_memcg_events(memcg, PGREFILL, sorted);
>        __count_vm_events(PGSCAN_ANON + type, isolated);
>+       trace_mm_vmscan_lru_isolate(sc->reclaim_idx, sc->order, MAX_LRU_BATCH,
>+                                   scanned, skipped, isolated,
>+                                   type ? LRU_INACTIVE_FILE : LRU_INACTIVE_ANON);
> 
>        /*
>         * There might not be eligible folios due to reclaim_idx. Check the
>@@ -4517,6 +4522,9 @@ static int evict_folios(struct lruvec *lruvec, struct scan_control *sc, int swap
> retry:
>        reclaimed = shrink_folio_list(&list, pgdat, sc, &stat, false);
>        sc->nr_reclaimed += reclaimed;
>+       trace_mm_vmscan_lru_shrink_inactive(pgdat->node_id,
>+                       scanned, reclaimed, &stat, sc->priority,
>+                       type ? LRU_INACTIVE_FILE : LRU_INACTIVE_ANON);
> 
>        list_for_each_entry_safe_reverse(folio, next, &list, lru) {
>                if (!folio_evictable(folio)) {
>
Jaewon Kim Oct. 1, 2023, 11:41 p.m. UTC | #5
Hello Yu Zhao

Could you give me your comment? I am waiting for your opinion on the
reuse method. I'm planning to resend it as a complete patch with it.
Thank you.


On Tue, Sep 26, 2023 at 11:15 PM 김재원 <jaewon31.kim@samsung.com> wrote:
>
> >>>On Mon, Sep 25, 2023 at 10:20?PM Jaewon Kim <jaewon31.kim@samsung.com> wrote:
> >>>>
> >>>> As the legacy lru provides, the lru_gen needs some trace events for
> >>>> debugging.
> >>>>
> >>>> This commit introduces 2 trace events.
> >>>>   trace_mm_vmscan_lru_gen_scan
> >>>>   trace_mm_vmscan_lru_gen_evict
> >>>>
> >>>> Each event is similar to the following legacy events.
> >>>>   trace_mm_vmscan_lru_isolate,
> >>>>   trace_mm_vmscan_lru_shrink_[in]active
> >>>
> >>>We should just reuse trace_mm_vmscan_lru_isolate and
> >>>trace_mm_vmscan_lru_shrink_inactive instead of adding new tracepoints.
> >>>
> >>>To reuse trace_mm_vmscan_lru_isolate, we'd just need to append two new
> >>>names to LRU_NAMES.
> >>>
> >>>The naming of trace_mm_vmscan_lru_shrink_inactive might seem confusing
> >>>but it's how MGLRU maintains the compatibility, e.g., the existing
> >>>active/inactive counters in /proc/vmstat.
> >>
> >>
> >>Hello
> >>
> >>Actually I had tried to reuse them. But some value was not that compatible.
> >>Let me try that way again.
> >>
> >>>
> >
> >Hello Yu Zhao
> >
> >Could you look into what I tried below? I reused the legacy trace events as you recommened.
> >
> >For the nr_scanned for trace_mm_vmscan_lru_shrink_inactive, I just used the scanned returned from isolate_folios.
> >I thought this is right as scan_folios also uses its isolated.
> >  __count_vm_events(PGSCAN_ANON + type, isolated);
> >But I guess the scanned in scan_folios is actually the one used in shrink_inactive_list
>
> please ignore nr_scanned thing above I just misread the code.
>
> This is an example, I think it works well.
>
>  mm_vmscan_lru_isolate: isolate_mode=0 classzone=2 order=0 nr_requested=4096 nr_scanned=64 nr_skipped=0 nr_taken=64 lru=inactive_file
>  mm_vmscan_lru_shrink_inactive: nid=0 nr_scanned=64 nr_reclaimed=63 nr_dirty=0 nr_writeback=0 nr_congested=0 nr_immediate=0 nr_activate_anon=0 nr_activate_file=1 nr_ref_keep=0 nr_unmap_fail=0 priority=2 flags=RECLAIM_WB_FILE|RECLAIM_WB_ASYNC
>
> >
> >I tested this on both 0 and 7 of /sys/kernel/mm/lru_gen/enabled
> >
> >
> >diff --git a/mm/vmscan.c b/mm/vmscan.c
> >index a4e44f1c97c1..b61a0156559c 100644
> >--- a/mm/vmscan.c
> >+++ b/mm/vmscan.c
> >@@ -4328,6 +4328,7 @@ static int scan_folios(struct lruvec *lruvec, struct scan_control *sc,
> >        int sorted = 0;
> >        int scanned = 0;
> >        int isolated = 0;
> >+       int skipped = 0;
> >        int remaining = MAX_LRU_BATCH;
> >        struct lru_gen_folio *lrugen = &lruvec->lrugen;
> >        struct mem_cgroup *memcg = lruvec_memcg(lruvec);
> >@@ -4341,7 +4342,7 @@ static int scan_folios(struct lruvec *lruvec, struct scan_control *sc,
> >
> >        for (i = MAX_NR_ZONES; i > 0; i--) {
> >                LIST_HEAD(moved);
> >-               int skipped = 0;
> >+               int skipped_zone = 0;
> >                int zone = (sc->reclaim_idx + i) % MAX_NR_ZONES;
> >                struct list_head *head = &lrugen->folios[gen][type][zone];
> >
> >@@ -4363,16 +4364,17 @@ static int scan_folios(struct lruvec *lruvec, struct scan_control *sc,
> >                                isolated += delta;
> >                        } else {
> >                                list_move(&folio->lru, &moved);
> >-                               skipped += delta;
> >+                               skipped_zone += delta;
> >                        }
> >
> >-                       if (!--remaining || max(isolated, skipped) >= MIN_LRU_BATCH)
> >+                       if (!--remaining || max(isolated, skipped_zone) >= MIN_LRU_BATCH)
> >                                break;
> >                }
> >
> >-               if (skipped) {
> >+               if (skipped_zone) {
> >                        list_splice(&moved, head);
> >-                       __count_zid_vm_events(PGSCAN_SKIP, zone, skipped);
> >+                       __count_zid_vm_events(PGSCAN_SKIP, zone, skipped_zone);
> >+                       skipped += skipped_zone;
> >                }
> >
> >                if (!remaining || isolated >= MIN_LRU_BATCH)
> >@@ -4387,6 +4389,9 @@ static int scan_folios(struct lruvec *lruvec, struct scan_control *sc,
> >        __count_memcg_events(memcg, item, isolated);
> >        __count_memcg_events(memcg, PGREFILL, sorted);
> >        __count_vm_events(PGSCAN_ANON + type, isolated);
> >+       trace_mm_vmscan_lru_isolate(sc->reclaim_idx, sc->order, MAX_LRU_BATCH,
> >+                                   scanned, skipped, isolated,
> >+                                   type ? LRU_INACTIVE_FILE : LRU_INACTIVE_ANON);
> >
> >        /*
> >         * There might not be eligible folios due to reclaim_idx. Check the
> >@@ -4517,6 +4522,9 @@ static int evict_folios(struct lruvec *lruvec, struct scan_control *sc, int swap
> > retry:
> >        reclaimed = shrink_folio_list(&list, pgdat, sc, &stat, false);
> >        sc->nr_reclaimed += reclaimed;
> >+       trace_mm_vmscan_lru_shrink_inactive(pgdat->node_id,
> >+                       scanned, reclaimed, &stat, sc->priority,
> >+                       type ? LRU_INACTIVE_FILE : LRU_INACTIVE_ANON);
> >
> >        list_for_each_entry_safe_reverse(folio, next, &list, lru) {
> >                if (!folio_evictable(folio)) {
> >
Yu Zhao Oct. 2, 2023, 3:26 a.m. UTC | #6
On Sun, Oct 1, 2023 at 5:41 PM Jaewon Kim <jaewon31.kim@gmail.com> wrote:
>
> Hello Yu Zhao
>
> Could you give me your comment? I am waiting for your opinion on the
> reuse method. I'm planning to resend it as a complete patch with it.
> Thank you.

Acked-by: Yu Zhao <yuzhao@google.com>
diff mbox series

Patch

diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h
index 1478b9dd05fa..6dfe85bd4e81 100644
--- a/include/trace/events/mmflags.h
+++ b/include/trace/events/mmflags.h
@@ -274,6 +274,12 @@  IF_HAVE_VM_SOFTDIRTY(VM_SOFTDIRTY,	"softdirty"	)		\
 		EM (LRU_ACTIVE_FILE, "active_file") \
 		EMe(LRU_UNEVICTABLE, "unevictable")
 
+#ifdef CONFIG_LRU_GEN
+#define LRU_GEN_NAMES		\
+		EM (LRU_GEN_ANON, "anon") \
+		EMe(LRU_GEN_FILE, "file")
+#endif
+
 /*
  * First define the enums in the above macros to be exported to userspace
  * via TRACE_DEFINE_ENUM().
@@ -288,6 +294,9 @@  COMPACTION_PRIORITY
 /* COMPACTION_FEEDBACK are defines not enums. Not needed here. */
 ZONE_TYPE
 LRU_NAMES
+#ifdef CONFIG_LRU_GEN
+LRU_GEN_NAMES
+#endif
 
 /*
  * Now redefine the EM() and EMe() macros to map the enums to the strings
diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h
index d2123dd960d5..2080ef742f89 100644
--- a/include/trace/events/vmscan.h
+++ b/include/trace/events/vmscan.h
@@ -327,6 +327,102 @@  TRACE_EVENT(mm_vmscan_lru_isolate,
 		__print_symbolic(__entry->lru, LRU_NAMES))
 );
 
+#ifdef CONFIG_LRU_GEN
+TRACE_EVENT_CONDITION(mm_vmscan_lru_gen_scan,
+	TP_PROTO(int highest_zoneidx,
+		int order,
+		unsigned long nr_requested,
+		unsigned long nr_scanned,
+		unsigned long nr_skipped,
+		unsigned long nr_taken,
+		int lru),
+
+	TP_ARGS(highest_zoneidx, order, nr_requested, nr_scanned, nr_skipped, nr_taken, lru),
+
+	TP_CONDITION(nr_scanned),
+
+	TP_STRUCT__entry(
+		__field(int, highest_zoneidx)
+		__field(int, order)
+		__field(unsigned long, nr_requested)
+		__field(unsigned long, nr_scanned)
+		__field(unsigned long, nr_skipped)
+		__field(unsigned long, nr_taken)
+		__field(int, lru)
+	),
+
+	TP_fast_assign(
+		__entry->highest_zoneidx = highest_zoneidx;
+		__entry->order = order;
+		__entry->nr_requested = nr_requested;
+		__entry->nr_scanned = nr_scanned;
+		__entry->nr_skipped = nr_skipped;
+		__entry->nr_taken = nr_taken;
+		__entry->lru = lru;
+	),
+
+	/*
+	 * classzone is previous name of the highest_zoneidx.
+	 * Reason not to change it is the ABI requirement of the tracepoint.
+	 */
+	TP_printk("classzone=%d order=%d nr_requested=%lu nr_scanned=%lu nr_skipped=%lu nr_taken=%lu lru=%s",
+		__entry->highest_zoneidx,
+		__entry->order,
+		__entry->nr_requested,
+		__entry->nr_scanned,
+		__entry->nr_skipped,
+		__entry->nr_taken,
+		__print_symbolic(__entry->lru, LRU_GEN_NAMES))
+);
+
+TRACE_EVENT(mm_vmscan_lru_gen_evict,
+
+	TP_PROTO(int nid, unsigned long nr_reclaimed,
+		struct reclaim_stat *stat, int priority, int file),
+
+	TP_ARGS(nid, nr_reclaimed, stat, priority, file),
+
+	TP_STRUCT__entry(
+		__field(unsigned long, nr_reclaimed)
+		__field(unsigned long, nr_dirty)
+		__field(unsigned long, nr_writeback)
+		__field(unsigned long, nr_congested)
+		__field(unsigned long, nr_immediate)
+		__field(unsigned int, nr_activate0)
+		__field(unsigned int, nr_activate1)
+		__field(unsigned long, nr_ref_keep)
+		__field(unsigned long, nr_unmap_fail)
+		__field(int, nid)
+		__field(int, priority)
+		__field(int, reclaim_flags)
+	),
+
+	TP_fast_assign(
+		__entry->nid = nid;
+		__entry->nr_reclaimed = nr_reclaimed;
+		__entry->nr_dirty = stat->nr_dirty;
+		__entry->nr_writeback = stat->nr_writeback;
+		__entry->nr_congested = stat->nr_congested;
+		__entry->nr_immediate = stat->nr_immediate;
+		__entry->nr_activate0 = stat->nr_activate[0];
+		__entry->nr_activate1 = stat->nr_activate[1];
+		__entry->nr_ref_keep = stat->nr_ref_keep;
+		__entry->nr_unmap_fail = stat->nr_unmap_fail;
+		__entry->priority = priority;
+		__entry->reclaim_flags = trace_reclaim_flags(file);
+	),
+
+	TP_printk("nid=%d nr_reclaimed=%lu nr_dirty=%lu nr_writeback=%lu nr_congested=%lu nr_immediate=%lu nr_activate_anon=%u nr_activate_file=%u nr_ref_keep=%lu nr_unmap_fail=%lu priority=%d flags=%s",
+		__entry->nid, __entry->nr_reclaimed,
+		__entry->nr_dirty, __entry->nr_writeback,
+		__entry->nr_congested, __entry->nr_immediate,
+		__entry->nr_activate0, __entry->nr_activate1,
+		__entry->nr_ref_keep, __entry->nr_unmap_fail,
+		__entry->priority,
+		show_reclaim_flags(__entry->reclaim_flags))
+);
+#endif
+
 TRACE_EVENT(mm_vmscan_write_folio,
 
 	TP_PROTO(struct folio *folio),
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 6f13394b112e..0c8b48bcb461 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -5005,6 +5005,7 @@  static int scan_folios(struct lruvec *lruvec, struct scan_control *sc,
 	int sorted = 0;
 	int scanned = 0;
 	int isolated = 0;
+	int skipped = 0;
 	int remaining = MAX_LRU_BATCH;
 	struct lru_gen_folio *lrugen = &lruvec->lrugen;
 	struct mem_cgroup *memcg = lruvec_memcg(lruvec);
@@ -5018,7 +5019,7 @@  static int scan_folios(struct lruvec *lruvec, struct scan_control *sc,
 
 	for (i = MAX_NR_ZONES; i > 0; i--) {
 		LIST_HEAD(moved);
-		int skipped = 0;
+		int skipped_zone = 0;
 		int zone = (sc->reclaim_idx + i) % MAX_NR_ZONES;
 		struct list_head *head = &lrugen->folios[gen][type][zone];
 
@@ -5040,16 +5041,17 @@  static int scan_folios(struct lruvec *lruvec, struct scan_control *sc,
 				isolated += delta;
 			} else {
 				list_move(&folio->lru, &moved);
-				skipped += delta;
+				skipped_zone += delta;
 			}
 
-			if (!--remaining || max(isolated, skipped) >= MIN_LRU_BATCH)
+			if (!--remaining || max(isolated, skipped_zone) >= MIN_LRU_BATCH)
 				break;
 		}
 
-		if (skipped) {
+		if (skipped_zone) {
 			list_splice(&moved, head);
-			__count_zid_vm_events(PGSCAN_SKIP, zone, skipped);
+			__count_zid_vm_events(PGSCAN_SKIP, zone, skipped_zone);
+			skipped += skipped_zone;
 		}
 
 		if (!remaining || isolated >= MIN_LRU_BATCH)
@@ -5065,6 +5067,10 @@  static int scan_folios(struct lruvec *lruvec, struct scan_control *sc,
 	__count_memcg_events(memcg, PGREFILL, sorted);
 	__count_vm_events(PGSCAN_ANON + type, isolated);
 
+#ifdef CONFIG_LRU_GEN
+	trace_mm_vmscan_lru_gen_scan(sc->reclaim_idx, sc->order, MAX_LRU_BATCH,
+			scanned, skipped, isolated, type);
+#endif
 	/*
 	 * There might not be eligible folios due to reclaim_idx. Check the
 	 * remaining to prevent livelock if it's not making progress.
@@ -5194,6 +5200,10 @@  static int evict_folios(struct lruvec *lruvec, struct scan_control *sc, int swap
 retry:
 	reclaimed = shrink_folio_list(&list, pgdat, sc, &stat, false);
 	sc->nr_reclaimed += reclaimed;
+#ifdef CONFIG_LRU_GEN
+	trace_mm_vmscan_lru_gen_evict(pgdat->node_id, reclaimed, &stat,
+				      sc->priority, type);
+#endif
 
 	list_for_each_entry_safe_reverse(folio, next, &list, lru) {
 		if (!folio_evictable(folio)) {