diff mbox series

[v2] mm: vmscan: add tracepoints for node reclaim

Message ID 1551421452-5385-1-git-send-email-laoar.shao@gmail.com (mailing list archive)
State New, archived
Headers show
Series [v2] mm: vmscan: add tracepoints for node reclaim | expand

Commit Message

Yafang Shao March 1, 2019, 6:24 a.m. UTC
In the page alloc fast path, it may do node reclaim, which may cause
latency spike.
We should add tracepoint for this event, and also measure the latency
it causes.

So bellow two tracepoints are introduced,
	mm_vmscan_node_reclaim_begin
	mm_vmscan_node_reclaim_end

Signed-off-by: Yafang Shao <laoar.shao@gmail.com>
---
 include/trace/events/vmscan.h | 32 ++++++++++++++++++++++++++++++++
 mm/vmscan.c                   |  6 ++++++
 2 files changed, 38 insertions(+)

Comments

Souptick Joarder March 1, 2019, 8:38 a.m. UTC | #1
On Fri, Mar 1, 2019 at 11:54 AM Yafang Shao <laoar.shao@gmail.com> wrote:
>
> In the page alloc fast path, it may do node reclaim, which may cause
> latency spike.
> We should add tracepoint for this event, and also measure the latency
> it causes.
>
> So bellow two tracepoints are introduced,
>         mm_vmscan_node_reclaim_begin
>         mm_vmscan_node_reclaim_end
>
> Signed-off-by: Yafang Shao <laoar.shao@gmail.com>

Acked-by: Souptick Joarder <jrdr.linux@gmail.com>
(for the comment on v1).

> ---
>  include/trace/events/vmscan.h | 32 ++++++++++++++++++++++++++++++++
>  mm/vmscan.c                   |  6 ++++++
>  2 files changed, 38 insertions(+)
>
> diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h
> index a1cb913..c1ddf28 100644
> --- a/include/trace/events/vmscan.h
> +++ b/include/trace/events/vmscan.h
> @@ -465,6 +465,38 @@
>                 __entry->ratio,
>                 show_reclaim_flags(__entry->reclaim_flags))
>  );
> +
> +TRACE_EVENT(mm_vmscan_node_reclaim_begin,
> +
> +       TP_PROTO(int nid, int order, gfp_t gfp_flags),
> +
> +       TP_ARGS(nid, order, gfp_flags),
> +
> +       TP_STRUCT__entry(
> +               __field(int, nid)
> +               __field(int, order)
> +               __field(gfp_t, gfp_flags)
> +       ),
> +
> +       TP_fast_assign(
> +               __entry->nid = nid;
> +               __entry->order = order;
> +               __entry->gfp_flags = gfp_flags;
> +       ),
> +
> +       TP_printk("nid=%d order=%d gfp_flags=%s",
> +               __entry->nid,
> +               __entry->order,
> +               show_gfp_flags(__entry->gfp_flags))
> +);
> +
> +DEFINE_EVENT(mm_vmscan_direct_reclaim_end_template, mm_vmscan_node_reclaim_end,
> +
> +       TP_PROTO(unsigned long nr_reclaimed),
> +
> +       TP_ARGS(nr_reclaimed)
> +);
> +
>  #endif /* _TRACE_VMSCAN_H */
>
>  /* This part must be outside protection */
> diff --git a/mm/vmscan.c b/mm/vmscan.c
> index ac4806f..2bee5d1 100644
> --- a/mm/vmscan.c
> +++ b/mm/vmscan.c
> @@ -4241,6 +4241,9 @@ static int __node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned in
>                 .reclaim_idx = gfp_zone(gfp_mask),
>         };
>
> +       trace_mm_vmscan_node_reclaim_begin(pgdat->node_id, order,
> +                                          sc.gfp_mask);
> +
>         cond_resched();
>         fs_reclaim_acquire(sc.gfp_mask);
>         /*
> @@ -4267,6 +4270,9 @@ static int __node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned in
>         current->flags &= ~PF_SWAPWRITE;
>         memalloc_noreclaim_restore(noreclaim_flag);
>         fs_reclaim_release(sc.gfp_mask);
> +
> +       trace_mm_vmscan_node_reclaim_end(sc.nr_reclaimed);
> +
>         return sc.nr_reclaimed >= nr_pages;
>  }
>
> --
> 1.8.3.1
>
Yafang Shao March 14, 2019, 9:43 a.m. UTC | #2
On Fri, Mar 1, 2019 at 2:24 PM Yafang Shao <laoar.shao@gmail.com> wrote:
>
> There are three tracepoints using this template, which are
> mm_vmscan_direct_reclaim_begin,
> mm_vmscan_memcg_reclaim_begin,
> mm_vmscan_memcg_softlimit_reclaim_begin.
>
> Regarding mm_vmscan_direct_reclaim_begin,
> sc.may_writepage is !laptop_mode, that's a static setting, and
> reclaim_idx is derived from gfp_mask which is already show in this
> tracepoint.
>
> Regarding mm_vmscan_memcg_reclaim_begin,
> may_writepage is !laptop_mode too, and reclaim_idx is (MAX_NR_ZONES-1),
> which are both static value.
>
> mm_vmscan_memcg_softlimit_reclaim_begin is the same with
> mm_vmscan_memcg_reclaim_begin.
>
> So we can drop them all.
>
> Signed-off-by: Yafang Shao <laoar.shao@gmail.com>
> ---
>  include/trace/events/vmscan.h | 26 ++++++++++----------------
>  mm/vmscan.c                   | 14 +++-----------
>  2 files changed, 13 insertions(+), 27 deletions(-)
>
> diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h
> index a1cb913..153d90c 100644
> --- a/include/trace/events/vmscan.h
> +++ b/include/trace/events/vmscan.h
> @@ -105,51 +105,45 @@
>
>  DECLARE_EVENT_CLASS(mm_vmscan_direct_reclaim_begin_template,
>
> -       TP_PROTO(int order, int may_writepage, gfp_t gfp_flags, int classzone_idx),
> +       TP_PROTO(int order, gfp_t gfp_flags),
>
> -       TP_ARGS(order, may_writepage, gfp_flags, classzone_idx),
> +       TP_ARGS(order, gfp_flags),
>
>         TP_STRUCT__entry(
>                 __field(        int,    order           )
> -               __field(        int,    may_writepage   )
>                 __field(        gfp_t,  gfp_flags       )
> -               __field(        int,    classzone_idx   )
>         ),
>
>         TP_fast_assign(
>                 __entry->order          = order;
> -               __entry->may_writepage  = may_writepage;
>                 __entry->gfp_flags      = gfp_flags;
> -               __entry->classzone_idx  = classzone_idx;
>         ),
>
> -       TP_printk("order=%d may_writepage=%d gfp_flags=%s classzone_idx=%d",
> +       TP_printk("order=%d gfp_flags=%s",
>                 __entry->order,
> -               __entry->may_writepage,
> -               show_gfp_flags(__entry->gfp_flags),
> -               __entry->classzone_idx)
> +               show_gfp_flags(__entry->gfp_flags))
>  );
>
>  DEFINE_EVENT(mm_vmscan_direct_reclaim_begin_template, mm_vmscan_direct_reclaim_begin,
>
> -       TP_PROTO(int order, int may_writepage, gfp_t gfp_flags, int classzone_idx),
> +       TP_PROTO(int order, gfp_t gfp_flags),
>
> -       TP_ARGS(order, may_writepage, gfp_flags, classzone_idx)
> +       TP_ARGS(order, gfp_flags)
>  );
>
>  #ifdef CONFIG_MEMCG
>  DEFINE_EVENT(mm_vmscan_direct_reclaim_begin_template, mm_vmscan_memcg_reclaim_begin,
>
> -       TP_PROTO(int order, int may_writepage, gfp_t gfp_flags, int classzone_idx),
> +       TP_PROTO(int order, gfp_t gfp_flags),
>
> -       TP_ARGS(order, may_writepage, gfp_flags, classzone_idx)
> +       TP_ARGS(order, gfp_flags)
>  );
>
>  DEFINE_EVENT(mm_vmscan_direct_reclaim_begin_template, mm_vmscan_memcg_softlimit_reclaim_begin,
>
> -       TP_PROTO(int order, int may_writepage, gfp_t gfp_flags, int classzone_idx),
> +       TP_PROTO(int order, gfp_t gfp_flags),
>
> -       TP_ARGS(order, may_writepage, gfp_flags, classzone_idx)
> +       TP_ARGS(order, gfp_flags)
>  );
>  #endif /* CONFIG_MEMCG */
>
> diff --git a/mm/vmscan.c b/mm/vmscan.c
> index ac4806f..cdc0305 100644
> --- a/mm/vmscan.c
> +++ b/mm/vmscan.c
> @@ -3304,10 +3304,7 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
>         if (throttle_direct_reclaim(sc.gfp_mask, zonelist, nodemask))
>                 return 1;
>
> -       trace_mm_vmscan_direct_reclaim_begin(order,
> -                               sc.may_writepage,
> -                               sc.gfp_mask,
> -                               sc.reclaim_idx);
> +       trace_mm_vmscan_direct_reclaim_begin(order, sc.gfp_mask);
>
>         nr_reclaimed = do_try_to_free_pages(zonelist, &sc);
>
> @@ -3338,9 +3335,7 @@ unsigned long mem_cgroup_shrink_node(struct mem_cgroup *memcg,
>                         (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK);
>
>         trace_mm_vmscan_memcg_softlimit_reclaim_begin(sc.order,
> -                                                     sc.may_writepage,
> -                                                     sc.gfp_mask,
> -                                                     sc.reclaim_idx);
> +                                                     sc.gfp_mask);
>
>         /*
>          * NOTE: Although we can get the priority field, using it
> @@ -3389,10 +3384,7 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
>
>         zonelist = &NODE_DATA(nid)->node_zonelists[ZONELIST_FALLBACK];
>
> -       trace_mm_vmscan_memcg_reclaim_begin(0,
> -                                           sc.may_writepage,
> -                                           sc.gfp_mask,
> -                                           sc.reclaim_idx);
> +       trace_mm_vmscan_memcg_reclaim_begin(0, sc.gfp_mask);
>
>         psi_memstall_enter(&pflags);
>         noreclaim_flag = memalloc_noreclaim_save();
> --
> 1.8.3.1
>

Hi Vlastimil, Michal,

Any comments on this patch ?

Thanks
Yafang
Yafang Shao March 14, 2019, 10:43 a.m. UTC | #3
On Thu, Mar 14, 2019 at 6:19 PM Michal Hocko <mhocko@kernel.org> wrote:
>
> On Fri 01-03-19 14:24:12, Yafang Shao wrote:
> > There are three tracepoints using this template, which are
> > mm_vmscan_direct_reclaim_begin,
> > mm_vmscan_memcg_reclaim_begin,
> > mm_vmscan_memcg_softlimit_reclaim_begin.
> >
> > Regarding mm_vmscan_direct_reclaim_begin,
> > sc.may_writepage is !laptop_mode, that's a static setting, and
> > reclaim_idx is derived from gfp_mask which is already show in this
> > tracepoint.
> >
> > Regarding mm_vmscan_memcg_reclaim_begin,
> > may_writepage is !laptop_mode too, and reclaim_idx is (MAX_NR_ZONES-1),
> > which are both static value.
> >
> > mm_vmscan_memcg_softlimit_reclaim_begin is the same with
> > mm_vmscan_memcg_reclaim_begin.
> >
> > So we can drop them all.
>
> I agree. Although classzone_idx is PITA to calculate nothing really
> prevents us to have a tool to do that. may_writepage is not all that
> useful anymore.
>
> > Signed-off-by: Yafang Shao <laoar.shao@gmail.com>
>
> From a quick glance this looks ok. I haven't really checked deeply or
> tried to compile it but the change makes sense.
>

Thanks for your quick response!
This patch works fine, I have verified it.

> Acked-by: Michal Hocko <mhocko@suse.com>
> > ---
> >  include/trace/events/vmscan.h | 26 ++++++++++----------------
> >  mm/vmscan.c                   | 14 +++-----------
> >  2 files changed, 13 insertions(+), 27 deletions(-)
> >
> > diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h
> > index a1cb913..153d90c 100644
> > --- a/include/trace/events/vmscan.h
> > +++ b/include/trace/events/vmscan.h
> > @@ -105,51 +105,45 @@
> >
> >  DECLARE_EVENT_CLASS(mm_vmscan_direct_reclaim_begin_template,
> >
> > -     TP_PROTO(int order, int may_writepage, gfp_t gfp_flags, int classzone_idx),
> > +     TP_PROTO(int order, gfp_t gfp_flags),
> >
> > -     TP_ARGS(order, may_writepage, gfp_flags, classzone_idx),
> > +     TP_ARGS(order, gfp_flags),
> >
> >       TP_STRUCT__entry(
> >               __field(        int,    order           )
> > -             __field(        int,    may_writepage   )
> >               __field(        gfp_t,  gfp_flags       )
> > -             __field(        int,    classzone_idx   )
> >       ),
> >
> >       TP_fast_assign(
> >               __entry->order          = order;
> > -             __entry->may_writepage  = may_writepage;
> >               __entry->gfp_flags      = gfp_flags;
> > -             __entry->classzone_idx  = classzone_idx;
> >       ),
> >
> > -     TP_printk("order=%d may_writepage=%d gfp_flags=%s classzone_idx=%d",
> > +     TP_printk("order=%d gfp_flags=%s",
> >               __entry->order,
> > -             __entry->may_writepage,
> > -             show_gfp_flags(__entry->gfp_flags),
> > -             __entry->classzone_idx)
> > +             show_gfp_flags(__entry->gfp_flags))
> >  );
> >
> >  DEFINE_EVENT(mm_vmscan_direct_reclaim_begin_template, mm_vmscan_direct_reclaim_begin,
> >
> > -     TP_PROTO(int order, int may_writepage, gfp_t gfp_flags, int classzone_idx),
> > +     TP_PROTO(int order, gfp_t gfp_flags),
> >
> > -     TP_ARGS(order, may_writepage, gfp_flags, classzone_idx)
> > +     TP_ARGS(order, gfp_flags)
> >  );
> >
> >  #ifdef CONFIG_MEMCG
> >  DEFINE_EVENT(mm_vmscan_direct_reclaim_begin_template, mm_vmscan_memcg_reclaim_begin,
> >
> > -     TP_PROTO(int order, int may_writepage, gfp_t gfp_flags, int classzone_idx),
> > +     TP_PROTO(int order, gfp_t gfp_flags),
> >
> > -     TP_ARGS(order, may_writepage, gfp_flags, classzone_idx)
> > +     TP_ARGS(order, gfp_flags)
> >  );
> >
> >  DEFINE_EVENT(mm_vmscan_direct_reclaim_begin_template, mm_vmscan_memcg_softlimit_reclaim_begin,
> >
> > -     TP_PROTO(int order, int may_writepage, gfp_t gfp_flags, int classzone_idx),
> > +     TP_PROTO(int order, gfp_t gfp_flags),
> >
> > -     TP_ARGS(order, may_writepage, gfp_flags, classzone_idx)
> > +     TP_ARGS(order, gfp_flags)
> >  );
> >  #endif /* CONFIG_MEMCG */
> >
> > diff --git a/mm/vmscan.c b/mm/vmscan.c
> > index ac4806f..cdc0305 100644
> > --- a/mm/vmscan.c
> > +++ b/mm/vmscan.c
> > @@ -3304,10 +3304,7 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
> >       if (throttle_direct_reclaim(sc.gfp_mask, zonelist, nodemask))
> >               return 1;
> >
> > -     trace_mm_vmscan_direct_reclaim_begin(order,
> > -                             sc.may_writepage,
> > -                             sc.gfp_mask,
> > -                             sc.reclaim_idx);
> > +     trace_mm_vmscan_direct_reclaim_begin(order, sc.gfp_mask);
> >
> >       nr_reclaimed = do_try_to_free_pages(zonelist, &sc);
> >
> > @@ -3338,9 +3335,7 @@ unsigned long mem_cgroup_shrink_node(struct mem_cgroup *memcg,
> >                       (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK);
> >
> >       trace_mm_vmscan_memcg_softlimit_reclaim_begin(sc.order,
> > -                                                   sc.may_writepage,
> > -                                                   sc.gfp_mask,
> > -                                                   sc.reclaim_idx);
> > +                                                   sc.gfp_mask);
> >
> >       /*
> >        * NOTE: Although we can get the priority field, using it
> > @@ -3389,10 +3384,7 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
> >
> >       zonelist = &NODE_DATA(nid)->node_zonelists[ZONELIST_FALLBACK];
> >
> > -     trace_mm_vmscan_memcg_reclaim_begin(0,
> > -                                         sc.may_writepage,
> > -                                         sc.gfp_mask,
> > -                                         sc.reclaim_idx);
> > +     trace_mm_vmscan_memcg_reclaim_begin(0, sc.gfp_mask);
> >
> >       psi_memstall_enter(&pflags);
> >       noreclaim_flag = memalloc_noreclaim_save();
> > --
> > 1.8.3.1
> >
>
> --
> Michal Hocko
> SUSE Labs
diff mbox series

Patch

diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h
index a1cb913..c1ddf28 100644
--- a/include/trace/events/vmscan.h
+++ b/include/trace/events/vmscan.h
@@ -465,6 +465,38 @@ 
 		__entry->ratio,
 		show_reclaim_flags(__entry->reclaim_flags))
 );
+
+TRACE_EVENT(mm_vmscan_node_reclaim_begin,
+
+	TP_PROTO(int nid, int order, gfp_t gfp_flags),
+
+	TP_ARGS(nid, order, gfp_flags),
+
+	TP_STRUCT__entry(
+		__field(int, nid)
+		__field(int, order)
+		__field(gfp_t, gfp_flags)
+	),
+
+	TP_fast_assign(
+		__entry->nid = nid;
+		__entry->order = order;
+		__entry->gfp_flags = gfp_flags;
+	),
+
+	TP_printk("nid=%d order=%d gfp_flags=%s",
+		__entry->nid,
+		__entry->order,
+		show_gfp_flags(__entry->gfp_flags))
+);
+
+DEFINE_EVENT(mm_vmscan_direct_reclaim_end_template, mm_vmscan_node_reclaim_end,
+
+	TP_PROTO(unsigned long nr_reclaimed),
+
+	TP_ARGS(nr_reclaimed)
+);
+
 #endif /* _TRACE_VMSCAN_H */
 
 /* This part must be outside protection */
diff --git a/mm/vmscan.c b/mm/vmscan.c
index ac4806f..2bee5d1 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -4241,6 +4241,9 @@  static int __node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned in
 		.reclaim_idx = gfp_zone(gfp_mask),
 	};
 
+	trace_mm_vmscan_node_reclaim_begin(pgdat->node_id, order,
+					   sc.gfp_mask);
+
 	cond_resched();
 	fs_reclaim_acquire(sc.gfp_mask);
 	/*
@@ -4267,6 +4270,9 @@  static int __node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned in
 	current->flags &= ~PF_SWAPWRITE;
 	memalloc_noreclaim_restore(noreclaim_flag);
 	fs_reclaim_release(sc.gfp_mask);
+
+	trace_mm_vmscan_node_reclaim_end(sc.nr_reclaimed);
+
 	return sc.nr_reclaimed >= nr_pages;
 }