Message ID | 1551421452-5385-1-git-send-email-laoar.shao@gmail.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [v2] mm: vmscan: add tracepoints for node reclaim | expand |
On Fri, Mar 1, 2019 at 11:54 AM Yafang Shao <laoar.shao@gmail.com> wrote: > > In the page alloc fast path, it may do node reclaim, which may cause > latency spike. > We should add tracepoint for this event, and also measure the latency > it causes. > > So bellow two tracepoints are introduced, > mm_vmscan_node_reclaim_begin > mm_vmscan_node_reclaim_end > > Signed-off-by: Yafang Shao <laoar.shao@gmail.com> Acked-by: Souptick Joarder <jrdr.linux@gmail.com> (for the comment on v1). > --- > include/trace/events/vmscan.h | 32 ++++++++++++++++++++++++++++++++ > mm/vmscan.c | 6 ++++++ > 2 files changed, 38 insertions(+) > > diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h > index a1cb913..c1ddf28 100644 > --- a/include/trace/events/vmscan.h > +++ b/include/trace/events/vmscan.h > @@ -465,6 +465,38 @@ > __entry->ratio, > show_reclaim_flags(__entry->reclaim_flags)) > ); > + > +TRACE_EVENT(mm_vmscan_node_reclaim_begin, > + > + TP_PROTO(int nid, int order, gfp_t gfp_flags), > + > + TP_ARGS(nid, order, gfp_flags), > + > + TP_STRUCT__entry( > + __field(int, nid) > + __field(int, order) > + __field(gfp_t, gfp_flags) > + ), > + > + TP_fast_assign( > + __entry->nid = nid; > + __entry->order = order; > + __entry->gfp_flags = gfp_flags; > + ), > + > + TP_printk("nid=%d order=%d gfp_flags=%s", > + __entry->nid, > + __entry->order, > + show_gfp_flags(__entry->gfp_flags)) > +); > + > +DEFINE_EVENT(mm_vmscan_direct_reclaim_end_template, mm_vmscan_node_reclaim_end, > + > + TP_PROTO(unsigned long nr_reclaimed), > + > + TP_ARGS(nr_reclaimed) > +); > + > #endif /* _TRACE_VMSCAN_H */ > > /* This part must be outside protection */ > diff --git a/mm/vmscan.c b/mm/vmscan.c > index ac4806f..2bee5d1 100644 > --- a/mm/vmscan.c > +++ b/mm/vmscan.c > @@ -4241,6 +4241,9 @@ static int __node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned in > .reclaim_idx = gfp_zone(gfp_mask), > }; > > + trace_mm_vmscan_node_reclaim_begin(pgdat->node_id, order, > + sc.gfp_mask); > + > cond_resched(); > fs_reclaim_acquire(sc.gfp_mask); > /* > @@ -4267,6 +4270,9 @@ static int __node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned in > current->flags &= ~PF_SWAPWRITE; > memalloc_noreclaim_restore(noreclaim_flag); > fs_reclaim_release(sc.gfp_mask); > + > + trace_mm_vmscan_node_reclaim_end(sc.nr_reclaimed); > + > return sc.nr_reclaimed >= nr_pages; > } > > -- > 1.8.3.1 >
On Fri, Mar 1, 2019 at 2:24 PM Yafang Shao <laoar.shao@gmail.com> wrote: > > There are three tracepoints using this template, which are > mm_vmscan_direct_reclaim_begin, > mm_vmscan_memcg_reclaim_begin, > mm_vmscan_memcg_softlimit_reclaim_begin. > > Regarding mm_vmscan_direct_reclaim_begin, > sc.may_writepage is !laptop_mode, that's a static setting, and > reclaim_idx is derived from gfp_mask which is already show in this > tracepoint. > > Regarding mm_vmscan_memcg_reclaim_begin, > may_writepage is !laptop_mode too, and reclaim_idx is (MAX_NR_ZONES-1), > which are both static value. > > mm_vmscan_memcg_softlimit_reclaim_begin is the same with > mm_vmscan_memcg_reclaim_begin. > > So we can drop them all. > > Signed-off-by: Yafang Shao <laoar.shao@gmail.com> > --- > include/trace/events/vmscan.h | 26 ++++++++++---------------- > mm/vmscan.c | 14 +++----------- > 2 files changed, 13 insertions(+), 27 deletions(-) > > diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h > index a1cb913..153d90c 100644 > --- a/include/trace/events/vmscan.h > +++ b/include/trace/events/vmscan.h > @@ -105,51 +105,45 @@ > > DECLARE_EVENT_CLASS(mm_vmscan_direct_reclaim_begin_template, > > - TP_PROTO(int order, int may_writepage, gfp_t gfp_flags, int classzone_idx), > + TP_PROTO(int order, gfp_t gfp_flags), > > - TP_ARGS(order, may_writepage, gfp_flags, classzone_idx), > + TP_ARGS(order, gfp_flags), > > TP_STRUCT__entry( > __field( int, order ) > - __field( int, may_writepage ) > __field( gfp_t, gfp_flags ) > - __field( int, classzone_idx ) > ), > > TP_fast_assign( > __entry->order = order; > - __entry->may_writepage = may_writepage; > __entry->gfp_flags = gfp_flags; > - __entry->classzone_idx = classzone_idx; > ), > > - TP_printk("order=%d may_writepage=%d gfp_flags=%s classzone_idx=%d", > + TP_printk("order=%d gfp_flags=%s", > __entry->order, > - __entry->may_writepage, > - show_gfp_flags(__entry->gfp_flags), > - __entry->classzone_idx) > + show_gfp_flags(__entry->gfp_flags)) > ); > > DEFINE_EVENT(mm_vmscan_direct_reclaim_begin_template, mm_vmscan_direct_reclaim_begin, > > - TP_PROTO(int order, int may_writepage, gfp_t gfp_flags, int classzone_idx), > + TP_PROTO(int order, gfp_t gfp_flags), > > - TP_ARGS(order, may_writepage, gfp_flags, classzone_idx) > + TP_ARGS(order, gfp_flags) > ); > > #ifdef CONFIG_MEMCG > DEFINE_EVENT(mm_vmscan_direct_reclaim_begin_template, mm_vmscan_memcg_reclaim_begin, > > - TP_PROTO(int order, int may_writepage, gfp_t gfp_flags, int classzone_idx), > + TP_PROTO(int order, gfp_t gfp_flags), > > - TP_ARGS(order, may_writepage, gfp_flags, classzone_idx) > + TP_ARGS(order, gfp_flags) > ); > > DEFINE_EVENT(mm_vmscan_direct_reclaim_begin_template, mm_vmscan_memcg_softlimit_reclaim_begin, > > - TP_PROTO(int order, int may_writepage, gfp_t gfp_flags, int classzone_idx), > + TP_PROTO(int order, gfp_t gfp_flags), > > - TP_ARGS(order, may_writepage, gfp_flags, classzone_idx) > + TP_ARGS(order, gfp_flags) > ); > #endif /* CONFIG_MEMCG */ > > diff --git a/mm/vmscan.c b/mm/vmscan.c > index ac4806f..cdc0305 100644 > --- a/mm/vmscan.c > +++ b/mm/vmscan.c > @@ -3304,10 +3304,7 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order, > if (throttle_direct_reclaim(sc.gfp_mask, zonelist, nodemask)) > return 1; > > - trace_mm_vmscan_direct_reclaim_begin(order, > - sc.may_writepage, > - sc.gfp_mask, > - sc.reclaim_idx); > + trace_mm_vmscan_direct_reclaim_begin(order, sc.gfp_mask); > > nr_reclaimed = do_try_to_free_pages(zonelist, &sc); > > @@ -3338,9 +3335,7 @@ unsigned long mem_cgroup_shrink_node(struct mem_cgroup *memcg, > (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK); > > trace_mm_vmscan_memcg_softlimit_reclaim_begin(sc.order, > - sc.may_writepage, > - sc.gfp_mask, > - sc.reclaim_idx); > + sc.gfp_mask); > > /* > * NOTE: Although we can get the priority field, using it > @@ -3389,10 +3384,7 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg, > > zonelist = &NODE_DATA(nid)->node_zonelists[ZONELIST_FALLBACK]; > > - trace_mm_vmscan_memcg_reclaim_begin(0, > - sc.may_writepage, > - sc.gfp_mask, > - sc.reclaim_idx); > + trace_mm_vmscan_memcg_reclaim_begin(0, sc.gfp_mask); > > psi_memstall_enter(&pflags); > noreclaim_flag = memalloc_noreclaim_save(); > -- > 1.8.3.1 > Hi Vlastimil, Michal, Any comments on this patch ? Thanks Yafang
On Thu, Mar 14, 2019 at 6:19 PM Michal Hocko <mhocko@kernel.org> wrote: > > On Fri 01-03-19 14:24:12, Yafang Shao wrote: > > There are three tracepoints using this template, which are > > mm_vmscan_direct_reclaim_begin, > > mm_vmscan_memcg_reclaim_begin, > > mm_vmscan_memcg_softlimit_reclaim_begin. > > > > Regarding mm_vmscan_direct_reclaim_begin, > > sc.may_writepage is !laptop_mode, that's a static setting, and > > reclaim_idx is derived from gfp_mask which is already show in this > > tracepoint. > > > > Regarding mm_vmscan_memcg_reclaim_begin, > > may_writepage is !laptop_mode too, and reclaim_idx is (MAX_NR_ZONES-1), > > which are both static value. > > > > mm_vmscan_memcg_softlimit_reclaim_begin is the same with > > mm_vmscan_memcg_reclaim_begin. > > > > So we can drop them all. > > I agree. Although classzone_idx is PITA to calculate nothing really > prevents us to have a tool to do that. may_writepage is not all that > useful anymore. > > > Signed-off-by: Yafang Shao <laoar.shao@gmail.com> > > From a quick glance this looks ok. I haven't really checked deeply or > tried to compile it but the change makes sense. > Thanks for your quick response! This patch works fine, I have verified it. > Acked-by: Michal Hocko <mhocko@suse.com> > > --- > > include/trace/events/vmscan.h | 26 ++++++++++---------------- > > mm/vmscan.c | 14 +++----------- > > 2 files changed, 13 insertions(+), 27 deletions(-) > > > > diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h > > index a1cb913..153d90c 100644 > > --- a/include/trace/events/vmscan.h > > +++ b/include/trace/events/vmscan.h > > @@ -105,51 +105,45 @@ > > > > DECLARE_EVENT_CLASS(mm_vmscan_direct_reclaim_begin_template, > > > > - TP_PROTO(int order, int may_writepage, gfp_t gfp_flags, int classzone_idx), > > + TP_PROTO(int order, gfp_t gfp_flags), > > > > - TP_ARGS(order, may_writepage, gfp_flags, classzone_idx), > > + TP_ARGS(order, gfp_flags), > > > > TP_STRUCT__entry( > > __field( int, order ) > > - __field( int, may_writepage ) > > __field( gfp_t, gfp_flags ) > > - __field( int, classzone_idx ) > > ), > > > > TP_fast_assign( > > __entry->order = order; > > - __entry->may_writepage = may_writepage; > > __entry->gfp_flags = gfp_flags; > > - __entry->classzone_idx = classzone_idx; > > ), > > > > - TP_printk("order=%d may_writepage=%d gfp_flags=%s classzone_idx=%d", > > + TP_printk("order=%d gfp_flags=%s", > > __entry->order, > > - __entry->may_writepage, > > - show_gfp_flags(__entry->gfp_flags), > > - __entry->classzone_idx) > > + show_gfp_flags(__entry->gfp_flags)) > > ); > > > > DEFINE_EVENT(mm_vmscan_direct_reclaim_begin_template, mm_vmscan_direct_reclaim_begin, > > > > - TP_PROTO(int order, int may_writepage, gfp_t gfp_flags, int classzone_idx), > > + TP_PROTO(int order, gfp_t gfp_flags), > > > > - TP_ARGS(order, may_writepage, gfp_flags, classzone_idx) > > + TP_ARGS(order, gfp_flags) > > ); > > > > #ifdef CONFIG_MEMCG > > DEFINE_EVENT(mm_vmscan_direct_reclaim_begin_template, mm_vmscan_memcg_reclaim_begin, > > > > - TP_PROTO(int order, int may_writepage, gfp_t gfp_flags, int classzone_idx), > > + TP_PROTO(int order, gfp_t gfp_flags), > > > > - TP_ARGS(order, may_writepage, gfp_flags, classzone_idx) > > + TP_ARGS(order, gfp_flags) > > ); > > > > DEFINE_EVENT(mm_vmscan_direct_reclaim_begin_template, mm_vmscan_memcg_softlimit_reclaim_begin, > > > > - TP_PROTO(int order, int may_writepage, gfp_t gfp_flags, int classzone_idx), > > + TP_PROTO(int order, gfp_t gfp_flags), > > > > - TP_ARGS(order, may_writepage, gfp_flags, classzone_idx) > > + TP_ARGS(order, gfp_flags) > > ); > > #endif /* CONFIG_MEMCG */ > > > > diff --git a/mm/vmscan.c b/mm/vmscan.c > > index ac4806f..cdc0305 100644 > > --- a/mm/vmscan.c > > +++ b/mm/vmscan.c > > @@ -3304,10 +3304,7 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order, > > if (throttle_direct_reclaim(sc.gfp_mask, zonelist, nodemask)) > > return 1; > > > > - trace_mm_vmscan_direct_reclaim_begin(order, > > - sc.may_writepage, > > - sc.gfp_mask, > > - sc.reclaim_idx); > > + trace_mm_vmscan_direct_reclaim_begin(order, sc.gfp_mask); > > > > nr_reclaimed = do_try_to_free_pages(zonelist, &sc); > > > > @@ -3338,9 +3335,7 @@ unsigned long mem_cgroup_shrink_node(struct mem_cgroup *memcg, > > (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK); > > > > trace_mm_vmscan_memcg_softlimit_reclaim_begin(sc.order, > > - sc.may_writepage, > > - sc.gfp_mask, > > - sc.reclaim_idx); > > + sc.gfp_mask); > > > > /* > > * NOTE: Although we can get the priority field, using it > > @@ -3389,10 +3384,7 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg, > > > > zonelist = &NODE_DATA(nid)->node_zonelists[ZONELIST_FALLBACK]; > > > > - trace_mm_vmscan_memcg_reclaim_begin(0, > > - sc.may_writepage, > > - sc.gfp_mask, > > - sc.reclaim_idx); > > + trace_mm_vmscan_memcg_reclaim_begin(0, sc.gfp_mask); > > > > psi_memstall_enter(&pflags); > > noreclaim_flag = memalloc_noreclaim_save(); > > -- > > 1.8.3.1 > > > > -- > Michal Hocko > SUSE Labs
diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h index a1cb913..c1ddf28 100644 --- a/include/trace/events/vmscan.h +++ b/include/trace/events/vmscan.h @@ -465,6 +465,38 @@ __entry->ratio, show_reclaim_flags(__entry->reclaim_flags)) ); + +TRACE_EVENT(mm_vmscan_node_reclaim_begin, + + TP_PROTO(int nid, int order, gfp_t gfp_flags), + + TP_ARGS(nid, order, gfp_flags), + + TP_STRUCT__entry( + __field(int, nid) + __field(int, order) + __field(gfp_t, gfp_flags) + ), + + TP_fast_assign( + __entry->nid = nid; + __entry->order = order; + __entry->gfp_flags = gfp_flags; + ), + + TP_printk("nid=%d order=%d gfp_flags=%s", + __entry->nid, + __entry->order, + show_gfp_flags(__entry->gfp_flags)) +); + +DEFINE_EVENT(mm_vmscan_direct_reclaim_end_template, mm_vmscan_node_reclaim_end, + + TP_PROTO(unsigned long nr_reclaimed), + + TP_ARGS(nr_reclaimed) +); + #endif /* _TRACE_VMSCAN_H */ /* This part must be outside protection */ diff --git a/mm/vmscan.c b/mm/vmscan.c index ac4806f..2bee5d1 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -4241,6 +4241,9 @@ static int __node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned in .reclaim_idx = gfp_zone(gfp_mask), }; + trace_mm_vmscan_node_reclaim_begin(pgdat->node_id, order, + sc.gfp_mask); + cond_resched(); fs_reclaim_acquire(sc.gfp_mask); /* @@ -4267,6 +4270,9 @@ static int __node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned in current->flags &= ~PF_SWAPWRITE; memalloc_noreclaim_restore(noreclaim_flag); fs_reclaim_release(sc.gfp_mask); + + trace_mm_vmscan_node_reclaim_end(sc.nr_reclaimed); + return sc.nr_reclaimed >= nr_pages; }
In the page alloc fast path, it may do node reclaim, which may cause latency spike. We should add tracepoint for this event, and also measure the latency it causes. So bellow two tracepoints are introduced, mm_vmscan_node_reclaim_begin mm_vmscan_node_reclaim_end Signed-off-by: Yafang Shao <laoar.shao@gmail.com> --- include/trace/events/vmscan.h | 32 ++++++++++++++++++++++++++++++++ mm/vmscan.c | 6 ++++++ 2 files changed, 38 insertions(+)