Message ID | 20210729092853.38242-1-ligang.bdlg@bytedance.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | mmap: mmap_lock: some improvments | expand |
Not a strong objection, but I think this can be achieved already using either: - The "stacktrace" feature which histogram triggers support (https://www.kernel.org/doc/html/latest/trace/histogram.html) - bpftrace's kstack/ustack feature (https://github.com/iovisor/bpftrace/blob/master/docs/tutorial_one_liners.md#lesson-9-profile-on-cpu-kernel-stacks) I haven't tried it out myself, but I suspect you could construct a synthetic event (https://www.kernel.org/doc/html/latest/trace/histogram.html#synthetic-events) which adds in the stack trace, then it ought to function a lot like it would with this patch. Then again, it's not like this change is huge by any means. So, if you find this more convenient than those alternatives, you can take: Reviewed-by: Axel Rasmussen <axelrasmussen@google.com> It's possible Steven or Tom have a more strong opinion on this though. ;) On Thu, Jul 29, 2021 at 2:29 AM Gang Li <ligang.bdlg@bytedance.com> wrote: > > The mmap_lock is acquired on most (all?) mmap / munmap / page fault > operations, so a multi-threaded process which does a lot of these > can experience significant contention. Sometimes we want to know > where the lock is hold. And it's hard to locate without collecting ip. > > Here's an example: TP_printk("ip=%pS",ip) > Log looks like this: "ip=do_user_addr_fault+0x274/0x640" > > We can find out who cause the contention amd make some improvements > for it. > > Signed-off-by: Gang Li <ligang.bdlg@bytedance.com> > --- > include/trace/events/mmap_lock.h | 27 +++++++++++++++++---------- > mm/mmap_lock.c | 6 +++--- > 2 files changed, 20 insertions(+), 13 deletions(-) > > diff --git a/include/trace/events/mmap_lock.h b/include/trace/events/mmap_lock.h > index b9dd66f9c226..8913a9f85894 100644 > --- a/include/trace/events/mmap_lock.h > +++ b/include/trace/events/mmap_lock.h > @@ -15,35 +15,39 @@ extern void trace_mmap_lock_unreg(void); > > DECLARE_EVENT_CLASS(mmap_lock, > > - TP_PROTO(struct mm_struct *mm, const char *memcg_path, bool write), > + TP_PROTO(struct mm_struct *mm, const char *memcg_path, bool write, > + unsigned long ip), > > - TP_ARGS(mm, memcg_path, write), > + TP_ARGS(mm, memcg_path, write, ip), > > TP_STRUCT__entry( > __field(struct mm_struct *, mm) > __string(memcg_path, memcg_path) > __field(bool, write) > + __field(void *, ip) > ), > > TP_fast_assign( > __entry->mm = mm; > __assign_str(memcg_path, memcg_path); > __entry->write = write; > + __entry->ip = (void *)ip; > ), > > TP_printk( > - "mm=%p memcg_path=%s write=%s", > + "mm=%p memcg_path=%s write=%s ip=%pS", > __entry->mm, > __get_str(memcg_path), > - __entry->write ? "true" : "false" > - ) > + __entry->write ? "true" : "false", > + __entry->ip > + ) > ); > > #define DEFINE_MMAP_LOCK_EVENT(name) \ > DEFINE_EVENT_FN(mmap_lock, name, \ > TP_PROTO(struct mm_struct *mm, const char *memcg_path, \ > - bool write), \ > - TP_ARGS(mm, memcg_path, write), \ > + bool write, unsigned long ip), \ > + TP_ARGS(mm, memcg_path, write, ip), \ > trace_mmap_lock_reg, trace_mmap_lock_unreg) > > DEFINE_MMAP_LOCK_EVENT(mmap_lock_start_locking); > @@ -52,14 +56,15 @@ DEFINE_MMAP_LOCK_EVENT(mmap_lock_released); > TRACE_EVENT_FN(mmap_lock_acquire_returned, > > TP_PROTO(struct mm_struct *mm, const char *memcg_path, bool write, > - bool success), > + unsigned long ip, bool success), > > - TP_ARGS(mm, memcg_path, write, success), > + TP_ARGS(mm, memcg_path, write, ip, success), > > TP_STRUCT__entry( > __field(struct mm_struct *, mm) > __string(memcg_path, memcg_path) > __field(bool, write) > + __field(void *, ip) > __field(bool, success) > ), > > @@ -67,14 +72,16 @@ TRACE_EVENT_FN(mmap_lock_acquire_returned, > __entry->mm = mm; > __assign_str(memcg_path, memcg_path); > __entry->write = write; > + __entry->ip = (void *)ip; > __entry->success = success; > ), > > TP_printk( > - "mm=%p memcg_path=%s write=%s success=%s", > + "mm=%p memcg_path=%s write=%s ip=%pS success=%s", > __entry->mm, > __get_str(memcg_path), > __entry->write ? "true" : "false", > + __entry->ip, > __entry->success ? "true" : "false" > ), > > diff --git a/mm/mmap_lock.c b/mm/mmap_lock.c > index 1854850b4b89..f1100eae6f2f 100644 > --- a/mm/mmap_lock.c > +++ b/mm/mmap_lock.c > @@ -227,20 +227,20 @@ static const char *get_mm_memcg_path(struct mm_struct *mm) > > void __mmap_lock_do_trace_start_locking(struct mm_struct *mm, bool write) > { > - TRACE_MMAP_LOCK_EVENT(start_locking, mm, write); > + TRACE_MMAP_LOCK_EVENT(start_locking, mm, write, _RET_IP_); > } > EXPORT_SYMBOL(__mmap_lock_do_trace_start_locking); > > void __mmap_lock_do_trace_acquire_returned(struct mm_struct *mm, bool write, > bool success) > { > - TRACE_MMAP_LOCK_EVENT(acquire_returned, mm, write, success); > + TRACE_MMAP_LOCK_EVENT(acquire_returned, mm, write, _RET_IP_, success); > } > EXPORT_SYMBOL(__mmap_lock_do_trace_acquire_returned); > > void __mmap_lock_do_trace_released(struct mm_struct *mm, bool write) > { > - TRACE_MMAP_LOCK_EVENT(released, mm, write); > + TRACE_MMAP_LOCK_EVENT(released, mm, write, _RET_IP_); > } > EXPORT_SYMBOL(__mmap_lock_do_trace_released); > #endif /* CONFIG_TRACING */ > -- > 2.20.1 >
Thanks! I have tried your suggestion. They are great, especially synthetic-events. If don't print ip per event, we can only guess which one cause the contention by "hitcount". > (https://www.kernel.org/doc/html/latest/trace/histogram.html#synthetic-events) But it seems that they only support histogram, can I print the synthetic-events args per event in /sys/kernel/debug/tracing/trace like other events? I haven't found that in kernel doc. On 7/30/21 1:33 AM, Axel Rasmussen wrote: > Not a strong objection, but I think this can be achieved already using either: > > - The "stacktrace" feature which histogram triggers support > (https://www.kernel.org/doc/html/latest/trace/histogram.html) > - bpftrace's kstack/ustack feature > (https://github.com/iovisor/bpftrace/blob/master/docs/tutorial_one_liners.md#lesson-9-profile-on-cpu-kernel-stacks) > > I haven't tried it out myself, but I suspect you could construct a > synthetic event > (https://www.kernel.org/doc/html/latest/trace/histogram.html#synthetic-events) > which adds in the stack trace, then it ought to function a lot like it > would with this patch. > > Then again, it's not like this change is huge by any means. So, if you > find this more convenient than those alternatives, you can take: > > Reviewed-by: Axel Rasmussen <axelrasmussen@google.com> > > It's possible Steven or Tom have a more strong opinion on this though. ;) > > On Thu, Jul 29, 2021 at 2:29 AM Gang Li <ligang.bdlg@bytedance.com> wrote: >> >> The mmap_lock is acquired on most (all?) mmap / munmap / page fault >> operations, so a multi-threaded process which does a lot of these >> can experience significant contention. Sometimes we want to know >> where the lock is hold. And it's hard to locate without collecting ip. >> >> Here's an example: TP_printk("ip=%pS",ip) >> Log looks like this: "ip=do_user_addr_fault+0x274/0x640" >> >> We can find out who cause the contention amd make some improvements >> for it. >> >> Signed-off-by: Gang Li <ligang.bdlg@bytedance.com>
On 7/29/21 7:33 PM, Axel Rasmussen wrote: > Not a strong objection, but I think this can be achieved already using either: > > - The "stacktrace" feature which histogram triggers support > (https://www.kernel.org/doc/html/latest/trace/histogram.html) > - bpftrace's kstack/ustack feature > (https://github.com/iovisor/bpftrace/blob/master/docs/tutorial_one_liners.md#lesson-9-profile-on-cpu-kernel-stacks) > > I haven't tried it out myself, but I suspect you could construct a > synthetic event > (https://www.kernel.org/doc/html/latest/trace/histogram.html#synthetic-events) > which adds in the stack trace, then it ought to function a lot like it > would with this patch. > > Then again, it's not like this change is huge by any means. So, if you > find this more convenient than those alternatives, you can take: > > Reviewed-by: Axel Rasmussen <axelrasmussen@google.com> > > It's possible Steven or Tom have a more strong opinion on this though. ;) I generally dislike tracepoints with an ip. Often you then find out it's not enough to distinguish what you need (due to some commonly shared wrapper doing the call) and you need more of the backtrace anyway.
On Fri, 30 Jul 2021 13:32:12 +0800 Gang Li <ligang.bdlg@bytedance.com> wrote: > Thanks! I have tried your suggestion. They are great, especially > synthetic-events. > > If don't print ip per event, we can only guess which one cause the > contention by "hitcount". > > > > (https://www.kernel.org/doc/html/latest/trace/histogram.html#synthetic-events) > > But it seems that they only support histogram, can I print the > synthetic-events args per event in /sys/kernel/debug/tracing/trace > like other events? I haven't found that in kernel doc. Yes, synthetic events are just like normal events, and have triggers, stack traces, and do pretty much anything that another event can do. I'm just finishing up a libtracfs called tracefs_sql() (hopefully posting it today), that allows you to create a synthetic event via an SQL statement. But I don't think this is what you are looking for. What about using function tracing? Because the tracepoint is called from __mmap_lock* helper functions that function tracer can see, you can just do the following: # trace-cmd start -e mmap_lock -p function -l '__mmap_lock_*' # trace-cmd show [..] trace-cmd-1840 [006] .... 194.576801: __mmap_lock_do_trace_start_locking <-do_user_addr_fault trace-cmd-1840 [006] ...1 194.576805: mmap_lock_start_locking: mm=000000006515cb1f memcg_path=/user.slice/user-0.slice/session-2.scope write=false trace-cmd-1840 [006] .... 194.576806: __mmap_lock_do_trace_acquire_returned <-do_user_addr_fault trace-cmd-1840 [006] ...1 194.576807: mmap_lock_acquire_returned: mm=000000006515cb1f memcg_path=/user.slice/user-0.slice/session-2.scope write=false success=true trace-cmd-1840 [006] .... 194.576811: __mmap_lock_do_trace_released <-do_user_addr_fault trace-cmd-1840 [006] ...1 194.576812: mmap_lock_released: mm=000000006515cb1f memcg_path=/user.slice/user-0.slice/session-2.scope write=false trace-cmd-1840 [006] .... 194.576815: __mmap_lock_do_trace_start_locking <-do_user_addr_fault trace-cmd-1840 [006] ...1 194.576816: mmap_lock_start_locking: mm=000000006515cb1f memcg_path=/user.slice/user-0.slice/session-2.scope write=false trace-cmd-1840 [006] .... 194.576816: __mmap_lock_do_trace_acquire_returned <-do_user_addr_fault trace-cmd-1840 [006] ...1 194.576817: mmap_lock_acquire_returned: mm=000000006515cb1f memcg_path=/user.slice/user-0.slice/session-2.scope write=false success=true trace-cmd-1840 [006] .... 194.576820: __mmap_lock_do_trace_released <-do_user_addr_fault trace-cmd-1840 [006] ...1 194.576821: mmap_lock_released: mm=000000006515cb1f memcg_path=/user.slice/user-0.slice/session-2.scope write=false This looks exactly like the robots you are looking for. -- Steve
On 7/31/21 4:03 AM, Steven Rostedt wrote: > Yes, synthetic events are just like normal events, and have triggers, > stack traces, and do pretty much anything that another event can do. > > I'm just finishing up a libtracfs called tracefs_sql() (hopefully > posting it today), that allows you to create a synthetic event via an > SQL statement. But I don't think this is what you are looking for. > > What about using function tracing? Because the tracepoint is called > from __mmap_lock* helper functions that function tracer can see, you > can just do the following: > > # trace-cmd start -e mmap_lock -p function -l '__mmap_lock_*' > # trace-cmd show > [..] > trace-cmd-1840 [006] .... 194.576801: __mmap_lock_do_trace_start_locking <-do_user_addr_fault > trace-cmd-1840 [006] ...1 194.576805: mmap_lock_start_locking: mm=000000006515cb1f memcg_path=/user.slice/user-0.slice/session-2.scope write=false > > trace-cmd-1840 [006] .... 194.576806: __mmap_lock_do_trace_acquire_returned <-do_user_addr_fault > trace-cmd-1840 [006] ...1 194.576807: mmap_lock_acquire_returned: mm=000000006515cb1f memcg_path=/user.slice/user-0.slice/session-2.scope write=false success=true > > trace-cmd-1840 [006] .... 194.576811: __mmap_lock_do_trace_released <-do_user_addr_fault > trace-cmd-1840 [006] ...1 194.576812: mmap_lock_released: mm=000000006515cb1f memcg_path=/user.slice/user-0.slice/session-2.scope write=false > > trace-cmd-1840 [006] .... 194.576815: __mmap_lock_do_trace_start_locking <-do_user_addr_fault > trace-cmd-1840 [006] ...1 194.576816: mmap_lock_start_locking: mm=000000006515cb1f memcg_path=/user.slice/user-0.slice/session-2.scope write=false > > trace-cmd-1840 [006] .... 194.576816: __mmap_lock_do_trace_acquire_returned <-do_user_addr_fault > trace-cmd-1840 [006] ...1 194.576817: mmap_lock_acquire_returned: mm=000000006515cb1f memcg_path=/user.slice/user-0.slice/session-2.scope write=false success=true > > trace-cmd-1840 [006] .... 194.576820: __mmap_lock_do_trace_released <-do_user_addr_fault > trace-cmd-1840 [006] ...1 194.576821: mmap_lock_released: mm=000000006515cb1f memcg_path=/user.slice/user-0.slice/session-2.scope write=false > > > This looks exactly like the robots you are looking for. > > -- Steve > I think this is exactly what I am looking for. Thanks!
On 8/2/21 10:44 AM, Gang Li wrote: > On 7/31/21 4:03 AM, Steven Rostedt wrote: >> Yes, synthetic events are just like normal events, and have triggers, >> stack traces, and do pretty much anything that another event can do. >> >> I'm just finishing up a libtracfs called tracefs_sql() (hopefully >> posting it today), that allows you to create a synthetic event via an >> SQL statement. But I don't think this is what you are looking for. >> >> What about using function tracing? Because the tracepoint is called >> from __mmap_lock* helper functions that function tracer can see, you >> can just do the following: >> >> # trace-cmd start -e mmap_lock -p function -l '__mmap_lock_*' >> # trace-cmd show >> [..] >> trace-cmd-1840 [006] .... 194.576801: >> __mmap_lock_do_trace_start_locking <-do_user_addr_fault >> trace-cmd-1840 [006] ...1 194.576805: >> mmap_lock_start_locking: mm=000000006515cb1f >> memcg_path=/user.slice/user-0.slice/session-2.scope write=false >> Hi! I find that sometimes the output data is out of order, which leads to inaccurate time stamps and make it hard to analyse. node-953 [001] .... 203.823297: __mmap_lock_do_trace_start_locking <-do_user_addr_fault node-955 [002] .... 203.823297: __mmap_lock_do_trace_start_locking <-do_user_addr_fault node-956 [003] .... 203.823297: __mmap_lock_do_trace_start_locking <-do_user_addr_fault node-953 [001] .... 203.823297: mmap_lock_start_locking: mm=000000004395a005 memcg_path=/user.slice/user-0.slice/session-1.scope write=false ip=do_user_addr_fault+0x270/0x4d0 node-953 [001] .... 203.823298: __mmap_lock_do_trace_acquire_returned <-do_user_addr_fault Is there any other way to collect ip in each event? It seems that adding "ip" fields is the only way to do this accurately and effectively.
Hi Steven! Sorry to bother you ;) (I resend this email because the last one was not cc to mailing list.) It has been ten days since my last email. What's your opinion about my patch "[PATCH 3/3] mm: mmap_lock: add ip to mmap_lock tracepoints"? Briefly, lock events are so frequent that ip collection and lock event collection cannot be separated, otherwise it will cause the wrong order of data. I am developing a tool to analyze mmap_lock contend using this feature. Adding ip to mmap_lock tracepoints is quite convenient. Sorry to bother you again. Hoping for your reply. Thanks! -- Gang On 8/20/21 2:18 AM, Gang Li wrote: > On 8/2/21 10:44 AM, Gang Li wrote: >> On 7/31/21 4:03 AM, Steven Rostedt wrote: >>> Yes, synthetic events are just like normal events, and have triggers, >>> stack traces, and do pretty much anything that another event can do. >>> > Hi! > > I find that sometimes the output data is out of order, which leads to inaccurate time stamps and make it hard to analyze.
diff --git a/include/trace/events/mmap_lock.h b/include/trace/events/mmap_lock.h index b9dd66f9c226..8913a9f85894 100644 --- a/include/trace/events/mmap_lock.h +++ b/include/trace/events/mmap_lock.h @@ -15,35 +15,39 @@ extern void trace_mmap_lock_unreg(void); DECLARE_EVENT_CLASS(mmap_lock, - TP_PROTO(struct mm_struct *mm, const char *memcg_path, bool write), + TP_PROTO(struct mm_struct *mm, const char *memcg_path, bool write, + unsigned long ip), - TP_ARGS(mm, memcg_path, write), + TP_ARGS(mm, memcg_path, write, ip), TP_STRUCT__entry( __field(struct mm_struct *, mm) __string(memcg_path, memcg_path) __field(bool, write) + __field(void *, ip) ), TP_fast_assign( __entry->mm = mm; __assign_str(memcg_path, memcg_path); __entry->write = write; + __entry->ip = (void *)ip; ), TP_printk( - "mm=%p memcg_path=%s write=%s", + "mm=%p memcg_path=%s write=%s ip=%pS", __entry->mm, __get_str(memcg_path), - __entry->write ? "true" : "false" - ) + __entry->write ? "true" : "false", + __entry->ip + ) ); #define DEFINE_MMAP_LOCK_EVENT(name) \ DEFINE_EVENT_FN(mmap_lock, name, \ TP_PROTO(struct mm_struct *mm, const char *memcg_path, \ - bool write), \ - TP_ARGS(mm, memcg_path, write), \ + bool write, unsigned long ip), \ + TP_ARGS(mm, memcg_path, write, ip), \ trace_mmap_lock_reg, trace_mmap_lock_unreg) DEFINE_MMAP_LOCK_EVENT(mmap_lock_start_locking); @@ -52,14 +56,15 @@ DEFINE_MMAP_LOCK_EVENT(mmap_lock_released); TRACE_EVENT_FN(mmap_lock_acquire_returned, TP_PROTO(struct mm_struct *mm, const char *memcg_path, bool write, - bool success), + unsigned long ip, bool success), - TP_ARGS(mm, memcg_path, write, success), + TP_ARGS(mm, memcg_path, write, ip, success), TP_STRUCT__entry( __field(struct mm_struct *, mm) __string(memcg_path, memcg_path) __field(bool, write) + __field(void *, ip) __field(bool, success) ), @@ -67,14 +72,16 @@ TRACE_EVENT_FN(mmap_lock_acquire_returned, __entry->mm = mm; __assign_str(memcg_path, memcg_path); __entry->write = write; + __entry->ip = (void *)ip; __entry->success = success; ), TP_printk( - "mm=%p memcg_path=%s write=%s success=%s", + "mm=%p memcg_path=%s write=%s ip=%pS success=%s", __entry->mm, __get_str(memcg_path), __entry->write ? "true" : "false", + __entry->ip, __entry->success ? "true" : "false" ), diff --git a/mm/mmap_lock.c b/mm/mmap_lock.c index 1854850b4b89..f1100eae6f2f 100644 --- a/mm/mmap_lock.c +++ b/mm/mmap_lock.c @@ -227,20 +227,20 @@ static const char *get_mm_memcg_path(struct mm_struct *mm) void __mmap_lock_do_trace_start_locking(struct mm_struct *mm, bool write) { - TRACE_MMAP_LOCK_EVENT(start_locking, mm, write); + TRACE_MMAP_LOCK_EVENT(start_locking, mm, write, _RET_IP_); } EXPORT_SYMBOL(__mmap_lock_do_trace_start_locking); void __mmap_lock_do_trace_acquire_returned(struct mm_struct *mm, bool write, bool success) { - TRACE_MMAP_LOCK_EVENT(acquire_returned, mm, write, success); + TRACE_MMAP_LOCK_EVENT(acquire_returned, mm, write, _RET_IP_, success); } EXPORT_SYMBOL(__mmap_lock_do_trace_acquire_returned); void __mmap_lock_do_trace_released(struct mm_struct *mm, bool write) { - TRACE_MMAP_LOCK_EVENT(released, mm, write); + TRACE_MMAP_LOCK_EVENT(released, mm, write, _RET_IP_); } EXPORT_SYMBOL(__mmap_lock_do_trace_released); #endif /* CONFIG_TRACING */
The mmap_lock is acquired on most (all?) mmap / munmap / page fault operations, so a multi-threaded process which does a lot of these can experience significant contention. Sometimes we want to know where the lock is hold. And it's hard to locate without collecting ip. Here's an example: TP_printk("ip=%pS",ip) Log looks like this: "ip=do_user_addr_fault+0x274/0x640" We can find out who cause the contention amd make some improvements for it. Signed-off-by: Gang Li <ligang.bdlg@bytedance.com> --- include/trace/events/mmap_lock.h | 27 +++++++++++++++++---------- mm/mmap_lock.c | 6 +++--- 2 files changed, 20 insertions(+), 13 deletions(-)