Message ID | 20190417130959.10064-2-kaslevs@vmware.com (mailing list archive) |
---|---|
State | Accepted |
Headers | show |
Series | Optimize pid filters | expand |
On Wed, Apr 17, 2019 at 04:09:58PM +0300 Slavomir Kaslev wrote: > Express pid filters as allowed/disallowed filter ranges > > (pid>=100&&pid<=103) > > instead of specifying them per pid > > (pid==100||pid==101||pid==102||pid==103) > > This makes the size of the resulting filter smaller (and faster) and avoids > overflowing the filter size limit of one page which we can hit on bigger > machines (say >160 CPUs). This one works as well :) I finally hit a case where my trace-cmd pids were non-contiguous and this split the range up correctly. FILTER write /sys/kernel/debug/tracing/events/sched/sched_kthread_stop/filter (len 74) value "(common_pid<21420||common_pid>21425)&&(common_pid<21265||common_pid>21418)" FILTER write /sys/kernel/debug/tracing/events/sched/sched_kthread_stop_ret/filter (len 74) value "(common_pid<21420||common_pid>21425)&&(common_pid<21265||common_pid>21418)" ... FILTER write /sys/kernel/debug/tracing/events/sched/sched_switch/filter (len 142) value "(common_pid<21420||common_pid>21425)&&(common_pid<21265||common_pid>21418)||(next_pid<21420||next_pid>21425)&&(next_pid<21265||next_pid>21418)" The latter is correct given precendce of && before || but I wonder if () don't make sense? I always have to look that one up :) If I were writing that in code I'd probably put in the extra ()s, but since it's generated and no one actually sees it, probably okay and simpler as is. Having seen that and having tried it on a few other machines I'd be more willing to have a Tested-by: Phil Auld <pauld@redhat.com> on it, if you want it. Cheers, Phil > > Signed-off-by: Slavomir Kaslev <kaslevs@vmware.com> > Reported-by: Phil Auld <pauld@redhat.com> > Suggested-by: Steven Rostedt (VMware) <rostedt@goodmis.org> > --- > tracecmd/trace-record.c | 117 +++++++++++++++++++++++++++------------- > 1 file changed, 81 insertions(+), 36 deletions(-) > > diff --git a/tracecmd/trace-record.c b/tracecmd/trace-record.c > index a3a34f1..4523128 100644 > --- a/tracecmd/trace-record.c > +++ b/tracecmd/trace-record.c > @@ -951,10 +951,63 @@ static void update_ftrace_pids(int reset) > static void update_event_filters(struct buffer_instance *instance); > static void update_pid_event_filters(struct buffer_instance *instance); > > +static void append_filter_pid_range(char **filter, int *curr_len, > + const char *field, > + int start_pid, int end_pid, bool exclude) > +{ > + const char *op = "", *op1, *op2, *op3; > + int len; > + > + if (*filter && **filter) > + op = exclude ? "&&" : "||"; > + > + /* Handle thus case explicitly so that we get `pid==3` instead of > + * `pid>=3&&pid<=3` for singleton ranges > + */ > + if (start_pid == end_pid) { > +#define FMT "%s(%s%s%d)" > + len = snprintf(NULL, 0, FMT, op, > + field, exclude ? "!=" : "==", start_pid); > + *filter = realloc(*filter, *curr_len + len + 1); > + if (!*filter) > + die("realloc"); > + > + len = snprintf(*filter + *curr_len, len + 1, FMT, op, > + field, exclude ? "!=" : "==", start_pid); > + *curr_len += len; > + > + return; > +#undef FMT > + } > + > + if (exclude) { > + op1 = "<"; > + op2 = "||"; > + op3 = ">"; > + } else { > + op1 = ">="; > + op2 = "&&"; > + op3 = "<="; > + } > + > +#define FMT "%s(%s%s%d%s%s%s%d)" > + len = snprintf(NULL, 0, FMT, op, > + field, op1, start_pid, op2, > + field, op3, end_pid); > + *filter = realloc(*filter, *curr_len + len + 1); > + if (!*filter) > + die("realloc"); > + > + len = snprintf(*filter + *curr_len, len + 1, FMT, op, > + field, op1, start_pid, op2, > + field, op3, end_pid); > + *curr_len += len; > +} > + > /** > * make_pid_filter - create a filter string to all pids against @field > * @curr_filter: Append to a previous filter (may realloc). Can be NULL > - * @field: The fild to compare the pids against > + * @field: The field to compare the pids against > * > * Creates a new string or appends to an existing one if @curr_filter > * is not NULL. The new string will contain a filter with all pids > @@ -964,54 +1017,46 @@ static void update_pid_event_filters(struct buffer_instance *instance); > */ > static char *make_pid_filter(char *curr_filter, const char *field) > { > + int start_pid = -1, last_pid = -1; > + int last_exclude = -1; > struct filter_pids *p; > - char *filter; > - char *orit; > - char *match; > - char *str; > + char *filter = NULL; > int curr_len = 0; > - int len; > > /* Use the new method if possible */ > if (have_set_event_pid) > return NULL; > > - len = len_filter_pids + (strlen(field) + strlen("(==)||")) * nr_filter_pids; > - > - if (curr_filter) { > - curr_len = strlen(curr_filter); > - filter = realloc(curr_filter, curr_len + len + strlen("(&&())")); > - if (!filter) > - die("realloc"); > - memmove(filter+1, curr_filter, curr_len); > - filter[0] = '('; > - strcat(filter, ")&&("); > - curr_len = strlen(filter); > - } else > - filter = malloc(len); > - if (!filter) > - die("Failed to allocate pid filter"); > - > - /* Last '||' that is not used will cover the \0 */ > - str = filter + curr_len; > + if (!filter_pids) > + return curr_filter; > > for (p = filter_pids; p; p = p->next) { > - if (p->exclude) { > - match = "!="; > - orit = "&&"; > - } else { > - match = "=="; > - orit = "||"; > + /* > + * PIDs are inserted in `filter_pids` from the front and that's > + * why we expect them in descending order here. > + */ > + if (p->pid == last_pid - 1 && p->exclude == last_exclude) { > + last_pid = p->pid; > + continue; > } > - if (p == filter_pids) > - orit = ""; > > - len = sprintf(str, "%s(%s%s%d)", orit, field, match, p->pid); > - str += len; > + if (start_pid != -1) > + append_filter_pid_range(&filter, &curr_len, field, > + last_pid, start_pid, > + last_exclude); > + > + start_pid = last_pid = p->pid; > + last_exclude = p->exclude; > + > } > + append_filter_pid_range(&filter, &curr_len, field, > + last_pid, start_pid, last_exclude); > > - if (curr_len) > - sprintf(str, ")"); > + if (curr_filter) { > + char *save = filter; > + asprintf(&filter, "(%s)&&(%s)", curr_filter, filter); > + free(save); > + } > > return filter; > } > -- > 2.19.1 > --
On Wed, 17 Apr 2019, Phil Auld wrote: > On Wed, Apr 17, 2019 at 04:09:58PM +0300 Slavomir Kaslev wrote: > > Express pid filters as allowed/disallowed filter ranges > > > > (pid>=100&&pid<=103) > > > > instead of specifying them per pid > > > > (pid==100||pid==101||pid==102||pid==103) > > > > This makes the size of the resulting filter smaller (and faster) and avoids > > overflowing the filter size limit of one page which we can hit on bigger > > machines (say >160 CPUs). > > This one works as well :) > > I finally hit a case where my trace-cmd pids were non-contiguous and > this split the range up correctly. > > > FILTER write /sys/kernel/debug/tracing/events/sched/sched_kthread_stop/filter (len 74) value "(common_pid<21420||common_pid>21425)&&(common_pid<21265||common_pid>21418)" > FILTER write /sys/kernel/debug/tracing/events/sched/sched_kthread_stop_ret/filter (len 74) value "(common_pid<21420||common_pid>21425)&&(common_pid<21265||common_pid>21418)" > ... > FILTER write /sys/kernel/debug/tracing/events/sched/sched_switch/filter (len 142) value "(common_pid<21420||common_pid>21425)&&(common_pid<21265||common_pid>21418)||(next_pid<21420||next_pid>21425)&&(next_pid<21265||next_pid>21418)" It seems crazy that we write "common_pid", instead of "pid" or "cpid", or something like that. > > > The latter is correct given precendce of && before || but I wonder if () don't make sense? I always have to look > that one up :) > > If I were writing that in code I'd probably put in the extra ()s, but since it's generated and no > one actually sees it, probably okay and simpler as is. > > > Having seen that and having tried it on a few other machines I'd be more willing to have a > > Tested-by: Phil Auld <pauld@redhat.com> > > on it, if you want it. > > Cheers, > Phil > > > > > > Signed-off-by: Slavomir Kaslev <kaslevs@vmware.com> > > Reported-by: Phil Auld <pauld@redhat.com> > > Suggested-by: Steven Rostedt (VMware) <rostedt@goodmis.org> > > --- > > tracecmd/trace-record.c | 117 +++++++++++++++++++++++++++------------- > > 1 file changed, 81 insertions(+), 36 deletions(-) > > > > diff --git a/tracecmd/trace-record.c b/tracecmd/trace-record.c > > index a3a34f1..4523128 100644 > > --- a/tracecmd/trace-record.c > > +++ b/tracecmd/trace-record.c > > @@ -951,10 +951,63 @@ static void update_ftrace_pids(int reset) > > static void update_event_filters(struct buffer_instance *instance); > > static void update_pid_event_filters(struct buffer_instance *instance); > > > > +static void append_filter_pid_range(char **filter, int *curr_len, > > + const char *field, > > + int start_pid, int end_pid, bool exclude) > > +{ > > + const char *op = "", *op1, *op2, *op3; > > + int len; > > + > > + if (*filter && **filter) > > + op = exclude ? "&&" : "||"; > > + > > + /* Handle thus case explicitly so that we get `pid==3` instead of > > + * `pid>=3&&pid<=3` for singleton ranges > > + */ > > + if (start_pid == end_pid) { > > +#define FMT "%s(%s%s%d)" > > + len = snprintf(NULL, 0, FMT, op, > > + field, exclude ? "!=" : "==", start_pid); > > + *filter = realloc(*filter, *curr_len + len + 1); > > + if (!*filter) > > + die("realloc"); > > + > > + len = snprintf(*filter + *curr_len, len + 1, FMT, op, > > + field, exclude ? "!=" : "==", start_pid); > > + *curr_len += len; > > + > > + return; > > +#undef FMT > > + } > > + > > + if (exclude) { > > + op1 = "<"; > > + op2 = "||"; > > + op3 = ">"; > > + } else { > > + op1 = ">="; > > + op2 = "&&"; > > + op3 = "<="; > > + } > > + > > +#define FMT "%s(%s%s%d%s%s%s%d)" > > + len = snprintf(NULL, 0, FMT, op, > > + field, op1, start_pid, op2, > > + field, op3, end_pid); > > + *filter = realloc(*filter, *curr_len + len + 1); > > + if (!*filter) > > + die("realloc"); > > + > > + len = snprintf(*filter + *curr_len, len + 1, FMT, op, > > + field, op1, start_pid, op2, > > + field, op3, end_pid); > > + *curr_len += len; > > +} > > + > > /** > > * make_pid_filter - create a filter string to all pids against @field > > * @curr_filter: Append to a previous filter (may realloc). Can be NULL > > - * @field: The fild to compare the pids against > > + * @field: The field to compare the pids against > > * > > * Creates a new string or appends to an existing one if @curr_filter > > * is not NULL. The new string will contain a filter with all pids > > @@ -964,54 +1017,46 @@ static void update_pid_event_filters(struct buffer_instance *instance); > > */ > > static char *make_pid_filter(char *curr_filter, const char *field) > > { > > + int start_pid = -1, last_pid = -1; > > + int last_exclude = -1; > > struct filter_pids *p; > > - char *filter; > > - char *orit; > > - char *match; > > - char *str; > > + char *filter = NULL; > > int curr_len = 0; > > - int len; > > > > /* Use the new method if possible */ > > if (have_set_event_pid) > > return NULL; > > > > - len = len_filter_pids + (strlen(field) + strlen("(==)||")) * nr_filter_pids; > > - > > - if (curr_filter) { > > - curr_len = strlen(curr_filter); > > - filter = realloc(curr_filter, curr_len + len + strlen("(&&())")); > > - if (!filter) > > - die("realloc"); > > - memmove(filter+1, curr_filter, curr_len); > > - filter[0] = '('; > > - strcat(filter, ")&&("); > > - curr_len = strlen(filter); > > - } else > > - filter = malloc(len); > > - if (!filter) > > - die("Failed to allocate pid filter"); > > - > > - /* Last '||' that is not used will cover the \0 */ > > - str = filter + curr_len; > > + if (!filter_pids) > > + return curr_filter; > > > > for (p = filter_pids; p; p = p->next) { > > - if (p->exclude) { > > - match = "!="; > > - orit = "&&"; > > - } else { > > - match = "=="; > > - orit = "||"; > > + /* > > + * PIDs are inserted in `filter_pids` from the front and that's > > + * why we expect them in descending order here. > > + */ > > + if (p->pid == last_pid - 1 && p->exclude == last_exclude) { > > + last_pid = p->pid; > > + continue; > > } > > - if (p == filter_pids) > > - orit = ""; > > > > - len = sprintf(str, "%s(%s%s%d)", orit, field, match, p->pid); > > - str += len; > > + if (start_pid != -1) > > + append_filter_pid_range(&filter, &curr_len, field, > > + last_pid, start_pid, > > + last_exclude); > > + > > + start_pid = last_pid = p->pid; > > + last_exclude = p->exclude; > > + > > } > > + append_filter_pid_range(&filter, &curr_len, field, > > + last_pid, start_pid, last_exclude); > > > > - if (curr_len) > > - sprintf(str, ")"); > > + if (curr_filter) { > > + char *save = filter; > > + asprintf(&filter, "(%s)&&(%s)", curr_filter, filter); > > + free(save); > > + } > > > > return filter; > > } > > -- > > 2.19.1 > > > > -- >
On Wed, Apr 17, 2019 at 04:18:50PM +0200 John Kacur wrote: > > > On Wed, 17 Apr 2019, Phil Auld wrote: > > > On Wed, Apr 17, 2019 at 04:09:58PM +0300 Slavomir Kaslev wrote: > > > Express pid filters as allowed/disallowed filter ranges > > > > > > (pid>=100&&pid<=103) > > > > > > instead of specifying them per pid > > > > > > (pid==100||pid==101||pid==102||pid==103) > > > > > > This makes the size of the resulting filter smaller (and faster) and avoids > > > overflowing the filter size limit of one page which we can hit on bigger > > > machines (say >160 CPUs). > > > > This one works as well :) > > > > I finally hit a case where my trace-cmd pids were non-contiguous and > > this split the range up correctly. > > > > > > FILTER write /sys/kernel/debug/tracing/events/sched/sched_kthread_stop/filter (len 74) value "(common_pid<21420||common_pid>21425)&&(common_pid<21265||common_pid>21418)" > > FILTER write /sys/kernel/debug/tracing/events/sched/sched_kthread_stop_ret/filter (len 74) value "(common_pid<21420||common_pid>21425)&&(common_pid<21265||common_pid>21418)" > > ... > > FILTER write /sys/kernel/debug/tracing/events/sched/sched_switch/filter (len 142) value "(common_pid<21420||common_pid>21425)&&(common_pid<21265||common_pid>21418)||(next_pid<21420||next_pid>21425)&&(next_pid<21265||next_pid>21418)" > > It seems crazy that we write "common_pid", instead of "pid" or "cpid", or > something like that. > > > I assume those have to match fields in the trace event mechanism, but I don't know a lot about it. "pid" is used in the wakeup event filters: FILTER write /sys/kernel/debug/tracing/events/sched/sched_wakeup_new/filter (len 122) value "(common_pid<21420||common_pid>21425)&&(common_pid<21265||common_pid>21418)||(pid<21420||pid>21425)&&(pid<21265||pid>21418)" Cheers, Phil
On Wed, 17 Apr 2019 10:25:15 -0400 Phil Auld <pauld@redhat.com> wrote: > > > FILTER write /sys/kernel/debug/tracing/events/sched/sched_kthread_stop/filter (len 74) value "(common_pid<21420||common_pid>21425)&&(common_pid<21265||common_pid>21418)" > > > FILTER write /sys/kernel/debug/tracing/events/sched/sched_kthread_stop_ret/filter (len 74) value "(common_pid<21420||common_pid>21425)&&(common_pid<21265||common_pid>21418)" > > > ... > > > FILTER write /sys/kernel/debug/tracing/events/sched/sched_switch/filter (len 142) value "(common_pid<21420||common_pid>21425)&&(common_pid<21265||common_pid>21418)||(next_pid<21420||next_pid>21425)&&(next_pid<21265||next_pid>21418)" > > > > It seems crazy that we write "common_pid", instead of "pid" or "cpid", or > > something like that. > > > > > > > > I assume those have to match fields in the trace event mechanism, but I don't know a lot > about it. > > "pid" is used in the wakeup event filters: > > FILTER write /sys/kernel/debug/tracing/events/sched/sched_wakeup_new/filter (len 122) value "(common_pid<21420||common_pid>21425)&&(common_pid<21265||common_pid>21418)||(pid<21420||pid>21425)&&(pid<21265||pid>21418)" > Correct, the names have to match the fields listed in the format file. The "common_*" was added way back in the beginning so that they differ from the field names in the events. -- Steve
On Wed, 17 Apr 2019 09:58:58 -0400 Phil Auld <pauld@redhat.com> wrote: > Having seen that and having tried it on a few other machines I'd be more willing to have a > > Tested-by: Phil Auld <pauld@redhat.com> > > on it, if you want it. Thanks! I will add it. -- Steve
On Wed, 17 Apr 2019 09:58:58 -0400 Phil Auld <pauld@redhat.com> wrote: > FILTER write /sys/kernel/debug/tracing/events/sched/sched_switch/filter (len 142) value "(common_pid<21420||common_pid>21425)&&(common_pid<21265||common_pid>21418)||(next_pid<21420||next_pid>21425)&&(next_pid<21265||next_pid>21418)" > > > The latter is correct given precendce of && before || but I wonder if () don't make sense? I always have to look > that one up :) Yes, as the one who wrote the parsing code (and was lectured by Al Viro about it), I know for a fact that && has a higher precedence than the || and it should work. I was about to comment about that, but then remembered my "lesson" :-) -- Steve > > If I were writing that in code I'd probably put in the extra ()s, but since it's generated and no > one actually sees it, probably okay and simpler as is.
diff --git a/tracecmd/trace-record.c b/tracecmd/trace-record.c index a3a34f1..4523128 100644 --- a/tracecmd/trace-record.c +++ b/tracecmd/trace-record.c @@ -951,10 +951,63 @@ static void update_ftrace_pids(int reset) static void update_event_filters(struct buffer_instance *instance); static void update_pid_event_filters(struct buffer_instance *instance); +static void append_filter_pid_range(char **filter, int *curr_len, + const char *field, + int start_pid, int end_pid, bool exclude) +{ + const char *op = "", *op1, *op2, *op3; + int len; + + if (*filter && **filter) + op = exclude ? "&&" : "||"; + + /* Handle thus case explicitly so that we get `pid==3` instead of + * `pid>=3&&pid<=3` for singleton ranges + */ + if (start_pid == end_pid) { +#define FMT "%s(%s%s%d)" + len = snprintf(NULL, 0, FMT, op, + field, exclude ? "!=" : "==", start_pid); + *filter = realloc(*filter, *curr_len + len + 1); + if (!*filter) + die("realloc"); + + len = snprintf(*filter + *curr_len, len + 1, FMT, op, + field, exclude ? "!=" : "==", start_pid); + *curr_len += len; + + return; +#undef FMT + } + + if (exclude) { + op1 = "<"; + op2 = "||"; + op3 = ">"; + } else { + op1 = ">="; + op2 = "&&"; + op3 = "<="; + } + +#define FMT "%s(%s%s%d%s%s%s%d)" + len = snprintf(NULL, 0, FMT, op, + field, op1, start_pid, op2, + field, op3, end_pid); + *filter = realloc(*filter, *curr_len + len + 1); + if (!*filter) + die("realloc"); + + len = snprintf(*filter + *curr_len, len + 1, FMT, op, + field, op1, start_pid, op2, + field, op3, end_pid); + *curr_len += len; +} + /** * make_pid_filter - create a filter string to all pids against @field * @curr_filter: Append to a previous filter (may realloc). Can be NULL - * @field: The fild to compare the pids against + * @field: The field to compare the pids against * * Creates a new string or appends to an existing one if @curr_filter * is not NULL. The new string will contain a filter with all pids @@ -964,54 +1017,46 @@ static void update_pid_event_filters(struct buffer_instance *instance); */ static char *make_pid_filter(char *curr_filter, const char *field) { + int start_pid = -1, last_pid = -1; + int last_exclude = -1; struct filter_pids *p; - char *filter; - char *orit; - char *match; - char *str; + char *filter = NULL; int curr_len = 0; - int len; /* Use the new method if possible */ if (have_set_event_pid) return NULL; - len = len_filter_pids + (strlen(field) + strlen("(==)||")) * nr_filter_pids; - - if (curr_filter) { - curr_len = strlen(curr_filter); - filter = realloc(curr_filter, curr_len + len + strlen("(&&())")); - if (!filter) - die("realloc"); - memmove(filter+1, curr_filter, curr_len); - filter[0] = '('; - strcat(filter, ")&&("); - curr_len = strlen(filter); - } else - filter = malloc(len); - if (!filter) - die("Failed to allocate pid filter"); - - /* Last '||' that is not used will cover the \0 */ - str = filter + curr_len; + if (!filter_pids) + return curr_filter; for (p = filter_pids; p; p = p->next) { - if (p->exclude) { - match = "!="; - orit = "&&"; - } else { - match = "=="; - orit = "||"; + /* + * PIDs are inserted in `filter_pids` from the front and that's + * why we expect them in descending order here. + */ + if (p->pid == last_pid - 1 && p->exclude == last_exclude) { + last_pid = p->pid; + continue; } - if (p == filter_pids) - orit = ""; - len = sprintf(str, "%s(%s%s%d)", orit, field, match, p->pid); - str += len; + if (start_pid != -1) + append_filter_pid_range(&filter, &curr_len, field, + last_pid, start_pid, + last_exclude); + + start_pid = last_pid = p->pid; + last_exclude = p->exclude; + } + append_filter_pid_range(&filter, &curr_len, field, + last_pid, start_pid, last_exclude); - if (curr_len) - sprintf(str, ")"); + if (curr_filter) { + char *save = filter; + asprintf(&filter, "(%s)&&(%s)", curr_filter, filter); + free(save); + } return filter; }
Express pid filters as allowed/disallowed filter ranges (pid>=100&&pid<=103) instead of specifying them per pid (pid==100||pid==101||pid==102||pid==103) This makes the size of the resulting filter smaller (and faster) and avoids overflowing the filter size limit of one page which we can hit on bigger machines (say >160 CPUs). Signed-off-by: Slavomir Kaslev <kaslevs@vmware.com> Reported-by: Phil Auld <pauld@redhat.com> Suggested-by: Steven Rostedt (VMware) <rostedt@goodmis.org> --- tracecmd/trace-record.c | 117 +++++++++++++++++++++++++++------------- 1 file changed, 81 insertions(+), 36 deletions(-)