@@ -597,7 +597,7 @@ static void handle_notify(struct blk_io_trace *bit)
void *payload = (caddr_t) bit + sizeof(*bit);
__u32 two32[2];
- switch (bit->action) {
+ switch (bit->action & ~__BLK_TN_CGROUP) {
case BLK_TN_PROCESS:
add_ppm_hash(bit->pid, payload);
break;
@@ -623,16 +623,27 @@ static void handle_notify(struct blk_io_trace *bit)
case BLK_TN_MESSAGE:
if (bit->pdu_len > 0) {
char msg[bit->pdu_len+1];
+ int len = bit->pdu_len;
+ char cgidstr[24];
- memcpy(msg, (char *)payload, bit->pdu_len);
- msg[bit->pdu_len] = '\0';
+ cgidstr[0] = 0;
+ if (bit->action & __BLK_TN_CGROUP) {
+ struct blk_io_cgroup_payload *cgid = payload;
+
+ sprintf(cgidstr, "%x,%x ", cgid->ino,
+ cgid->gen);
+ payload += sizeof(struct blk_io_cgroup_payload);
+ len -= sizeof(struct blk_io_cgroup_payload);
+ }
+ memcpy(msg, (char *)payload, len);
+ msg[len] = '\0';
fprintf(ofp,
- "%3d,%-3d %2d %8s %5d.%09lu %5u %2s %3s %s\n",
+ "%3d,%-3d %2d %8s %5d.%09lu %5u %s%2s %3s %s\n",
MAJOR(bit->device), MINOR(bit->device),
- bit->cpu, "0", (int) SECONDS(bit->time),
- (unsigned long) NANO_SECONDS(bit->time),
- 0, "m", "N", msg);
+ bit->cpu, "0", (int)SECONDS(bit->time),
+ (unsigned long)NANO_SECONDS(bit->time),
+ 0, cgidstr, "m", "N", msg);
}
break;
@@ -1511,7 +1522,7 @@ static void dump_trace_pc(struct blk_io_trace *t, struct per_dev_info *pdi,
struct per_cpu_info *pci)
{
int w = (t->action & BLK_TC_ACT(BLK_TC_WRITE)) != 0;
- int act = t->action & 0xffff;
+ int act = (t->action & 0xffff) & ~__BLK_TA_CGROUP;
switch (act) {
case __BLK_TA_QUEUE:
@@ -1560,7 +1571,7 @@ static void dump_trace_fs(struct blk_io_trace *t, struct per_dev_info *pdi,
struct per_cpu_info *pci)
{
int w = (t->action & BLK_TC_ACT(BLK_TC_WRITE)) != 0;
- int act = t->action & 0xffff;
+ int act = (t->action & 0xffff) & ~__BLK_TA_CGROUP;
switch (act) {
case __BLK_TA_QUEUE:
@@ -1643,7 +1654,7 @@ static void dump_trace(struct blk_io_trace *t, struct per_cpu_info *pci,
struct per_dev_info *pdi)
{
if (text_output) {
- if (t->action == BLK_TN_MESSAGE)
+ if ((t->action & ~__BLK_TN_CGROUP) == BLK_TN_MESSAGE)
handle_notify(t);
else if (t->action & BLK_TC_ACT(BLK_TC_PC))
dump_trace_pc(t, pdi, pci);
@@ -1658,7 +1669,7 @@ static void dump_trace(struct blk_io_trace *t, struct per_cpu_info *pci,
if (bin_output_msgs ||
!(t->action & BLK_TC_ACT(BLK_TC_NOTIFY) &&
- t->action == BLK_TN_MESSAGE))
+ (t->action & ~__BLK_TN_CGROUP) == BLK_TN_MESSAGE))
output_binary(t, sizeof(*t) + t->pdu_len);
}
@@ -2234,7 +2245,7 @@ static void show_entries_rb(int force)
break;
}
- if (!(bit->action == BLK_TN_MESSAGE) &&
+ if (!((bit->action & ~__BLK_TN_CGROUP) == BLK_TN_MESSAGE) &&
check_sequence(pdi, t, force))
break;
@@ -2246,7 +2257,7 @@ static void show_entries_rb(int force)
if (!pci || pci->cpu != bit->cpu)
pci = get_cpu_info(pdi, bit->cpu);
- if (!(bit->action == BLK_TN_MESSAGE))
+ if (!((bit->action & ~__BLK_TN_CGROUP) == BLK_TN_MESSAGE))
pci->last_sequence = bit->sequence;
pci->nelems++;
@@ -2380,7 +2391,7 @@ static int read_events(int fd, int always_block, int *fdblock)
/*
* not a real trace, so grab and handle it here
*/
- if (bit->action & BLK_TC_ACT(BLK_TC_NOTIFY) && bit->action != BLK_TN_MESSAGE) {
+ if (bit->action & BLK_TC_ACT(BLK_TC_NOTIFY) && (bit->action & ~__BLK_TN_CGROUP) != BLK_TN_MESSAGE) {
handle_notify(bit);
output_binary(bit, sizeof(*bit) + bit->pdu_len);
continue;
@@ -2529,7 +2540,7 @@ static int ms_prime(struct ms_stream *msp)
continue;
}
- if (bit->action & BLK_TC_ACT(BLK_TC_NOTIFY) && bit->action != BLK_TN_MESSAGE) {
+ if (bit->action & BLK_TC_ACT(BLK_TC_NOTIFY) && (bit->action & ~__BLK_TN_CGROUP) != BLK_TN_MESSAGE) {
handle_notify(bit);
output_binary(bit, sizeof(*bit) + bit->pdu_len);
bit_free(bit);
@@ -205,6 +205,21 @@ static void print_field(char *act, struct per_cpu_info *pci,
case 'e':
fprintf(ofp, strcat(format, "d"), t->error);
break;
+ case 'g': {
+ char cgidstr[24];
+ u32 ino = 0, gen = 0;
+
+ if (t->action & __BLK_TA_CGROUP) {
+ struct blk_io_cgroup_payload *cgid =
+ (struct blk_io_cgroup_payload *)pdu_buf;
+
+ ino = cgid->ino;
+ gen = cgid->gen;
+ }
+ sprintf(cgidstr, "%x,%x", ino, gen);
+ fprintf(ofp, strcat(format, "s"), cgidstr);
+ break;
+ }
case 'M':
fprintf(ofp, strcat(format, "d"), MAJOR(t->device));
break;
@@ -51,6 +51,7 @@ enum {
__BLK_TA_REMAP, /* bio was remapped */
__BLK_TA_ABORT, /* request aborted */
__BLK_TA_DRV_DATA, /* binary driver data */
+ __BLK_TA_CGROUP = 1 << 8,
};
/*
@@ -60,6 +61,7 @@ enum blktrace_notify {
__BLK_TN_PROCESS = 0, /* establish pid/name mapping */
__BLK_TN_TIMESTAMP, /* include system clock */
__BLK_TN_MESSAGE, /* Character string message */
+ __BLK_TN_CGROUP = __BLK_TA_CGROUP,
};
/*
@@ -116,6 +118,14 @@ struct blk_io_trace_remap {
__u64 sector_from;
};
+/*
+ * Payload with originating cgroup info
+ */
+struct blk_io_cgroup_payload {
+ __u32 ino;
+ __u32 gen;
+};
+
/*
* User setup structure passed with BLKSTARTTRACE
*/
@@ -332,6 +332,10 @@ the event's device (separated by a comma).
.IP \fBe\fR 4
Error value
+.IP \fBg\fR 4
+Cgroup identifier of the cgroup that generated the IO. Note that this requires
+appropriate kernel support (kernel version at least 4.14).
+
.IP \fBm\fR 4
Minor number of event's device.
@@ -601,6 +601,9 @@ Specifier & \\ \hline\hline
the event's device \\
& (separated by a comma). \\ \hline
\emph{e} & Error value \\ \hline
+\emph{g} & Cgroup identifier of the cgroup that generated the IO. Note that this requires
+appropriate \\
+ & kernel support (kernel version at least 4.14). \\ \hline
\emph{m} & Minor number of event's device. \\ \hline
\emph{M} & Major number of event's device. \\ \hline
\emph{n} & Number of blocks \\ \hline
Since Linux kernel commit 35fe6d763229 "block: use standard blktrace API to output cgroup info for debug notes" the kernel can pass __BLK_TA_CGROUP flag in the action field of generated events. blkparse does not count with this and so it will get confused by such events and either ignore them or misreport them. Teach blkparse how to properly process events with __BLK_TA_CGROUP flag. Signed-off-by: Jan Kara <jack@suse.cz> --- blkparse.c | 41 ++++++++++++++++++++++++++--------------- blkparse_fmt.c | 15 +++++++++++++++ blktrace_api.h | 10 ++++++++++ doc/blkparse.1 | 4 ++++ doc/blktrace.tex | 3 +++ 5 files changed, 58 insertions(+), 15 deletions(-)