Message ID | 20210225070231.21136-8-chaitanya.kulkarni@wdc.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [RFC,01/39] blktrace_api: add new trace definitions | expand |
On 2021/02/25 16:03, Chaitanya Kulkarni wrote: > Signed-off-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com> No commit message. Please add one. > --- > kernel/trace/blktrace.c | 130 ++++++++++++++++++++++++++++++++++++++++ > 1 file changed, 130 insertions(+) > > diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c > index feb823b917ec..1aef55fdefa9 100644 > --- a/kernel/trace/blktrace.c > +++ b/kernel/trace/blktrace.c > @@ -462,6 +462,136 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes, > local_irq_restore(flags); > } > > +/* > + * Data direction bit lookup > + */ > +static const u64 ddir_act_ext[2] = { BLK_TC_ACT_EXT(BLK_TC_READ), > + BLK_TC_ACT_EXT(BLK_TC_WRITE) }; > + > +/* The ilog2() calls fall out because they're constant */ > +#define MASK_TC_BIT_EXT(rw, __name) ((rw & REQ_ ## __name) << \ > + (ilog2(BLK_TC_ ## __name) + BLK_TC_SHIFT_EXT - __REQ_ ## __name)) > + > +/* > + * The worker for the various blk_add_trace*() types. Fills out a > + * blk_io_trace structure and places it in a per-cpu subbuffer. > + */ The comment is wrong. You are filling a blk_io_trace_ext structure. But I do not see why that structure is needed in the first place. So the function below may not be needed either. Modifying the existing one seems like a simpler approach to me. > +static void __blk_add_trace_ext(struct blk_trace_ext *bt, sector_t sector, int bytes, > + int op, int op_flags, u64 what, int error, int pdu_len, > + void *pdu_data, u64 cgid, u32 ioprio) > +{ > + struct task_struct *tsk = current; > + struct ring_buffer_event *event = NULL; > + struct trace_buffer *buffer = NULL; > + struct blk_io_trace_ext *t; > + unsigned long flags = 0; > + unsigned long *sequence; > + pid_t pid; > + int cpu, pc = 0; > + bool blk_tracer = blk_tracer_enabled; > + ssize_t cgid_len = cgid ? sizeof(cgid) : 0; > + > + if (unlikely(bt->trace_state != Blktrace_running && !blk_tracer)) > + return; > + > + what |= ddir_act_ext[op_is_write(op) ? WRITE : READ]; > + what |= MASK_TC_BIT_EXT(op_flags, SYNC); > + what |= MASK_TC_BIT_EXT(op_flags, RAHEAD); > + what |= MASK_TC_BIT_EXT(op_flags, META); > + what |= MASK_TC_BIT_EXT(op_flags, PREFLUSH); > + what |= MASK_TC_BIT_EXT(op_flags, FUA); > + if (op == REQ_OP_ZONE_APPEND) > + what |= BLK_TC_ACT_EXT(BLK_TC_ZONE_APPEND); > + if (op == REQ_OP_DISCARD || op == REQ_OP_SECURE_ERASE) > + what |= BLK_TC_ACT_EXT(BLK_TC_DISCARD); > + if (op == REQ_OP_FLUSH) > + what |= BLK_TC_ACT_EXT(BLK_TC_FLUSH); > + if (unlikely(op == REQ_OP_WRITE_ZEROES)) > + what |= BLK_TC_ACT_EXT(BLK_TC_WRITE_ZEROES); > + if (unlikely(op == REQ_OP_ZONE_RESET)) > + what |= BLK_TC_ACT_EXT(BLK_TC_ZONE_RESET); > + if (unlikely(op == REQ_OP_ZONE_RESET_ALL)) > + what |= BLK_TC_ACT_EXT(BLK_TC_ZONE_RESET_ALL); > + if (unlikely(op == REQ_OP_ZONE_OPEN)) > + what |= BLK_TC_ACT_EXT(BLK_TC_ZONE_OPEN); > + if (unlikely(op == REQ_OP_ZONE_CLOSE)) > + what |= BLK_TC_ACT_EXT(BLK_TC_ZONE_CLOSE); > + if (unlikely(op == REQ_OP_ZONE_FINISH)) > + what |= BLK_TC_ACT_EXT(BLK_TC_ZONE_FINISH); > + > + if (cgid) > + what |= __BLK_TA_CGROUP; > + > + pid = tsk->pid; > + if (act_log_check_ext(bt, what, sector, pid)) > + return; > + if (bt->prio_mask && !prio_log_check(bt, ioprio)) > + return; > + > + cpu = raw_smp_processor_id(); > + > + if (blk_tracer) { > + tracing_record_cmdline(current); > + > + buffer = blk_tr->array_buffer.buffer; > + pc = preempt_count(); > + event = trace_buffer_lock_reserve(buffer, TRACE_BLK, > + sizeof(*t) + pdu_len + cgid_len, > + 0, pc); > + if (!event) > + return; > + t = ring_buffer_event_data(event); > + goto record_it; > + } > + > + if (unlikely(tsk->btrace_seq != blktrace_seq)) > + trace_note_tsk_ext(tsk, ioprio); > + > + /* > + * A word about the locking here - we disable interrupts to reserve > + * some space in the relay per-cpu buffer, to prevent an irq > + * from coming in and stepping on our toes. > + */ > + local_irq_save(flags); > + t = relay_reserve(bt->rchan, sizeof(*t) + pdu_len + cgid_len); > + if (t) { > + sequence = per_cpu_ptr(bt->sequence, cpu); > + > + t->magic = BLK_IO_TRACE_MAGIC | BLK_IO_TRACE_VERSION_EXT; > + t->sequence = ++(*sequence); > + t->time = ktime_to_ns(ktime_get()); > +record_it: > + /* > + * These two are not needed in ftrace as they are in the > + * generic trace_entry, filled by tracing_generic_entry_update, > + * but for the trace_event->bin() synthesizer benefit we do it > + * here too. > + */ > + t->cpu = cpu; > + t->pid = pid; > + > + t->sector = sector; > + t->bytes = bytes; > + t->action = what; > + t->ioprio = ioprio; > + t->device = bt->dev; > + t->error = error; > + t->pdu_len = pdu_len + cgid_len; > + > + if (cgid_len) > + memcpy((void *)t + sizeof(*t), &cgid, cgid_len); > + if (pdu_len) > + memcpy((void *)t + sizeof(*t) + cgid_len, pdu_data, pdu_len); > + > + if (blk_tracer) { > + trace_buffer_unlock_commit(blk_tr, buffer, event, 0, pc); > + return; > + } > + } > + > + local_irq_restore(flags); > +} > + > static void blk_trace_free(struct blk_trace *bt) > { > relay_close(bt->rchan); >
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index feb823b917ec..1aef55fdefa9 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -462,6 +462,136 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes, local_irq_restore(flags); } +/* + * Data direction bit lookup + */ +static const u64 ddir_act_ext[2] = { BLK_TC_ACT_EXT(BLK_TC_READ), + BLK_TC_ACT_EXT(BLK_TC_WRITE) }; + +/* The ilog2() calls fall out because they're constant */ +#define MASK_TC_BIT_EXT(rw, __name) ((rw & REQ_ ## __name) << \ + (ilog2(BLK_TC_ ## __name) + BLK_TC_SHIFT_EXT - __REQ_ ## __name)) + +/* + * The worker for the various blk_add_trace*() types. Fills out a + * blk_io_trace structure and places it in a per-cpu subbuffer. + */ +static void __blk_add_trace_ext(struct blk_trace_ext *bt, sector_t sector, int bytes, + int op, int op_flags, u64 what, int error, int pdu_len, + void *pdu_data, u64 cgid, u32 ioprio) +{ + struct task_struct *tsk = current; + struct ring_buffer_event *event = NULL; + struct trace_buffer *buffer = NULL; + struct blk_io_trace_ext *t; + unsigned long flags = 0; + unsigned long *sequence; + pid_t pid; + int cpu, pc = 0; + bool blk_tracer = blk_tracer_enabled; + ssize_t cgid_len = cgid ? sizeof(cgid) : 0; + + if (unlikely(bt->trace_state != Blktrace_running && !blk_tracer)) + return; + + what |= ddir_act_ext[op_is_write(op) ? WRITE : READ]; + what |= MASK_TC_BIT_EXT(op_flags, SYNC); + what |= MASK_TC_BIT_EXT(op_flags, RAHEAD); + what |= MASK_TC_BIT_EXT(op_flags, META); + what |= MASK_TC_BIT_EXT(op_flags, PREFLUSH); + what |= MASK_TC_BIT_EXT(op_flags, FUA); + if (op == REQ_OP_ZONE_APPEND) + what |= BLK_TC_ACT_EXT(BLK_TC_ZONE_APPEND); + if (op == REQ_OP_DISCARD || op == REQ_OP_SECURE_ERASE) + what |= BLK_TC_ACT_EXT(BLK_TC_DISCARD); + if (op == REQ_OP_FLUSH) + what |= BLK_TC_ACT_EXT(BLK_TC_FLUSH); + if (unlikely(op == REQ_OP_WRITE_ZEROES)) + what |= BLK_TC_ACT_EXT(BLK_TC_WRITE_ZEROES); + if (unlikely(op == REQ_OP_ZONE_RESET)) + what |= BLK_TC_ACT_EXT(BLK_TC_ZONE_RESET); + if (unlikely(op == REQ_OP_ZONE_RESET_ALL)) + what |= BLK_TC_ACT_EXT(BLK_TC_ZONE_RESET_ALL); + if (unlikely(op == REQ_OP_ZONE_OPEN)) + what |= BLK_TC_ACT_EXT(BLK_TC_ZONE_OPEN); + if (unlikely(op == REQ_OP_ZONE_CLOSE)) + what |= BLK_TC_ACT_EXT(BLK_TC_ZONE_CLOSE); + if (unlikely(op == REQ_OP_ZONE_FINISH)) + what |= BLK_TC_ACT_EXT(BLK_TC_ZONE_FINISH); + + if (cgid) + what |= __BLK_TA_CGROUP; + + pid = tsk->pid; + if (act_log_check_ext(bt, what, sector, pid)) + return; + if (bt->prio_mask && !prio_log_check(bt, ioprio)) + return; + + cpu = raw_smp_processor_id(); + + if (blk_tracer) { + tracing_record_cmdline(current); + + buffer = blk_tr->array_buffer.buffer; + pc = preempt_count(); + event = trace_buffer_lock_reserve(buffer, TRACE_BLK, + sizeof(*t) + pdu_len + cgid_len, + 0, pc); + if (!event) + return; + t = ring_buffer_event_data(event); + goto record_it; + } + + if (unlikely(tsk->btrace_seq != blktrace_seq)) + trace_note_tsk_ext(tsk, ioprio); + + /* + * A word about the locking here - we disable interrupts to reserve + * some space in the relay per-cpu buffer, to prevent an irq + * from coming in and stepping on our toes. + */ + local_irq_save(flags); + t = relay_reserve(bt->rchan, sizeof(*t) + pdu_len + cgid_len); + if (t) { + sequence = per_cpu_ptr(bt->sequence, cpu); + + t->magic = BLK_IO_TRACE_MAGIC | BLK_IO_TRACE_VERSION_EXT; + t->sequence = ++(*sequence); + t->time = ktime_to_ns(ktime_get()); +record_it: + /* + * These two are not needed in ftrace as they are in the + * generic trace_entry, filled by tracing_generic_entry_update, + * but for the trace_event->bin() synthesizer benefit we do it + * here too. + */ + t->cpu = cpu; + t->pid = pid; + + t->sector = sector; + t->bytes = bytes; + t->action = what; + t->ioprio = ioprio; + t->device = bt->dev; + t->error = error; + t->pdu_len = pdu_len + cgid_len; + + if (cgid_len) + memcpy((void *)t + sizeof(*t), &cgid, cgid_len); + if (pdu_len) + memcpy((void *)t + sizeof(*t) + cgid_len, pdu_data, pdu_len); + + if (blk_tracer) { + trace_buffer_unlock_commit(blk_tr, buffer, event, 0, pc); + return; + } + } + + local_irq_restore(flags); +} + static void blk_trace_free(struct blk_trace *bt) { relay_close(bt->rchan);
Signed-off-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com> --- kernel/trace/blktrace.c | 130 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 130 insertions(+)