@@ -184,6 +184,11 @@ struct fsxattr {
#define BLKSECDISCARD _IO(0x12,125)
#define BLKROTATIONAL _IO(0x12,126)
#define BLKZEROOUT _IO(0x12,127)
+#define BLKTRACESETUP_EXT _IOWR(0x12,128,struct blk_user_trace_setup_ext)
+#define BLKTRACESTART_EXT _IO(0x12,129)
+#define BLKTRACESTOP_EXT _IO(0x12,130)
+#define BLKTRACETEARDOWN_EXT _IO(0x12,131)
+
/*
* A jump here: 130-131 are reserved for zoned block devices
* (see uapi/linux/blkzoned.h)
@@ -609,6 +609,17 @@ static void get_probe_ref(void)
mutex_unlock(&blk_probe_mutex);
}
+static void blk_trace_free_ext(struct blk_trace_ext *bt)
+{
+ debugfs_remove(bt->msg_file);
+ debugfs_remove(bt->dropped_file);
+ relay_close(bt->rchan);
+ debugfs_remove(bt->dir);
+ free_percpu(bt->sequence);
+ free_percpu(bt->msg_data);
+ kfree(bt);
+}
+
static void put_probe_ref(void)
{
mutex_lock(&blk_probe_mutex);
@@ -624,6 +635,13 @@ static void blk_trace_cleanup(struct blk_trace *bt)
put_probe_ref();
}
+static void blk_trace_cleanup_ext(struct blk_trace_ext *bt)
+{
+ synchronize_rcu();
+ blk_trace_free_ext(bt);
+ put_probe_ref();
+}
+
static int __blk_trace_remove(struct request_queue *q)
{
struct blk_trace *bt;
@@ -639,12 +657,28 @@ static int __blk_trace_remove(struct request_queue *q)
return 0;
}
+static int __blk_trace_remove_ext(struct request_queue *q)
+{
+ struct blk_trace_ext *bt;
+
+ bt = xchg(&q->blk_trace_ext, NULL);
+ if (!bt)
+ return -EINVAL;
+
+ if (bt->trace_state != Blktrace_running)
+ blk_trace_cleanup_ext(bt);
+
+ return 0;
+}
+
int blk_trace_remove(struct request_queue *q)
{
int ret;
mutex_lock(&q->debugfs_mutex);
ret = __blk_trace_remove(q);
+ if (ret)
+ __blk_trace_remove_ext(q);
mutex_unlock(&q->debugfs_mutex);
return ret;
@@ -662,6 +696,17 @@ static ssize_t blk_dropped_read(struct file *filp, char __user *buffer,
return simple_read_from_buffer(buffer, count, ppos, buf, strlen(buf));
}
+static ssize_t blk_dropped_read_ext(struct file *filp, char __user *buffer,
+ size_t count, loff_t *ppos)
+{
+ struct blk_trace_ext *bt = filp->private_data;
+ char buf[16];
+
+ snprintf(buf, sizeof(buf), "%u\n", atomic_read(&bt->dropped));
+
+ return simple_read_from_buffer(buffer, count, ppos, buf, strlen(buf));
+}
+
static const struct file_operations blk_dropped_fops = {
.owner = THIS_MODULE,
.open = simple_open,
@@ -669,6 +714,13 @@ static const struct file_operations blk_dropped_fops = {
.llseek = default_llseek,
};
+static const struct file_operations blk_dropped_fops_ext = {
+ .owner = THIS_MODULE,
+ .open = simple_open,
+ .read = blk_dropped_read_ext,
+ .llseek = default_llseek,
+};
+
static ssize_t blk_msg_write(struct file *filp, const char __user *buffer,
size_t count, loff_t *ppos)
{
@@ -689,6 +741,26 @@ static ssize_t blk_msg_write(struct file *filp, const char __user *buffer,
return count;
}
+static ssize_t blk_msg_write_ext(struct file *filp, const char __user *buffer,
+ size_t count, loff_t *ppos)
+{
+ char *msg;
+ struct blk_trace_ext *bt;
+
+ if (count >= BLK_TN_MAX_MSG)
+ return -EINVAL;
+
+ msg = memdup_user_nul(buffer, count);
+ if (IS_ERR(msg))
+ return PTR_ERR(msg);
+
+ bt = filp->private_data;
+ __trace_note_message_ext(bt, NULL, "%s", msg);
+ kfree(msg);
+
+ return count;
+}
+
static const struct file_operations blk_msg_fops = {
.owner = THIS_MODULE,
.open = simple_open,
@@ -696,6 +768,13 @@ static const struct file_operations blk_msg_fops = {
.llseek = noop_llseek,
};
+static const struct file_operations blk_msg_fops_ext = {
+ .owner = THIS_MODULE,
+ .open = simple_open,
+ .write = blk_msg_write_ext,
+ .llseek = noop_llseek,
+};
+
/*
* Keep track of how many times we encountered a full subbuffer, to aid
* the user space app in telling how many lost events there were.
@@ -730,12 +809,31 @@ static struct dentry *blk_create_buf_file_callback(const char *filename,
&relay_file_operations);
}
+static int blk_subbuf_start_callback_ext(struct rchan_buf *buf, void *subbuf,
+ void *prev_subbuf, size_t prev_padding)
+{
+ struct blk_trace_ext *bt;
+
+ if (!relay_buf_full(buf))
+ return 1;
+
+ bt = buf->chan->private_data;
+ atomic_inc(&bt->dropped);
+ return 0;
+}
+
static const struct rchan_callbacks blk_relay_callbacks = {
.subbuf_start = blk_subbuf_start_callback,
.create_buf_file = blk_create_buf_file_callback,
.remove_buf_file = blk_remove_buf_file_callback,
};
+static struct rchan_callbacks blk_relay_callbacks_ext = {
+ .subbuf_start = blk_subbuf_start_callback_ext,
+ .create_buf_file = blk_create_buf_file_callback,
+ .remove_buf_file = blk_remove_buf_file_callback,
+};
+
static void blk_trace_setup_lba(struct blk_trace *bt,
struct block_device *bdev)
{
@@ -748,6 +846,18 @@ static void blk_trace_setup_lba(struct blk_trace *bt,
}
}
+static void blk_trace_setup_lba_ext(struct blk_trace_ext *bt,
+ struct block_device *bdev)
+{
+ if (bdev) {
+ bt->start_lba = bdev->bd_start_sect;
+ bt->end_lba = bdev->bd_start_sect + bdev_nr_sectors(bdev);
+ } else {
+ bt->start_lba = 0;
+ bt->end_lba = -1ULL;
+ }
+}
+
/*
* Setup everything required to start tracing
*/
@@ -858,6 +968,106 @@ static int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
return ret;
}
+static int do_blk_trace_setup_ext(struct request_queue *q, char *name, dev_t dev,
+ struct block_device *bdev,
+ struct blk_user_trace_setup_ext *buts)
+{
+ struct blk_trace_ext *bt = NULL;
+ struct dentry *dir = NULL;
+ int ret;
+
+ if (q->blk_trace) {
+ pr_err("queue is already associated with legecy trace\n");
+ return -EINVAL;
+ }
+ if (!buts->buf_size || !buts->buf_nr)
+ return -EINVAL;
+
+ if (!blk_debugfs_root)
+ return -ENOENT;
+
+ strncpy(buts->name, name, BLKTRACE_BDEV_SIZE);
+ buts->name[BLKTRACE_BDEV_SIZE - 1] = '\0';
+
+ /*
+ * some device names have larger paths - convert the slashes
+ * to underscores for this to work as expected
+ */
+ strreplace(buts->name, '/', '_');
+
+ bt = kzalloc(sizeof(*bt), GFP_KERNEL);
+ if (!bt)
+ return -ENOMEM;
+
+ ret = -ENOMEM;
+ bt->sequence = alloc_percpu(unsigned long);
+ if (!bt->sequence)
+ goto err;
+
+ bt->msg_data = __alloc_percpu(BLK_TN_MAX_MSG, __alignof__(char));
+ if (!bt->msg_data)
+ goto err;
+
+ ret = -ENOENT;
+
+ dir = debugfs_lookup(buts->name, blk_debugfs_root);
+ if (!dir)
+ bt->dir = dir = debugfs_create_dir(buts->name, blk_debugfs_root);
+ if (!dir)
+ goto err;
+
+ bt->dev = dev;
+ atomic_set(&bt->dropped, 0);
+ INIT_LIST_HEAD(&bt->running_ext_list);
+
+ ret = -EIO;
+ bt->dropped_file = debugfs_create_file("dropped", 0444, dir, bt,
+ &blk_dropped_fops_ext);
+ if (!bt->dropped_file)
+ goto err;
+
+ bt->msg_file = debugfs_create_file("msg", 0222, dir, bt, &blk_msg_fops_ext);
+ if (!bt->msg_file)
+ goto err;
+
+ bt->rchan = relay_open("trace", dir, buts->buf_size,
+ buts->buf_nr, &blk_relay_callbacks_ext, bt);
+ if (!bt->rchan)
+ goto err;
+
+ bt->act_mask = buts->act_mask;
+ if (!bt->act_mask)
+ bt->act_mask = (u64) -1ULL;
+
+ bt->prio_mask = buts->prio_mask;
+
+ blk_trace_setup_lba_ext(bt, bdev);
+
+ /* overwrite with user settings */
+ if (buts->start_lba)
+ bt->start_lba = buts->start_lba;
+ if (buts->end_lba)
+ bt->end_lba = buts->end_lba;
+
+ bt->pid = buts->pid;
+ bt->trace_state = Blktrace_setup;
+
+ ret = -EBUSY;
+ if (cmpxchg(&q->blk_trace_ext, NULL, bt))
+ goto err;
+
+ get_probe_ref();
+
+ ret = 0;
+err:
+ if (dir && !bt->dir)
+ dput(dir);
+ if (ret)
+ blk_trace_free_ext(bt);
+ return ret;
+}
+
+
static int __blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
struct block_device *bdev, char __user *arg)
{
@@ -879,6 +1089,27 @@ static int __blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
return 0;
}
+static int __blk_trace_setup_ext(struct request_queue *q, char *name, dev_t dev,
+ struct block_device *bdev, char __user *arg)
+{
+ struct blk_user_trace_setup_ext buts;
+ int ret;
+
+ ret = copy_from_user(&buts, arg, sizeof(buts));
+ if (ret)
+ return -EFAULT;
+
+ ret = do_blk_trace_setup_ext(q, name, dev, bdev, &buts);
+ if (ret)
+ return ret;
+
+ if (copy_to_user(arg, &buts, sizeof(buts))) {
+ __blk_trace_remove_ext(q);
+ return -EFAULT;
+ }
+ return 0;
+}
+
int blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
struct block_device *bdev,
char __user *arg)
@@ -969,12 +1200,57 @@ static int __blk_trace_startstop(struct request_queue *q, int start)
return ret;
}
+static int __blk_trace_startstop_ext(struct request_queue *q, int start)
+{
+ int ret;
+ struct blk_trace_ext *bt;
+
+ bt = rcu_dereference_protected(q->blk_trace_ext,
+ lockdep_is_held(&q->debugfs_mutex));
+ if (bt == NULL)
+ return -EINVAL;
+
+ /*
+ * For starting a trace, we can transition from a setup or stopped
+ * trace. For stopping a trace, the state must be running
+ */
+ ret = -EINVAL;
+ if (start) {
+ if (bt->trace_state == Blktrace_setup ||
+ bt->trace_state == Blktrace_stopped) {
+ blktrace_seq++;
+ smp_mb();
+ bt->trace_state = Blktrace_running;
+ spin_lock_irq(&running_trace_ext_lock);
+ list_add(&bt->running_ext_list,
+ &running_trace_ext_list);
+ spin_unlock_irq(&running_trace_ext_lock);
+
+ trace_note_time_ext(bt);
+ ret = 0;
+ }
+ } else {
+ if (bt->trace_state == Blktrace_running) {
+ bt->trace_state = Blktrace_stopped;
+ spin_lock_irq(&running_trace_ext_lock);
+ list_del_init(&bt->running_ext_list);
+ spin_unlock_irq(&running_trace_ext_lock);
+ relay_flush(bt->rchan);
+ ret = 0;
+ }
+ }
+
+ return ret;
+}
+
int blk_trace_startstop(struct request_queue *q, int start)
{
int ret;
mutex_lock(&q->debugfs_mutex);
ret = __blk_trace_startstop(q, start);
+ if (ret)
+ ret = __blk_trace_startstop_ext(q, start);
mutex_unlock(&q->debugfs_mutex);
return ret;
@@ -1011,6 +1287,10 @@ int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg)
bdevname(bdev, b);
ret = __blk_trace_setup(q, b, bdev->bd_dev, bdev, arg);
break;
+ case BLKTRACESETUP_EXT:
+ bdevname(bdev, b);
+ ret = __blk_trace_setup_ext(q, b, bdev->bd_dev, bdev, arg);
+ break;
#if defined(CONFIG_COMPAT) && defined(CONFIG_X86_64)
case BLKTRACESETUP32:
bdevname(bdev, b);
@@ -1023,9 +1303,18 @@ int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg)
case BLKTRACESTOP:
ret = __blk_trace_startstop(q, start);
break;
+ case BLKTRACESTART_EXT:
+ start = 1;
+ /* fallthrough */
+ case BLKTRACESTOP_EXT:
+ ret = __blk_trace_startstop_ext(q, start);
+ break;
case BLKTRACETEARDOWN:
ret = __blk_trace_remove(q);
break;
+ case BLKTRACETEARDOWN_EXT:
+ ret = __blk_trace_remove_ext(q);
+ break;
default:
ret = -ENOTTY;
break;
@@ -1049,6 +1338,11 @@ void blk_trace_shutdown(struct request_queue *q)
__blk_trace_remove(q);
}
+ if (rcu_dereference_protected(q->blk_trace_ext,
+ lockdep_is_held(&q->debugfs_mutex))) {
+ __blk_trace_startstop_ext(q, 0);
+ __blk_trace_remove_ext(q);
+ }
mutex_unlock(&q->debugfs_mutex);
}
Implement newly introduced IOCTLs for setup/start/stop/teardown. Signed-off-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com> --- include/uapi/linux/fs.h | 5 + kernel/trace/blktrace.c | 294 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 299 insertions(+)