diff mbox series

[2/3,RFC] trace: Add interface for configuring trace ring buffer size

Message ID 20211117154101.38659-3-tz.stoyanov@gmail.com (mailing list archive)
State Superseded
Headers show
Series Introduce configurable ring buffer page size | expand

Commit Message

Tzvetomir Stoyanov (VMware) Nov. 17, 2021, 3:41 p.m. UTC
The trace ring buffer page size can be configured, per trace instance. A
new ftrace file "buffer_page_size" is added to get and set the size of
the ring buffer page for current trace instance. The size must be
multiple of system page size, that's why the new interface works with
system page count, instead of absolute page size: 1 means the ring
buffer page is equal to one system page and so forth. The ring buffer
page is limited between 1 and 100 system pages.

Signed-off-by: Tzvetomir Stoyanov (VMware) <tz.stoyanov@gmail.com>
---
 include/linux/ring_buffer.h |  3 +++
 kernel/trace/ring_buffer.c  | 51 +++++++++++++++++++++++++++++++++++++
 kernel/trace/trace.c        | 47 ++++++++++++++++++++++++++++++++++
 3 files changed, 101 insertions(+)

Comments

Steven Rostedt Nov. 17, 2021, 6:39 p.m. UTC | #1
On Wed, 17 Nov 2021 17:41:00 +0200
"Tzvetomir Stoyanov (VMware)" <tz.stoyanov@gmail.com> wrote:

> The trace ring buffer page size can be configured, per trace instance. A
> new ftrace file "buffer_page_size" is added to get and set the size of
> the ring buffer page for current trace instance. The size must be
> multiple of system page size, that's why the new interface works with
> system page count, instead of absolute page size: 1 means the ring
> buffer page is equal to one system page and so forth. The ring buffer
> page is limited between 1 and 100 system pages.

It should be an order of pages, not a size (or a multiple).

0 - 1 page
1 - 2 pages
2 - 4 pages
3 - 8 pages
4 - 16 pages
[..]


> 
> Signed-off-by: Tzvetomir Stoyanov (VMware) <tz.stoyanov@gmail.com>
> ---
>  include/linux/ring_buffer.h |  3 +++
>  kernel/trace/ring_buffer.c  | 51 +++++++++++++++++++++++++++++++++++++
>  kernel/trace/trace.c        | 47 ++++++++++++++++++++++++++++++++++
>  3 files changed, 101 insertions(+)
> 
> diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h
> index d9a2e6e8fb79..53cd7a38b717 100644
> --- a/include/linux/ring_buffer.h
> +++ b/include/linux/ring_buffer.h
> @@ -202,6 +202,9 @@ struct trace_seq;
>  int ring_buffer_print_entry_header(struct trace_seq *s);
>  int ring_buffer_print_page_header(struct trace_buffer *buffer, struct trace_seq *s);
>  
> +int ring_buffer_page_size_get(struct trace_buffer *buffer);
> +int ring_buffer_page_size_set(struct trace_buffer *buffer, int psize);

  ring_buffer_subbuf_order_get/set()

> +
>  enum ring_buffer_flags {
>  	RB_FL_OVERWRITE		= 1 << 0,
>  };
> diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
> index 6bca2977ca1a..9aa245795c3d 100644
> --- a/kernel/trace/ring_buffer.c
> +++ b/kernel/trace/ring_buffer.c
> @@ -5677,6 +5677,57 @@ int ring_buffer_read_page(struct trace_buffer *buffer,
>  }
>  EXPORT_SYMBOL_GPL(ring_buffer_read_page);
>  
> +/**
> + * ring_buffer_page_size_get - get count of system pages in one buffer page.
> + * @buffer: The ring_buffer to get the system page count from
> + *
> + * By default, one ring buffer pages equals to one system page. This parameter
> + * is configurable, per ring buffer. The size of the ring buffer page can be
> + * extended, but must be multiple of system page size.
> + *
> + * Returns the size of buffer page, in system pages: 1 means the buffer size is
> + * one system page and so forth. In case of an error < 0 is returned.
> + */
> +int ring_buffer_page_size_get(struct trace_buffer *buffer)
> +{
> +	if (!buffer)
> +		return -EINVAL;
> +
> +	return (buffer->page_size + BUF_PAGE_HDR_SIZE) / PAGE_SIZE;

And save it to another field in the structure, and not calculate it.

> +}
> +EXPORT_SYMBOL_GPL(ring_buffer_page_size_get);
> +
> +/**
> + * ring_buffer_page_size_set - set the size of ring buffer page.
> + * @buffer: The ring_buffer to set the new page size.
> + * @pcount: Number of system pages.
> + *
> + * By default, one ring buffer pages equals to one system page. This API can be
> + * used to set new size of the ring buffer page. The size must be multiple of
> + * system page size, that's why the input parameter @pcount is the count of
> + * system pages that are allocated for one ring buffer page.
> + *
> + * Returns 0 on success or < 0 in case of an error.
> + */
> +int ring_buffer_page_size_set(struct trace_buffer *buffer, int pcount)
> +{
> +	int psize;
> +
> +	if (!buffer)
> +		return -EINVAL;
> +
> +	psize = pcount * PAGE_SIZE;
> +	if (psize <= BUF_PAGE_HDR_SIZE)
> +		return -EINVAL;
> +
> +	buffer->page_size = psize - BUF_PAGE_HDR_SIZE;
> +
> +	/* Todo: reset the buffer with the new page size */
> +
> +	return 0;
> +}
> +EXPORT_SYMBOL_GPL(ring_buffer_page_size_set);
> +
>  /*
>   * We only allocate new buffers, never free them if the CPU goes down.
>   * If we were to free the buffer, then the user would lose any trace that was in
> diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
> index f9139dc1262c..05fc2712fdbd 100644
> --- a/kernel/trace/trace.c
> +++ b/kernel/trace/trace.c
> @@ -9005,6 +9005,50 @@ static const struct file_operations buffer_percent_fops = {
>  	.llseek		= default_llseek,
>  };
>  
> +static ssize_t
> +buffer_psize_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
> +{
> +	struct trace_array *tr = filp->private_data;
> +	char buf[64];
> +	int r;
> +
> +	r = sprintf(buf, "%d\n", ring_buffer_page_size_get(tr->array_buffer.buffer));
> +
> +	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
> +}
> +
> +static ssize_t
> +buffer_psize_write(struct file *filp, const char __user *ubuf,
> +		   size_t cnt, loff_t *ppos)
> +{
> +	struct trace_array *tr = filp->private_data;
> +	unsigned long val;
> +	int ret;
> +
> +	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
> +	if (ret)
> +		return ret;
> +
> +	if (val < 1 || val > 100)
> +		return -EINVAL;
> +
> +	ret = ring_buffer_page_size_set(tr->array_buffer.buffer, val);
> +	if (ret)
> +		return ret;
> +
> +	(*ppos)++;
> +
> +	return cnt;
> +}
> +
> +static const struct file_operations buffer_psize_fops = {
> +	.open		= tracing_open_generic_tr,
> +	.read		= buffer_psize_read,
> +	.write		= buffer_psize_write,
> +	.release	= tracing_release_generic_tr,
> +	.llseek		= default_llseek,
> +};
> +
>  static struct dentry *trace_instance_dir;
>  
>  static void
> @@ -9458,6 +9502,9 @@ init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
>  	trace_create_file("buffer_percent", TRACE_MODE_READ, d_tracer,
>  			tr, &buffer_percent_fops);
>  
> +	trace_create_file("buffer_page_size", TRACE_MODE_WRITE, d_tracer,

 "buffer_subbuf_order"

-- Steve

> +			tr, &buffer_psize_fops);
> +
>  	create_trace_options_dir(tr);
>  
>  	trace_create_maxlat_file(tr, d_tracer);
diff mbox series

Patch

diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h
index d9a2e6e8fb79..53cd7a38b717 100644
--- a/include/linux/ring_buffer.h
+++ b/include/linux/ring_buffer.h
@@ -202,6 +202,9 @@  struct trace_seq;
 int ring_buffer_print_entry_header(struct trace_seq *s);
 int ring_buffer_print_page_header(struct trace_buffer *buffer, struct trace_seq *s);
 
+int ring_buffer_page_size_get(struct trace_buffer *buffer);
+int ring_buffer_page_size_set(struct trace_buffer *buffer, int psize);
+
 enum ring_buffer_flags {
 	RB_FL_OVERWRITE		= 1 << 0,
 };
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 6bca2977ca1a..9aa245795c3d 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -5677,6 +5677,57 @@  int ring_buffer_read_page(struct trace_buffer *buffer,
 }
 EXPORT_SYMBOL_GPL(ring_buffer_read_page);
 
+/**
+ * ring_buffer_page_size_get - get count of system pages in one buffer page.
+ * @buffer: The ring_buffer to get the system page count from
+ *
+ * By default, one ring buffer pages equals to one system page. This parameter
+ * is configurable, per ring buffer. The size of the ring buffer page can be
+ * extended, but must be multiple of system page size.
+ *
+ * Returns the size of buffer page, in system pages: 1 means the buffer size is
+ * one system page and so forth. In case of an error < 0 is returned.
+ */
+int ring_buffer_page_size_get(struct trace_buffer *buffer)
+{
+	if (!buffer)
+		return -EINVAL;
+
+	return (buffer->page_size + BUF_PAGE_HDR_SIZE) / PAGE_SIZE;
+}
+EXPORT_SYMBOL_GPL(ring_buffer_page_size_get);
+
+/**
+ * ring_buffer_page_size_set - set the size of ring buffer page.
+ * @buffer: The ring_buffer to set the new page size.
+ * @pcount: Number of system pages.
+ *
+ * By default, one ring buffer pages equals to one system page. This API can be
+ * used to set new size of the ring buffer page. The size must be multiple of
+ * system page size, that's why the input parameter @pcount is the count of
+ * system pages that are allocated for one ring buffer page.
+ *
+ * Returns 0 on success or < 0 in case of an error.
+ */
+int ring_buffer_page_size_set(struct trace_buffer *buffer, int pcount)
+{
+	int psize;
+
+	if (!buffer)
+		return -EINVAL;
+
+	psize = pcount * PAGE_SIZE;
+	if (psize <= BUF_PAGE_HDR_SIZE)
+		return -EINVAL;
+
+	buffer->page_size = psize - BUF_PAGE_HDR_SIZE;
+
+	/* Todo: reset the buffer with the new page size */
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(ring_buffer_page_size_set);
+
 /*
  * We only allocate new buffers, never free them if the CPU goes down.
  * If we were to free the buffer, then the user would lose any trace that was in
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index f9139dc1262c..05fc2712fdbd 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -9005,6 +9005,50 @@  static const struct file_operations buffer_percent_fops = {
 	.llseek		= default_llseek,
 };
 
+static ssize_t
+buffer_psize_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+	struct trace_array *tr = filp->private_data;
+	char buf[64];
+	int r;
+
+	r = sprintf(buf, "%d\n", ring_buffer_page_size_get(tr->array_buffer.buffer));
+
+	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
+}
+
+static ssize_t
+buffer_psize_write(struct file *filp, const char __user *ubuf,
+		   size_t cnt, loff_t *ppos)
+{
+	struct trace_array *tr = filp->private_data;
+	unsigned long val;
+	int ret;
+
+	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
+	if (ret)
+		return ret;
+
+	if (val < 1 || val > 100)
+		return -EINVAL;
+
+	ret = ring_buffer_page_size_set(tr->array_buffer.buffer, val);
+	if (ret)
+		return ret;
+
+	(*ppos)++;
+
+	return cnt;
+}
+
+static const struct file_operations buffer_psize_fops = {
+	.open		= tracing_open_generic_tr,
+	.read		= buffer_psize_read,
+	.write		= buffer_psize_write,
+	.release	= tracing_release_generic_tr,
+	.llseek		= default_llseek,
+};
+
 static struct dentry *trace_instance_dir;
 
 static void
@@ -9458,6 +9502,9 @@  init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
 	trace_create_file("buffer_percent", TRACE_MODE_READ, d_tracer,
 			tr, &buffer_percent_fops);
 
+	trace_create_file("buffer_page_size", TRACE_MODE_WRITE, d_tracer,
+			tr, &buffer_psize_fops);
+
 	create_trace_options_dir(tr);
 
 	trace_create_maxlat_file(tr, d_tracer);