diff mbox series

pipe: cache 2 pages instead of 1

Message ID 20250227180407.111787-1-mjguzik@gmail.com (mailing list archive)
State New
Headers show
Series pipe: cache 2 pages instead of 1 | expand

Commit Message

Mateusz Guzik Feb. 27, 2025, 6:04 p.m. UTC
User data is kept in a circular buffer backed by pages allocated as
needed. Only having space for one spare is still prone to having to
resort to allocation / freeing.

In my testing this decreases page allocs by 60% during a -j 20 kernel
build.

Signed-off-by: Mateusz Guzik <mjguzik@gmail.com>
---
 fs/pipe.c                 | 67 +++++++++++++++++++++++++++------------
 include/linux/pipe_fs_i.h |  2 +-
 2 files changed, 48 insertions(+), 21 deletions(-)

Comments

Oleg Nesterov Feb. 27, 2025, 9:58 p.m. UTC | #1
I already had a lot of beer, can't read this patch, just one nit...

On 02/27, Mateusz Guzik wrote:
>
> User data is kept in a circular buffer backed by pages allocated as
> needed. Only having space for one spare is still prone to having to
> resort to allocation / freeing.
>
> In my testing this decreases page allocs by 60% during a -j 20 kernel
> build.

So this is performance improvement?

> +static struct page *anon_pipe_get_page(struct pipe_inode_info *pipe)
> +{
> +	struct page *page;
> +
> +	if (pipe->tmp_page[0]) {
> +		page = pipe->tmp_page[0];
> +		pipe->tmp_page[0] = NULL;
> +	} else if (pipe->tmp_page[1]) {
> +		page = pipe->tmp_page[1];
> +		pipe->tmp_page[1] = NULL;
> +	} else {
> +		page = alloc_page(GFP_HIGHUSER | __GFP_ACCOUNT);
> +	}
> +
> +	return page;
> +}

Perhaps something like

	for (i = 0; i < ARRAY_SIZE(pipe->tmp_page); i++) {
		if (pipe->tmp_page[i]) {
			struct page *page = pipe->tmp_page[i];
			pipe->tmp_page[i] = NULL;
			return page;
		}
	}

	return alloc_page(GFP_HIGHUSER | __GFP_ACCOUNT);
?

Same for anon_pipe_put_page() and free_pipe_info().

This avoids the code duplication and allows to change the size of
pipe->tmp_page[] array without other changes.

Oleg.
Mateusz Guzik Feb. 27, 2025, 10:07 p.m. UTC | #2
On Thu, Feb 27, 2025 at 10:59 PM Oleg Nesterov <oleg@redhat.com> wrote:
> > +static struct page *anon_pipe_get_page(struct pipe_inode_info *pipe)
> > +{
> > +     struct page *page;
> > +
> > +     if (pipe->tmp_page[0]) {
> > +             page = pipe->tmp_page[0];
> > +             pipe->tmp_page[0] = NULL;
> > +     } else if (pipe->tmp_page[1]) {
> > +             page = pipe->tmp_page[1];
> > +             pipe->tmp_page[1] = NULL;
> > +     } else {
> > +             page = alloc_page(GFP_HIGHUSER | __GFP_ACCOUNT);
> > +     }
> > +
> > +     return page;
> > +}
>
> Perhaps something like
>
>         for (i = 0; i < ARRAY_SIZE(pipe->tmp_page); i++) {
>                 if (pipe->tmp_page[i]) {
>                         struct page *page = pipe->tmp_page[i];
>                         pipe->tmp_page[i] = NULL;
>                         return page;
>                 }
>         }
>
>         return alloc_page(GFP_HIGHUSER | __GFP_ACCOUNT);
> ?
>
> Same for anon_pipe_put_page() and free_pipe_info().
>
> This avoids the code duplication and allows to change the size of
> pipe->tmp_page[] array without other changes.
>

I have almost no opinion one way or the other and I'm not going to
argue about this bit. I only note I don't expect there is a legit
reason to go beyond 2 pages here. As in if more is warranted, the
approach to baking the area should probably change.

I started with this being spelled out so that I have easier time
toggling the extra slot for testing.

That said, I don't know who counts as the pipe man today. I can do the
needful(tm) no problem.
Christian Brauner Feb. 28, 2025, 10:16 a.m. UTC | #3
On Thu, Feb 27, 2025 at 11:07:45PM +0100, Mateusz Guzik wrote:
> On Thu, Feb 27, 2025 at 10:59 PM Oleg Nesterov <oleg@redhat.com> wrote:
> > > +static struct page *anon_pipe_get_page(struct pipe_inode_info *pipe)
> > > +{
> > > +     struct page *page;
> > > +
> > > +     if (pipe->tmp_page[0]) {
> > > +             page = pipe->tmp_page[0];
> > > +             pipe->tmp_page[0] = NULL;
> > > +     } else if (pipe->tmp_page[1]) {
> > > +             page = pipe->tmp_page[1];
> > > +             pipe->tmp_page[1] = NULL;
> > > +     } else {
> > > +             page = alloc_page(GFP_HIGHUSER | __GFP_ACCOUNT);
> > > +     }
> > > +
> > > +     return page;
> > > +}
> >
> > Perhaps something like
> >
> >         for (i = 0; i < ARRAY_SIZE(pipe->tmp_page); i++) {
> >                 if (pipe->tmp_page[i]) {
> >                         struct page *page = pipe->tmp_page[i];
> >                         pipe->tmp_page[i] = NULL;
> >                         return page;
> >                 }
> >         }
> >
> >         return alloc_page(GFP_HIGHUSER | __GFP_ACCOUNT);
> > ?
> >
> > Same for anon_pipe_put_page() and free_pipe_info().
> >
> > This avoids the code duplication and allows to change the size of
> > pipe->tmp_page[] array without other changes.
> >
> 
> I have almost no opinion one way or the other and I'm not going to
> argue about this bit. I only note I don't expect there is a legit
> reason to go beyond 2 pages here. As in if more is warranted, the
> approach to baking the area should probably change.
> 
> I started with this being spelled out so that I have easier time
> toggling the extra slot for testing.
> 
> That said, I don't know who counts as the pipe man today. I can do the

Linus or David should have the most detailed knowledge.

> needful(tm) no problem.
diff mbox series

Patch

diff --git a/fs/pipe.c b/fs/pipe.c
index 19a7948ab234..2508d14e8812 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -112,20 +112,47 @@  void pipe_double_lock(struct pipe_inode_info *pipe1,
 	pipe_lock(pipe2);
 }
 
+static struct page *anon_pipe_get_page(struct pipe_inode_info *pipe)
+{
+	struct page *page;
+
+	if (pipe->tmp_page[0]) {
+		page = pipe->tmp_page[0];
+		pipe->tmp_page[0] = NULL;
+	} else if (pipe->tmp_page[1]) {
+		page = pipe->tmp_page[1];
+		pipe->tmp_page[1] = NULL;
+	} else {
+		page = alloc_page(GFP_HIGHUSER | __GFP_ACCOUNT);
+	}
+
+	return page;
+}
+
+static void anon_pipe_put_page(struct pipe_inode_info *pipe,
+			       struct page *page)
+{
+	if (page_count(page) == 1) {
+		if (!pipe->tmp_page[0]) {
+			pipe->tmp_page[0] = page;
+			return;
+		}
+
+		if (!pipe->tmp_page[1]) {
+			pipe->tmp_page[1] = page;
+			return;
+		}
+	}
+
+	put_page(page);
+}
+
 static void anon_pipe_buf_release(struct pipe_inode_info *pipe,
 				  struct pipe_buffer *buf)
 {
 	struct page *page = buf->page;
 
-	/*
-	 * If nobody else uses this page, and we don't already have a
-	 * temporary page, let's keep track of it as a one-deep
-	 * allocation cache. (Otherwise just release our reference to it)
-	 */
-	if (page_count(page) == 1 && !pipe->tmp_page)
-		pipe->tmp_page = page;
-	else
-		put_page(page);
+	anon_pipe_put_page(pipe, page);
 }
 
 static bool anon_pipe_buf_try_steal(struct pipe_inode_info *pipe,
@@ -493,27 +520,25 @@  anon_pipe_write(struct kiocb *iocb, struct iov_iter *from)
 		if (!pipe_full(head, pipe->tail, pipe->max_usage)) {
 			unsigned int mask = pipe->ring_size - 1;
 			struct pipe_buffer *buf;
-			struct page *page = pipe->tmp_page;
+			struct page *page;
 			int copied;
 
-			if (!page) {
-				page = alloc_page(GFP_HIGHUSER | __GFP_ACCOUNT);
-				if (unlikely(!page)) {
-					ret = ret ? : -ENOMEM;
-					break;
-				}
-				pipe->tmp_page = page;
+			page = anon_pipe_get_page(pipe);
+			if (unlikely(!page)) {
+				if (!ret)
+					ret = -ENOMEM;
+				break;
 			}
 
 			copied = copy_page_from_iter(page, 0, PAGE_SIZE, from);
 			if (unlikely(copied < PAGE_SIZE && iov_iter_count(from))) {
+				anon_pipe_put_page(pipe, page);
 				if (!ret)
 					ret = -EFAULT;
 				break;
 			}
 
 			pipe->head = head + 1;
-			pipe->tmp_page = NULL;
 			/* Insert it into the buffer array */
 			buf = &pipe->bufs[head & mask];
 			buf->page = page;
@@ -847,8 +872,10 @@  void free_pipe_info(struct pipe_inode_info *pipe)
 	if (pipe->watch_queue)
 		put_watch_queue(pipe->watch_queue);
 #endif
-	if (pipe->tmp_page)
-		__free_page(pipe->tmp_page);
+	if (pipe->tmp_page[0])
+		__free_page(pipe->tmp_page[0]);
+	if (pipe->tmp_page[1])
+		__free_page(pipe->tmp_page[1]);
 	kfree(pipe->bufs);
 	kfree(pipe);
 }
diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h
index 8ff23bf5a819..eb7994a1ff93 100644
--- a/include/linux/pipe_fs_i.h
+++ b/include/linux/pipe_fs_i.h
@@ -72,7 +72,7 @@  struct pipe_inode_info {
 #ifdef CONFIG_WATCH_QUEUE
 	bool note_loss;
 #endif
-	struct page *tmp_page;
+	struct page *tmp_page[2];
 	struct fasync_struct *fasync_readers;
 	struct fasync_struct *fasync_writers;
 	struct pipe_buffer *bufs;