Message ID | 20250227180407.111787-1-mjguzik@gmail.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | pipe: cache 2 pages instead of 1 | expand |
I already had a lot of beer, can't read this patch, just one nit... On 02/27, Mateusz Guzik wrote: > > User data is kept in a circular buffer backed by pages allocated as > needed. Only having space for one spare is still prone to having to > resort to allocation / freeing. > > In my testing this decreases page allocs by 60% during a -j 20 kernel > build. So this is performance improvement? > +static struct page *anon_pipe_get_page(struct pipe_inode_info *pipe) > +{ > + struct page *page; > + > + if (pipe->tmp_page[0]) { > + page = pipe->tmp_page[0]; > + pipe->tmp_page[0] = NULL; > + } else if (pipe->tmp_page[1]) { > + page = pipe->tmp_page[1]; > + pipe->tmp_page[1] = NULL; > + } else { > + page = alloc_page(GFP_HIGHUSER | __GFP_ACCOUNT); > + } > + > + return page; > +} Perhaps something like for (i = 0; i < ARRAY_SIZE(pipe->tmp_page); i++) { if (pipe->tmp_page[i]) { struct page *page = pipe->tmp_page[i]; pipe->tmp_page[i] = NULL; return page; } } return alloc_page(GFP_HIGHUSER | __GFP_ACCOUNT); ? Same for anon_pipe_put_page() and free_pipe_info(). This avoids the code duplication and allows to change the size of pipe->tmp_page[] array without other changes. Oleg.
On Thu, Feb 27, 2025 at 10:59 PM Oleg Nesterov <oleg@redhat.com> wrote: > > +static struct page *anon_pipe_get_page(struct pipe_inode_info *pipe) > > +{ > > + struct page *page; > > + > > + if (pipe->tmp_page[0]) { > > + page = pipe->tmp_page[0]; > > + pipe->tmp_page[0] = NULL; > > + } else if (pipe->tmp_page[1]) { > > + page = pipe->tmp_page[1]; > > + pipe->tmp_page[1] = NULL; > > + } else { > > + page = alloc_page(GFP_HIGHUSER | __GFP_ACCOUNT); > > + } > > + > > + return page; > > +} > > Perhaps something like > > for (i = 0; i < ARRAY_SIZE(pipe->tmp_page); i++) { > if (pipe->tmp_page[i]) { > struct page *page = pipe->tmp_page[i]; > pipe->tmp_page[i] = NULL; > return page; > } > } > > return alloc_page(GFP_HIGHUSER | __GFP_ACCOUNT); > ? > > Same for anon_pipe_put_page() and free_pipe_info(). > > This avoids the code duplication and allows to change the size of > pipe->tmp_page[] array without other changes. > I have almost no opinion one way or the other and I'm not going to argue about this bit. I only note I don't expect there is a legit reason to go beyond 2 pages here. As in if more is warranted, the approach to baking the area should probably change. I started with this being spelled out so that I have easier time toggling the extra slot for testing. That said, I don't know who counts as the pipe man today. I can do the needful(tm) no problem.
On Thu, Feb 27, 2025 at 11:07:45PM +0100, Mateusz Guzik wrote: > On Thu, Feb 27, 2025 at 10:59 PM Oleg Nesterov <oleg@redhat.com> wrote: > > > +static struct page *anon_pipe_get_page(struct pipe_inode_info *pipe) > > > +{ > > > + struct page *page; > > > + > > > + if (pipe->tmp_page[0]) { > > > + page = pipe->tmp_page[0]; > > > + pipe->tmp_page[0] = NULL; > > > + } else if (pipe->tmp_page[1]) { > > > + page = pipe->tmp_page[1]; > > > + pipe->tmp_page[1] = NULL; > > > + } else { > > > + page = alloc_page(GFP_HIGHUSER | __GFP_ACCOUNT); > > > + } > > > + > > > + return page; > > > +} > > > > Perhaps something like > > > > for (i = 0; i < ARRAY_SIZE(pipe->tmp_page); i++) { > > if (pipe->tmp_page[i]) { > > struct page *page = pipe->tmp_page[i]; > > pipe->tmp_page[i] = NULL; > > return page; > > } > > } > > > > return alloc_page(GFP_HIGHUSER | __GFP_ACCOUNT); > > ? > > > > Same for anon_pipe_put_page() and free_pipe_info(). > > > > This avoids the code duplication and allows to change the size of > > pipe->tmp_page[] array without other changes. > > > > I have almost no opinion one way or the other and I'm not going to > argue about this bit. I only note I don't expect there is a legit > reason to go beyond 2 pages here. As in if more is warranted, the > approach to baking the area should probably change. > > I started with this being spelled out so that I have easier time > toggling the extra slot for testing. > > That said, I don't know who counts as the pipe man today. I can do the Linus or David should have the most detailed knowledge. > needful(tm) no problem.
diff --git a/fs/pipe.c b/fs/pipe.c index 19a7948ab234..2508d14e8812 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -112,20 +112,47 @@ void pipe_double_lock(struct pipe_inode_info *pipe1, pipe_lock(pipe2); } +static struct page *anon_pipe_get_page(struct pipe_inode_info *pipe) +{ + struct page *page; + + if (pipe->tmp_page[0]) { + page = pipe->tmp_page[0]; + pipe->tmp_page[0] = NULL; + } else if (pipe->tmp_page[1]) { + page = pipe->tmp_page[1]; + pipe->tmp_page[1] = NULL; + } else { + page = alloc_page(GFP_HIGHUSER | __GFP_ACCOUNT); + } + + return page; +} + +static void anon_pipe_put_page(struct pipe_inode_info *pipe, + struct page *page) +{ + if (page_count(page) == 1) { + if (!pipe->tmp_page[0]) { + pipe->tmp_page[0] = page; + return; + } + + if (!pipe->tmp_page[1]) { + pipe->tmp_page[1] = page; + return; + } + } + + put_page(page); +} + static void anon_pipe_buf_release(struct pipe_inode_info *pipe, struct pipe_buffer *buf) { struct page *page = buf->page; - /* - * If nobody else uses this page, and we don't already have a - * temporary page, let's keep track of it as a one-deep - * allocation cache. (Otherwise just release our reference to it) - */ - if (page_count(page) == 1 && !pipe->tmp_page) - pipe->tmp_page = page; - else - put_page(page); + anon_pipe_put_page(pipe, page); } static bool anon_pipe_buf_try_steal(struct pipe_inode_info *pipe, @@ -493,27 +520,25 @@ anon_pipe_write(struct kiocb *iocb, struct iov_iter *from) if (!pipe_full(head, pipe->tail, pipe->max_usage)) { unsigned int mask = pipe->ring_size - 1; struct pipe_buffer *buf; - struct page *page = pipe->tmp_page; + struct page *page; int copied; - if (!page) { - page = alloc_page(GFP_HIGHUSER | __GFP_ACCOUNT); - if (unlikely(!page)) { - ret = ret ? : -ENOMEM; - break; - } - pipe->tmp_page = page; + page = anon_pipe_get_page(pipe); + if (unlikely(!page)) { + if (!ret) + ret = -ENOMEM; + break; } copied = copy_page_from_iter(page, 0, PAGE_SIZE, from); if (unlikely(copied < PAGE_SIZE && iov_iter_count(from))) { + anon_pipe_put_page(pipe, page); if (!ret) ret = -EFAULT; break; } pipe->head = head + 1; - pipe->tmp_page = NULL; /* Insert it into the buffer array */ buf = &pipe->bufs[head & mask]; buf->page = page; @@ -847,8 +872,10 @@ void free_pipe_info(struct pipe_inode_info *pipe) if (pipe->watch_queue) put_watch_queue(pipe->watch_queue); #endif - if (pipe->tmp_page) - __free_page(pipe->tmp_page); + if (pipe->tmp_page[0]) + __free_page(pipe->tmp_page[0]); + if (pipe->tmp_page[1]) + __free_page(pipe->tmp_page[1]); kfree(pipe->bufs); kfree(pipe); } diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index 8ff23bf5a819..eb7994a1ff93 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h @@ -72,7 +72,7 @@ struct pipe_inode_info { #ifdef CONFIG_WATCH_QUEUE bool note_loss; #endif - struct page *tmp_page; + struct page *tmp_page[2]; struct fasync_struct *fasync_readers; struct fasync_struct *fasync_writers; struct pipe_buffer *bufs;
User data is kept in a circular buffer backed by pages allocated as needed. Only having space for one spare is still prone to having to resort to allocation / freeing. In my testing this decreases page allocs by 60% during a -j 20 kernel build. Signed-off-by: Mateusz Guzik <mjguzik@gmail.com> --- fs/pipe.c | 67 +++++++++++++++++++++++++++------------ include/linux/pipe_fs_i.h | 2 +- 2 files changed, 48 insertions(+), 21 deletions(-)