Message ID | 20240628084411.2371-4-cliang01.li@samsung.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | io_uring/rsrc: coalescing multi-hugepage registered buffers | expand |
On 6/28/24 09:44, Chenliang Li wrote: > Modify io_sqe_buffer_register to enable the coalescing for > multi-hugepage fixed buffers. > > Signed-off-by: Chenliang Li <cliang01.li@samsung.com> > --- > io_uring/rsrc.c | 47 ++++++++++++++++------------------------------- > 1 file changed, 16 insertions(+), 31 deletions(-) > > diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c > index 3198cf854db1..790ed3c1bcc8 100644 > --- a/io_uring/rsrc.c > +++ b/io_uring/rsrc.c > @@ -945,7 +945,8 @@ static int io_sqe_buffer_register(struct io_ring_ctx *ctx, struct iovec *iov, > unsigned long off; > size_t size; > int ret, nr_pages, i; > - struct folio *folio = NULL; > + struct io_imu_folio_data data; > + bool coalesced; > > *pimu = (struct io_mapped_ubuf *)&dummy_ubuf; > if (!iov->iov_base) > @@ -960,31 +961,8 @@ static int io_sqe_buffer_register(struct io_ring_ctx *ctx, struct iovec *iov, > goto done; > } > > - /* If it's a huge page, try to coalesce them into a single bvec entry */ > - if (nr_pages > 1) { > - folio = page_folio(pages[0]); > - for (i = 1; i < nr_pages; i++) { > - /* > - * Pages must be consecutive and on the same folio for > - * this to work > - */ > - if (page_folio(pages[i]) != folio || > - pages[i] != pages[i - 1] + 1) { > - folio = NULL; > - break; > - } > - } > - if (folio) { > - /* > - * The pages are bound to the folio, it doesn't > - * actually unpin them but drops all but one reference, > - * which is usually put down by io_buffer_unmap(). > - * Note, needs a better helper. > - */ > - unpin_user_pages(&pages[1], nr_pages - 1); > - nr_pages = 1; > - } > - } > + /* If it's huge page(s), try to coalesce them into fewer bvec entries */ > + coalesced = io_try_coalesce_buffer(&pages, &nr_pages, &data); > > imu = kvmalloc(struct_size(imu, bvec, nr_pages), GFP_KERNEL); > if (!imu) > @@ -1004,17 +982,24 @@ static int io_sqe_buffer_register(struct io_ring_ctx *ctx, struct iovec *iov, > imu->nr_bvecs = nr_pages; > imu->folio_shift = PAGE_SHIFT; > imu->folio_mask = PAGE_MASK; > + if (coalesced) { > + imu->folio_shift = data.folio_shift; > + imu->folio_mask = ~((1UL << data.folio_shift) - 1); > + } > *pimu = imu; > ret = 0; > > - if (folio) { > - bvec_set_page(&imu->bvec[0], pages[0], size, off); > - goto done; > - } > for (i = 0; i < nr_pages; i++) { > size_t vec_len; > > - vec_len = min_t(size_t, size, PAGE_SIZE - off); > + if (coalesced) { > + size_t seg_size = i ? data.folio_size : > + PAGE_SIZE * data.nr_pages_head; When you're compacting the page array, instead of taking a middle page for the first folio, you can set it to the first page in the folio and fix up the offset. Kind of: new_array[0] = compound_head(old_array[0]); off += folio_page_idx(folio, old_array[0]) << PAGE_SHIFT; With that change you should be able to treat it in a uniform way without branching. off = (unsigned long) iov->iov_base & ~folio_mask; vec_len = min_t(size_t, size, folio_size - off); > + > + vec_len = min_t(size_t, size, seg_size - off); > + } else { > + vec_len = min_t(size_t, size, PAGE_SIZE - off); > + } > bvec_set_page(&imu->bvec[i], pages[i], vec_len, off); > off = 0; > size -= vec_len;
On 2024-07-09 13:17 UTC, Pavel Begunkov wrote: > On 6/28/24 09:44, Chenliang Li wrote: >> - if (folio) { >> - bvec_set_page(&imu->bvec[0], pages[0], size, off); >> - goto done; >> - } >> for (i = 0; i < nr_pages; i++) { >> size_t vec_len; >> >> - vec_len = min_t(size_t, size, PAGE_SIZE - off); >> + if (coalesced) { >> + size_t seg_size = i ? data.folio_size : >> + PAGE_SIZE * data.nr_pages_head; > > When you're compacting the page array, instead of taking a middle > page for the first folio, you can set it to the first page in the > folio and fix up the offset. Kind of: > > new_array[0] = compound_head(old_array[0]); > off += folio_page_idx(folio, old_array[0]) << PAGE_SHIFT; > > > With that change you should be able to treat it in a uniform way > without branching. > > off = (unsigned long) iov->iov_base & ~folio_mask; > vec_len = min_t(size_t, size, folio_size - off); That's brilliant. Will change it this way. Thanks, Chenliang Li
diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c index 3198cf854db1..790ed3c1bcc8 100644 --- a/io_uring/rsrc.c +++ b/io_uring/rsrc.c @@ -945,7 +945,8 @@ static int io_sqe_buffer_register(struct io_ring_ctx *ctx, struct iovec *iov, unsigned long off; size_t size; int ret, nr_pages, i; - struct folio *folio = NULL; + struct io_imu_folio_data data; + bool coalesced; *pimu = (struct io_mapped_ubuf *)&dummy_ubuf; if (!iov->iov_base) @@ -960,31 +961,8 @@ static int io_sqe_buffer_register(struct io_ring_ctx *ctx, struct iovec *iov, goto done; } - /* If it's a huge page, try to coalesce them into a single bvec entry */ - if (nr_pages > 1) { - folio = page_folio(pages[0]); - for (i = 1; i < nr_pages; i++) { - /* - * Pages must be consecutive and on the same folio for - * this to work - */ - if (page_folio(pages[i]) != folio || - pages[i] != pages[i - 1] + 1) { - folio = NULL; - break; - } - } - if (folio) { - /* - * The pages are bound to the folio, it doesn't - * actually unpin them but drops all but one reference, - * which is usually put down by io_buffer_unmap(). - * Note, needs a better helper. - */ - unpin_user_pages(&pages[1], nr_pages - 1); - nr_pages = 1; - } - } + /* If it's huge page(s), try to coalesce them into fewer bvec entries */ + coalesced = io_try_coalesce_buffer(&pages, &nr_pages, &data); imu = kvmalloc(struct_size(imu, bvec, nr_pages), GFP_KERNEL); if (!imu) @@ -1004,17 +982,24 @@ static int io_sqe_buffer_register(struct io_ring_ctx *ctx, struct iovec *iov, imu->nr_bvecs = nr_pages; imu->folio_shift = PAGE_SHIFT; imu->folio_mask = PAGE_MASK; + if (coalesced) { + imu->folio_shift = data.folio_shift; + imu->folio_mask = ~((1UL << data.folio_shift) - 1); + } *pimu = imu; ret = 0; - if (folio) { - bvec_set_page(&imu->bvec[0], pages[0], size, off); - goto done; - } for (i = 0; i < nr_pages; i++) { size_t vec_len; - vec_len = min_t(size_t, size, PAGE_SIZE - off); + if (coalesced) { + size_t seg_size = i ? data.folio_size : + PAGE_SIZE * data.nr_pages_head; + + vec_len = min_t(size_t, size, seg_size - off); + } else { + vec_len = min_t(size_t, size, PAGE_SIZE - off); + } bvec_set_page(&imu->bvec[i], pages[i], vec_len, off); off = 0; size -= vec_len;
Modify io_sqe_buffer_register to enable the coalescing for multi-hugepage fixed buffers. Signed-off-by: Chenliang Li <cliang01.li@samsung.com> --- io_uring/rsrc.c | 47 ++++++++++++++++------------------------------- 1 file changed, 16 insertions(+), 31 deletions(-)