diff mbox series

[v5,3/3] io_uring/rsrc: enable multi-hugepage buffer coalescing

Message ID 20240628084411.2371-4-cliang01.li@samsung.com (mailing list archive)
State New
Headers show
Series io_uring/rsrc: coalescing multi-hugepage registered buffers | expand

Commit Message

Chenliang Li June 28, 2024, 8:44 a.m. UTC
Modify io_sqe_buffer_register to enable the coalescing for
multi-hugepage fixed buffers.

Signed-off-by: Chenliang Li <cliang01.li@samsung.com>
---
 io_uring/rsrc.c | 47 ++++++++++++++++-------------------------------
 1 file changed, 16 insertions(+), 31 deletions(-)

Comments

Pavel Begunkov July 9, 2024, 1:17 p.m. UTC | #1
On 6/28/24 09:44, Chenliang Li wrote:
> Modify io_sqe_buffer_register to enable the coalescing for
> multi-hugepage fixed buffers.
> 
> Signed-off-by: Chenliang Li <cliang01.li@samsung.com>
> ---
>   io_uring/rsrc.c | 47 ++++++++++++++++-------------------------------
>   1 file changed, 16 insertions(+), 31 deletions(-)
> 
> diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c
> index 3198cf854db1..790ed3c1bcc8 100644
> --- a/io_uring/rsrc.c
> +++ b/io_uring/rsrc.c
> @@ -945,7 +945,8 @@ static int io_sqe_buffer_register(struct io_ring_ctx *ctx, struct iovec *iov,
>   	unsigned long off;
>   	size_t size;
>   	int ret, nr_pages, i;
> -	struct folio *folio = NULL;
> +	struct io_imu_folio_data data;
> +	bool coalesced;
>   
>   	*pimu = (struct io_mapped_ubuf *)&dummy_ubuf;
>   	if (!iov->iov_base)
> @@ -960,31 +961,8 @@ static int io_sqe_buffer_register(struct io_ring_ctx *ctx, struct iovec *iov,
>   		goto done;
>   	}
>   
> -	/* If it's a huge page, try to coalesce them into a single bvec entry */
> -	if (nr_pages > 1) {
> -		folio = page_folio(pages[0]);
> -		for (i = 1; i < nr_pages; i++) {
> -			/*
> -			 * Pages must be consecutive and on the same folio for
> -			 * this to work
> -			 */
> -			if (page_folio(pages[i]) != folio ||
> -			    pages[i] != pages[i - 1] + 1) {
> -				folio = NULL;
> -				break;
> -			}
> -		}
> -		if (folio) {
> -			/*
> -			 * The pages are bound to the folio, it doesn't
> -			 * actually unpin them but drops all but one reference,
> -			 * which is usually put down by io_buffer_unmap().
> -			 * Note, needs a better helper.
> -			 */
> -			unpin_user_pages(&pages[1], nr_pages - 1);
> -			nr_pages = 1;
> -		}
> -	}
> +	/* If it's huge page(s), try to coalesce them into fewer bvec entries */
> +	coalesced = io_try_coalesce_buffer(&pages, &nr_pages, &data);
>   
>   	imu = kvmalloc(struct_size(imu, bvec, nr_pages), GFP_KERNEL);
>   	if (!imu)
> @@ -1004,17 +982,24 @@ static int io_sqe_buffer_register(struct io_ring_ctx *ctx, struct iovec *iov,
>   	imu->nr_bvecs = nr_pages;
>   	imu->folio_shift = PAGE_SHIFT;
>   	imu->folio_mask = PAGE_MASK;
> +	if (coalesced) {
> +		imu->folio_shift = data.folio_shift;
> +		imu->folio_mask = ~((1UL << data.folio_shift) - 1);
> +	}
>   	*pimu = imu;
>   	ret = 0;
>   
> -	if (folio) {
> -		bvec_set_page(&imu->bvec[0], pages[0], size, off);
> -		goto done;
> -	}
>   	for (i = 0; i < nr_pages; i++) {
>   		size_t vec_len;
>   
> -		vec_len = min_t(size_t, size, PAGE_SIZE - off);
> +		if (coalesced) {
> +			size_t seg_size = i ? data.folio_size :
> +				PAGE_SIZE * data.nr_pages_head;

When you're compacting the page array, instead of taking a middle
page for the first folio, you can set it to the first page in the
folio and fix up the offset. Kind of:

new_array[0] = compound_head(old_array[0]);
off += folio_page_idx(folio, old_array[0]) << PAGE_SHIFT;


With that change you should be able to treat it in a uniform way
without branching.

off = (unsigned long) iov->iov_base & ~folio_mask;
vec_len = min_t(size_t, size, folio_size - off);


> +
> +			vec_len = min_t(size_t, size, seg_size - off);
> +		} else {
> +			vec_len = min_t(size_t, size, PAGE_SIZE - off);
> +		}
>   		bvec_set_page(&imu->bvec[i], pages[i], vec_len, off);
>   		off = 0;
>   		size -= vec_len;
Chenliang Li July 10, 2024, 2:28 a.m. UTC | #2
On 2024-07-09 13:17 UTC, Pavel Begunkov wrote:
> On 6/28/24 09:44, Chenliang Li wrote:
>> -	if (folio) {
>> -		bvec_set_page(&imu->bvec[0], pages[0], size, off);
>> -		goto done;
>> -	}
>>   	for (i = 0; i < nr_pages; i++) {
>>   		size_t vec_len;
>>   
>> -		vec_len = min_t(size_t, size, PAGE_SIZE - off);
>> +		if (coalesced) {
>> +			size_t seg_size = i ? data.folio_size :
>> +				PAGE_SIZE * data.nr_pages_head;
>
> When you're compacting the page array, instead of taking a middle
> page for the first folio, you can set it to the first page in the
> folio and fix up the offset. Kind of:
>
> new_array[0] = compound_head(old_array[0]);
> off += folio_page_idx(folio, old_array[0]) << PAGE_SHIFT;
>
>
> With that change you should be able to treat it in a uniform way
> without branching.
> 
> off = (unsigned long) iov->iov_base & ~folio_mask;
> vec_len = min_t(size_t, size, folio_size - off);

That's brilliant. Will change it this way.

Thanks,
Chenliang Li
diff mbox series

Patch

diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c
index 3198cf854db1..790ed3c1bcc8 100644
--- a/io_uring/rsrc.c
+++ b/io_uring/rsrc.c
@@ -945,7 +945,8 @@  static int io_sqe_buffer_register(struct io_ring_ctx *ctx, struct iovec *iov,
 	unsigned long off;
 	size_t size;
 	int ret, nr_pages, i;
-	struct folio *folio = NULL;
+	struct io_imu_folio_data data;
+	bool coalesced;
 
 	*pimu = (struct io_mapped_ubuf *)&dummy_ubuf;
 	if (!iov->iov_base)
@@ -960,31 +961,8 @@  static int io_sqe_buffer_register(struct io_ring_ctx *ctx, struct iovec *iov,
 		goto done;
 	}
 
-	/* If it's a huge page, try to coalesce them into a single bvec entry */
-	if (nr_pages > 1) {
-		folio = page_folio(pages[0]);
-		for (i = 1; i < nr_pages; i++) {
-			/*
-			 * Pages must be consecutive and on the same folio for
-			 * this to work
-			 */
-			if (page_folio(pages[i]) != folio ||
-			    pages[i] != pages[i - 1] + 1) {
-				folio = NULL;
-				break;
-			}
-		}
-		if (folio) {
-			/*
-			 * The pages are bound to the folio, it doesn't
-			 * actually unpin them but drops all but one reference,
-			 * which is usually put down by io_buffer_unmap().
-			 * Note, needs a better helper.
-			 */
-			unpin_user_pages(&pages[1], nr_pages - 1);
-			nr_pages = 1;
-		}
-	}
+	/* If it's huge page(s), try to coalesce them into fewer bvec entries */
+	coalesced = io_try_coalesce_buffer(&pages, &nr_pages, &data);
 
 	imu = kvmalloc(struct_size(imu, bvec, nr_pages), GFP_KERNEL);
 	if (!imu)
@@ -1004,17 +982,24 @@  static int io_sqe_buffer_register(struct io_ring_ctx *ctx, struct iovec *iov,
 	imu->nr_bvecs = nr_pages;
 	imu->folio_shift = PAGE_SHIFT;
 	imu->folio_mask = PAGE_MASK;
+	if (coalesced) {
+		imu->folio_shift = data.folio_shift;
+		imu->folio_mask = ~((1UL << data.folio_shift) - 1);
+	}
 	*pimu = imu;
 	ret = 0;
 
-	if (folio) {
-		bvec_set_page(&imu->bvec[0], pages[0], size, off);
-		goto done;
-	}
 	for (i = 0; i < nr_pages; i++) {
 		size_t vec_len;
 
-		vec_len = min_t(size_t, size, PAGE_SIZE - off);
+		if (coalesced) {
+			size_t seg_size = i ? data.folio_size :
+				PAGE_SIZE * data.nr_pages_head;
+
+			vec_len = min_t(size_t, size, seg_size - off);
+		} else {
+			vec_len = min_t(size_t, size, PAGE_SIZE - off);
+		}
 		bvec_set_page(&imu->bvec[i], pages[i], vec_len, off);
 		off = 0;
 		size -= vec_len;