diff mbox series

[v5,1/3] io_uring/rsrc: add hugepage fixed buffer coalesce helpers

Message ID 20240628084411.2371-2-cliang01.li@samsung.com (mailing list archive)
State New
Headers show
Series io_uring/rsrc: coalescing multi-hugepage registered buffers | expand

Commit Message

Chenliang Li June 28, 2024, 8:44 a.m. UTC
Introduce helper functions to check and coalesce hugepage-backed fixed
buffers. The coalescing optimizes both time and space consumption caused
by mapping and storing multi-hugepage fixed buffers. Currently we only
have single-hugepage buffer coalescing, so add support for multi-hugepage
fixed buffer coalescing.

A coalescable multi-hugepage buffer should fully cover its folios
(except potentially the first and last one), and these folios should
have the same size. These requirements are for easier processing later,
also we need same size'd chunks in io_import_fixed for fast iov_iter
adjust.

Signed-off-by: Chenliang Li <cliang01.li@samsung.com>
---
 io_uring/rsrc.c | 87 +++++++++++++++++++++++++++++++++++++++++++++++++
 io_uring/rsrc.h |  9 +++++
 2 files changed, 96 insertions(+)

Comments

Pavel Begunkov July 9, 2024, 1:09 p.m. UTC | #1
On 6/28/24 09:44, Chenliang Li wrote:
> Introduce helper functions to check and coalesce hugepage-backed fixed
> buffers. The coalescing optimizes both time and space consumption caused
> by mapping and storing multi-hugepage fixed buffers. Currently we only
> have single-hugepage buffer coalescing, so add support for multi-hugepage
> fixed buffer coalescing.
> 
> A coalescable multi-hugepage buffer should fully cover its folios
> (except potentially the first and last one), and these folios should
> have the same size. These requirements are for easier processing later,
> also we need same size'd chunks in io_import_fixed for fast iov_iter
> adjust.
> 
> Signed-off-by: Chenliang Li <cliang01.li@samsung.com>
> ---
>   io_uring/rsrc.c | 87 +++++++++++++++++++++++++++++++++++++++++++++++++
>   io_uring/rsrc.h |  9 +++++
>   2 files changed, 96 insertions(+)
> 
> diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c
> index 60c00144471a..c88ce8c38515 100644
> --- a/io_uring/rsrc.c
> +++ b/io_uring/rsrc.c
> @@ -849,6 +849,93 @@ static int io_buffer_account_pin(struct io_ring_ctx *ctx, struct page **pages,
>   	return ret;
>   }
>   
> +static bool io_do_coalesce_buffer(struct page ***pages, int *nr_pages,
> +				struct io_imu_folio_data *data, int nr_folios)
> +{
> +	struct page **page_array = *pages, **new_array = NULL;
> +	int nr_pages_left = *nr_pages, i, j;
> +
> +	/* Store head pages only*/
> +	new_array = kvmalloc_array(nr_folios, sizeof(struct page *),
> +					GFP_KERNEL);
> +	if (!new_array)
> +		return false;
> +
> +	new_array[0] = page_array[0];
> +	/*
> +	 * The pages are bound to the folio, it doesn't
> +	 * actually unpin them but drops all but one reference,
> +	 * which is usually put down by io_buffer_unmap().
> +	 * Note, needs a better helper.
> +	 */
> +	if (data->nr_pages_head > 1)
> +		unpin_user_pages(&page_array[1], data->nr_pages_head - 1);
> +
> +	j = data->nr_pages_head;
> +	nr_pages_left -= data->nr_pages_head;
> +	for (i = 1; i < nr_folios; i++) {
> +		unsigned int nr_unpin;
> +
> +		new_array[i] = page_array[j];
> +		nr_unpin = min_t(unsigned int, nr_pages_left - 1,
> +					data->nr_pages_mid - 1);
> +		if (nr_unpin)
> +			unpin_user_pages(&page_array[j+1], nr_unpin);
> +		j += data->nr_pages_mid;
> +		nr_pages_left -= data->nr_pages_mid;
> +	}
> +	kvfree(page_array);
> +	*pages = new_array;
> +	*nr_pages = nr_folios;
> +	return true;
> +}
> +
> +static bool io_try_coalesce_buffer(struct page ***pages, int *nr_pages,
> +					 struct io_imu_folio_data *data)

I believe unused static function will trigger a warning, we don't
want that, especially since error on warn is a thing.

You can either reshuffle patches or at least add a
__maybe_unused attribute.


> +{
> +	struct page **page_array = *pages;
> +	struct folio *folio = page_folio(page_array[0]);
> +	unsigned int count = 1, nr_folios = 1;
> +	int i;
> +
> +	if (*nr_pages <= 1)
> +		return false;
> +
> +	data->nr_pages_mid = folio_nr_pages(folio);
> +	if (data->nr_pages_mid == 1)
> +		return false;
> +
> +	data->folio_shift = folio_shift(folio);
> +	data->folio_size = folio_size(folio);
> +	/*
> +	 * Check if pages are contiguous inside a folio, and all folios have
> +	 * the same page count except for the head and tail.
> +	 */
> +	for (i = 1; i < *nr_pages; i++) {
> +		if (page_folio(page_array[i]) == folio &&
> +			page_array[i] == page_array[i-1] + 1) {
> +			count++;
> +			continue;
> +		}

Seems like the first and last folios can be not border aligned,
i.e. the first should end at the folio_size boundary, and the
last one should start at the beginning of the folio.

Not really a bug, but we might get some problems with optimising
calculations down the road if we don't restrict it.

> +
> +		if (nr_folios == 1)
> +			data->nr_pages_head = count;
> +		else if (count != data->nr_pages_mid)
> +			return false;
> +
> +		folio = page_folio(page_array[i]);
> +		if (folio_size(folio) != data->folio_size)
> +			return false;
> +
> +		count = 1;
> +		nr_folios++;
> +	}
> +	if (nr_folios == 1)
> +		data->nr_pages_head = count;
> +
> +	return io_do_coalesce_buffer(pages, nr_pages, data, nr_folios);
> +}
> +
>   static int io_sqe_buffer_register(struct io_ring_ctx *ctx, struct iovec *iov,
>   				  struct io_mapped_ubuf **pimu,
>   				  struct page **last_hpage)
> diff --git a/io_uring/rsrc.h b/io_uring/rsrc.h
> index c032ca3436ca..cc66323535f6 100644
> --- a/io_uring/rsrc.h
> +++ b/io_uring/rsrc.h
> @@ -50,6 +50,15 @@ struct io_mapped_ubuf {
>   	struct bio_vec	bvec[] __counted_by(nr_bvecs);
>   };
>   
> +struct io_imu_folio_data {
> +	/* Head folio can be partially included in the fixed buf */
> +	unsigned int	nr_pages_head;
> +	/* For non-head/tail folios, has to be fully included */
> +	unsigned int	nr_pages_mid;
> +	unsigned int	folio_shift;
> +	size_t		folio_size;
> +};
> +
>   void io_rsrc_node_ref_zero(struct io_rsrc_node *node);
>   void io_rsrc_node_destroy(struct io_ring_ctx *ctx, struct io_rsrc_node *ref_node);
>   struct io_rsrc_node *io_rsrc_node_alloc(struct io_ring_ctx *ctx);
Chenliang Li July 10, 2024, 2:23 a.m. UTC | #2
On 2024-07-09 13:09 UTC, Pavel Begunkov wrote:
> On 6/28/24 09:44, Chenliang Li wrote:
>> +static bool io_try_coalesce_buffer(struct page ***pages, int *nr_pages,
>> +					 struct io_imu_folio_data *data)
>
> I believe unused static function will trigger a warning, we don't
> want that, especially since error on warn is a thing.
>
> You can either reshuffle patches or at least add a
> __maybe_unused attribute.

OK, will reshuffle the patchset.

>> +	/*
>> +	 * Check if pages are contiguous inside a folio, and all folios have
>> +	 * the same page count except for the head and tail.
>> +	 */
>> +	for (i = 1; i < *nr_pages; i++) {
>> +		if (page_folio(page_array[i]) == folio &&
>> +			page_array[i] == page_array[i-1] + 1) {
>> +			count++;
>> +			continue;
>> +		}
>
> Seems like the first and last folios can be not border aligned,
> i.e. the first should end at the folio_size boundary, and the
> last one should start at the beginning of the folio.
>
> Not really a bug, but we might get some problems with optimising
> calculations down the road if we don't restrict it.

Will add restrictions for that.

Thanks,
Chenliang Li
diff mbox series

Patch

diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c
index 60c00144471a..c88ce8c38515 100644
--- a/io_uring/rsrc.c
+++ b/io_uring/rsrc.c
@@ -849,6 +849,93 @@  static int io_buffer_account_pin(struct io_ring_ctx *ctx, struct page **pages,
 	return ret;
 }
 
+static bool io_do_coalesce_buffer(struct page ***pages, int *nr_pages,
+				struct io_imu_folio_data *data, int nr_folios)
+{
+	struct page **page_array = *pages, **new_array = NULL;
+	int nr_pages_left = *nr_pages, i, j;
+
+	/* Store head pages only*/
+	new_array = kvmalloc_array(nr_folios, sizeof(struct page *),
+					GFP_KERNEL);
+	if (!new_array)
+		return false;
+
+	new_array[0] = page_array[0];
+	/*
+	 * The pages are bound to the folio, it doesn't
+	 * actually unpin them but drops all but one reference,
+	 * which is usually put down by io_buffer_unmap().
+	 * Note, needs a better helper.
+	 */
+	if (data->nr_pages_head > 1)
+		unpin_user_pages(&page_array[1], data->nr_pages_head - 1);
+
+	j = data->nr_pages_head;
+	nr_pages_left -= data->nr_pages_head;
+	for (i = 1; i < nr_folios; i++) {
+		unsigned int nr_unpin;
+
+		new_array[i] = page_array[j];
+		nr_unpin = min_t(unsigned int, nr_pages_left - 1,
+					data->nr_pages_mid - 1);
+		if (nr_unpin)
+			unpin_user_pages(&page_array[j+1], nr_unpin);
+		j += data->nr_pages_mid;
+		nr_pages_left -= data->nr_pages_mid;
+	}
+	kvfree(page_array);
+	*pages = new_array;
+	*nr_pages = nr_folios;
+	return true;
+}
+
+static bool io_try_coalesce_buffer(struct page ***pages, int *nr_pages,
+					 struct io_imu_folio_data *data)
+{
+	struct page **page_array = *pages;
+	struct folio *folio = page_folio(page_array[0]);
+	unsigned int count = 1, nr_folios = 1;
+	int i;
+
+	if (*nr_pages <= 1)
+		return false;
+
+	data->nr_pages_mid = folio_nr_pages(folio);
+	if (data->nr_pages_mid == 1)
+		return false;
+
+	data->folio_shift = folio_shift(folio);
+	data->folio_size = folio_size(folio);
+	/*
+	 * Check if pages are contiguous inside a folio, and all folios have
+	 * the same page count except for the head and tail.
+	 */
+	for (i = 1; i < *nr_pages; i++) {
+		if (page_folio(page_array[i]) == folio &&
+			page_array[i] == page_array[i-1] + 1) {
+			count++;
+			continue;
+		}
+
+		if (nr_folios == 1)
+			data->nr_pages_head = count;
+		else if (count != data->nr_pages_mid)
+			return false;
+
+		folio = page_folio(page_array[i]);
+		if (folio_size(folio) != data->folio_size)
+			return false;
+
+		count = 1;
+		nr_folios++;
+	}
+	if (nr_folios == 1)
+		data->nr_pages_head = count;
+
+	return io_do_coalesce_buffer(pages, nr_pages, data, nr_folios);
+}
+
 static int io_sqe_buffer_register(struct io_ring_ctx *ctx, struct iovec *iov,
 				  struct io_mapped_ubuf **pimu,
 				  struct page **last_hpage)
diff --git a/io_uring/rsrc.h b/io_uring/rsrc.h
index c032ca3436ca..cc66323535f6 100644
--- a/io_uring/rsrc.h
+++ b/io_uring/rsrc.h
@@ -50,6 +50,15 @@  struct io_mapped_ubuf {
 	struct bio_vec	bvec[] __counted_by(nr_bvecs);
 };
 
+struct io_imu_folio_data {
+	/* Head folio can be partially included in the fixed buf */
+	unsigned int	nr_pages_head;
+	/* For non-head/tail folios, has to be fully included */
+	unsigned int	nr_pages_mid;
+	unsigned int	folio_shift;
+	size_t		folio_size;
+};
+
 void io_rsrc_node_ref_zero(struct io_rsrc_node *node);
 void io_rsrc_node_destroy(struct io_ring_ctx *ctx, struct io_rsrc_node *ref_node);
 struct io_rsrc_node *io_rsrc_node_alloc(struct io_ring_ctx *ctx);