diff mbox series

[for-next,v2,07/18] RDMA/rxe: Add routine to compute the number of frags

Message ID 20221031202805.19138-7-rpearsonhpe@gmail.com (mailing list archive)
State Changes Requested
Delegated to: Jason Gunthorpe
Headers show
Series RDMA/rxe: Enable scatter/gather support for skbs | expand

Commit Message

Bob Pearson Oct. 31, 2022, 8:27 p.m. UTC
Add a subroutine named rxe_num_mr_frags() to compute the
number of skb frags needed to hold length bytes in an skb
when sending data from an mr starting at iova.

Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
---
 drivers/infiniband/sw/rxe/rxe_loc.h |  1 +
 drivers/infiniband/sw/rxe/rxe_mr.c  | 68 +++++++++++++++++++++++++++++
 2 files changed, 69 insertions(+)

Comments

Jason Gunthorpe Nov. 24, 2022, 7:15 p.m. UTC | #1
On Mon, Oct 31, 2022 at 03:27:56PM -0500, Bob Pearson wrote:
> Add a subroutine named rxe_num_mr_frags() to compute the
> number of skb frags needed to hold length bytes in an skb
> when sending data from an mr starting at iova.
> 
> Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
> ---
>  drivers/infiniband/sw/rxe/rxe_loc.h |  1 +
>  drivers/infiniband/sw/rxe/rxe_mr.c  | 68 +++++++++++++++++++++++++++++
>  2 files changed, 69 insertions(+)
> 
> diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h
> index 81a611778d44..87fb052c1d0a 100644
> --- a/drivers/infiniband/sw/rxe/rxe_loc.h
> +++ b/drivers/infiniband/sw/rxe/rxe_loc.h
> @@ -70,6 +70,7 @@ int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova,
>  int rxe_mr_init_fast(int max_pages, struct rxe_mr *mr);
>  int rxe_add_frag(struct sk_buff *skb, struct rxe_phys_buf *buf,
>  		 int length, int offset);
> +int rxe_num_mr_frags(struct rxe_mr *mr, u64 iova, int length);
>  int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length,
>  		enum rxe_mr_copy_op op);
>  int copy_data(struct rxe_pd *pd, int access, struct rxe_dma_info *dma,
> diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c
> index 2dcf37f32330..23abcf2a0198 100644
> --- a/drivers/infiniband/sw/rxe/rxe_mr.c
> +++ b/drivers/infiniband/sw/rxe/rxe_mr.c
> @@ -320,6 +320,74 @@ int rxe_add_frag(struct sk_buff *skb, struct rxe_phys_buf *buf,
>  	return 0;
>  }
>  
> +/**
> + * rxe_num_mr_frags() - Compute the number of skb frags needed to copy
> + *			length bytes from an mr to an skb frag list.
> + * @mr: mr to copy data from
> + * @iova: iova in memory region as starting point
> + * @length: number of bytes to transfer
> + *
> + * Returns: the number of frags needed or a negative error
> + */
> +int rxe_num_mr_frags(struct rxe_mr *mr, u64 iova, int length)
> +{

This seems too complicated, and isn't quite right anyhow..

The umem code builds up the SGT by combining physically adjacent pages
into contiguous chunks. The key thing to notice is that it will
combine pages that are not part of the same folio (compound page) into
SGL entries. This is fine and well for a DMA device

However, when you build a skb frag you can only put a folio into
it, as it has exactly one struct page refcount that controls a folio
worth of memory lifetime.

So, eg, if the umem stuff allowed you to create a 64K page size MR, it
doesn't guarentee that the folios are 64K page size, and thus it
doesn't guarantee that you can use 64k skb frags later.

The best you can do is (after the xarray conversion) check what was
stuffed in the xarray and decide what the smallest folio size is within
the MR.

Then this is just simple math, num frags is computed as the number of
folios of smallest size that span the requested IOVA.

Jason
diff mbox series

Patch

diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h
index 81a611778d44..87fb052c1d0a 100644
--- a/drivers/infiniband/sw/rxe/rxe_loc.h
+++ b/drivers/infiniband/sw/rxe/rxe_loc.h
@@ -70,6 +70,7 @@  int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova,
 int rxe_mr_init_fast(int max_pages, struct rxe_mr *mr);
 int rxe_add_frag(struct sk_buff *skb, struct rxe_phys_buf *buf,
 		 int length, int offset);
+int rxe_num_mr_frags(struct rxe_mr *mr, u64 iova, int length);
 int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length,
 		enum rxe_mr_copy_op op);
 int copy_data(struct rxe_pd *pd, int access, struct rxe_dma_info *dma,
diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c
index 2dcf37f32330..23abcf2a0198 100644
--- a/drivers/infiniband/sw/rxe/rxe_mr.c
+++ b/drivers/infiniband/sw/rxe/rxe_mr.c
@@ -320,6 +320,74 @@  int rxe_add_frag(struct sk_buff *skb, struct rxe_phys_buf *buf,
 	return 0;
 }
 
+/**
+ * rxe_num_mr_frags() - Compute the number of skb frags needed to copy
+ *			length bytes from an mr to an skb frag list.
+ * @mr: mr to copy data from
+ * @iova: iova in memory region as starting point
+ * @length: number of bytes to transfer
+ *
+ * Returns: the number of frags needed or a negative error
+ */
+int rxe_num_mr_frags(struct rxe_mr *mr, u64 iova, int length)
+{
+	struct rxe_phys_buf *buf;
+	struct rxe_map **map;
+	size_t buf_offset;
+	int bytes;
+	int m;
+	int i;
+	int num_frags = 0;
+	int err;
+
+	if (length == 0)
+		return 0;
+
+	if (mr->type == IB_MR_TYPE_DMA) {
+		while (length > 0) {
+			buf_offset = iova & ~PAGE_MASK;
+			bytes = PAGE_SIZE - buf_offset;
+			if (bytes > length)
+				bytes = length;
+			length -= bytes;
+			num_frags++;
+		}
+
+		return num_frags;
+	}
+
+	WARN_ON_ONCE(!mr->map);
+
+	err = mr_check_range(mr, iova, length);
+	if (err)
+		return err;
+
+	lookup_iova(mr, iova, &m, &i, &buf_offset);
+
+	map = mr->map + m;
+	buf = map[0]->buf + i;
+
+	while (length > 0) {
+		bytes = buf->size - buf_offset;
+		if (bytes > length)
+			bytes = length;
+		length -= bytes;
+		buf_offset = 0;
+		buf++;
+		i++;
+		num_frags++;
+
+		/* we won't overrun since we checked range above */
+		if (i == RXE_BUF_PER_MAP) {
+			i = 0;
+			map++;
+			buf = map[0]->buf;
+		}
+	}
+
+	return num_frags;
+}
+
 /* copy data from a range (vaddr, vaddr+length-1) to or from
  * a mr object starting at iova.
  */