diff mbox series

[for-next,v2,08/18] RDMA/rxe: Extend rxe_mr_copy to support skb frags

Message ID 20221031202805.19138-8-rpearsonhpe@gmail.com (mailing list archive)
State Changes Requested
Delegated to: Jason Gunthorpe
Headers show
Series RDMA/rxe: Enable scatter/gather support for skbs | expand

Commit Message

Bob Pearson Oct. 31, 2022, 8:27 p.m. UTC
rxe_mr_copy() currently supports copying between an mr and
a contiguous region of kernel memory.

Rename rxe_mr_copy() to rxe_copy_mr_data().
Extend the operations to support copying between an mr and an skb
fragment list. Fixup calls to rxe_mr_copy() to support the new
API.

This is in preparation for supporting fragmented skbs.

Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
---
 drivers/infiniband/sw/rxe/rxe_loc.h  |   3 +
 drivers/infiniband/sw/rxe/rxe_mr.c   | 144 +++++++++++++++++++--------
 drivers/infiniband/sw/rxe/rxe_resp.c |  20 ++--
 3 files changed, 117 insertions(+), 50 deletions(-)
diff mbox series

Patch

diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h
index 87fb052c1d0a..c62fc2613a01 100644
--- a/drivers/infiniband/sw/rxe/rxe_loc.h
+++ b/drivers/infiniband/sw/rxe/rxe_loc.h
@@ -71,6 +71,9 @@  int rxe_mr_init_fast(int max_pages, struct rxe_mr *mr);
 int rxe_add_frag(struct sk_buff *skb, struct rxe_phys_buf *buf,
 		 int length, int offset);
 int rxe_num_mr_frags(struct rxe_mr *mr, u64 iova, int length);
+int rxe_copy_mr_data(struct sk_buff *skb, struct rxe_mr *mr, u64 iova,
+		     void *addr, int skb_offset, int length,
+		     enum rxe_mr_copy_op op);
 int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length,
 		enum rxe_mr_copy_op op);
 int copy_data(struct rxe_pd *pd, int access, struct rxe_dma_info *dma,
diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c
index 23abcf2a0198..37d35413da94 100644
--- a/drivers/infiniband/sw/rxe/rxe_mr.c
+++ b/drivers/infiniband/sw/rxe/rxe_mr.c
@@ -343,7 +343,7 @@  int rxe_num_mr_frags(struct rxe_mr *mr, u64 iova, int length)
 	if (length == 0)
 		return 0;
 
-	if (mr->type == IB_MR_TYPE_DMA) {
+	if (mr->ibmr.type == IB_MR_TYPE_DMA) {
 		while (length > 0) {
 			buf_offset = iova & ~PAGE_MASK;
 			bytes = PAGE_SIZE - buf_offset;
@@ -388,70 +388,130 @@  int rxe_num_mr_frags(struct rxe_mr *mr, u64 iova, int length)
 	return num_frags;
 }
 
-/* copy data from a range (vaddr, vaddr+length-1) to or from
- * a mr object starting at iova.
+/**
+ * rxe_copy_mr_data() - transfer data between an MR and a packet
+ * @skb: the packet buffer
+ * @mr: the MR
+ * @iova: the address in the MR
+ * @addr: the address in the packet (TO/FROM MR only)
+ * @length: the length to transfer
+ * @op: copy operation (TO MR, FROM MR or FRAG MR)
+ *
+ * Copy data from a range (addr, addr+length-1) in a packet
+ * to or from a range in an MR object at (iova, iova+length-1).
+ * Or, build a frag list referencing the MR range.
+ *
+ * Caller must verify that the access permissions support the
+ * operation.
+ *
+ * Returns: 0 on success or an error
  */
-int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length,
-		enum rxe_mr_copy_op op)
+int rxe_copy_mr_data(struct sk_buff *skb, struct rxe_mr *mr, u64 iova,
+		     void *addr, int skb_offset, int length,
+		     enum rxe_mr_copy_op op)
 {
-	int			err;
-	int			bytes;
-	u8			*va;
-	struct rxe_map		**map;
-	struct rxe_phys_buf	*buf;
-	int			m;
-	int			i;
-	size_t			offset;
+	struct rxe_phys_buf dmabuf;
+	struct rxe_phys_buf *buf;
+	struct rxe_map **map;
+	size_t buf_offset;
+	int bytes;
+	void *va;
+	int m;
+	int i;
+	int err = 0;
 
 	if (length == 0)
 		return 0;
 
-	if (mr->ibmr.type == IB_MR_TYPE_DMA) {
-		u8 *src, *dest;
-
-		src = (op == RXE_COPY_TO_MR) ? addr : ((void *)(uintptr_t)iova);
-
-		dest = (op == RXE_COPY_TO_MR) ? ((void *)(uintptr_t)iova) : addr;
+	switch (mr->ibmr.type) {
+	case IB_MR_TYPE_DMA:
+		va = (void *)(uintptr_t)iova;
+		switch (op) {
+		case RXE_COPY_TO_MR:
+			memcpy(va, addr, length);
+			break;
+		case RXE_COPY_FROM_MR:
+			memcpy(addr, va, length);
+			break;
+		case RXE_FRAG_TO_MR:
+			err = skb_copy_bits(skb, skb_offset, va, length);
+			if (err)
+				return err;
+			break;
+		case RXE_FRAG_FROM_MR:
+			/* limit frag length to PAGE_SIZE */
+			while (length) {
+				dmabuf.addr = iova & PAGE_MASK;
+				buf_offset = iova & ~PAGE_MASK;
+				bytes = PAGE_SIZE - buf_offset;
+				if (bytes > length)
+					bytes = length;
+				err = rxe_add_frag(skb, &dmabuf, bytes,
+						   buf_offset);
+				if (err)
+					return err;
+				iova += bytes;
+				length -= bytes;
+			}
+			break;
+		}
+		return 0;
 
-		memcpy(dest, src, length);
+	case IB_MR_TYPE_MEM_REG:
+	case IB_MR_TYPE_USER:
+		break;
 
-		return 0;
+	default:
+		pr_warn("%s: mr type (%d) not supported\n",
+			__func__, mr->ibmr.type);
+		return -EINVAL;
 	}
 
 	WARN_ON_ONCE(!mr->map);
 
 	err = mr_check_range(mr, iova, length);
-	if (err) {
-		err = -EFAULT;
-		goto err1;
-	}
+	if (err)
+		return -EFAULT;
 
-	lookup_iova(mr, iova, &m, &i, &offset);
+	lookup_iova(mr, iova, &m, &i, &buf_offset);
 
 	map = mr->map + m;
-	buf	= map[0]->buf + i;
+	buf = map[0]->buf + i;
 
 	while (length > 0) {
-		u8 *src, *dest;
-
-		va	= (u8 *)(uintptr_t)buf->addr + offset;
-		src = (op == RXE_COPY_TO_MR) ? addr : va;
-		dest = (op == RXE_COPY_TO_MR) ? va : addr;
-
-		bytes	= buf->size - offset;
-
+		va = (void *)(uintptr_t)buf->addr + buf_offset;
+		bytes = buf->size - buf_offset;
 		if (bytes > length)
 			bytes = length;
 
-		memcpy(dest, src, bytes);
+		switch (op) {
+		case RXE_COPY_TO_MR:
+			memcpy(va, addr, bytes);
+			break;
+		case RXE_COPY_FROM_MR:
+			memcpy(addr, va, bytes);
+			break;
+		case RXE_FRAG_TO_MR:
+			err = skb_copy_bits(skb, skb_offset, va, bytes);
+			if (err)
+				return err;
+			break;
+		case RXE_FRAG_FROM_MR:
+			err = rxe_add_frag(skb, buf, bytes, buf_offset);
+			if (err)
+				return err;
+			break;
+		}
 
-		length	-= bytes;
-		addr	+= bytes;
+		length -= bytes;
+		addr += bytes;
 
-		offset	= 0;
+		buf_offset = 0;
+		skb_offset += bytes;
 		buf++;
 		i++;
 
+		/* we won't overrun since we checked range above */
 		if (i == RXE_BUF_PER_MAP) {
 			i = 0;
 			map++;
@@ -460,9 +520,6 @@  int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length,
 	}
 
 	return 0;
-
-err1:
-	return err;
 }
 
 /* copy data in or out of a wqe, i.e. sg list
@@ -535,7 +592,8 @@  int copy_data(
 		if (bytes > 0) {
 			iova = sge->addr + offset;
 
-			err = rxe_mr_copy(mr, iova, addr, bytes, op);
+			err = rxe_copy_mr_data(NULL, mr, iova, addr,
+					       0, bytes, op);
 			if (err)
 				goto err2;
 
diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c
index 023df0562258..5f00477544fa 100644
--- a/drivers/infiniband/sw/rxe/rxe_resp.c
+++ b/drivers/infiniband/sw/rxe/rxe_resp.c
@@ -535,12 +535,15 @@  static enum resp_states send_data_in(struct rxe_qp *qp, void *data_addr,
 static enum resp_states write_data_in(struct rxe_qp *qp,
 				      struct rxe_pkt_info *pkt)
 {
+	struct sk_buff *skb = PKT_TO_SKB(pkt);
 	enum resp_states rc = RESPST_NONE;
-	int	err;
 	int data_len = payload_size(pkt);
+	int err;
+	int skb_offset = 0;
 
-	err = rxe_mr_copy(qp->resp.mr, qp->resp.va + qp->resp.offset,
-			  payload_addr(pkt), data_len, RXE_COPY_TO_MR);
+	err = rxe_copy_mr_data(skb, qp->resp.mr, qp->resp.va + qp->resp.offset,
+			  payload_addr(pkt), skb_offset, data_len,
+			  RXE_COPY_TO_MR);
 	if (err) {
 		rc = RESPST_ERR_RKEY_VIOLATION;
 		goto out;
@@ -766,6 +769,7 @@  static enum resp_states read_reply(struct rxe_qp *qp,
 	int err;
 	struct resp_res *res = qp->resp.res;
 	struct rxe_mr *mr;
+	int skb_offset = 0;
 
 	if (!res) {
 		res = rxe_prepare_res(qp, req_pkt, RXE_READ_MASK);
@@ -806,15 +810,17 @@  static enum resp_states read_reply(struct rxe_qp *qp,
 	if (!skb)
 		return RESPST_ERR_RNR;
 
-	err = rxe_mr_copy(mr, res->read.va, payload_addr(&ack_pkt),
-			  payload, RXE_COPY_FROM_MR);
-	if (mr)
-		rxe_put(mr);
+	err = rxe_copy_mr_data(skb, mr, res->read.va, payload_addr(&ack_pkt),
+			       skb_offset, payload, RXE_COPY_FROM_MR);
 	if (err) {
 		kfree_skb(skb);
+		rxe_put(mr);
 		return RESPST_ERR_RKEY_VIOLATION;
 	}
 
+	if (mr)
+		rxe_put(mr);
+
 	if (bth_pad(&ack_pkt)) {
 		u8 *pad = payload_addr(&ack_pkt) + payload;