@@ -79,6 +79,16 @@ static void rxe_init_device_param(struct rxe_dev *rxe)
/* IB_ODP_SUPPORT_IMPLICIT is not supported right now. */
rxe->attr.odp_caps.general_caps |= IB_ODP_SUPPORT;
+
+ rxe->attr.odp_caps.per_transport_caps.ud_odp_caps |= IB_ODP_SUPPORT_SEND;
+ rxe->attr.odp_caps.per_transport_caps.ud_odp_caps |= IB_ODP_SUPPORT_RECV;
+ rxe->attr.odp_caps.per_transport_caps.ud_odp_caps |= IB_ODP_SUPPORT_SRQ_RECV;
+
+ rxe->attr.odp_caps.per_transport_caps.rc_odp_caps |= IB_ODP_SUPPORT_SEND;
+ rxe->attr.odp_caps.per_transport_caps.rc_odp_caps |= IB_ODP_SUPPORT_RECV;
+ rxe->attr.odp_caps.per_transport_caps.rc_odp_caps |= IB_ODP_SUPPORT_WRITE;
+ rxe->attr.odp_caps.per_transport_caps.rc_odp_caps |= IB_ODP_SUPPORT_READ;
+ rxe->attr.odp_caps.per_transport_caps.rc_odp_caps |= IB_ODP_SUPPORT_SRQ_RECV;
}
}
@@ -192,6 +192,8 @@ static inline unsigned int wr_opcode_mask(int opcode, struct rxe_qp *qp)
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
int rxe_odp_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length,
u64 iova, int access_flags, struct rxe_mr *mr);
+int rxe_odp_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length,
+ enum rxe_mr_copy_dir dir);
#else /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
static inline int
rxe_odp_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova,
@@ -199,6 +201,12 @@ rxe_odp_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova,
{
return -EOPNOTSUPP;
}
+static inline int
+rxe_odp_mr_copy(struct rxe_mr *mr, u64 iova, void *addr,
+ int length, enum rxe_mr_copy_dir dir)
+{
+ return -EOPNOTSUPP;
+}
#endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
@@ -319,7 +319,7 @@ int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr,
}
if (mr->umem->is_odp)
- return -EOPNOTSUPP;
+ return rxe_odp_mr_copy(mr, iova, addr, length, dir);
else
return rxe_mr_copy_xarray(mr, iova, addr, length, dir);
}
@@ -184,3 +184,87 @@ int rxe_odp_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length,
return err;
}
+
+static inline bool rxe_odp_check_pages(struct rxe_mr *mr, u64 iova,
+ int length, u32 flags)
+{
+ unsigned long lower, upper, idx;
+ unsigned long hmm_flags = HMM_PFN_VALID;
+ struct ib_umem_odp *umem_odp = to_ib_umem_odp(mr->umem);
+ struct page *page;
+ bool need_fault = false;
+
+ lower = rxe_mr_iova_to_index(mr, iova);
+ upper = rxe_mr_iova_to_index(mr, iova + length - 1);
+
+ if (!(flags & RXE_PAGEFAULT_RDONLY))
+ hmm_flags |= HMM_PFN_WRITE;
+
+ /* xarray is protected by umem_mutex */
+ for (idx = lower; idx <= upper; idx++) {
+ page = xa_load(&mr->page_list, idx);
+
+ if (!page || !(umem_odp->pfn_list[idx] & hmm_flags)) {
+ need_fault = true;
+ break;
+ }
+ }
+
+ return need_fault;
+}
+
+int rxe_odp_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length,
+ enum rxe_mr_copy_dir dir)
+{
+ struct ib_umem_odp *umem_odp = to_ib_umem_odp(mr->umem);
+ u32 flags = 0;
+ int retry = 0;
+ int err;
+
+ if (unlikely(!mr->umem->is_odp))
+ return -EOPNOTSUPP;
+
+ switch (dir) {
+ case RXE_TO_MR_OBJ:
+ break;
+
+ case RXE_FROM_MR_OBJ:
+ flags = RXE_PAGEFAULT_RDONLY;
+ break;
+
+ default:
+ return -EINVAL;
+ }
+
+ mutex_lock(&umem_odp->umem_mutex);
+
+ if (rxe_odp_check_pages(mr, iova, length, flags))
+ goto need_fault;
+
+ err = rxe_mr_copy_xarray(mr, iova, addr, length, dir);
+
+ mutex_unlock(&umem_odp->umem_mutex);
+
+ return err;
+
+need_fault:
+ /* allow max 3 tries for pagefault */
+ do {
+ mutex_unlock(&umem_odp->umem_mutex);
+
+ if (retry > 2)
+ return -EFAULT;
+
+ /* umem_mutex is locked on success */
+ err = rxe_odp_do_pagefault_and_lock(mr, iova, length, flags);
+ if (err < 0)
+ return err;
+ retry++;
+ } while (rxe_odp_check_pages(mr, iova, length, flags));
+
+ err = rxe_mr_copy_xarray(mr, iova, addr, length, dir);
+
+ mutex_unlock(&umem_odp->umem_mutex);
+
+ return err;
+}
rxe_mr_copy() is used widely to copy data to/from a user MR. requester uses it to load payloads of requesting packets; responder uses it to process Send, Write, and Read operaetions; completer uses it to copy data from response packets of Read and Atomic operations to a user MR. Allow these operations to be used with ODP by adding a subordinate function rxe_odp_mr_copy(). It is comprised of the following steps: 1. Check page presence and R/W permission. 2. If OK, just execute data copy to/from the pages and exit. 3. Otherwise, trigger page fault to map the pages. 4. Update the MR xarray using PFNs in umem_odp->pfn_list. 5. Execute data copy to/from the pages. umem_mutex is used to ensure that mapped pages are not invalidated before data copy completes. It also protects the lists in umem_odp and the MR xarray. Signed-off-by: Daisuke Matsuda <matsuda-daisuke@fujitsu.com> --- drivers/infiniband/sw/rxe/rxe.c | 10 ++++ drivers/infiniband/sw/rxe/rxe_loc.h | 8 +++ drivers/infiniband/sw/rxe/rxe_mr.c | 2 +- drivers/infiniband/sw/rxe/rxe_odp.c | 84 +++++++++++++++++++++++++++++ 4 files changed, 103 insertions(+), 1 deletion(-)