@@ -130,6 +130,7 @@ static enum ib_wc_opcode wr_to_wc_opcode(enum ib_wr_opcode opcode)
case IB_WR_RDMA_READ_WITH_INV: return IB_WC_RDMA_READ;
case IB_WR_LOCAL_INV: return IB_WC_LOCAL_INV;
case IB_WR_REG_MR: return IB_WC_REG_MR;
+ case IB_WR_BIND_MW: return IB_WC_BIND_MW;
default:
return 0xff;
@@ -787,6 +788,8 @@ int rxe_completer(void *arg)
*/
WARN_ON_ONCE(skb);
rxe_drop_ref(qp);
+ // TODO this seems plain backwards
+ // EAGAIN normally means call me again
return -EAGAIN;
done:
@@ -140,6 +140,7 @@ int advance_dma_data(struct rxe_dma_info *dma, unsigned int length);
struct ib_mw *rxe_alloc_mw(struct ib_pd *ibpd, enum ib_mw_type type,
struct ib_udata *udata);
int rxe_dealloc_mw(struct ib_mw *ibmw);
+int rxe_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe);
/* rxe_net.c */
void rxe_loopback(struct sk_buff *skb);
@@ -96,3 +96,9 @@ int rxe_dealloc_mw(struct ib_mw *ibmw)
return 0;
}
+
+int rxe_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
+{
+ pr_err("rxe_bind_mw: not implemented\n");
+ return -ENOSYS;
+}
@@ -114,13 +114,20 @@ struct rxe_wr_opcode_info rxe_wr_opcode_info[] = {
[IB_WR_LOCAL_INV] = {
.name = "IB_WR_LOCAL_INV",
.mask = {
- [IB_QPT_RC] = WR_REG_MASK,
+ [IB_QPT_RC] = WR_LOCAL_MASK,
},
},
[IB_WR_REG_MR] = {
.name = "IB_WR_REG_MR",
.mask = {
- [IB_QPT_RC] = WR_REG_MASK,
+ [IB_QPT_RC] = WR_LOCAL_MASK,
+ },
+ },
+ [IB_WR_BIND_MW] = {
+ .name = "IB_WR_BIND_MW",
+ .mask = {
+ [IB_QPT_RC] = WR_LOCAL_MASK,
+ [IB_QPT_UC] = WR_LOCAL_MASK,
},
},
};
@@ -586,6 +586,8 @@ static void update_state(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
int rxe_requester(void *arg)
{
struct rxe_qp *qp = (struct rxe_qp *)arg;
+ struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
+ struct rxe_mr *rmr;
struct rxe_pkt_info pkt;
struct sk_buff *skb;
struct rxe_send_wqe *wqe;
@@ -596,9 +598,17 @@ int rxe_requester(void *arg)
int ret;
struct rxe_send_wqe rollback_wqe;
u32 rollback_psn;
+ int entered;
rxe_add_ref(qp);
+ // this code is 'guaranteed' to never be entered more
+ // than once. Check to make sure that this is the case
+ entered = atomic_inc_return(&qp->req.task.entered);
+ if (entered > 1) {
+ pr_err("rxe_requester: entered %d times\n", entered);
+ }
+
next_wqe:
if (unlikely(!qp->valid || qp->req.state == QP_STATE_ERROR))
goto exit;
@@ -621,13 +631,11 @@ int rxe_requester(void *arg)
if (unlikely(!wqe))
goto exit;
- if (wqe->mask & WR_REG_MASK) {
- if (wqe->wr.opcode == IB_WR_LOCAL_INV) {
- struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
- struct rxe_mr *rmr;
-
+ if (wqe->mask & WR_LOCAL_MASK) {
+ switch (wqe->wr.opcode) {
+ case IB_WR_LOCAL_INV:
rmr = rxe_pool_get_index(&rxe->mr_pool,
- wqe->wr.ex.invalidate_rkey >> 8);
+ wqe->wr.ex.invalidate_rkey >> 8);
if (!rmr) {
pr_err("No mr for key %#x\n",
wqe->wr.ex.invalidate_rkey);
@@ -635,13 +643,16 @@ int rxe_requester(void *arg)
wqe->status = IB_WC_MW_BIND_ERR;
goto exit;
}
+ // TODO this can race with external access
+ // to the MR in rxe_resp unless you can know
+ // that all accesses are done
rmr->state = RXE_MEM_STATE_FREE;
rxe_drop_ref(rmr);
wqe->state = wqe_state_done;
wqe->status = IB_WC_SUCCESS;
- } else if (wqe->wr.opcode == IB_WR_REG_MR) {
- struct rxe_mr *rmr = to_rmr(wqe->wr.wr.reg.mr);
-
+ break;
+ case IB_WR_REG_MR:
+ rmr = to_rmr(wqe->wr.wr.reg.mr);
rmr->state = RXE_MEM_STATE_VALID;
rmr->access = wqe->wr.wr.reg.access;
rmr->lkey = wqe->wr.wr.reg.key;
@@ -649,7 +660,21 @@ int rxe_requester(void *arg)
rmr->iova = wqe->wr.wr.reg.mr->iova;
wqe->state = wqe_state_done;
wqe->status = IB_WC_SUCCESS;
- } else {
+ break;
+ case IB_WR_BIND_MW:
+ ret = rxe_bind_mw(qp, wqe);
+ if (ret) {
+ wqe->state = wqe_state_done;
+ wqe->status = IB_WC_MW_BIND_ERR;
+ // TODO err: will change status
+ // probably should not
+ goto err;
+ }
+ wqe->state = wqe_state_done;
+ wqe->status = IB_WC_SUCCESS;
+ break;
+ default:
+ pr_err("rxe_requester: unexpected LOCAL WR opcode = %d\n", wqe->wr.opcode);
goto exit;
}
if ((wqe->wr.send_flags & IB_SEND_SIGNALED) ||
@@ -704,9 +729,10 @@ int rxe_requester(void *arg)
qp->req.wqe_index);
wqe->state = wqe_state_done;
wqe->status = IB_WC_SUCCESS;
+ // TODO why?? why not just treat the same as a
+ // successful wqe and go to next wqe?
__rxe_do_task(&qp->comp.task);
- rxe_drop_ref(qp);
- return 0;
+ goto again;
}
payload = mtu;
}
@@ -750,12 +776,36 @@ int rxe_requester(void *arg)
goto next_wqe;
+ // TODO this can be cleaned up
err:
+ /* we come here if an error occured while processing
+ * a send wqe. The completer will put the qp in error
+ * state and no more wqes will be processed unless
+ * the qp is cleaned up and restarted. We do not want
+ * to be called again */
wqe->status = IB_WC_LOC_PROT_ERR;
wqe->state = wqe_state_error;
__rxe_do_task(&qp->comp.task);
+ ret = -EAGAIN;
+ goto done;
exit:
+ /* we come here if either there are no more wqes in the send
+ * queue or we are blocked waiting for some resource or event.
+ * The current wqe will be restarted or new wqe started when
+ * there is work to do. */
+ ret = -EAGAIN;
+ goto done;
+
+again:
+ /* we come here if we are done with the current wqe but want to
+ * get called again. Mostly we loop back to next wqe so should
+ * be all one way or the other */
+ ret = 0;
+ goto done;
+
+done:
+ atomic_dec(&qp->req.task.entered);
rxe_drop_ref(qp);
- return -EAGAIN;
+ return ret;
}
@@ -55,6 +55,8 @@ struct rxe_task {
int ret;
char name[16];
bool destroyed;
+ // debug code, delete me when done
+ atomic_t entered;
};
/*
Added code to implement the path to rxe_bind_mw which is still a stub. Now the ibv_bind_mw verb can be called. Added bind_mw work requests to the opcodes file and added the new local operation to rxe_req. Changed WR_REG_MASK to WR_LOCAL_MASK since it is used to identify local operations. Signed-off-by: Bob Pearson <rpearson@hpe.com> --- drivers/infiniband/sw/rxe/rxe_comp.c | 3 + drivers/infiniband/sw/rxe/rxe_loc.h | 1 + drivers/infiniband/sw/rxe/rxe_mw.c | 6 ++ drivers/infiniband/sw/rxe/rxe_opcode.c | 11 +++- drivers/infiniband/sw/rxe/rxe_req.c | 76 +++++++++++++++++++++----- drivers/infiniband/sw/rxe/rxe_task.h | 2 + 6 files changed, 84 insertions(+), 15 deletions(-)