@@ -563,12 +563,20 @@ int rxe_completer(void *arg)
struct sk_buff *skb = NULL;
struct rxe_pkt_info *pkt = NULL;
enum comp_state state;
+ int entered;
rxe_add_ref(qp);
+ // this code is 'guaranteed' to never be entered more
+ // than once. Check to make sure that this is the case
+ entered = atomic_inc_return(&qp->comp.task.entered);
+ if (entered > 1) {
+ pr_err("rxe_completer: entered %d times\n", entered);
+ }
+
if (!qp->valid || qp->req.state == QP_STATE_ERROR ||
qp->req.state == QP_STATE_RESET) {
- rxe_drain_resp_pkts(qp, qp->valid &&
+ rxe_drain_resp_pkts(qp, qp->valid &&
qp->req.state == QP_STATE_ERROR);
goto exit;
}
@@ -782,14 +790,14 @@ int rxe_completer(void *arg)
}
}
+ /* these are the same. need to merge them TODO */
exit:
/* we come here if we are done with processing and want the task to
- * exit from the loop calling us
+ * exit from the loop calling us -- to call us again later
*/
WARN_ON_ONCE(skb);
+ atomic_dec(&qp->comp.task.entered);
rxe_drop_ref(qp);
- // TODO this seems plain backwards
- // EAGAIN normally means call me again
return -EAGAIN;
done:
@@ -797,6 +805,8 @@ int rxe_completer(void *arg)
* us again to see if there is anything else to do
*/
WARN_ON_ONCE(skb);
+ atomic_dec(&qp->comp.task.entered);
rxe_drop_ref(qp);
return 0;
+
}
@@ -137,6 +137,7 @@ int mr_check_range(struct rxe_mr *mr, u64 iova, size_t length);
void rxe_mr_cleanup(struct rxe_pool_entry *arg);
int advance_dma_data(struct rxe_dma_info *dma, unsigned int length);
+int rxe_invalidate_mr(struct rxe_mr *mr, int remote);
/* rxe_mw.c */
void rxe_set_mw_rkey(struct rxe_mw *mw);
@@ -144,6 +145,8 @@ struct ib_mw *rxe_alloc_mw(struct ib_pd *ibpd, enum ib_mw_type type,
struct ib_udata *udata);
int rxe_dealloc_mw(struct ib_mw *ibmw);
int rxe_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe);
+void rxe_mw_cleanup(struct rxe_pool_entry *arg);
+int rxe_invalidate_mw(struct rxe_mw *mw, int remote);
/* rxe_net.c */
void rxe_loopback(struct sk_buff *skb);
@@ -47,7 +47,7 @@ void rxe_set_mr_lkey(struct rxe_mr *mr)
if (likely(lkey && (rxe_add_key(mr, &lkey) == 0)))
return;
} while (tries++ < 10);
- pr_err("rxe_set_mr_lkey: unable to get random lkey\n");
+ pr_err("unable to get random key for mr\n");
}
#if 0
@@ -114,7 +114,8 @@ void rxe_mr_cleanup(struct rxe_pool_entry *arg)
struct rxe_mr *mr = container_of(arg, typeof(*mr), pelem);
int i;
- ib_umem_release(mr->umem);
+ if (mr->umem)
+ ib_umem_release(mr->umem);
if (mr->map) {
for (i = 0; i < mr->num_map; i++)
@@ -122,6 +123,9 @@ void rxe_mr_cleanup(struct rxe_pool_entry *arg)
kfree(mr->map);
}
+
+ rxe_drop_index(mr);
+ rxe_drop_key(mr);
}
static int rxe_mr_alloc(struct rxe_mr *mr, int num_buf)
@@ -602,3 +606,12 @@ struct rxe_mr *lookup_mr(struct rxe_pd *pd, int access, u32 key,
return mr;
}
+
+int rxe_invalidate_mr(struct rxe_mr *mr, int remote)
+{
+ // more TODO here, can fail
+
+ mr->state = RXE_MEM_STATE_FREE;
+
+ return 0;
+}
@@ -49,7 +49,7 @@ void rxe_set_mw_rkey(struct rxe_mw *mw)
(rxe_add_key(mw, &rkey) == 0)))
return;
} while (tries++ < 10);
- pr_err("rxe_set_mw_rkey: unable to get random rkey\n");
+ pr_err("unable to get random rkey for mw\n");
}
@@ -91,9 +91,6 @@ struct ib_mw *rxe_alloc_mw(struct ib_pd *ibpd, enum ib_mw_type type,
rxe_add_index(mw);
rxe_set_mw_rkey(mw);
- pr_info("rxe_alloc_mw: index = 0x%08x, rkey = 0x%08x\n",
- mw->pelem.index, mw->ibmw.rkey);
-
spin_lock_init(&mw->lock);
if (type == IB_MW_TYPE_2) {
@@ -120,8 +117,6 @@ struct ib_mw *rxe_alloc_mw(struct ib_pd *ibpd, enum ib_mw_type type,
return &mw->ibmw;
err2:
- rxe_drop_key(mw);
- rxe_drop_index(mw);
rxe_drop_ref(mw);
rxe_drop_ref(pd);
err1:
@@ -138,8 +133,6 @@ int rxe_dealloc_mw(struct ib_mw *ibmw)
mw->state = RXE_MW_STATE_INVALID;
spin_unlock_irqrestore(&mw->lock, flags);
- rxe_drop_key(mw);
- rxe_drop_index(mw);
rxe_drop_ref(pd);
rxe_drop_ref(mw);
@@ -151,8 +144,6 @@ int rxe_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
struct rxe_mw *mw;
struct rxe_mr *mr;
- pr_info("rxe_bind_mw: called\n");
-
if (qp->is_user) {
} else {
mw = to_rmw(wqe->wr.wr.kmw.ibmw);
@@ -186,3 +177,25 @@ int rxe_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
#endif
return 0;
}
+
+void rxe_mw_cleanup(struct rxe_pool_entry *arg)
+{
+ struct rxe_mw *mw = container_of(arg, typeof(*mw), pelem);
+
+ rxe_drop_index(mw);
+ rxe_drop_key(mw);
+}
+
+int rxe_invalidate_mw(struct rxe_mw *mw, int remote)
+{
+ /* type 1 MWs don't support invalidate */
+ if (mw->ibmw.type == IB_MW_TYPE_1) {
+ pr_err("attempt to %s-invalidate a type 1 mw\n",
+ remote ? "send" : "local");
+ return -EINVAL;
+ }
+
+ mw->state = RXE_MEM_STATE_FREE;
+
+ return 0;
+}
@@ -85,6 +85,7 @@ struct rxe_type_info rxe_type_info[RXE_NUM_TYPES] = {
[RXE_TYPE_MW] = {
.name = "rxe-mw",
.size = sizeof(struct rxe_mw),
+ .cleanup = rxe_mw_cleanup,
.flags = RXE_POOL_INDEX
| RXE_POOL_KEY,
.max_index = RXE_MAX_MW_INDEX,
@@ -365,6 +366,11 @@ void rxe_drop_key(void *arg)
struct rxe_pool *pool = elem->pool;
unsigned long flags;
+ if (elem == NULL) {
+ pr_warn("rxe_drop_key: called with null pointer\n");
+ return;
+ }
+
write_lock_irqsave(&pool->pool_lock, flags);
rb_erase(&elem->key_node, &pool->key.tree);
write_unlock_irqrestore(&pool->pool_lock, flags);
@@ -388,6 +394,11 @@ void rxe_drop_index(void *arg)
struct rxe_pool *pool = elem->pool;
unsigned long flags;
+ if (elem == NULL) {
+ pr_warn("rxe_drop_index: called with null pointer\n");
+ return;
+ }
+
write_lock_irqsave(&pool->pool_lock, flags);
clear_bit(elem->index - pool->index.min_index, pool->index.table);
rb_erase(&elem->index_node, &pool->index.tree);
@@ -583,11 +583,32 @@ static void update_state(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
jiffies + qp->qp_timeout_jiffies);
}
+static int local_invalidate(struct rxe_dev *rxe, struct rxe_send_wqe *wqe)
+{
+ int ret;
+ struct rxe_mr *mr;
+ struct rxe_mw *mw;
+ u32 key = wqe->wr.ex.invalidate_rkey;
+
+ if ((mr = rxe_pool_get_key(&rxe->mr_pool, &key))) {
+ ret = rxe_invalidate_mr(mr, 0);
+ rxe_drop_ref(mr);
+ } else if ((mw = rxe_pool_get_key(&rxe->mw_pool, &key))) {
+ ret = rxe_invalidate_mw(mw, 0);
+ rxe_drop_ref(mw);
+ } else {
+ ret = -EINVAL;
+ pr_err("No mr/mw for rkey %#x\n", key);
+ }
+
+ return ret;
+}
+
int rxe_requester(void *arg)
{
struct rxe_qp *qp = (struct rxe_qp *)arg;
struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
- struct rxe_mr *rmr;
+ struct rxe_mr *mr;
struct rxe_pkt_info pkt;
struct sk_buff *skb;
struct rxe_send_wqe *wqe;
@@ -600,6 +621,8 @@ int rxe_requester(void *arg)
u32 rollback_psn;
int entered;
+ pr_info("rxe_requester: called\n");
+
rxe_add_ref(qp);
// this code is 'guaranteed' to never be entered more
@@ -631,57 +654,47 @@ int rxe_requester(void *arg)
if (unlikely(!wqe))
goto exit;
+ /* process local operations */
if (wqe->mask & WR_LOCAL_MASK) {
+ wqe->state = wqe_state_done;
+ wqe->status = IB_WC_SUCCESS;
+
switch (wqe->wr.opcode) {
case IB_WR_LOCAL_INV:
- rmr = rxe_pool_get_index(&rxe->mr_pool,
- wqe->wr.ex.invalidate_rkey >> 8);
- if (!rmr) {
- pr_err("No mr for key %#x\n",
- wqe->wr.ex.invalidate_rkey);
- wqe->state = wqe_state_error;
- wqe->status = IB_WC_MW_BIND_ERR;
- goto exit;
- }
- // TODO this can race with external access
- // to the MR in rxe_resp unless you can know
- // that all accesses are done
- rmr->state = RXE_MEM_STATE_FREE;
- rxe_drop_ref(rmr);
- wqe->state = wqe_state_done;
- wqe->status = IB_WC_SUCCESS;
+ if ((ret = local_invalidate(rxe, wqe)))
+ wqe->status = IB_WC_LOC_QP_OP_ERR;
break;
case IB_WR_REG_MR:
- rmr = to_rmr(wqe->wr.wr.reg.mr);
- rmr->state = RXE_MEM_STATE_VALID;
- rmr->access = wqe->wr.wr.reg.access;
- rmr->lkey = wqe->wr.wr.reg.key;
- rmr->rkey = wqe->wr.wr.reg.key;
- rmr->iova = wqe->wr.wr.reg.mr->iova;
- wqe->state = wqe_state_done;
- wqe->status = IB_WC_SUCCESS;
+ mr = to_rmr(wqe->wr.wr.reg.mr);
+ mr->state = RXE_MEM_STATE_VALID;
+ mr->access = wqe->wr.wr.reg.access;
+ mr->lkey = wqe->wr.wr.reg.key;
+ mr->rkey = wqe->wr.wr.reg.key;
+ mr->iova = wqe->wr.wr.reg.mr->iova;
break;
case IB_WR_BIND_MW:
- ret = rxe_bind_mw(qp, wqe);
- if (ret) {
- wqe->state = wqe_state_done;
+ if ((ret = rxe_bind_mw(qp, wqe)))
wqe->status = IB_WC_MW_BIND_ERR;
- // TODO err: will change status
- // probably should not
- goto err;
- }
- wqe->state = wqe_state_done;
- wqe->status = IB_WC_SUCCESS;
break;
default:
- pr_err("rxe_requester: unexpected LOCAL WR opcode = %d\n", wqe->wr.opcode);
- goto exit;
+ pr_err("rxe_requester: unexpected local WR opcode = %d\n",
+ wqe->wr.opcode);
+ wqe->status = IB_WC_LOC_QP_OP_ERR;
}
+
+ /* we're done processing the wqe so move index */
+ qp->req.wqe_index = next_index(qp->sq.queue, qp->req.wqe_index);
+
+ /* if an error occurred do a completion pass now
+ * (below) and then quit processing more wqes */
+ if (wqe->status != IB_WC_SUCCESS)
+ goto err;
+
+ /* if the wqe is signalled schedule a completion pass */
if ((wqe->wr.send_flags & IB_SEND_SIGNALED) ||
- qp->sq_sig_type == IB_SIGNAL_ALL_WR)
+ (qp->sq_sig_type == IB_SIGNAL_ALL_WR))
rxe_run_task(&qp->comp.task, 1);
- qp->req.wqe_index = next_index(qp->sq.queue,
- qp->req.wqe_index);
+
goto next_wqe;
}
@@ -834,14 +834,38 @@ static enum resp_states execute(struct rxe_qp *qp, struct rxe_pkt_info *pkt)
return RESPST_CLEANUP;
}
+static int send_invalidate(struct rxe_dev *rxe, u32 rkey)
+{
+ int ret;
+ struct rxe_mr *mr;
+ struct rxe_mw *mw;
+
+ pr_info("send_invalidate: called\n");
+
+ if ((mr = rxe_pool_get_key(&rxe->mr_pool, &rkey))) {
+ ret = rxe_invalidate_mr(mr, 1);
+ rxe_drop_ref(mr);
+ } else if ((mw = rxe_pool_get_key(&rxe->mw_pool, &rkey))) {
+ ret = rxe_invalidate_mw(mw, 1);
+ rxe_drop_ref(mw);
+ } else {
+ pr_err("send invalidate failed for rkey = 0x%x\n", rkey);
+ ret = -EINVAL;
+ }
+
+ return ret;
+}
+
static enum resp_states do_complete(struct rxe_qp *qp,
struct rxe_pkt_info *pkt)
{
+ int ret;
struct rxe_cqe cqe;
struct ib_wc *wc = &cqe.ibwc;
struct ib_uverbs_wc *uwc = &cqe.uibwc;
struct rxe_recv_wqe *wqe = qp->resp.wqe;
struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
+ u32 rkey = ieth_rkey(pkt);
if (unlikely(!wqe))
return RESPST_CLEANUP;
@@ -858,6 +882,14 @@ static enum resp_states do_complete(struct rxe_qp *qp,
wc->wr_id = wqe->wr_id;
}
+ if (pkt->mask & RXE_IETH_MASK) {
+ ret = send_invalidate(rxe, rkey);
+ if (ret) {
+ pr_err("do_complete: send invalidate failed\n");
+ // TODO
+ }
+ }
+
if (wc->status == IB_WC_SUCCESS) {
rxe_counter_inc(rxe, RXE_CNT_RDMA_RECV);
wc->opcode = (pkt->mask & RXE_IMMDT_MASK &&
@@ -881,7 +913,7 @@ static enum resp_states do_complete(struct rxe_qp *qp,
if (pkt->mask & RXE_IETH_MASK) {
uwc->wc_flags |= IB_WC_WITH_INVALIDATE;
- uwc->ex.invalidate_rkey = ieth_rkey(pkt);
+ uwc->ex.invalidate_rkey = rkey;
}
uwc->qp_num = qp->ibqp.qp_num;
@@ -910,20 +942,8 @@ static enum resp_states do_complete(struct rxe_qp *qp,
}
if (pkt->mask & RXE_IETH_MASK) {
- struct rxe_mr *rmr;
-
wc->wc_flags |= IB_WC_WITH_INVALIDATE;
wc->ex.invalidate_rkey = ieth_rkey(pkt);
-
- rmr = rxe_pool_get_index(&rxe->mr_pool,
- wc->ex.invalidate_rkey >> 8);
- if (unlikely(!rmr)) {
- pr_err("Bad rkey %#x invalidation\n",
- wc->ex.invalidate_rkey);
- return RESPST_ERROR;
- }
- rmr->state = RXE_MEM_STATE_FREE;
- rxe_drop_ref(rmr);
}
wc->qp = &qp->ibqp;
@@ -933,6 +953,8 @@ static enum resp_states do_complete(struct rxe_qp *qp,
wc->port_num = qp->attr.port_num;
}
+ } else {
+ // TODO what???
}
/* have copy for srq and reference for !srq */
@@ -1224,9 +1246,17 @@ int rxe_responder(void *arg)
enum resp_states state;
struct rxe_pkt_info *pkt = NULL;
int ret = 0;
+ int entered;
rxe_add_ref(qp);
+ // this code is 'guaranteed' to never be entered more
+ // than once. Check to make sure that this is the case
+ entered = atomic_inc_return(&qp->resp.task.entered);
+ if (entered > 1) {
+ pr_err("rxe_responder: entered %d times\n", entered);
+ }
+
qp->resp.aeth_syndrome = AETH_ACK_UNLIMITED;
if (!qp->valid) {
@@ -1405,6 +1435,7 @@ int rxe_responder(void *arg)
exit:
ret = -EAGAIN;
done:
+ atomic_dec(&qp->resp.task.entered);
rxe_drop_ref(qp);
return ret;
}
@@ -970,7 +970,6 @@ static struct ib_mr *rxe_reg_user_mr(struct ib_pd *ibpd,
return &mr->ibmr;
err3:
- rxe_drop_index(mr);
rxe_drop_ref(mr);
rxe_drop_ref(pd);
err2:
@@ -985,8 +984,6 @@ static int rxe_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
mr->state = RXE_MEM_STATE_ZOMBIE;
rxe_drop_ref(mr->pd);
- rxe_drop_index(mr);
- rxe_drop_key(mr);
rxe_drop_ref(mr);
return 0;
}
@@ -1020,7 +1017,6 @@ static struct ib_mr *rxe_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type,
err2:
rxe_drop_ref(pd);
- rxe_drop_index(mr);
rxe_drop_ref(mr);
err1:
return ERR_PTR(err);
Fixed a bug in rxe_req that killed the last WC in error. Added a teardown/completion routine for MWs. Invalidate routines are still stubs. Still need to clean up some error path code for both MR and MW. Added atomics to detect reentrancy in the tasklets for now. Will go away later. Signed-off-by: Bob Pearson <rpearson@hpe.com> --- drivers/infiniband/sw/rxe/rxe_comp.c | 18 ++++-- drivers/infiniband/sw/rxe/rxe_loc.h | 3 + drivers/infiniband/sw/rxe/rxe_mr.c | 17 ++++- drivers/infiniband/sw/rxe/rxe_mw.c | 33 +++++++--- drivers/infiniband/sw/rxe/rxe_pool.c | 11 ++++ drivers/infiniband/sw/rxe/rxe_req.c | 91 +++++++++++++++------------ drivers/infiniband/sw/rxe/rxe_resp.c | 57 +++++++++++++---- drivers/infiniband/sw/rxe/rxe_verbs.c | 4 -- 8 files changed, 162 insertions(+), 72 deletions(-)