diff mbox series

[18/20] cleanup

Message ID 20200815045912.8626-19-rpearson@hpe.com (mailing list archive)
State Changes Requested
Delegated to: Jason Gunthorpe
Headers show
Series [01/20] Added ib_uverbs_wc_opcode to ib_user_verbs.h | expand

Commit Message

Bob Pearson Aug. 15, 2020, 4:58 a.m. UTC
This patch culls some left over comments and made things a little
neater.

Signed-off-by: Bob Pearson <rpearson@hpe.com>
---
 drivers/infiniband/sw/rxe/rxe_comp.c  |  10 +--
 drivers/infiniband/sw/rxe/rxe_loc.h   |  37 +++------
 drivers/infiniband/sw/rxe/rxe_mr.c    | 106 ++++++++++++------------
 drivers/infiniband/sw/rxe/rxe_mw.c    | 115 +++++++++++++++++++-------
 drivers/infiniband/sw/rxe/rxe_req.c   |  33 +++-----
 drivers/infiniband/sw/rxe/rxe_resp.c  |  57 ++++++++-----
 drivers/infiniband/sw/rxe/rxe_verbs.h |  17 ++--
 7 files changed, 208 insertions(+), 167 deletions(-)
diff mbox series

Patch

diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c
index d2a094621486..ed9e27eeaadd 100644
--- a/drivers/infiniband/sw/rxe/rxe_comp.c
+++ b/drivers/infiniband/sw/rxe/rxe_comp.c
@@ -790,23 +790,19 @@  int rxe_completer(void *arg)
 		}
 	}
 
-	/* these are the same. need to merge them TODO */
 exit:
 	/* we come here if we are done with processing and want the task to
-	 * exit from the loop calling us -- to call us again later
-	 */
+	 * exit from the loop calling us */
 	WARN_ON_ONCE(skb);
 	atomic_dec(&qp->comp.task.entered);
 	rxe_drop_ref(qp);
 	return -EAGAIN;
 
 done:
-	/* we come here if we have processed a packet we want the task to call
-	 * us again to see if there is anything else to do
-	 */
+	/* we come here if we have processed a packet and we want
+	 * to be called again to see if there is anything else to do */
 	WARN_ON_ONCE(skb);
 	atomic_dec(&qp->comp.task.entered);
 	rxe_drop_ref(qp);
 	return 0;
-
 }
diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h
index 652e0d67fe5c..2421ca311845 100644
--- a/drivers/infiniband/sw/rxe/rxe_loc.h
+++ b/drivers/infiniband/sw/rxe/rxe_loc.h
@@ -99,45 +99,26 @@  int rxe_mmap(struct ib_ucontext *context, struct vm_area_struct *vma);
 
 /* rxe_mr.c */
 void rxe_set_mr_lkey(struct rxe_mr *mr);
-
 enum copy_direction {
 	to_mr_obj,
 	from_mr_obj,
 };
-
-void rxe_mr_init_dma(struct rxe_pd *pd,
-		     int access, struct rxe_mr *mr);
-
-int rxe_mr_init_user(struct rxe_pd *pd, u64 start,
-		      u64 length, u64 iova, int access, struct ib_udata *udata,
-		      struct rxe_mr *mr);
-
-int rxe_mr_init_fast(struct rxe_pd *pd,
-		      int max_pages, struct rxe_mr *mr);
-
+void rxe_mr_init_dma(struct rxe_pd *pd, int access, struct rxe_mr *mr);
+int rxe_mr_init_user(struct rxe_pd *pd, u64 start, u64 length,
+		     u64 iova, int access, struct ib_udata *udata,
+		     struct rxe_mr *mr);
+int rxe_mr_init_fast(struct rxe_pd *pd, int max_pages, struct rxe_mr *mr);
 int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr,
-		 int length, enum copy_direction dir, u32 *crcp);
-
+		int length, enum copy_direction dir, u32 *crcp);
 int copy_data(struct rxe_pd *pd, int access,
 	      struct rxe_dma_info *dma, void *addr, int length,
 	      enum copy_direction dir, u32 *crcp);
-
 void *iova_to_vaddr(struct rxe_mr *mr, u64 iova, int length);
-
-enum lookup_type {
-	lookup_local,
-	lookup_remote,
-};
-
-struct rxe_mr *lookup_mr(struct rxe_pd *pd, int access, u32 key,
-			   enum lookup_type type);
-
-int mr_check_range(struct rxe_mr *mr, u64 iova, size_t length);
-
 void rxe_mr_cleanup(struct rxe_pool_entry *arg);
-
 int advance_dma_data(struct rxe_dma_info *dma, unsigned int length);
 int rxe_invalidate_mr(struct rxe_qp *qp, struct rxe_mr *mr);
+int rxe_mr_check_access(struct rxe_qp *qp, struct rxe_mr *mr,
+			int access, u64 va, u32 resid);
 
 /* rxe_mw.c */
 void rxe_set_mw_rkey(struct rxe_mw *mw);
@@ -147,6 +128,8 @@  int rxe_dealloc_mw(struct ib_mw *ibmw);
 int rxe_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe);
 void rxe_mw_cleanup(struct rxe_pool_entry *arg);
 int rxe_invalidate_mw(struct rxe_qp *qp, struct rxe_mw *mw);
+int rxe_mw_check_access(struct rxe_qp *qp, struct rxe_mw *mw,
+			int access, u64 va, u32 resid);
 
 /* rxe_net.c */
 void rxe_loopback(struct sk_buff *skb);
diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c
index a983a838bf4c..ce64d4101888 100644
--- a/drivers/infiniband/sw/rxe/rxe_mr.c
+++ b/drivers/infiniband/sw/rxe/rxe_mr.c
@@ -43,33 +43,14 @@  void rxe_set_mr_lkey(struct rxe_mr *mr)
 
 	do {
 		get_random_bytes(&lkey, sizeof(lkey));
-		lkey &= 0x7fffffff;
+		lkey &= ~IS_MW;
 		if (likely(lkey && (rxe_add_key(mr, &lkey) == 0)))
 			return;
 	} while (tries++ < 10);
 	pr_err("unable to get random key for mr\n");
 }
 
-#if 0
-/*
- * lfsr (linear feedback shift register) with period 255
- */
-static u8 rxe_get_key(void)
-{
-	static u32 key = 1;
-
-	key = key << 1;
-
-	key |= (0 != (key & 0x100)) ^ (0 != (key & 0x10))
-		^ (0 != (key & 0x80)) ^ (0 != (key & 0x40));
-
-	key &= 0xff;
-
-	return key;
-}
-#endif
-
-int mr_check_range(struct rxe_mr *mr, u64 iova, size_t length)
+static int mr_check_range(struct rxe_mr *mr, u64 iova, size_t length)
 {
 	switch (mr->type) {
 	case RXE_MEM_TYPE_DMA:
@@ -430,6 +411,25 @@  int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length,
 	return err;
 }
 
+static struct rxe_mr *lookup_mr(struct rxe_pd *pd, int access, u32 lkey)
+{
+	struct rxe_mr *mr;
+	struct rxe_dev *rxe = to_rdev(pd->ibpd.device);
+
+	mr = rxe_pool_get_key(&rxe->mr_pool, &lkey);
+	if (!mr)
+		return NULL;
+
+	if (unlikely((mr->ibmr.lkey != lkey) || (mr->pd != pd) ||
+		     (access && !(access & mr->access)) ||
+		     (mr->state != RXE_MEM_STATE_VALID))) {
+		rxe_drop_ref(mr);
+		return NULL;
+	}
+
+	return mr;
+}
+
 /* copy data in or out of a wqe, i.e. sg list
  * under the control of a dma descriptor
  */
@@ -459,7 +459,7 @@  int copy_data(
 	}
 
 	if (sge->length && (offset < sge->length)) {
-		mr = lookup_mr(pd, access, sge->lkey, lookup_local);
+		mr = lookup_mr(pd, access, sge->lkey);
 		if (!mr) {
 			err = -EINVAL;
 			goto err1;
@@ -484,8 +484,7 @@  int copy_data(
 			}
 
 			if (sge->length) {
-				mr = lookup_mr(pd, access, sge->lkey,
-						 lookup_local);
+				mr = lookup_mr(pd, access, sge->lkey);
 				if (!mr) {
 					err = -EINVAL;
 					goto err1;
@@ -560,34 +559,6 @@  int advance_dma_data(struct rxe_dma_info *dma, unsigned int length)
 	return 0;
 }
 
-/* (1) find the mr (mr or mw) corresponding to lkey/rkey
- *     depending on lookup_type
- * (2) verify that the (qp) pd matches the mr pd
- * (3) verify that the mr can support the requested access
- * (4) verify that mr state is valid
- */
-struct rxe_mr *lookup_mr(struct rxe_pd *pd, int access, u32 key,
-			 enum lookup_type type)
-{
-	struct rxe_mr *mr;
-	struct rxe_dev *rxe = to_rdev(pd->ibpd.device);
-
-	mr = rxe_pool_get_key(&rxe->mr_pool, &key);
-	if (!mr)
-		return NULL;
-
-	if (unlikely((type == lookup_local && mr->ibmr.lkey != key) ||
-		     (type == lookup_remote && mr->ibmr.rkey != key) ||
-		     mr->pd != pd ||
-		     (access && !(access & mr->access)) ||
-		     mr->state != RXE_MEM_STATE_VALID)) {
-		rxe_drop_ref(mr);
-		mr = NULL;
-	}
-
-	return mr;
-}
-
 int rxe_invalidate_mr(struct rxe_qp *qp, struct rxe_mr *mr)
 {
 	// much more TODO here, can fail
@@ -599,6 +570,37 @@  int rxe_invalidate_mr(struct rxe_qp *qp, struct rxe_mr *mr)
 	return 0;
 }
 
+int rxe_mr_check_access(struct rxe_qp *qp, struct rxe_mr *mr,
+			int access, u64 va, u32 resid)
+{
+	int ret;
+	struct rxe_pd *pd = to_rpd(mr->ibmr.pd);
+
+	if (unlikely(mr->state != RXE_MEM_STATE_VALID)) {
+		pr_err("attempt to access a MR that is"
+			" not in the valid state\n");
+		return -EINVAL;
+	}
+
+	/* C10-56 */
+	if (unlikely(pd != qp->pd)) {
+		pr_err("attempt to access a MR with a"
+			" different PD than the QP\n");
+		return -EINVAL;
+	}
+
+	/* C10-57 */
+	if (unlikely(access && !(access & mr->access))) {
+		pr_err("attempt to access a MR that does"
+			" not have the required access rights\n");
+		return -EINVAL;
+	}
+
+	ret = mr_check_range(mr, va, resid);
+
+	return ret;
+}
+
 void rxe_mr_cleanup(struct rxe_pool_entry *arg)
 {
 	struct rxe_mr *mr = container_of(arg, typeof(*mr), pelem);
diff --git a/drivers/infiniband/sw/rxe/rxe_mw.c b/drivers/infiniband/sw/rxe/rxe_mw.c
index 0c774aadf6c7..6b998527b34b 100644
--- a/drivers/infiniband/sw/rxe/rxe_mw.c
+++ b/drivers/infiniband/sw/rxe/rxe_mw.c
@@ -44,8 +44,8 @@  void rxe_set_mw_rkey(struct rxe_mw *mw)
 
 	do {
 		get_random_bytes(&rkey, sizeof(rkey));
-		rkey |= 0x80000000;
-		if (likely((rkey & 0x7fffffff) &&
+		rkey |= IS_MW;
+		if (likely((rkey & ~IS_MW) &&
 			   (rxe_add_key(mw, &rkey) == 0)))
 			return;
 	} while (tries++ < 10);
@@ -77,10 +77,10 @@  struct ib_mw *rxe_alloc_mw(struct ib_pd *ibpd, enum ib_mw_type type,
 
 	switch (type) {
 	case IB_MW_TYPE_1:
-		mw->state	= RXE_MW_STATE_VALID;
+		mw->state	= RXE_MEM_STATE_VALID;
 		break;
 	case IB_MW_TYPE_2:
-		mw->state	= RXE_MW_STATE_FREE;
+		mw->state	= RXE_MEM_STATE_FREE;
 		break;
 	default:
 		pr_err("attempt to allocate MW with unknown type\n");
@@ -166,7 +166,7 @@  static int check_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
 	}
 
 	if (unlikely((mw->ibmw.type == IB_MW_TYPE_1) &&
-			(mw->state != RXE_MW_STATE_VALID))) {
+			(mw->state != RXE_MEM_STATE_VALID))) {
 		pr_err("attempt to bind a type 1 MW not in the"
 			" valid state\n");
 		return -EINVAL;
@@ -195,7 +195,7 @@  static int check_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
 
 	/* o10-37.2.30: */
 	if (unlikely((mw->ibmw.type == IB_MW_TYPE_2) &&
-			(mw->state != RXE_MW_STATE_FREE))) {
+			(mw->state != RXE_MEM_STATE_FREE))) {
 		pr_err("attempt to bind a type 2 MW not in the"
 			" free state\n");
 		return -EINVAL;
@@ -217,9 +217,6 @@  static int check_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
 		return -EINVAL;
 	}
 
-	/* MR duplicates address and length in the private and ib
-	 * parts of the rxe_mr struct. TODO should only keep one. */
-
 	/* C10-75: */
 	if (mw->access & IB_ZERO_BASED) {
 		if (unlikely(wqe->wr.wr.umw.length > mr->length)) {
@@ -240,13 +237,29 @@  static int check_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
 	return 0;
 }
 
-static void do_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
+static int do_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
 			struct rxe_mw *mw, struct rxe_mr *mr)
 {
+	int ret;
 	u32 rkey;
+	u32 new_rkey;
+
+	/* key part of new rkey is provided by user for type 2
+	 * and ibv_bind_mw() for type 1 MWs */
+	rkey = mw->ibmw.rkey;
+	rxe_drop_key(mw);
+	new_rkey = (rkey & 0xffffff00) | (wqe->wr.wr.umw.rkey & 0x000000ff);
+	ret = rxe_add_key(mw, &new_rkey);
+	if (ret) {
+		/* this should never happen */
+		pr_err("shouldn't happen unable to set new rkey\n");
+		/* try to put back the old one */
+		rxe_add_key(mw, &rkey);
+		return ret;
+	}
 
 	mw->access = wqe->wr.wr.umw.access;
-	mw->state = RXE_MW_STATE_VALID;
+	mw->state = RXE_MEM_STATE_VALID;
 	mw->addr = wqe->wr.wr.umw.addr;
 	mw->length = wqe->wr.wr.umw.length;
 
@@ -272,14 +285,7 @@  static void do_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
 		mw->qp = qp;
 	}
 
-	/* key part of new rkey is provided by user for type 2
-	 * and ibv_bind_mw() for type 1 MWs */
-	rkey = mw->ibmw.rkey;
-	rxe_drop_key(mw);
-	rkey = (rkey & 0xffffff00) | (wqe->wr.wr.umw.rkey & 0x000000ff);
-	rxe_add_key(mw, &rkey);
-
-	return;
+	return 0;
 }
 
 int rxe_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
@@ -326,7 +332,7 @@  int rxe_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
 		goto err3;
 
 	/* implement the change */
-	do_bind_mw(qp, wqe, mw, mr);
+	ret = do_bind_mw(qp, wqe, mw, mr);
 err3:
 	spin_unlock_irqrestore(&mw->lock, flags);
 
@@ -340,15 +346,15 @@  int rxe_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
 
 static int check_invalidate_mw(struct rxe_qp *qp, struct rxe_mw *mw)
 {
-	/* o10-37.2.26: */
-	if (unlikely(mw->ibmw.type == IB_MW_TYPE_1)) {
-		pr_err("attempt to invalidate a type 1 MW\n");
+	if (unlikely(mw->state != RXE_MEM_STATE_VALID)) {
+		pr_warn("attempt to invalidate a MW that"
+			" is not valid\n");
 		return -EINVAL;
 	}
 
-	if (unlikely(mw->state != RXE_MW_STATE_VALID)) {
-		pr_warn("attempt to invalidate a MW that"
-			" is not valid\n");
+	/* o10-37.2.26: */
+	if (unlikely(mw->ibmw.type == IB_MW_TYPE_1)) {
+		pr_err("attempt to invalidate a type 1 MW\n");
 		return -EINVAL;
 	}
 
@@ -366,7 +372,7 @@  static void do_invalidate_mw(struct rxe_mw *mw)
 	mw->access = 0;
 	mw->addr = 0; 
 	mw->length = 0;
-	mw->state = RXE_MW_STATE_FREE;
+	mw->state = RXE_MEM_STATE_FREE;
 }
 
 int rxe_invalidate_mw(struct rxe_qp *qp, struct rxe_mw *mw)
@@ -387,7 +393,7 @@  int rxe_invalidate_mw(struct rxe_qp *qp, struct rxe_mw *mw)
 	return ret;
 }
 
-static void do_deallocate_mw(struct rxe_mw *mw)
+static void do_dealloc_mw(struct rxe_mw *mw)
 {
 	mw->qp = NULL;
 
@@ -397,10 +403,11 @@  static void do_deallocate_mw(struct rxe_mw *mw)
 		mw->mr = NULL;
 	}
 
+	mw->ibmw.pd = NULL;
 	mw->access = 0;
 	mw->addr = 0; 
 	mw->length = 0;
-	mw->state = RXE_MW_STATE_INVALID;
+	mw->state = RXE_MEM_STATE_INVALID;
 }
 
 int rxe_dealloc_mw(struct ib_mw *ibmw)
@@ -411,7 +418,7 @@  int rxe_dealloc_mw(struct ib_mw *ibmw)
 
 	spin_lock_irqsave(&mw->lock, flags);
 
-	do_deallocate_mw(mw);
+	do_dealloc_mw(mw);
 
 	spin_unlock_irqrestore(&mw->lock, flags);
 
@@ -421,6 +428,54 @@  int rxe_dealloc_mw(struct ib_mw *ibmw)
 	return 0;
 }
 
+int rxe_mw_check_access(struct rxe_qp *qp, struct rxe_mw *mw,
+			int access, u64 va, u32 resid)
+{
+	struct rxe_pd *pd = to_rpd(mw->ibmw.pd);
+
+	if (unlikely(mw->state != RXE_MEM_STATE_VALID)) {
+		pr_err("attempt to access a MW that is"
+			" not in the valid state\n");
+		return -EINVAL;
+	}
+
+	/* C10-76.2.1 */
+	if (unlikely((mw->ibmw.type == IB_MW_TYPE_1) && (pd != qp->pd))) {
+		pr_err("attempt to access a type 1 MW with a"
+			" different PD than the QP\n");
+		return -EINVAL;
+	}
+
+	/* o10-37.2.43 */
+	if (unlikely((mw->ibmw.type == IB_MW_TYPE_2) && (mw->qp != qp))) {
+		pr_err("attempt to access a type 2 MW that is"
+			" associated with a different QP\n");
+		return -EINVAL;
+	}
+
+	/* C10-77 */
+	if (unlikely(access && !(access & mw->access))) {
+		pr_err("attempt to access a MW that does"
+			" not have the required access rights\n");
+		return -EINVAL;
+	}
+
+	if (mw->access & IB_ZERO_BASED) {
+		if (unlikely((va + resid) > mw->length)) {
+			pr_err("attempt to access a MW out of bounds\n");
+			return -EINVAL;
+		}
+	} else {
+		if (unlikely((va < mw->addr) ||
+			((va + resid) > (mw->addr + mw->length)))) {
+			pr_err("attempt to access a MW out of bounds\n");
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
 void rxe_mw_cleanup(struct rxe_pool_entry *arg)
 {
 	struct rxe_mw *mw = container_of(arg, typeof(*mw), pelem);
diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c
index ad747f230318..f0fa195fcc70 100644
--- a/drivers/infiniband/sw/rxe/rxe_req.c
+++ b/drivers/infiniband/sw/rxe/rxe_req.c
@@ -591,7 +591,7 @@  static int local_invalidate(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
 	struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
 	u32 key = wqe->wr.ex.invalidate_rkey;
 
-	if ((mr = rxe_pool_get_key(&rxe->mr_pool, &key))) {
+	if (!(key & IS_MW) && (mr = rxe_pool_get_key(&rxe->mr_pool, &key))) {
 		ret = rxe_invalidate_mr(qp, mr);
 		rxe_drop_ref(mr);
 	} else if ((mw = rxe_pool_get_key(&rxe->mw_pool, &key))) {
@@ -732,12 +732,7 @@  int rxe_requester(void *arg)
 	payload = (mask & RXE_WRITE_OR_SEND) ? wqe->dma.resid : 0;
 	if (payload > mtu) {
 		if (qp_type(qp) == IB_QPT_UD) {
-			/* C10-93.1.1: If the total sum of all the buffer lengths specified for a
-			 * UD message exceeds the MTU of the port as returned by QueryHCA, the CI
-			 * shall not emit any packets for this message. Further, the CI shall not
-			 * generate an error due to this condition.
-			 */
-
+			/* C10-93.1.1 */
 			/* fake a successful UD send */
 			wqe->first_psn = qp->req.psn;
 			wqe->last_psn = qp->req.psn;
@@ -747,8 +742,13 @@  int rxe_requester(void *arg)
 						       qp->req.wqe_index);
 			wqe->state = wqe_state_done;
 			wqe->status = IB_WC_SUCCESS;
-			// TODO why?? why not just treat the same as a
-			// successful wqe and go to next wqe?
+
+			/* TODO why?? why not just treat the same as a
+			 * successful wqe and go to next wqe?
+			 * __rxe_do_task probably shouldn't be used
+			 * it reenters the completion task which may
+			 * already be running
+			 */
 			__rxe_do_task(&qp->comp.task);
 			goto again;
 		}
@@ -789,7 +789,7 @@  int rxe_requester(void *arg)
 			goto exit;
 		}
 
-		wqe->status = IB_WC_LOC_PROT_ERR;	// ?? FIXME
+		wqe->status = IB_WC_LOC_PROT_ERR;
 		goto err;
 	}
 
@@ -797,17 +797,12 @@  int rxe_requester(void *arg)
 
 	goto next_wqe;
 
-	// TODO this can be cleaned up
 err:
 	/* we come here if an error occured while processing
 	 * a send wqe. The completer will put the qp in error
 	 * state and no more wqes will be processed unless
-	 * the qp is cleaned up and restarted. We do not want
-	 * to be called again */
+	 * the qp is cleaned up and restarted. */
 	wqe->state = wqe_state_error;
-	// ?? we want to force the qp into error state before
-	// anyone else has a chance to process another wqe but
-	// this could collide with an already running completer
 	__rxe_do_task(&qp->comp.task);
 	ret = -EAGAIN;
 	goto done;
@@ -816,14 +811,12 @@  int rxe_requester(void *arg)
 	/* we come here if either there are no more wqes in the send
 	 * queue or we are blocked waiting for some resource or event.
 	 * The current wqe will be restarted or new wqe started when
-	 * there is work to do. */
+	 * there is something to do. */
 	ret = -EAGAIN;
 	goto done;
 
 again:
-	/* we come here if we are done with the current wqe but want to
-	 * get called again. Mostly we loop back to next wqe so should
-	 * be all one way or the other */
+	/* we come here if we need to exit and reenter the task */
 	ret = 0;
 	goto done;
 
diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c
index 49cd77cd6264..0bfea50505d1 100644
--- a/drivers/infiniband/sw/rxe/rxe_resp.c
+++ b/drivers/infiniband/sw/rxe/rxe_resp.c
@@ -417,7 +417,9 @@  static enum resp_states check_length(struct rxe_qp *qp,
 static enum resp_states check_rkey(struct rxe_qp *qp,
 				   struct rxe_pkt_info *pkt)
 {
-	struct rxe_mr *mr = NULL;
+	struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
+	struct rxe_mr *mr;
+	struct rxe_mw *mw;
 	u64 va;
 	u32 rkey;
 	u32 resid;
@@ -425,6 +427,7 @@  static enum resp_states check_rkey(struct rxe_qp *qp,
 	int mtu = qp->mtu;
 	enum resp_states state;
 	int access;
+	unsigned long flags;
 
 	if (pkt->mask & (RXE_READ_MASK | RXE_WRITE_MASK)) {
 		if (pkt->mask & RXE_RETH_MASK) {
@@ -432,13 +435,16 @@  static enum resp_states check_rkey(struct rxe_qp *qp,
 			qp->resp.rkey = reth_rkey(pkt);
 			qp->resp.resid = reth_len(pkt);
 			qp->resp.length = reth_len(pkt);
+			qp->resp.offset = 0;
 		}
-		access = (pkt->mask & RXE_READ_MASK) ? IB_ACCESS_REMOTE_READ
-						     : IB_ACCESS_REMOTE_WRITE;
+		access = (pkt->mask & RXE_READ_MASK)
+				? IB_ACCESS_REMOTE_READ
+				: IB_ACCESS_REMOTE_WRITE;
 	} else if (pkt->mask & RXE_ATOMIC_MASK) {
 		qp->resp.va = atmeth_va(pkt);
 		qp->resp.rkey = atmeth_rkey(pkt);
 		qp->resp.resid = sizeof(u64);
+		qp->resp.offset = 0;
 		access = IB_ACCESS_REMOTE_ATOMIC;
 	} else {
 		return RESPST_EXECUTE;
@@ -456,18 +462,31 @@  static enum resp_states check_rkey(struct rxe_qp *qp,
 	resid	= qp->resp.resid;
 	pktlen	= payload_size(pkt);
 
-	mr = lookup_mr(qp->pd, access, rkey, lookup_remote);
-	if (!mr) {
-		state = RESPST_ERR_RKEY_VIOLATION;
-		goto err;
-	}
+	if ((rkey & IS_MW) && (mw = rxe_pool_get_key(&rxe->mw_pool, &rkey))) {
+		spin_lock_irqsave(&mw->lock, flags);
+		if (rxe_mw_check_access(qp, mw, access, va, resid)) {
+			spin_unlock_irqrestore(&mw->lock, flags);
+			rxe_drop_ref(mw);
+			state = RESPST_ERR_RKEY_VIOLATION;
+			goto err;
+		}
 
-	if (unlikely(mr->state == RXE_MEM_STATE_FREE)) {
-		state = RESPST_ERR_RKEY_VIOLATION;
-		goto err;
-	}
+		mr = mw->mr;
+		rxe_add_ref(mr);
+
+		if (mw->access & IB_ZERO_BASED)
+			qp->resp.offset = mw->addr;
 
-	if (mr_check_range(mr, va, resid)) {
+		spin_unlock_irqrestore(&mw->lock, flags);
+		rxe_drop_ref(mw);
+	} else if ((mr = rxe_pool_get_key(&rxe->mr_pool, &rkey)) &&
+		   (mr->rkey == rkey)) {
+		if (rxe_mr_check_access(qp, mr, access, va, resid)) {
+			state = RESPST_ERR_RKEY_VIOLATION;
+			goto err;
+		}
+	} else {
+		pr_err("no MR/MW found with rkey = 0x%08x\n", rkey);
 		state = RESPST_ERR_RKEY_VIOLATION;
 		goto err;
 	}
@@ -525,8 +544,8 @@  static enum resp_states write_data_in(struct rxe_qp *qp,
 	int	err;
 	int data_len = payload_size(pkt);
 
-	err = rxe_mr_copy(qp->resp.mr, qp->resp.va, payload_addr(pkt),
-			   data_len, to_mr_obj, NULL);
+	err = rxe_mr_copy(qp->resp.mr, qp->resp.va + qp->resp.offset,
+			payload_addr(pkt), data_len, to_mr_obj, NULL);
 	if (err) {
 		rc = RESPST_ERR_RKEY_VIOLATION;
 		goto out;
@@ -545,17 +564,11 @@  static DEFINE_SPINLOCK(atomic_ops_lock);
 static enum resp_states process_atomic(struct rxe_qp *qp,
 				       struct rxe_pkt_info *pkt)
 {
-	u64 iova = atmeth_va(pkt);
 	u64 *vaddr;
 	enum resp_states ret;
 	struct rxe_mr *mr = qp->resp.mr;
 
-	if (mr->state != RXE_MEM_STATE_VALID) {
-		ret = RESPST_ERR_RKEY_VIOLATION;
-		goto out;
-	}
-
-	vaddr = iova_to_vaddr(mr, iova, sizeof(u64));
+	vaddr = iova_to_vaddr(mr, qp->resp.va + qp->resp.offset, sizeof(u64));
 
 	/* check vaddr is 8 bytes aligned. */
 	if (!vaddr || (uintptr_t)vaddr & 7) {
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h
index 2fe8433d0801..b4855d3ea6f4 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.h
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.h
@@ -210,6 +210,7 @@  struct rxe_resp_info {
 
 	/* RDMA read / atomic only */
 	u64			va;
+	u64			offset;
 	struct rxe_mr		*mr;
 	u32			resid;
 	u32			rkey;
@@ -289,7 +290,8 @@  struct rxe_qp {
 	struct execute_work	cleanup_work;
 };
 
-enum rxe_mr_state {
+/* common state for rxe_mr and rxe_mw */
+enum rxe_mem_state {
 	RXE_MEM_STATE_ZOMBIE,
 	RXE_MEM_STATE_INVALID,
 	RXE_MEM_STATE_FREE,
@@ -325,7 +327,7 @@  struct rxe_mr {
 	u32			lkey;
 	u32			rkey;
 
-	enum rxe_mr_state	state;
+	enum rxe_mem_state	state;
 	enum rxe_mr_type	type;
 	u64			va;
 	u64			iova;
@@ -349,24 +351,21 @@  struct rxe_mr {
 	struct rxe_map		**map;
 };
 
-enum rxe_mw_state {
-	RXE_MW_STATE_INVALID,
-	RXE_MW_STATE_FREE,
-	RXE_MW_STATE_VALID,
-};
-
 enum rxe_send_flags {
 	/* flag indicaes bind call came through verbs API */
 	RXE_BIND_MW		= (1 << 0),
 };
 
+/* use high order bit to separate MW and MR rkeys */
+#define IS_MW	(1 << 31)
+
 struct rxe_mw {
 	struct rxe_pool_entry	pelem;
 	struct ib_mw		ibmw;
 	struct rxe_mr		*mr;
 	struct rxe_qp		*qp;	/* type 2B only */
 	spinlock_t		lock;
-	enum rxe_mw_state	state;
+	enum rxe_mem_state	state;
 	u32			access;
 	u64			addr;
 	u64			length;