diff mbox

[2/2] IB/qib: fix race between qib_error_qp() and receive packet processing

Message ID 20100802223929.8701.62691.stgit@chromite.mv.qlogic.com (mailing list archive)
State Accepted
Headers show

Commit Message

Ralph Campbell Aug. 2, 2010, 10:39 p.m. UTC
None
diff mbox

Patch

diff --git a/drivers/infiniband/hw/qib/qib_qp.c b/drivers/infiniband/hw/qib/qib_qp.c
index e0f65e3..6c39851 100644
--- a/drivers/infiniband/hw/qib/qib_qp.c
+++ b/drivers/infiniband/hw/qib/qib_qp.c
@@ -450,7 +450,7 @@  static void clear_mr_refs(struct qib_qp *qp, int clr_sends)
  *
  * Flushes both send and receive work queues.
  * Returns true if last WQE event should be generated.
- * The QP s_lock should be held and interrupts disabled.
+ * The QP r_lock and s_lock should be held and interrupts disabled.
  * If we are already in error state, just return.
  */
 int qib_error_qp(struct qib_qp *qp, enum ib_wc_status err)
diff --git a/drivers/infiniband/hw/qib/qib_rc.c b/drivers/infiniband/hw/qib/qib_rc.c
index 40c0a37..a093111 100644
--- a/drivers/infiniband/hw/qib/qib_rc.c
+++ b/drivers/infiniband/hw/qib/qib_rc.c
@@ -868,7 +868,7 @@  done:
 
 /*
  * Back up requester to resend the last un-ACKed request.
- * The QP s_lock should be held and interrupts disabled.
+ * The QP r_lock and s_lock should be held and interrupts disabled.
  */
 static void qib_restart_rc(struct qib_qp *qp, u32 psn, int wait)
 {
@@ -911,7 +911,8 @@  static void rc_timeout(unsigned long arg)
 	struct qib_ibport *ibp;
 	unsigned long flags;
 
-	spin_lock_irqsave(&qp->s_lock, flags);
+	spin_lock_irqsave(&qp->r_lock, flags);
+	spin_lock(&qp->s_lock);
 	if (qp->s_flags & QIB_S_TIMER) {
 		ibp = to_iport(qp->ibqp.device, qp->port_num);
 		ibp->n_rc_timeouts++;
@@ -920,7 +921,8 @@  static void rc_timeout(unsigned long arg)
 		qib_restart_rc(qp, qp->s_last_psn + 1, 1);
 		qib_schedule_send(qp);
 	}
-	spin_unlock_irqrestore(&qp->s_lock, flags);
+	spin_unlock(&qp->s_lock);
+	spin_unlock_irqrestore(&qp->r_lock, flags);
 }
 
 /*
@@ -1414,10 +1416,6 @@  static void qib_rc_rcv_resp(struct qib_ibport *ibp,
 
 	spin_lock_irqsave(&qp->s_lock, flags);
 
-	/* Double check we can process this now that we hold the s_lock. */
-	if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK))
-		goto ack_done;
-
 	/* Ignore invalid responses. */
 	if (qib_cmp24(psn, qp->s_next_psn) >= 0)
 		goto ack_done;
@@ -1661,9 +1659,6 @@  static int qib_rc_rcv_error(struct qib_other_headers *ohdr,
 	ibp->n_rc_dupreq++;
 
 	spin_lock_irqsave(&qp->s_lock, flags);
-	/* Double check we can process this now that we hold the s_lock. */
-	if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK))
-		goto unlock_done;
 
 	for (i = qp->r_head_ack_queue; ; i = prev) {
 		if (i == qp->s_tail_ack_queue)
@@ -1878,9 +1873,6 @@  void qib_rc_rcv(struct qib_ctxtdata *rcd, struct qib_ib_header *hdr,
 	psn = be32_to_cpu(ohdr->bth[2]);
 	opcode >>= 24;
 
-	/* Prevent simultaneous processing after APM on different CPUs */
-	spin_lock(&qp->r_lock);
-
 	/*
 	 * Process responses (ACKs) before anything else.  Note that the
 	 * packet sequence number will be for something in the send work
@@ -1891,14 +1883,14 @@  void qib_rc_rcv(struct qib_ctxtdata *rcd, struct qib_ib_header *hdr,
 	    opcode <= OP(ATOMIC_ACKNOWLEDGE)) {
 		qib_rc_rcv_resp(ibp, ohdr, data, tlen, qp, opcode, psn,
 				hdrsize, pmtu, rcd);
-		goto runlock;
+		return;
 	}
 
 	/* Compute 24 bits worth of difference. */
 	diff = qib_cmp24(psn, qp->r_psn);
 	if (unlikely(diff)) {
 		if (qib_rc_rcv_error(ohdr, data, qp, opcode, psn, diff, rcd))
-			goto runlock;
+			return;
 		goto send_ack;
 	}
 
@@ -2090,9 +2082,6 @@  send_last:
 		if (next > QIB_MAX_RDMA_ATOMIC)
 			next = 0;
 		spin_lock_irqsave(&qp->s_lock, flags);
-		/* Double check we can process this while holding the s_lock. */
-		if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK))
-			goto srunlock;
 		if (unlikely(next == qp->s_tail_ack_queue)) {
 			if (!qp->s_ack_queue[next].sent)
 				goto nack_inv_unlck;
@@ -2146,7 +2135,7 @@  send_last:
 		qp->s_flags |= QIB_S_RESP_PENDING;
 		qib_schedule_send(qp);
 
-		goto srunlock;
+		goto sunlock;
 	}
 
 	case OP(COMPARE_SWAP):
@@ -2165,9 +2154,6 @@  send_last:
 		if (next > QIB_MAX_RDMA_ATOMIC)
 			next = 0;
 		spin_lock_irqsave(&qp->s_lock, flags);
-		/* Double check we can process this while holding the s_lock. */
-		if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK))
-			goto srunlock;
 		if (unlikely(next == qp->s_tail_ack_queue)) {
 			if (!qp->s_ack_queue[next].sent)
 				goto nack_inv_unlck;
@@ -2213,7 +2199,7 @@  send_last:
 		qp->s_flags |= QIB_S_RESP_PENDING;
 		qib_schedule_send(qp);
 
-		goto srunlock;
+		goto sunlock;
 	}
 
 	default:
@@ -2227,7 +2213,7 @@  send_last:
 	/* Send an ACK if requested or required. */
 	if (psn & (1 << 31))
 		goto send_ack;
-	goto runlock;
+	return;
 
 rnr_nak:
 	qp->r_nak_state = IB_RNR_NAK | qp->r_min_rnr_timer;
@@ -2238,7 +2224,7 @@  rnr_nak:
 		atomic_inc(&qp->refcount);
 		list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
 	}
-	goto runlock;
+	return;
 
 nack_op_err:
 	qib_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
@@ -2250,7 +2236,7 @@  nack_op_err:
 		atomic_inc(&qp->refcount);
 		list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
 	}
-	goto runlock;
+	return;
 
 nack_inv_unlck:
 	spin_unlock_irqrestore(&qp->s_lock, flags);
@@ -2264,7 +2250,7 @@  nack_inv:
 		atomic_inc(&qp->refcount);
 		list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
 	}
-	goto runlock;
+	return;
 
 nack_acc_unlck:
 	spin_unlock_irqrestore(&qp->s_lock, flags);
@@ -2274,13 +2260,6 @@  nack_acc:
 	qp->r_ack_psn = qp->r_psn;
 send_ack:
 	qib_send_rc_ack(qp);
-runlock:
-	spin_unlock(&qp->r_lock);
-	return;
-
-srunlock:
-	spin_unlock_irqrestore(&qp->s_lock, flags);
-	spin_unlock(&qp->r_lock);
 	return;
 
 sunlock:
diff --git a/drivers/infiniband/hw/qib/qib_sdma.c b/drivers/infiniband/hw/qib/qib_sdma.c
index b845688..cad4449 100644
--- a/drivers/infiniband/hw/qib/qib_sdma.c
+++ b/drivers/infiniband/hw/qib/qib_sdma.c
@@ -656,6 +656,7 @@  unmap:
 	}
 	qp = tx->qp;
 	qib_put_txreq(tx);
+	spin_lock(&qp->r_lock);
 	spin_lock(&qp->s_lock);
 	if (qp->ibqp.qp_type == IB_QPT_RC) {
 		/* XXX what about error sending RDMA read responses? */
@@ -664,6 +665,7 @@  unmap:
 	} else if (qp->s_wqe)
 		qib_send_complete(qp, qp->s_wqe, IB_WC_GENERAL_ERR);
 	spin_unlock(&qp->s_lock);
+	spin_unlock(&qp->r_lock);
 	/* return zero to process the next send work request */
 	goto unlock;
 
diff --git a/drivers/infiniband/hw/qib/qib_uc.c b/drivers/infiniband/hw/qib/qib_uc.c
index 6c7fe78..b9c8b63 100644
--- a/drivers/infiniband/hw/qib/qib_uc.c
+++ b/drivers/infiniband/hw/qib/qib_uc.c
@@ -272,9 +272,6 @@  void qib_uc_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr,
 	opcode >>= 24;
 	memset(&wc, 0, sizeof wc);
 
-	/* Prevent simultaneous processing after APM on different CPUs */
-	spin_lock(&qp->r_lock);
-
 	/* Compare the PSN verses the expected PSN. */
 	if (unlikely(qib_cmp24(psn, qp->r_psn) != 0)) {
 		/*
@@ -534,7 +531,6 @@  rdma_last:
 	}
 	qp->r_psn++;
 	qp->r_state = opcode;
-	spin_unlock(&qp->r_lock);
 	return;
 
 rewind:
@@ -542,12 +538,10 @@  rewind:
 	qp->r_sge.num_sge = 0;
 drop:
 	ibp->n_pkt_drops++;
-	spin_unlock(&qp->r_lock);
 	return;
 
 op_err:
 	qib_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
-	spin_unlock(&qp->r_lock);
 	return;
 
 sunlock:
diff --git a/drivers/infiniband/hw/qib/qib_ud.c b/drivers/infiniband/hw/qib/qib_ud.c
index c838cda..e1b3da2 100644
--- a/drivers/infiniband/hw/qib/qib_ud.c
+++ b/drivers/infiniband/hw/qib/qib_ud.c
@@ -535,13 +535,6 @@  void qib_ud_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr,
 	wc.byte_len = tlen + sizeof(struct ib_grh);
 
 	/*
-	 * We need to serialize getting a receive work queue entry and
-	 * generating a completion for it against QPs sending to this QP
-	 * locally.
-	 */
-	spin_lock(&qp->r_lock);
-
-	/*
 	 * Get the next work request entry to find where to put the data.
 	 */
 	if (qp->r_flags & QIB_R_REUSE_SGE)
@@ -552,19 +545,19 @@  void qib_ud_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr,
 		ret = qib_get_rwqe(qp, 0);
 		if (ret < 0) {
 			qib_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
-			goto bail_unlock;
+			return;
 		}
 		if (!ret) {
 			if (qp->ibqp.qp_num == 0)
 				ibp->n_vl15_dropped++;
-			goto bail_unlock;
+			return;
 		}
 	}
 	/* Silently drop packets which are too big. */
 	if (unlikely(wc.byte_len > qp->r_len)) {
 		qp->r_flags |= QIB_R_REUSE_SGE;
 		ibp->n_pkt_drops++;
-		goto bail_unlock;
+		return;
 	}
 	if (has_grh) {
 		qib_copy_sge(&qp->r_sge, &hdr->u.l.grh,
@@ -579,7 +572,7 @@  void qib_ud_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr,
 			qp->r_sge.sge = *qp->r_sge.sg_list++;
 	}
 	if (!test_and_clear_bit(QIB_R_WRID_VALID, &qp->r_aflags))
-		goto bail_unlock;
+		return;
 	wc.wr_id = qp->r_wr_id;
 	wc.status = IB_WC_SUCCESS;
 	wc.opcode = IB_WC_RECV;
@@ -601,7 +594,5 @@  void qib_ud_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr,
 	qib_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
 		     (ohdr->bth[0] &
 			cpu_to_be32(IB_BTH_SOLICITED)) != 0);
-bail_unlock:
-	spin_unlock(&qp->r_lock);
 bail:;
 }
diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c
index cda8f41..9fab404 100644
--- a/drivers/infiniband/hw/qib/qib_verbs.c
+++ b/drivers/infiniband/hw/qib/qib_verbs.c
@@ -550,10 +550,12 @@  static void qib_qp_rcv(struct qib_ctxtdata *rcd, struct qib_ib_header *hdr,
 {
 	struct qib_ibport *ibp = &rcd->ppd->ibport_data;
 
+	spin_lock(&qp->r_lock);
+
 	/* Check for valid receive state. */
 	if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK)) {
 		ibp->n_pkt_drops++;
-		return;
+		goto unlock;
 	}
 
 	switch (qp->ibqp.qp_type) {
@@ -577,6 +579,9 @@  static void qib_qp_rcv(struct qib_ctxtdata *rcd, struct qib_ib_header *hdr,
 	default:
 		break;
 	}
+
+unlock:
+	spin_unlock(&qp->r_lock);
 }
 
 /**