diff mbox series

[for-next,6/9] IB/rdmavt: Add new completion inline

Message ID 20190411141640.19651.88619.stgit@scvm10.sc.intel.com (mailing list archive)
State Changes Requested
Delegated to: Jason Gunthorpe
Headers show
Series IB/hfi1, rdmavt: Smattering of fixes and code cleanups/improvments | expand

Commit Message

Dennis Dalessandro April 11, 2019, 2:16 p.m. UTC
From: Mike Marciniszyn <mike.marciniszyn@intel.com>

There is opencoded send completion logic all over all
the drivers.

We need to convert to this routine to enforce ordering
issues for completions.  This routine fixes an ordering
issue where the read of the SWQE fields necessary for creating
the completion can race with a post send if the post send catches
a send queue at the edge of being full.  Is is possible in that situation
to read SWQE fields that are being written.

This new routine insures that SWQE fields are read prior to advancing
the index that post send uses to determine queue fullness.

Reviewed-by: Michael J. Ruhl <michael.j.ruhl@intel.com>
Signed-off-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
---
 include/rdma/rdmavt_qp.h |   73 ++++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 73 insertions(+), 0 deletions(-)

Comments

Jason Gunthorpe April 24, 2019, 2:47 p.m. UTC | #1
On Thu, Apr 11, 2019 at 07:16:48AM -0700, Dennis Dalessandro wrote:
> From: Mike Marciniszyn <mike.marciniszyn@intel.com>
> 
> There is opencoded send completion logic all over all
> the drivers.
> 
> We need to convert to this routine to enforce ordering
> issues for completions.  This routine fixes an ordering
> issue where the read of the SWQE fields necessary for creating
> the completion can race with a post send if the post send catches
> a send queue at the edge of being full.  Is is possible in that situation
> to read SWQE fields that are being written.
> 
> This new routine insures that SWQE fields are read prior to advancing
> the index that post send uses to determine queue fullness.
> 
> Reviewed-by: Michael J. Ruhl <michael.j.ruhl@intel.com>
> Signed-off-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
> Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
>  include/rdma/rdmavt_qp.h |   73 ++++++++++++++++++++++++++++++++++++++++++++++
>  1 files changed, 73 insertions(+), 0 deletions(-)
> 
> diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h
> index 68e38c2..ca6b88f 100644
> +++ b/include/rdma/rdmavt_qp.h
> @@ -737,6 +737,79 @@ static inline void rvt_put_qp_swqe(struct rvt_qp *qp, struct rvt_swqe *wqe)
>  		atomic_dec(&ibah_to_rvtah(wqe->ud_wr.ah)->refcount);
>  }
>  
> +/**
> + * rvt_qp_sqwe_incr - increment ring index
> + * @qp: the qp
> + * @val: the starting value
> + *
> + * Return: the new value wrapping as appropriate
> + */
> +static inline u32
> +rvt_qp_swqe_incr(struct rvt_qp *qp, u32 val)
> +{
> +	if (++val >= qp->s_size)
> +		val = 0;
> +	return val;
> +}
> +
> +/**
> + * rvt_qp_complete_swqe - insert send completion
> + * @qp - the qp
> + * @wqe - the send wqe
> + * @opcode - wc operation (driver dependent)
> + * @status - completion status
> + *
> + * Update the s_last information, and then insert a send
> + * completion into the completion
> + * queue if the qp indicates it should be done.
> + *
> + * See IBTA 10.7.3.1 for info on completion
> + * control.
> + *
> + * Return: new last
> + */
> +static inline u32
> +rvt_qp_complete_swqe(struct rvt_qp *qp,
> +		     struct rvt_swqe *wqe,
> +		     enum ib_wc_opcode opcode,
> +		     enum ib_wc_status status)
> +{
> +	bool need_completion;
> +	u64 wr_id;
> +	u32 byte_len, last;
> +	int flags = wqe->wr.send_flags;
> +
> +	rvt_put_qp_swqe(qp, wqe);
> +
> +	need_completion =
> +		!(flags & RVT_SEND_RESERVE_USED) &&
> +		(!(qp->s_flags & RVT_S_SIGNAL_REQ_WR) ||
> +		(flags & IB_SEND_SIGNALED) ||
> +		status != IB_WC_SUCCESS);
> +	if (need_completion) {
> +		wr_id = wqe->wr.wr_id;
> +		byte_len = wqe->length;
> +		/* above fields required before writing s_last */
> +		smp_rmb();

That comment doesn't really read very well..

> +	}
> +	last = rvt_qp_swqe_incr(qp, qp->s_last);
> +	/* see post_send() */

post_send() doesn't seem to actually exist. Does it mean rvt_post_send?

> +	smp_store_mb(qp->s_last, last);

Looks like wrong use of a barrier to me..

I guess this is trying to synchronize s_last so that it can read wqe
without worrying that the rvt_post_send will write to the wqe at the
same time?

In which case this is not right. 

The code here should simply do smp_store_release() which will contain
the reads before the store

And the reader side must do smp_load_acquire() which will contain the
writes to after the read.

READ_ONCE does not accomplish that, and the smp_rmb/store_mb has an
unnecessary extra barrier.

Jason
diff mbox series

Patch

diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h
index 68e38c2..ca6b88f 100644
--- a/include/rdma/rdmavt_qp.h
+++ b/include/rdma/rdmavt_qp.h
@@ -737,6 +737,79 @@  static inline void rvt_put_qp_swqe(struct rvt_qp *qp, struct rvt_swqe *wqe)
 		atomic_dec(&ibah_to_rvtah(wqe->ud_wr.ah)->refcount);
 }
 
+/**
+ * rvt_qp_sqwe_incr - increment ring index
+ * @qp: the qp
+ * @val: the starting value
+ *
+ * Return: the new value wrapping as appropriate
+ */
+static inline u32
+rvt_qp_swqe_incr(struct rvt_qp *qp, u32 val)
+{
+	if (++val >= qp->s_size)
+		val = 0;
+	return val;
+}
+
+/**
+ * rvt_qp_complete_swqe - insert send completion
+ * @qp - the qp
+ * @wqe - the send wqe
+ * @opcode - wc operation (driver dependent)
+ * @status - completion status
+ *
+ * Update the s_last information, and then insert a send
+ * completion into the completion
+ * queue if the qp indicates it should be done.
+ *
+ * See IBTA 10.7.3.1 for info on completion
+ * control.
+ *
+ * Return: new last
+ */
+static inline u32
+rvt_qp_complete_swqe(struct rvt_qp *qp,
+		     struct rvt_swqe *wqe,
+		     enum ib_wc_opcode opcode,
+		     enum ib_wc_status status)
+{
+	bool need_completion;
+	u64 wr_id;
+	u32 byte_len, last;
+	int flags = wqe->wr.send_flags;
+
+	rvt_put_qp_swqe(qp, wqe);
+
+	need_completion =
+		!(flags & RVT_SEND_RESERVE_USED) &&
+		(!(qp->s_flags & RVT_S_SIGNAL_REQ_WR) ||
+		(flags & IB_SEND_SIGNALED) ||
+		status != IB_WC_SUCCESS);
+	if (need_completion) {
+		wr_id = wqe->wr.wr_id;
+		byte_len = wqe->length;
+		/* above fields required before writing s_last */
+		smp_rmb();
+	}
+	last = rvt_qp_swqe_incr(qp, qp->s_last);
+	/* see post_send() */
+	smp_store_mb(qp->s_last, last);
+	if (need_completion) {
+		struct ib_wc w = {
+			.wr_id = wr_id,
+			.status = status,
+			.opcode = opcode,
+			.qp = &qp->ibqp,
+			.byte_len = byte_len,
+		};
+
+		rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.send_cq), &w,
+			     status != IB_WC_SUCCESS);
+	}
+	return last;
+}
+
 extern const int  ib_rvt_state_ops[];
 
 struct rvt_dev_info;