diff mbox series

[rdma-next,1/3] IB/mad: Replace MAD's refcount with a state machine

Message ID b6b991b75a7d8cce465a6c917ef0db1c264165bd.1733233636.git.leonro@nvidia.com (mailing list archive)
State New
Headers show
Series Add Flow Control for Solicited MADs | expand

Commit Message

Leon Romanovsky Dec. 3, 2024, 1:52 p.m. UTC
From: Or Har-Toov <ohartoov@nvidia.com>

Replace the refcount mechanism with a 'state' field to track the status
of MADs send work requests (WRs). The state machine better represents
the stages in the MAD lifecycle, specifically indicating whether the
MAD is waiting for a response or a completion.

The existing refcount only takes two values:
 * 1 - MAD is waiting either for completion or for response.
 * 2 - MAD is waiting for both response and completion. Also when a
   response was received before a completion notification.

The current state transitions are not clearly visible, and developers
needs to infer the state from the refcount's value, which is
error-prone and difficult to follow.

Thus, replace with a state machine as the following:
 * IB_MAD_STATE_SEND_START - MAD was sent to the QP and is waiting
   for completion notification
 * IB_MAD_STATE_WAIT_RESP - MAD send completed successfully, waiting
   for a response
 * IB_MAD_STATE_EARLY_RESP - Response came early, before send
   completion notification
 * IB_MAD_STATE_DONE - MAD processing completed

Signed-off-by: Or Har-Toov <ohartoov@nvidia.com>
Reviewed-by: Maher Sanalla <msanalla@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 drivers/infiniband/core/mad.c      | 44 ++++++++++++++----------------
 drivers/infiniband/core/mad_priv.h | 10 ++++++-
 drivers/infiniband/core/mad_rmpp.c |  7 ++---
 3 files changed, 33 insertions(+), 28 deletions(-)
diff mbox series

Patch

diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
index 1fd54d5c4dd8..9b101f91ca3e 100644
--- a/drivers/infiniband/core/mad.c
+++ b/drivers/infiniband/core/mad.c
@@ -1118,8 +1118,7 @@  int ib_post_send_mad(struct ib_mad_send_buf *send_buf,
 		mad_send_wr->max_retries = send_buf->retries;
 		mad_send_wr->retries_left = send_buf->retries;
 		send_buf->retries = 0;
-		/* Reference for work request to QP + response */
-		mad_send_wr->refcount = 1 + (mad_send_wr->timeout > 0);
+		mad_send_wr->state = IB_MAD_STATE_SEND_START;
 		mad_send_wr->status = IB_WC_SUCCESS;
 
 		/* Reference MAD agent until send completes */
@@ -1773,9 +1772,13 @@  ib_find_send_mad(const struct ib_mad_agent_private *mad_agent_priv,
 void ib_mark_mad_done(struct ib_mad_send_wr_private *mad_send_wr)
 {
 	mad_send_wr->timeout = 0;
-	if (mad_send_wr->refcount == 1)
+	if (mad_send_wr->state == IB_MAD_STATE_WAIT_RESP) {
+		mad_send_wr->state = IB_MAD_STATE_DONE;
 		list_move_tail(&mad_send_wr->agent_list,
 			      &mad_send_wr->mad_agent_priv->done_list);
+	} else {
+		mad_send_wr->state = IB_MAD_STATE_EARLY_RESP;
+	}
 }
 
 static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv,
@@ -2195,6 +2198,7 @@  static void wait_for_response(struct ib_mad_send_wr_private *mad_send_wr)
 		list_item = &mad_agent_priv->wait_list;
 	}
 
+	mad_send_wr->state = IB_MAD_STATE_WAIT_RESP;
 	list_add(&mad_send_wr->agent_list, list_item);
 
 	/* Reschedule a work item if we have a shorter timeout */
@@ -2222,6 +2226,11 @@  void ib_mad_complete_send_wr(struct ib_mad_send_wr_private *mad_send_wr,
 
 	mad_agent_priv = mad_send_wr->mad_agent_priv;
 	spin_lock_irqsave(&mad_agent_priv->lock, flags);
+	if (mad_send_wr->state == IB_MAD_STATE_EARLY_RESP) {
+		mad_send_wr->state = IB_MAD_STATE_DONE;
+		goto done;
+	}
+
 	if (ib_mad_kernel_rmpp_agent(&mad_agent_priv->agent)) {
 		ret = ib_process_rmpp_send_wc(mad_send_wr, mad_send_wc);
 		if (ret == IB_RMPP_RESULT_CONSUMED)
@@ -2232,14 +2241,10 @@  void ib_mad_complete_send_wr(struct ib_mad_send_wr_private *mad_send_wr,
 	if (mad_send_wc->status != IB_WC_SUCCESS &&
 	    mad_send_wr->status == IB_WC_SUCCESS) {
 		mad_send_wr->status = mad_send_wc->status;
-		mad_send_wr->refcount -= (mad_send_wr->timeout > 0);
-	}
-
-	if (--mad_send_wr->refcount > 0) {
-		if (mad_send_wr->refcount == 1 && mad_send_wr->timeout &&
-		    mad_send_wr->status == IB_WC_SUCCESS) {
-			wait_for_response(mad_send_wr);
-		}
+	} else if (mad_send_wr->status == IB_WC_SUCCESS &&
+		   mad_send_wr->timeout &&
+		   mad_send_wr->state == IB_MAD_STATE_SEND_START) {
+		wait_for_response(mad_send_wr);
 		goto done;
 	}
 
@@ -2407,12 +2412,9 @@  static void cancel_mads(struct ib_mad_agent_private *mad_agent_priv)
 
 	spin_lock_irqsave(&mad_agent_priv->lock, flags);
 	list_for_each_entry_safe(mad_send_wr, temp_mad_send_wr,
-				 &mad_agent_priv->send_list, agent_list) {
-		if (mad_send_wr->status == IB_WC_SUCCESS) {
+				 &mad_agent_priv->send_list, agent_list)
+		if (mad_send_wr->status == IB_WC_SUCCESS)
 			mad_send_wr->status = IB_WC_WR_FLUSH_ERR;
-			mad_send_wr->refcount -= (mad_send_wr->timeout > 0);
-		}
-	}
 
 	/* Empty wait list to prevent receives from finding a request */
 	list_splice_init(&mad_agent_priv->wait_list, &cancel_list);
@@ -2459,7 +2461,6 @@  int ib_modify_mad(struct ib_mad_send_buf *send_buf, u32 timeout_ms)
 	struct ib_mad_agent_private *mad_agent_priv;
 	struct ib_mad_send_wr_private *mad_send_wr;
 	unsigned long flags;
-	int active;
 
 	if (!send_buf)
 		return -EINVAL;
@@ -2473,14 +2474,11 @@  int ib_modify_mad(struct ib_mad_send_buf *send_buf, u32 timeout_ms)
 		return -EINVAL;
 	}
 
-	active = (!mad_send_wr->timeout || mad_send_wr->refcount > 1);
-	if (!timeout_ms) {
+	if (!timeout_ms)
 		mad_send_wr->status = IB_WC_WR_FLUSH_ERR;
-		mad_send_wr->refcount -= (mad_send_wr->timeout > 0);
-	}
 
 	mad_send_wr->send_buf.timeout_ms = timeout_ms;
-	if (active)
+	if (mad_send_wr->state == IB_MAD_STATE_SEND_START)
 		mad_send_wr->timeout = msecs_to_jiffies(timeout_ms);
 	else
 		ib_reset_mad_timeout(mad_send_wr, timeout_ms);
@@ -2607,7 +2605,7 @@  static int retry_send(struct ib_mad_send_wr_private *mad_send_wr)
 		ret = ib_send_mad(mad_send_wr);
 
 	if (!ret) {
-		mad_send_wr->refcount++;
+		mad_send_wr->state = IB_MAD_STATE_SEND_START;
 		list_add_tail(&mad_send_wr->agent_list,
 			      &mad_send_wr->mad_agent_priv->send_list);
 	}
diff --git a/drivers/infiniband/core/mad_priv.h b/drivers/infiniband/core/mad_priv.h
index 1b7445a6f671..cc2de81ea6f6 100644
--- a/drivers/infiniband/core/mad_priv.h
+++ b/drivers/infiniband/core/mad_priv.h
@@ -118,6 +118,13 @@  struct ib_mad_snoop_private {
 	struct completion comp;
 };
 
+enum ib_mad_state {
+	IB_MAD_STATE_SEND_START,
+	IB_MAD_STATE_WAIT_RESP,
+	IB_MAD_STATE_EARLY_RESP,
+	IB_MAD_STATE_DONE
+};
+
 struct ib_mad_send_wr_private {
 	struct ib_mad_list_head mad_list;
 	struct list_head agent_list;
@@ -132,7 +139,6 @@  struct ib_mad_send_wr_private {
 	int max_retries;
 	int retries_left;
 	int retry;
-	int refcount;
 	enum ib_wc_status status;
 
 	/* RMPP control */
@@ -143,6 +149,8 @@  struct ib_mad_send_wr_private {
 	int seg_num;
 	int newwin;
 	int pad;
+
+	enum ib_mad_state state;
 };
 
 struct ib_mad_local_private {
diff --git a/drivers/infiniband/core/mad_rmpp.c b/drivers/infiniband/core/mad_rmpp.c
index 8af0619a39cd..dff264e9bb68 100644
--- a/drivers/infiniband/core/mad_rmpp.c
+++ b/drivers/infiniband/core/mad_rmpp.c
@@ -717,13 +717,13 @@  static void process_rmpp_ack(struct ib_mad_agent_private *agent,
 			ib_mad_complete_send_wr(mad_send_wr, &wc);
 			return;
 		}
-		if (mad_send_wr->refcount == 1)
+		if (mad_send_wr->state == IB_MAD_STATE_WAIT_RESP)
 			ib_reset_mad_timeout(mad_send_wr,
 					     mad_send_wr->send_buf.timeout_ms);
 		spin_unlock_irqrestore(&agent->lock, flags);
 		ack_ds_ack(agent, mad_recv_wc);
 		return;
-	} else if (mad_send_wr->refcount == 1 &&
+	} else if (mad_send_wr->state == IB_MAD_STATE_WAIT_RESP &&
 		   mad_send_wr->seg_num < mad_send_wr->newwin &&
 		   mad_send_wr->seg_num < mad_send_wr->send_buf.seg_count) {
 		/* Send failure will just result in a timeout/retry */
@@ -731,7 +731,7 @@  static void process_rmpp_ack(struct ib_mad_agent_private *agent,
 		if (ret)
 			goto out;
 
-		mad_send_wr->refcount++;
+		mad_send_wr->state = IB_MAD_STATE_SEND_START;
 		list_move_tail(&mad_send_wr->agent_list,
 			      &mad_send_wr->mad_agent_priv->send_list);
 	}
@@ -890,7 +890,6 @@  int ib_send_rmpp_mad(struct ib_mad_send_wr_private *mad_send_wr)
 	mad_send_wr->newwin = init_newwin(mad_send_wr);
 
 	/* We need to wait for the final ACK even if there isn't a response */
-	mad_send_wr->refcount += (mad_send_wr->timeout == 0);
 	ret = send_next_seg(mad_send_wr);
 	if (!ret)
 		return IB_RMPP_RESULT_CONSUMED;