diff mbox series

[rdma-next,3/9] RDMA/sa_query: Enforce min retry interval and deadline

Message ID eae3b880eba41734d7fe9604d7f871960a27cb26.1733405453.git.leon@kernel.org (mailing list archive)
State New
Headers show
Series Rework retry algorithm used when sending MADs | expand

Commit Message

Leon Romanovsky Dec. 5, 2024, 1:49 p.m. UTC
From: Vlad Dumitrescu <vdumitrescu@nvidia.com>

SA query users, both in-kernel and userspace (via rdma_resolve_route),
pass in a total timeout and expect the SA query layer to handle retries.
SA query relies on the MAD layer to issue a fixed number of 10 retries
at the specified interval (timeout), set to 1/10 of the requested total
timeout.

When the caller-requested total timeout is low (e.g., 1s for IPoIB), the
resulting retry interval (e.g., 100ms) to too aggressive.  There have
been reports of overloaded SA receivers.  Hence, enforce a minimum.  A
follow-up change will make this configurable via rdma tool (netlink) at
per-port granularity.

Continue to enforce the caller's total timeout by using the new MAD
layer deadline option.

Remove small-timeout special case - the total timeout option will take
care of stopping the send even when more retries are left.  Moreover,
this special case results in an extremely aggressive 1ms retries, which
is definitely not desirable.

Signed-off-by: Vlad Dumitrescu <vdumitrescu@nvidia.com>
Reviewed-by: Sean Hefty <shefty@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 drivers/infiniband/core/sa_query.c | 34 +++++++++++++++++++-----------
 1 file changed, 22 insertions(+), 12 deletions(-)
diff mbox series

Patch

diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
index 53571e6b3162..ac0d53bf91c4 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c
@@ -39,6 +39,7 @@ 
 #include <linux/slab.h>
 #include <linux/dma-mapping.h>
 #include <linux/kref.h>
+#include <linux/minmax.h>
 #include <linux/xarray.h>
 #include <linux/workqueue.h>
 #include <uapi/linux/if_ether.h>
@@ -59,6 +60,7 @@ 
 #define IB_SA_LOCAL_SVC_TIMEOUT_MAX		200000
 #define IB_SA_CPI_MAX_RETRY_CNT			3
 #define IB_SA_CPI_RETRY_WAIT			1000 /*msecs */
+#define IB_SA_MIN_TIMEOUT_MS_DEFAULT		500
 static int sa_local_svc_timeout_ms = IB_SA_LOCAL_SVC_TIMEOUT_DEFAULT;
 
 struct ib_sa_sm_ah {
@@ -96,6 +98,7 @@  struct ib_sa_port {
 	spinlock_t                   classport_lock; /* protects class port info set */
 	spinlock_t           ah_lock;
 	u32		     port_num;
+	u32                  min_timeout_ms;
 };
 
 struct ib_sa_device {
@@ -1344,13 +1347,14 @@  static int send_mad(struct ib_sa_query *query, unsigned long timeout_ms,
 	if (ret < 0)
 		return ret;
 
-	query->mad_buf->timeout_ms  = timeout_ms / nmbr_sa_query_retries;
+	query->mad_buf->timeout_ms =
+		max(READ_ONCE(query->port->min_timeout_ms),
+		    timeout_ms / nmbr_sa_query_retries);
 	query->mad_buf->retries = nmbr_sa_query_retries;
-	if (!query->mad_buf->timeout_ms) {
-		/* Special case, very small timeout_ms */
-		query->mad_buf->timeout_ms = 1;
-		query->mad_buf->retries = timeout_ms;
-	}
+	ret = ib_set_mad_deadline(query->mad_buf, timeout_ms);
+	if (ret)
+		goto out;
+
 	query->mad_buf->context[0] = query;
 	query->id = id;
 
@@ -1364,18 +1368,22 @@  static int send_mad(struct ib_sa_query *query, unsigned long timeout_ms,
 	}
 
 	ret = ib_post_send_mad(query->mad_buf, NULL);
-	if (ret) {
-		xa_lock_irqsave(&queries, flags);
-		__xa_erase(&queries, id);
-		xa_unlock_irqrestore(&queries, flags);
-	}
 
 	/*
 	 * It's not safe to dereference query any more, because the
 	 * send may already have completed and freed the query in
 	 * another context.
 	 */
-	return ret ? ret : id;
+
+out:
+	if (ret) {
+		xa_lock_irqsave(&queries, flags);
+		__xa_erase(&queries, id);
+		xa_unlock_irqrestore(&queries, flags);
+		return ret;
+	}
+
+	return id;
 }
 
 void ib_sa_unpack_path(void *attribute, struct sa_path_rec *rec)
@@ -2192,6 +2200,8 @@  static int ib_sa_add_one(struct ib_device *device)
 		INIT_DELAYED_WORK(&sa_dev->port[i].ib_cpi_work,
 				  update_ib_cpi);
 
+		sa_dev->port[i].min_timeout_ms = IB_SA_MIN_TIMEOUT_MS_DEFAULT;
+
 		count++;
 	}