@@ -54,7 +54,9 @@
#define CREATE_TRACE_POINTS
#include <trace/events/ib_mad.h>
-#define IB_MAD_MAX_DEADLINE (jiffies + msecs_to_jiffies(5 * 60 * 1000))
+#define IB_MAD_LINEAR_TIMEOUTS_DEFAULT 4
+#define IB_MAD_MAX_TIMEOUT_MS (60 * MSEC_PER_SEC)
+#define IB_MAD_MAX_DEADLINE (jiffies + msecs_to_jiffies(5 * 60 * 1000))
#ifdef CONFIG_TRACEPOINTS
static void create_mad_addr_info(struct ib_mad_send_wr_private *mad_send_wr,
@@ -1210,10 +1212,12 @@ int ib_post_send_mad(struct ib_mad_send_buf *send_buf,
}
mad_send_wr->tid = ((struct ib_mad_hdr *) send_buf->mad)->tid;
+ mad_send_wr->var_timeout_ms = send_buf->timeout_ms;
/* Timeout will be updated after send completes */
mad_send_wr->timeout = msecs_to_jiffies(send_buf->timeout_ms);
mad_send_wr->max_retries = send_buf->retries;
mad_send_wr->retries_left = send_buf->retries;
+ mad_send_wr->backoff_retries = 0;
send_buf->retries = 0;
mad_send_wr->status = IB_WC_SUCCESS;
@@ -2662,18 +2666,34 @@ int ib_modify_mad(struct ib_mad_send_buf *send_buf, u32 timeout_ms)
return -EINVAL;
}
- if (!timeout_ms)
+ if (!timeout_ms) {
mad_send_wr->status = IB_WC_WR_FLUSH_ERR;
+ goto apply;
+ }
+
+ /* CM MRA requesting a lower timeout than ours. Could be a delayed MRA
+ * (variable backoff increased in the meantime) or remote using a const.
+ */
+ if (timeout_ms < mad_send_wr->var_timeout_ms)
+ goto ignore;
+
+ /* Assume remote will no longer be overloaded after MRA Service Timeout
+ * passes and restart variable backoff algorithm.
+ */
+ mad_send_wr->var_timeout_ms = mad_send_wr->send_buf.timeout_ms;
+ mad_send_wr->backoff_retries = 0;
if (mad_send_wr->deadline)
mad_send_wr->deadline += msecs_to_jiffies(timeout_ms);
+apply:
if (mad_send_wr->state == IB_MAD_STATE_SEND_START ||
(mad_send_wr->state == IB_MAD_STATE_QUEUED && timeout_ms))
mad_send_wr->timeout = msecs_to_jiffies(timeout_ms);
else
ib_reset_mad_timeout(mad_send_wr, timeout_ms);
+ignore:
spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
return 0;
}
@@ -2767,6 +2787,30 @@ static void local_completions(struct work_struct *work)
spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
}
+/*
+ * Applies a variable backoff to certain send MADs.
+ *
+ * Exists to scope down the initial variable backoff implementation.
+ */
+static void set_next_timeout(struct ib_mad_send_wr_private *mad_send_wr)
+{
+ const struct ib_mad_agent_private *agent = mad_send_wr->mad_agent_priv;
+ const struct ib_mad_port_private *port = agent->qp_info->port_priv;
+ const struct ib_mad_hdr *hdr = mad_send_wr->send_buf.mad;
+
+ if (ib_mad_kernel_rmpp_agent(&agent->agent))
+ return;
+
+ if (hdr->base_version != IB_MGMT_BASE_VERSION)
+ return;
+
+ if (++mad_send_wr->backoff_retries < READ_ONCE(port->linear_timeouts))
+ return;
+
+ mad_send_wr->var_timeout_ms =
+ min(mad_send_wr->var_timeout_ms << 1, IB_MAD_MAX_TIMEOUT_MS);
+}
+
static int retry_send(struct ib_mad_send_wr_private *mad_send_wr)
{
int ret;
@@ -2778,7 +2822,8 @@ static int retry_send(struct ib_mad_send_wr_private *mad_send_wr)
mad_send_wr->retries_left--;
mad_send_wr->send_buf.retries++;
- mad_send_wr->timeout = msecs_to_jiffies(mad_send_wr->send_buf.timeout_ms);
+ set_next_timeout(mad_send_wr);
+ mad_send_wr->timeout = msecs_to_jiffies(mad_send_wr->var_timeout_ms);
if (ib_mad_kernel_rmpp_agent(&mad_send_wr->mad_agent_priv->agent)) {
ret = ib_retry_rmpp(mad_send_wr);
@@ -3195,6 +3240,8 @@ static int ib_mad_port_open(struct ib_device *device,
goto error8;
}
+ port_priv->linear_timeouts = IB_MAD_LINEAR_TIMEOUTS_DEFAULT;
+
spin_lock_irqsave(&ib_mad_port_list_lock, flags);
list_add_tail(&port_priv->port_list, &ib_mad_port_list);
spin_unlock_irqrestore(&ib_mad_port_list_lock, flags);
@@ -139,10 +139,12 @@ struct ib_mad_send_wr_private {
struct ib_ud_wr send_wr;
struct ib_sge sg_list[IB_MAD_SEND_REQ_MAX_SG];
__be64 tid;
+ unsigned int var_timeout_ms;
unsigned long timeout;
unsigned long deadline;
int max_retries;
int retries_left;
+ int backoff_retries;
int retry;
enum ib_wc_status status;
@@ -222,6 +224,7 @@ struct ib_mad_port_private {
struct ib_mad_mgmt_version_table version[MAX_MGMT_VERSION];
struct workqueue_struct *wq;
struct ib_mad_qp_info qp_info[IB_MAD_QPS_CORE];
+ u8 linear_timeouts;
};
int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr);