From patchwork Tue Oct 26 18:33:16 2010 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Michael Heinz X-Patchwork-Id: 283232 Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by demeter1.kernel.org (8.14.4/8.14.3) with ESMTP id o9QIXLIu015987 for ; Tue, 26 Oct 2010 18:33:22 GMT Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S932490Ab0JZSdV (ORCPT ); Tue, 26 Oct 2010 14:33:21 -0400 Received: from avexcashub1.qlogic.com ([198.70.193.61]:53984 "EHLO avexcashub1.qlogic.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1759944Ab0JZSdU (ORCPT ); Tue, 26 Oct 2010 14:33:20 -0400 Received: from avexcashub2.qlogic.org (10.1.4.116) by avexcashub1.qlogic.org (10.1.4.161) with Microsoft SMTP Server (TLS) id 8.1.436.0; Tue, 26 Oct 2010 11:33:19 -0700 Received: from MNEXCASHUB1.qlogic.org (10.33.2.103) by avexcashub2.qlogic.org (10.1.4.162) with Microsoft SMTP Server (TLS) id 8.1.436.0; Tue, 26 Oct 2010 11:33:19 -0700 Received: from MNEXMB1.qlogic.org ([fe80::8516:7839:9549:6996]) by MNEXCASHUB1.qlogic.org ([::1]) with mapi; Tue, 26 Oct 2010 13:33:18 -0500 From: Mike Heinz To: "linux-rdma@vger.kernel.org" Date: Tue, 26 Oct 2010 13:33:16 -0500 Subject: [PATCH] Add exponential backoff + random delay to MADs when retrying after timeout. Thread-Topic: [PATCH] Add exponential backoff + random delay to MADs when retrying after timeout. Thread-Index: ActpWb9Kl9Na4cQ2TJ6gxePWyk48cQL4nSQA Message-ID: <4C2744E8AD2982428C5BFE523DF8CDCB49D4675DEB@MNEXMB1.qlogic.org> Accept-Language: en-US Content-Language: en-US X-MS-Has-Attach: yes X-MS-TNEF-Correlator: acceptlanguage: en-US MIME-Version: 1.0 Sender: linux-rdma-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org X-Greylist: IP, sender and recipient auto-whitelisted, not delayed by milter-greylist-4.2.3 (demeter1.kernel.org [140.211.167.41]); Tue, 26 Oct 2010 18:33:22 +0000 (UTC) diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index ef1304f..3b03f1c 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -42,6 +42,11 @@ #include "smi.h" #include "agent.h" +#include "linux/random.h" + +#define MAD_MIN_TIMEOUT_MS 511 +#define MAD_RAND_TIMEOUT_MS 511 + MODULE_LICENSE("Dual BSD/GPL"); MODULE_DESCRIPTION("kernel IB MAD API"); MODULE_AUTHOR("Hal Rosenstock"); @@ -55,6 +60,10 @@ MODULE_PARM_DESC(send_queue_size, "Size of send queue in number of work requests module_param_named(recv_queue_size, mad_recvq_size, int, 0444); MODULE_PARM_DESC(recv_queue_size, "Size of receive queue in number of work requests"); +int mad_randomized_wait = 0; +module_param_named(randomized_wait, mad_randomized_wait, int, 0444); +MODULE_PARM_DESC(randomized_wait, "When true, use a randomized backoff +algorithm to control retries for timeouts."); + static struct kmem_cache *ib_mad_cache; static struct list_head ib_mad_port_list; @@ -1102,11 +1111,18 @@ int ib_post_send_mad(struct ib_mad_send_buf *send_buf, } mad_send_wr->tid = ((struct ib_mad_hdr *) send_buf->mad)->tid; + + mad_send_wr->randomized_wait = mad_randomized_wait || send_buf->randomized_wait; + mad_send_wr->total_timeout = msecs_to_jiffies(send_buf->timeout_ms) * +send_buf->retries; + /* Timeout will be updated after send completes */ mad_send_wr->timeout = msecs_to_jiffies(send_buf->timeout_ms); + mad_send_wr->max_retries = send_buf->retries; mad_send_wr->retries_left = send_buf->retries; + send_buf->retries = 0; + /* Reference for work request to QP + response */ mad_send_wr->refcount = 1 + (mad_send_wr->timeout > 0); mad_send_wr->status = IB_WC_SUCCESS; @@ -1803,6 +1819,7 @@ static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv, /* Complete corresponding request */ if (ib_response_mad(mad_recv_wc->recv_buf.mad)) { + spin_lock_irqsave(&mad_agent_priv->lock, flags); mad_send_wr = ib_find_send_mad(mad_agent_priv, mad_recv_wc); if (!mad_send_wr) { @@ -1811,6 +1828,7 @@ static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv, deref_mad_agent(mad_agent_priv); return; } + ib_mark_mad_done(mad_send_wr); spin_unlock_irqrestore(&mad_agent_priv->lock, flags); @@ -2429,14 +2447,33 @@ static int retry_send(struct ib_mad_send_wr_private *mad_send_wr) { int ret; - if (!mad_send_wr->retries_left) + if (!mad_send_wr->retries_left || (mad_send_wr->total_timeout == 0)) return -ETIMEDOUT; mad_send_wr->retries_left--; mad_send_wr->send_buf.retries++; - mad_send_wr->timeout = msecs_to_jiffies(mad_send_wr->send_buf.timeout_ms); + if (mad_send_wr->randomized_wait) { + mad_send_wr->timeout = msecs_to_jiffies(mad_send_wr->send_buf.timeout_ms + + (MAD_MIN_TIMEOUT_MS<send_buf.retries) - + (random32()&MAD_RAND_TIMEOUT_MS)); + if (mad_send_wr->timeout > mad_send_wr->total_timeout) { + mad_send_wr->timeout = mad_send_wr->total_timeout; + mad_send_wr->total_timeout = 0; + } else { + mad_send_wr->total_timeout -= mad_send_wr->timeout; + } + } else { + mad_send_wr->timeout = msecs_to_jiffies(mad_send_wr->send_buf.timeout_ms); + } + printk(KERN_DEBUG PFX "Retrying send %p: retries: %u, retries_left: %u, timeout: %lu, total_timeout: %lu\n", + mad_send_wr, + mad_send_wr->send_buf.retries, + mad_send_wr->retries_left, + mad_send_wr->timeout, + mad_send_wr->total_timeout); + if (mad_send_wr->mad_agent_priv->agent.rmpp_version) { ret = ib_retry_rmpp(mad_send_wr); switch (ret) { diff --git a/drivers/infiniband/core/mad_priv.h b/drivers/infiniband/core/mad_priv.h index 9430ab4..01fb7ed 100644 --- a/drivers/infiniband/core/mad_priv.h +++ b/drivers/infiniband/core/mad_priv.h @@ -132,8 +132,10 @@ struct ib_mad_send_wr_private { struct ib_sge sg_list[IB_MAD_SEND_REQ_MAX_SG]; __be64 tid; unsigned long timeout; + unsigned long total_timeout; int max_retries; int retries_left; + int randomized_wait; int retry; int refcount; enum ib_wc_status status; diff --git a/include/rdma/ib_mad.h b/include/rdma/ib_mad.h index d3b9401..c3d6efb 100644 --- a/include/rdma/ib_mad.h +++ b/include/rdma/ib_mad.h @@ -77,6 +77,15 @@ #define IB_MGMT_MAX_METHODS 128 +/* MAD Status field bit masks */ +#define IB_MGMT_MAD_STATUS_SUCCESS 0x0000 +#define IB_MGMT_MAD_STATUS_BUSY 0x0001 +#define IB_MGMT_MAD_STATUS_REDIRECT_REQD 0x0002 +#define IB_MGMT_MAD_STATUS_BAD_VERERSION 0x0004 +#define IB_MGMT_MAD_STATUS_UNSUPPORTED_METHOD 0x0008 +#define IB_MGMT_MAD_STATUS_UNSUPPORTED_METHOD_ATTRIB 0x000c +#define IB_MGMT_MAD_STATUS_INVALID_ATTRIB_VALUE 0x001c + /* RMPP information */ #define IB_MGMT_RMPP_VERSION 1 @@ -246,6 +255,7 @@ struct ib_mad_send_buf { int seg_count; int seg_size; int timeout_ms; + int randomized_wait; int retries; };