@@ -144,6 +144,7 @@ ksocknal_create_conn_cb(struct sockaddr *addr)
conn_cb->ksnr_blki_conn_count = 0;
conn_cb->ksnr_blko_conn_count = 0;
conn_cb->ksnr_max_conns = 0;
+ conn_cb->ksnr_busy_retry_count = 0;
return conn_cb;
}
@@ -379,6 +379,7 @@ struct ksock_conn {
};
#define SOCKNAL_CONN_COUNT_MAX_BITS 8 /* max conn count bits */
+#define SOCKNAL_MAX_BUSY_RETRIES 3
struct ksock_conn_cb {
struct list_head ksnr_connd_list; /* chain on ksnr_connd_routes */
@@ -407,6 +408,9 @@ struct ksock_conn_cb {
unsigned int ksnr_max_conns; /* conns_per_peer at
* peer creation
*/
+ unsigned int ksnr_busy_retry_count; /* counts retry attempts
+ * due to EALREADY rc
+ */
};
#define SOCKNAL_KEEPALIVE_PING 1 /* cookie for keepalive ping */
@@ -1785,7 +1785,7 @@ ksocknal_connect(struct ksock_conn_cb *conn_cb)
{
LIST_HEAD(zombies);
struct ksock_peer_ni *peer_ni = conn_cb->ksnr_peer;
- int type;
+ int type = SOCKLND_CONN_NONE;
int wanted;
struct socket *sock;
time64_t deadline;
@@ -1863,14 +1863,18 @@ ksocknal_connect(struct ksock_conn_cb *conn_cb)
goto failed;
}
- /*
- * A +ve RC means I have to retry because I lost the connection
+ if (rc == EALREADY && conn_cb->ksnr_conn_count > 0)
+ conn_cb->ksnr_busy_retry_count += 1;
+ else
+ conn_cb->ksnr_busy_retry_count = 0;
+
+ /* A +ve RC means I have to retry because I lost the connection
* race or I have to renegotiate protocol version
*/
- retry_later = (rc);
+ retry_later = (rc != 0);
if (retry_later)
- CDEBUG(D_NET, "peer_ni %s: conn race, retry later.\n",
- libcfs_nidstr(&peer_ni->ksnp_id.nid));
+ CDEBUG(D_NET, "peer_ni %s: conn race, retry later. rc %d\n",
+ libcfs_nidstr(&peer_ni->ksnp_id.nid), rc);
write_lock_bh(&ksocknal_data.ksnd_global_lock);
}
@@ -1878,6 +1882,15 @@ ksocknal_connect(struct ksock_conn_cb *conn_cb)
conn_cb->ksnr_scheduled = 0;
conn_cb->ksnr_connecting = 0;
+ if (conn_cb->ksnr_busy_retry_count >= SOCKNAL_MAX_BUSY_RETRIES &&
+ type > SOCKLND_CONN_NONE) {
+ /* After so many retries due to EALREADY assume that
+ * the peer doesn't support as many connections as we want
+ */
+ conn_cb->ksnr_connected |= BIT(type);
+ retry_later = false;
+ }
+
if (retry_later) {
/*
* re-queue for attention; this frees me up to handle