diff mbox series

[3/6] net/rds: RDS/TCP does not initiate a connection

Message ID 20250227042638.82553-4-allison.henderson@oracle.com (mailing list archive)
State New
Delegated to: Netdev Maintainers
Headers show
Series RDS bug fix collection | expand

Checks

Context Check Description
netdev/series_format warning Target tree name not specified in the subject
netdev/tree_selection success Guessed tree name to be net-next, async
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 0 this patch: 0
netdev/build_tools success No tools touched, skip
netdev/cc_maintainers warning 6 maintainers not CCed: linux-rdma@vger.kernel.org rds-devel@oss.oracle.com kuba@kernel.org horms@kernel.org edumazet@google.com pabeni@redhat.com
netdev/build_clang success Errors and warnings before: 0 this patch: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 12 this patch: 12
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 145 lines checked
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 30 this patch: 30
netdev/source_inline success Was 0 now: 0

Commit Message

Allison Henderson Feb. 27, 2025, 4:26 a.m. UTC
From: Ka-Cheong Poon <ka-cheong.poon@oracle.com>

Commit ("rds: Re-factor and avoid superfluous queuing of shutdown
work") changed rds_conn_path_connect_if_down() to call
rds_queue_reconnect() instead of queueing the connection request.  In
rds_queue_reconnect(), if the connection's transport is TCP and if the
local address is "bigger" than the peer's, no request is queued.
Beucause of this, no connection will be initiated to the peer.

This patch keeps the code re-factoring of that commit.  But it
initiates a connection request right away to make sure that a
connection is set up to the peer.

Signed-off-by: Ka-Cheong Poon <ka-cheong.poon@oracle.com>
Signed-off-by: Somasundaram Krishnasamy <somasundaram.krishnasamy@oracle.com>
Signed-off-by: Gerd Rausch <gerd.rausch@oracle.com>
Signed-off-by: Allison Henderson <allison.henderson@oracle.com>
---
 net/rds/af_rds.c      |  1 +
 net/rds/connection.c  |  3 ++-
 net/rds/rds.h         |  7 +++++--
 net/rds/send.c        | 46 +++++++++++++++++++++++++++++++++----------
 net/rds/tcp_connect.c |  1 +
 net/rds/tcp_listen.c  |  1 +
 6 files changed, 46 insertions(+), 13 deletions(-)
diff mbox series

Patch

diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c
index 8435a20968ef..d6cba98f3d45 100644
--- a/net/rds/af_rds.c
+++ b/net/rds/af_rds.c
@@ -685,6 +685,7 @@  static int __rds_create(struct socket *sock, struct sock *sk, int protocol)
 	rs->rs_rx_traces = 0;
 	rs->rs_tos = 0;
 	rs->rs_conn = NULL;
+	rs->rs_conn_path = NULL;
 
 	spin_lock_bh(&rds_sock_lock);
 	list_add_tail(&rs->rs_item, &rds_sock_list);
diff --git a/net/rds/connection.c b/net/rds/connection.c
index 73de221bd7c2..84034a3c69bd 100644
--- a/net/rds/connection.c
+++ b/net/rds/connection.c
@@ -147,6 +147,7 @@  static void __rds_conn_path_init(struct rds_connection *conn,
 	INIT_WORK(&cp->cp_down_w, rds_shutdown_worker);
 	mutex_init(&cp->cp_cm_lock);
 	cp->cp_flags = 0;
+	init_waitqueue_head(&cp->cp_up_waitq);
 }
 
 /*
@@ -913,7 +914,7 @@  void rds_conn_path_connect_if_down(struct rds_conn_path *cp)
 		rcu_read_unlock();
 		return;
 	}
-	if (rds_conn_path_state(cp) == RDS_CONN_DOWN)
+	if (rds_conn_path_down(cp))
 		rds_queue_reconnect(cp);
 	rcu_read_unlock();
 }
diff --git a/net/rds/rds.h b/net/rds/rds.h
index 1fb27e1a2e46..85b47ce52266 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -134,6 +134,8 @@  struct rds_conn_path {
 	unsigned int		cp_unacked_packets;
 	unsigned int		cp_unacked_bytes;
 	unsigned int		cp_index;
+
+	wait_queue_head_t       cp_up_waitq;    /* start up waitq */
 };
 
 /* One rds_connection per RDS address pair */
@@ -607,10 +609,11 @@  struct rds_sock {
 	struct rds_transport    *rs_transport;
 
 	/*
-	 * rds_sendmsg caches the conn it used the last time around.
-	 * This helps avoid costly lookups.
+	 * rds_sendmsg caches the conn and conn_path it used the last time
+	 * around. This helps avoid costly lookups.
 	 */
 	struct rds_connection	*rs_conn;
+	struct rds_conn_path	*rs_conn_path;
 
 	/* flag indicating we were congested or not */
 	int			rs_congested;
diff --git a/net/rds/send.c b/net/rds/send.c
index 6329cc8ec246..85ab9e32105e 100644
--- a/net/rds/send.c
+++ b/net/rds/send.c
@@ -1044,15 +1044,15 @@  static int rds_cmsg_send(struct rds_sock *rs, struct rds_message *rm,
 static int rds_send_mprds_hash(struct rds_sock *rs,
 			       struct rds_connection *conn, int nonblock)
 {
+	struct rds_conn_path *cp;
 	int hash;
 
 	if (conn->c_npaths == 0)
 		hash = RDS_MPATH_HASH(rs, RDS_MPATH_WORKERS);
 	else
 		hash = RDS_MPATH_HASH(rs, conn->c_npaths);
-	if (conn->c_npaths == 0 && hash != 0) {
-		rds_send_ping(conn, 0);
-
+	cp = &conn->c_path[hash];
+	if (!conn->c_npaths && rds_conn_path_down(cp)) {
 		/* The underlying connection is not up yet.  Need to wait
 		 * until it is up to be sure that the non-zero c_path can be
 		 * used.  But if we are interrupted, we have to use the zero
@@ -1066,10 +1066,19 @@  static int rds_send_mprds_hash(struct rds_sock *rs,
 				return 0;
 			if (wait_event_interruptible(conn->c_hs_waitq,
 						     conn->c_npaths != 0))
-				hash = 0;
+				return 0;
 		}
 		if (conn->c_npaths == 1)
 			hash = 0;
+
+		/* Wait until the chosen path is up.  If it is interrupted,
+		 * just return as this is an optimization to make sure that
+		 * the message is sent.
+		 */
+		cp = &conn->c_path[hash];
+		if (rds_conn_path_down(cp))
+			wait_event_interruptible(cp->cp_up_waitq,
+						 !rds_conn_path_down(cp));
 	}
 	return hash;
 }
@@ -1290,6 +1299,7 @@  int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
 	if (rs->rs_conn && ipv6_addr_equal(&rs->rs_conn->c_faddr, &daddr) &&
 	    rs->rs_tos == rs->rs_conn->c_tos) {
 		conn = rs->rs_conn;
+		cpath = rs->rs_conn_path;
 	} else {
 		conn = rds_conn_create_outgoing(sock_net(sock->sk),
 						&rs->rs_bound_addr, &daddr,
@@ -1300,14 +1310,30 @@  int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
 			ret = PTR_ERR(conn);
 			goto out;
 		}
+		if (conn->c_trans->t_mp_capable) {
+			/* c_npaths == 0 if we have not talked to this peer
+			 * before.  Initiate a connection request to the
+			 * peer right away.
+			 */
+			if (!conn->c_npaths &&
+			    rds_conn_path_down(&conn->c_path[0])) {
+				/* rds_connd_queue_reconnect_work() ensures
+				 * that only one request is queued.  And
+				 * rds_send_ping() ensures that only one ping
+				 * is outstanding.
+				 */
+				rds_cond_queue_reconnect_work(&conn->c_path[0],
+							      0);
+				rds_send_ping(conn, 0);
+			}
+			cpath = &conn->c_path[rds_send_mprds_hash(rs, conn, 0)];
+		} else {
+			cpath = &conn->c_path[0];
+		}
 		rs->rs_conn = conn;
+		rs->rs_conn_path = cpath;
 	}
 
-	if (conn->c_trans->t_mp_capable)
-		cpath = &conn->c_path[rds_send_mprds_hash(rs, conn, nonblock)];
-	else
-		cpath = &conn->c_path[0];
-
 	rm->m_conn_path = cpath;
 
 	/* Parse any control messages the user may have included. */
@@ -1335,7 +1361,7 @@  int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
 	}
 
 	if (rds_conn_path_down(cpath))
-		rds_check_all_paths(conn);
+		rds_conn_path_connect_if_down(cpath);
 
 	ret = rds_cong_wait(conn->c_fcong, dport, nonblock, rs);
 	if (ret) {
diff --git a/net/rds/tcp_connect.c b/net/rds/tcp_connect.c
index a0046e99d6df..97596a3c346a 100644
--- a/net/rds/tcp_connect.c
+++ b/net/rds/tcp_connect.c
@@ -73,6 +73,7 @@  void rds_tcp_state_change(struct sock *sk)
 			rds_conn_path_drop(cp, false);
 		} else {
 			rds_connect_path_complete(cp, RDS_CONN_CONNECTING);
+			wake_up(&cp->cp_up_waitq);
 		}
 		break;
 	case TCP_CLOSE_WAIT:
diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c
index d89bd8d0c354..60c52322b896 100644
--- a/net/rds/tcp_listen.c
+++ b/net/rds/tcp_listen.c
@@ -211,6 +211,7 @@  int rds_tcp_accept_one(struct socket *sock)
 	} else {
 		rds_tcp_set_callbacks(new_sock, cp);
 		rds_connect_path_complete(cp, RDS_CONN_CONNECTING);
+		wake_up(&cp->cp_up_waitq);
 	}
 	new_sock = NULL;
 	ret = 0;