diff mbox series

[net-next,v6,3/5] net/smc: Fallback when handshake workqueue congested

Message ID 87d49c573a15e13a26314316692fccca91741042.1644413637.git.alibuda@linux.alibaba.com (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series net/smc: Optimizing performance in short-lived scenarios | expand

Checks

Context Check Description
netdev/tree_selection success Clearly marked for net-next
netdev/fixes_present success Fixes tag not required for -next series
netdev/subject_prefix success Link
netdev/cover_letter success Series has a cover letter
netdev/patch_count success Link
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 2366 this patch: 2366
netdev/cc_maintainers warning 3 maintainers not CCed: dsahern@kernel.org yoshfuji@linux-ipv6.org edumazet@google.com
netdev/build_clang success Errors and warnings before: 322 this patch: 322
netdev/module_param success Was 0 now: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 2496 this patch: 2496
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 46 lines checked
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

D. Wythe Feb. 9, 2022, 2:11 p.m. UTC
From: "D. Wythe" <alibuda@linux.alibaba.com>

This patch intends to provide a mechanism to allow automatic fallback to
TCP according to the pressure of SMC handshake process. At present,
frequent visits will cause the incoming connections to be backlogged in
SMC handshake queue, raise the connections established time. Which is
quite unacceptable for those applications who base on short lived
connections.

There are two ways to implement this mechanism:

1. Fallback when TCP established.
2. Fallback before TCP established.

In the first way, we need to wait and receive CLC messages that the
client will potentially send, and then actively reply with a decline
message, in a sense, which is also a sort of SMC handshake, affect the
connections established time on its way.

In the second way, the only problem is that we need to inject SMC logic
into TCP when it is about to reply the incoming SYN, since we already do
that, it's seems not a problem anymore. And advantage is obvious, few
additional processes are required to complete the fallback.

This patch use the second way.

Link: https://lore.kernel.org/all/1641301961-59331-1-git-send-email-alibuda@linux.alibaba.com/
Signed-off-by: D. Wythe <alibuda@linux.alibaba.com>
---
 include/linux/tcp.h  |  1 +
 net/ipv4/tcp_input.c |  3 ++-
 net/smc/af_smc.c     | 18 ++++++++++++++++++
 3 files changed, 21 insertions(+), 1 deletion(-)

Comments

Karsten Graul Feb. 9, 2022, 4:11 p.m. UTC | #1
On 09/02/2022 15:11, D. Wythe wrote:
> From: "D. Wythe" <alibuda@linux.alibaba.com>
> 
> This patch intends to provide a mechanism to allow automatic fallback to

I would like to avoid the wording fallback all over here. The term SMC fallback
is used for SMC connections that are in our socket list, but use TCP because 
something went wrong during handshake.
What you changes result in are TCP-only connections which are not handled by
the SMC module at all. So the comments should use a different naming for that.
What the patch actually does is to disable the SMC experimental TCP header option,
so the client receives no SMC indication and does not proceed with SMC.
Is this correct?

Please also see my comments below.

> TCP according to the pressure of SMC handshake process. At present,
> frequent visits will cause the incoming connections to be backlogged in
> SMC handshake queue, raise the connections established time. Which is
> quite unacceptable for those applications who base on short lived
> connections.
> 
> There are two ways to implement this mechanism:
> 
> 1. Fallback when TCP established.
> 2. Fallback before TCP established.
> 
> In the first way, we need to wait and receive CLC messages that the
> client will potentially send, and then actively reply with a decline
> message, in a sense, which is also a sort of SMC handshake, affect the
> connections established time on its way.
> 
> In the second way, the only problem is that we need to inject SMC logic
> into TCP when it is about to reply the incoming SYN, since we already do
> that, it's seems not a problem anymore. And advantage is obvious, few
> additional processes are required to complete the fallback.
> 
> This patch use the second way.
> 
> Link: https://lore.kernel.org/all/1641301961-59331-1-git-send-email-alibuda@linux.alibaba.com/
> Signed-off-by: D. Wythe <alibuda@linux.alibaba.com>
> ---
>  include/linux/tcp.h  |  1 +
>  net/ipv4/tcp_input.c |  3 ++-
>  net/smc/af_smc.c     | 18 ++++++++++++++++++
>  3 files changed, 21 insertions(+), 1 deletion(-)
> 
> diff --git a/include/linux/tcp.h b/include/linux/tcp.h
> index 78b91bb..1c4ae5d 100644
> --- a/include/linux/tcp.h
> +++ b/include/linux/tcp.h
> @@ -394,6 +394,7 @@ struct tcp_sock {
>  	bool	is_mptcp;
>  #endif
>  #if IS_ENABLED(CONFIG_SMC)
> +	bool	(*smc_in_limited)(const struct sock *sk);

Better variable name: smc_hs_congested

>  	bool	syn_smc;	/* SYN includes SMC */
>  #endif
>  
> diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
> index af94a6d..e817ec6 100644
> --- a/net/ipv4/tcp_input.c
> +++ b/net/ipv4/tcp_input.c
> @@ -6703,7 +6703,8 @@ static void tcp_openreq_init(struct request_sock *req,
>  	ireq->ir_num = ntohs(tcp_hdr(skb)->dest);
>  	ireq->ir_mark = inet_request_mark(sk, skb);
>  #if IS_ENABLED(CONFIG_SMC)
> -	ireq->smc_ok = rx_opt->smc_ok;
> +	ireq->smc_ok = rx_opt->smc_ok && !(tcp_sk(sk)->smc_in_limited &&
> +			tcp_sk(sk)->smc_in_limited(sk));

Use new name here and elsewhere ...

>  #endif
>  }
>  
> diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
> index ebfce3d..8175f60 100644
> --- a/net/smc/af_smc.c
> +++ b/net/smc/af_smc.c
> @@ -101,6 +101,22 @@ static struct sock *smc_tcp_syn_recv_sock(const struct sock *sk, struct sk_buff
>  	return NULL;
>  }
>  
> +static bool smc_is_in_limited(const struct sock *sk)

Better function name: smc_hs_congested()

> +{
> +	const struct smc_sock *smc;
> +
> +	smc = (const struct smc_sock *)
> +		((uintptr_t)sk->sk_user_data & ~SK_USER_DATA_NOCOPY);
> +
> +	if (!smc)
> +		return true;
> +
> +	if (workqueue_congested(WORK_CPU_UNBOUND, smc_hs_wq))
> +		return true;
> +
> +	return false;
> +}
> +
>  static struct smc_hashinfo smc_v4_hashinfo = {
>  	.lock = __RW_LOCK_UNLOCKED(smc_v4_hashinfo.lock),
>  };
> @@ -2309,6 +2325,8 @@ static int smc_listen(struct socket *sock, int backlog)
>  
>  	inet_csk(smc->clcsock->sk)->icsk_af_ops = &smc->af_ops;
>  
> +	tcp_sk(smc->clcsock->sk)->smc_in_limited = smc_is_in_limited;

Use new names here, too.

> +
>  	rc = kernel_listen(smc->clcsock, backlog);
>  	if (rc) {
>  		smc->clcsock->sk->sk_data_ready = smc->clcsk_data_ready;
D. Wythe Feb. 10, 2022, 3:43 a.m. UTC | #2
在 2022/2/10 上午12:11, Karsten Graul 写道:
> On 09/02/2022 15:11, D. Wythe wrote:
>> From: "D. Wythe" <alibuda@linux.alibaba.com>
>>
>> This patch intends to provide a mechanism to allow automatic fallback to
> 
> I would like to avoid the wording fallback all over here. The term SMC fallback
> is used for SMC connections that are in our socket list, but use TCP because
> something went wrong during handshake.
> What you changes result in are TCP-only connections which are not handled by
> the SMC module at all. So the comments should use a different naming for that.
> What the patch actually does is to disable the SMC experimental TCP header option,
> so the client receives no SMC indication and does not proceed with SMC.
> Is this correct?
> 
> Please also see my comments below.

I agree with you, the wording fallback doesn't fit here. I'll try 
limitation.

>> TCP according to the pressure of SMC handshake process. At present,
>> frequent visits will cause the incoming connections to be backlogged in
>> SMC handshake queue, raise the connections established time. Which is
>> quite unacceptable for those applications who base on short lived
>> connections.
>>
>> There are two ways to implement this mechanism:
>>
>> 1. Fallback when TCP established.
>> 2. Fallback before TCP established.
>>
>> In the first way, we need to wait and receive CLC messages that the
>> client will potentially send, and then actively reply with a decline
>> message, in a sense, which is also a sort of SMC handshake, affect the
>> connections established time on its way.
>>
>> In the second way, the only problem is that we need to inject SMC logic
>> into TCP when it is about to reply the incoming SYN, since we already do
>> that, it's seems not a problem anymore. And advantage is obvious, few
>> additional processes are required to complete the fallback.
>>
>> This patch use the second way.
>>
>> Link: https://lore.kernel.org/all/1641301961-59331-1-git-send-email-alibuda@linux.alibaba.com/
>> Signed-off-by: D. Wythe <alibuda@linux.alibaba.com>
>> ---
>>   include/linux/tcp.h  |  1 +
>>   net/ipv4/tcp_input.c |  3 ++-
>>   net/smc/af_smc.c     | 18 ++++++++++++++++++
>>   3 files changed, 21 insertions(+), 1 deletion(-)
>>
>> diff --git a/include/linux/tcp.h b/include/linux/tcp.h
>> index 78b91bb..1c4ae5d 100644
>> --- a/include/linux/tcp.h
>> +++ b/include/linux/tcp.h
>> @@ -394,6 +394,7 @@ struct tcp_sock {
>>   	bool	is_mptcp;
>>   #endif
>>   #if IS_ENABLED(CONFIG_SMC)
>> +	bool	(*smc_in_limited)(const struct sock *sk);
> 
> Better variable name: smc_hs_congested
> 
>>   	bool	syn_smc;	/* SYN includes SMC */
>>   #endif
>>   
>> diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
>> index af94a6d..e817ec6 100644
>> --- a/net/ipv4/tcp_input.c
>> +++ b/net/ipv4/tcp_input.c
>> @@ -6703,7 +6703,8 @@ static void tcp_openreq_init(struct request_sock *req,
>>   	ireq->ir_num = ntohs(tcp_hdr(skb)->dest);
>>   	ireq->ir_mark = inet_request_mark(sk, skb);
>>   #if IS_ENABLED(CONFIG_SMC)
>> -	ireq->smc_ok = rx_opt->smc_ok;
>> +	ireq->smc_ok = rx_opt->smc_ok && !(tcp_sk(sk)->smc_in_limited &&
>> +			tcp_sk(sk)->smc_in_limited(sk));
> 
> Use new name here and elsewhere ...
> 
>>   #endif
>>   }
>>   
>> diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
>> index ebfce3d..8175f60 100644
>> --- a/net/smc/af_smc.c
>> +++ b/net/smc/af_smc.c
>> @@ -101,6 +101,22 @@ static struct sock *smc_tcp_syn_recv_sock(const struct sock *sk, struct sk_buff
>>   	return NULL;
>>   }
>>   
>> +static bool smc_is_in_limited(const struct sock *sk)
> 
> Better function name: smc_hs_congested()
> 
>> +{
>> +	const struct smc_sock *smc;
>> +
>> +	smc = (const struct smc_sock *)
>> +		((uintptr_t)sk->sk_user_data & ~SK_USER_DATA_NOCOPY);
>> +
>> +	if (!smc)
>> +		return true;
>> +
>> +	if (workqueue_congested(WORK_CPU_UNBOUND, smc_hs_wq))
>> +		return true;
>> +
>> +	return false;
>> +}
>> +
>>   static struct smc_hashinfo smc_v4_hashinfo = {
>>   	.lock = __RW_LOCK_UNLOCKED(smc_v4_hashinfo.lock),
>>   };
>> @@ -2309,6 +2325,8 @@ static int smc_listen(struct socket *sock, int backlog)
>>   
>>   	inet_csk(smc->clcsock->sk)->icsk_af_ops = &smc->af_ops;
>>   
>> +	tcp_sk(smc->clcsock->sk)->smc_in_limited = smc_is_in_limited;
> 
> Use new names here, too.
> 
>> +
>>   	rc = kernel_listen(smc->clcsock, backlog);
>>   	if (rc) {
>>   		smc->clcsock->sk->sk_data_ready = smc->clcsk_data_ready;


Copy that, i'll rename it all soon.

Thanks.
diff mbox series

Patch

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 78b91bb..1c4ae5d 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -394,6 +394,7 @@  struct tcp_sock {
 	bool	is_mptcp;
 #endif
 #if IS_ENABLED(CONFIG_SMC)
+	bool	(*smc_in_limited)(const struct sock *sk);
 	bool	syn_smc;	/* SYN includes SMC */
 #endif
 
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index af94a6d..e817ec6 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -6703,7 +6703,8 @@  static void tcp_openreq_init(struct request_sock *req,
 	ireq->ir_num = ntohs(tcp_hdr(skb)->dest);
 	ireq->ir_mark = inet_request_mark(sk, skb);
 #if IS_ENABLED(CONFIG_SMC)
-	ireq->smc_ok = rx_opt->smc_ok;
+	ireq->smc_ok = rx_opt->smc_ok && !(tcp_sk(sk)->smc_in_limited &&
+			tcp_sk(sk)->smc_in_limited(sk));
 #endif
 }
 
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index ebfce3d..8175f60 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -101,6 +101,22 @@  static struct sock *smc_tcp_syn_recv_sock(const struct sock *sk, struct sk_buff
 	return NULL;
 }
 
+static bool smc_is_in_limited(const struct sock *sk)
+{
+	const struct smc_sock *smc;
+
+	smc = (const struct smc_sock *)
+		((uintptr_t)sk->sk_user_data & ~SK_USER_DATA_NOCOPY);
+
+	if (!smc)
+		return true;
+
+	if (workqueue_congested(WORK_CPU_UNBOUND, smc_hs_wq))
+		return true;
+
+	return false;
+}
+
 static struct smc_hashinfo smc_v4_hashinfo = {
 	.lock = __RW_LOCK_UNLOCKED(smc_v4_hashinfo.lock),
 };
@@ -2309,6 +2325,8 @@  static int smc_listen(struct socket *sock, int backlog)
 
 	inet_csk(smc->clcsock->sk)->icsk_af_ops = &smc->af_ops;
 
+	tcp_sk(smc->clcsock->sk)->smc_in_limited = smc_is_in_limited;
+
 	rc = kernel_listen(smc->clcsock, backlog);
 	if (rc) {
 		smc->clcsock->sk->sk_data_ready = smc->clcsk_data_ready;