diff mbox series

[v2,net] tcp: fix TLP timer not set when CA_STATE changes from DISORDER to OPEN

Message ID 1611464834-23030-1-git-send-email-yangpc@wangsu.com (mailing list archive)
State Accepted
Commit 62d9f1a6945ba69c125e548e72a36d203b30596e
Delegated to: Netdev Maintainers
Headers show
Series [v2,net] tcp: fix TLP timer not set when CA_STATE changes from DISORDER to OPEN | expand

Checks

Context Check Description
netdev/cover_letter success Link
netdev/fixes_present fail Series targets non-next tree, but doesn't contain any Fixes tags
netdev/patch_count success Link
netdev/tree_selection success Clearly marked for net
netdev/subject_prefix success Link
netdev/cc_maintainers warning 1 maintainers not CCed: yoshfuji@linux-ipv6.org
netdev/source_inline success Was 0 now: 0
netdev/verify_signedoff success Link
netdev/module_param success Was 0 now: 0
netdev/build_32bit success Errors and warnings before: 1498 this patch: 1498
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/verify_fixes success Link
netdev/checkpatch warning CHECK: extern prototypes should be avoided in .h files
netdev/build_allmodconfig_warn success Errors and warnings before: 1505 this patch: 1505
netdev/header_inline success Link
netdev/stable success Stable not CCed

Commit Message

Pengcheng Yang Jan. 24, 2021, 5:07 a.m. UTC
Upon receiving a cumulative ACK that changes the congestion state from
Disorder to Open, the TLP timer is not set. If the sender is app-limited,
it can only wait for the RTO timer to expire and retransmit.

The reason for this is that the TLP timer is set before the congestion
state changes in tcp_ack(), so we delay the time point of calling
tcp_set_xmit_timer() until after tcp_fastretrans_alert() returns and
remove the FLAG_SET_XMIT_TIMER from ack_flag when the RACK reorder timer
is set.

This commit has two additional benefits:
1) Make sure to reset RTO according to RFC6298 when receiving ACK, to
avoid spurious RTO caused by RTO timer early expires.
2) Reduce the xmit timer reschedule once per ACK when the RACK reorder
timer is set.

Link: https://lore.kernel.org/netdev/1611311242-6675-1-git-send-email-yangpc@wangsu.com
Signed-off-by: Pengcheng Yang <yangpc@wangsu.com>
Cc: Neal Cardwell <ncardwell@google.com>
Cc: Yuchung Cheng <ycheng@google.com>
Cc: Eric Dumazet <edumazet@google.com>
---
v2:
 - modify the commit message according to Yuchung's suggestion

 include/net/tcp.h       |  2 +-
 net/ipv4/tcp_input.c    | 10 ++++++----
 net/ipv4/tcp_recovery.c |  5 +++--
 3 files changed, 10 insertions(+), 7 deletions(-)

Comments

Yuchung Cheng Jan. 24, 2021, 5 p.m. UTC | #1
On Sat, Jan 23, 2021 at 9:11 PM Pengcheng Yang <yangpc@wangsu.com> wrote:
>
> Upon receiving a cumulative ACK that changes the congestion state from
> Disorder to Open, the TLP timer is not set. If the sender is app-limited,
> it can only wait for the RTO timer to expire and retransmit.
>
> The reason for this is that the TLP timer is set before the congestion
> state changes in tcp_ack(), so we delay the time point of calling
> tcp_set_xmit_timer() until after tcp_fastretrans_alert() returns and
> remove the FLAG_SET_XMIT_TIMER from ack_flag when the RACK reorder timer
> is set.
>
> This commit has two additional benefits:
> 1) Make sure to reset RTO according to RFC6298 when receiving ACK, to
> avoid spurious RTO caused by RTO timer early expires.
> 2) Reduce the xmit timer reschedule once per ACK when the RACK reorder
> timer is set.
>
> Link: https://lore.kernel.org/netdev/1611311242-6675-1-git-send-email-yangpc@wangsu.com
> Signed-off-by: Pengcheng Yang <yangpc@wangsu.com>
> Cc: Neal Cardwell <ncardwell@google.com>
> Cc: Yuchung Cheng <ycheng@google.com>
Acked-by: Yuchung Cheng <ycheng@google.com>
thanks!
> Cc: Eric Dumazet <edumazet@google.com>
> ---
> v2:
>  - modify the commit message according to Yuchung's suggestion
>
>  include/net/tcp.h       |  2 +-
>  net/ipv4/tcp_input.c    | 10 ++++++----
>  net/ipv4/tcp_recovery.c |  5 +++--
>  3 files changed, 10 insertions(+), 7 deletions(-)
>
> diff --git a/include/net/tcp.h b/include/net/tcp.h
> index 78d13c8..67f7e52 100644
> --- a/include/net/tcp.h
> +++ b/include/net/tcp.h
> @@ -2060,7 +2060,7 @@ static inline __u32 cookie_init_sequence(const struct tcp_request_sock_ops *ops,
>  void tcp_newreno_mark_lost(struct sock *sk, bool snd_una_advanced);
>  extern s32 tcp_rack_skb_timeout(struct tcp_sock *tp, struct sk_buff *skb,
>                                 u32 reo_wnd);
> -extern void tcp_rack_mark_lost(struct sock *sk);
> +extern bool tcp_rack_mark_lost(struct sock *sk);
>  extern void tcp_rack_advance(struct tcp_sock *tp, u8 sacked, u32 end_seq,
>                              u64 xmit_time);
>  extern void tcp_rack_reo_timeout(struct sock *sk);
> diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
> index c7e16b0..d0a9588 100644
> --- a/net/ipv4/tcp_input.c
> +++ b/net/ipv4/tcp_input.c
> @@ -2859,7 +2859,8 @@ static void tcp_identify_packet_loss(struct sock *sk, int *ack_flag)
>         } else if (tcp_is_rack(sk)) {
>                 u32 prior_retrans = tp->retrans_out;
>
> -               tcp_rack_mark_lost(sk);
> +               if (tcp_rack_mark_lost(sk))
> +                       *ack_flag &= ~FLAG_SET_XMIT_TIMER;
>                 if (prior_retrans > tp->retrans_out)
>                         *ack_flag |= FLAG_LOST_RETRANS;
>         }
> @@ -3815,9 +3816,6 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
>
>         if (tp->tlp_high_seq)
>                 tcp_process_tlp_ack(sk, ack, flag);
> -       /* If needed, reset TLP/RTO timer; RACK may later override this. */
> -       if (flag & FLAG_SET_XMIT_TIMER)
> -               tcp_set_xmit_timer(sk);
>
>         if (tcp_ack_is_dubious(sk, flag)) {
>                 if (!(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP))) {
> @@ -3830,6 +3828,10 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
>                                       &rexmit);
>         }
>
> +       /* If needed, reset TLP/RTO timer when RACK doesn't set. */
> +       if (flag & FLAG_SET_XMIT_TIMER)
> +               tcp_set_xmit_timer(sk);
> +
>         if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP))
>                 sk_dst_confirm(sk);
>
> diff --git a/net/ipv4/tcp_recovery.c b/net/ipv4/tcp_recovery.c
> index 177307a..6f1b4ac 100644
> --- a/net/ipv4/tcp_recovery.c
> +++ b/net/ipv4/tcp_recovery.c
> @@ -96,13 +96,13 @@ static void tcp_rack_detect_loss(struct sock *sk, u32 *reo_timeout)
>         }
>  }
>
> -void tcp_rack_mark_lost(struct sock *sk)
> +bool tcp_rack_mark_lost(struct sock *sk)
>  {
>         struct tcp_sock *tp = tcp_sk(sk);
>         u32 timeout;
>
>         if (!tp->rack.advanced)
> -               return;
> +               return false;
>
>         /* Reset the advanced flag to avoid unnecessary queue scanning */
>         tp->rack.advanced = 0;
> @@ -112,6 +112,7 @@ void tcp_rack_mark_lost(struct sock *sk)
>                 inet_csk_reset_xmit_timer(sk, ICSK_TIME_REO_TIMEOUT,
>                                           timeout, inet_csk(sk)->icsk_rto);
>         }
> +       return !!timeout;
>  }
>
>  /* Record the most recently (re)sent time among the (s)acked packets
> --
> 1.8.3.1
>
Neal Cardwell Jan. 24, 2021, 6:26 p.m. UTC | #2
On Sun, Jan 24, 2021 at 12:11 AM Pengcheng Yang <yangpc@wangsu.com> wrote:
>
> Upon receiving a cumulative ACK that changes the congestion state from
> Disorder to Open, the TLP timer is not set. If the sender is app-limited,
> it can only wait for the RTO timer to expire and retransmit.
>
> The reason for this is that the TLP timer is set before the congestion
> state changes in tcp_ack(), so we delay the time point of calling
> tcp_set_xmit_timer() until after tcp_fastretrans_alert() returns and
> remove the FLAG_SET_XMIT_TIMER from ack_flag when the RACK reorder timer
> is set.
>
> This commit has two additional benefits:
> 1) Make sure to reset RTO according to RFC6298 when receiving ACK, to
> avoid spurious RTO caused by RTO timer early expires.
> 2) Reduce the xmit timer reschedule once per ACK when the RACK reorder
> timer is set.
>
> Link: https://lore.kernel.org/netdev/1611311242-6675-1-git-send-email-yangpc@wangsu.com
> Signed-off-by: Pengcheng Yang <yangpc@wangsu.com>
> Cc: Neal Cardwell <ncardwell@google.com>
> Cc: Yuchung Cheng <ycheng@google.com>
> Cc: Eric Dumazet <edumazet@google.com>
> ---
> v2:
>  - modify the commit message according to Yuchung's suggestion
>

Thanks, Pengcheng! This seems to be missing the Fixes tag, but I guess
the maintainers can add it:

Fixes: df92c8394e6e ("tcp: fix xmit timer to only be reset if data
ACKed/SACKed")

Acked-by: Neal Cardwell <ncardwell@google.com>

neal
patchwork-bot+netdevbpf@kernel.org Jan. 25, 2021, 10 p.m. UTC | #3
Hello:

This patch was applied to netdev/net.git (refs/heads/master):

On Sun, 24 Jan 2021 13:07:14 +0800 you wrote:
> Upon receiving a cumulative ACK that changes the congestion state from
> Disorder to Open, the TLP timer is not set. If the sender is app-limited,
> it can only wait for the RTO timer to expire and retransmit.
> 
> The reason for this is that the TLP timer is set before the congestion
> state changes in tcp_ack(), so we delay the time point of calling
> tcp_set_xmit_timer() until after tcp_fastretrans_alert() returns and
> remove the FLAG_SET_XMIT_TIMER from ack_flag when the RACK reorder timer
> is set.
> 
> [...]

Here is the summary with links:
  - [v2,net] tcp: fix TLP timer not set when CA_STATE changes from DISORDER to OPEN
    https://git.kernel.org/netdev/net/c/62d9f1a6945b

You are awesome, thank you!
--
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/patchwork/pwbot.html
diff mbox series

Patch

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 78d13c8..67f7e52 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -2060,7 +2060,7 @@  static inline __u32 cookie_init_sequence(const struct tcp_request_sock_ops *ops,
 void tcp_newreno_mark_lost(struct sock *sk, bool snd_una_advanced);
 extern s32 tcp_rack_skb_timeout(struct tcp_sock *tp, struct sk_buff *skb,
 				u32 reo_wnd);
-extern void tcp_rack_mark_lost(struct sock *sk);
+extern bool tcp_rack_mark_lost(struct sock *sk);
 extern void tcp_rack_advance(struct tcp_sock *tp, u8 sacked, u32 end_seq,
 			     u64 xmit_time);
 extern void tcp_rack_reo_timeout(struct sock *sk);
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index c7e16b0..d0a9588 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2859,7 +2859,8 @@  static void tcp_identify_packet_loss(struct sock *sk, int *ack_flag)
 	} else if (tcp_is_rack(sk)) {
 		u32 prior_retrans = tp->retrans_out;
 
-		tcp_rack_mark_lost(sk);
+		if (tcp_rack_mark_lost(sk))
+			*ack_flag &= ~FLAG_SET_XMIT_TIMER;
 		if (prior_retrans > tp->retrans_out)
 			*ack_flag |= FLAG_LOST_RETRANS;
 	}
@@ -3815,9 +3816,6 @@  static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
 
 	if (tp->tlp_high_seq)
 		tcp_process_tlp_ack(sk, ack, flag);
-	/* If needed, reset TLP/RTO timer; RACK may later override this. */
-	if (flag & FLAG_SET_XMIT_TIMER)
-		tcp_set_xmit_timer(sk);
 
 	if (tcp_ack_is_dubious(sk, flag)) {
 		if (!(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP))) {
@@ -3830,6 +3828,10 @@  static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
 				      &rexmit);
 	}
 
+	/* If needed, reset TLP/RTO timer when RACK doesn't set. */
+	if (flag & FLAG_SET_XMIT_TIMER)
+		tcp_set_xmit_timer(sk);
+
 	if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP))
 		sk_dst_confirm(sk);
 
diff --git a/net/ipv4/tcp_recovery.c b/net/ipv4/tcp_recovery.c
index 177307a..6f1b4ac 100644
--- a/net/ipv4/tcp_recovery.c
+++ b/net/ipv4/tcp_recovery.c
@@ -96,13 +96,13 @@  static void tcp_rack_detect_loss(struct sock *sk, u32 *reo_timeout)
 	}
 }
 
-void tcp_rack_mark_lost(struct sock *sk)
+bool tcp_rack_mark_lost(struct sock *sk)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	u32 timeout;
 
 	if (!tp->rack.advanced)
-		return;
+		return false;
 
 	/* Reset the advanced flag to avoid unnecessary queue scanning */
 	tp->rack.advanced = 0;
@@ -112,6 +112,7 @@  void tcp_rack_mark_lost(struct sock *sk)
 		inet_csk_reset_xmit_timer(sk, ICSK_TIME_REO_TIMEOUT,
 					  timeout, inet_csk(sk)->icsk_rto);
 	}
+	return !!timeout;
 }
 
 /* Record the most recently (re)sent time among the (s)acked packets