@@ -419,7 +419,8 @@ enum tcp_tw_status {
TCP_TW_SUCCESS = 0,
TCP_TW_RST = 1,
TCP_TW_ACK = 2,
- TCP_TW_SYN = 3
+ TCP_TW_SYN = 3,
+ TCP_TW_ACK_OOW = 4
};
@@ -75,7 +75,6 @@
#include <net/checksum.h>
#include <net/gso.h>
#include <net/inetpeer.h>
-#include <net/inet_ecn.h>
#include <net/lwtunnel.h>
#include <net/inet_dscp.h>
#include <linux/bpf-cgroup.h>
@@ -1640,7 +1639,7 @@ void ip_send_unicast_reply(struct sock *sk, const struct sock *orig_sk,
if (IS_ERR(rt))
return;
- inet_sk(sk)->tos = arg->tos & ~INET_ECN_MASK;
+ inet_sk(sk)->tos = arg->tos;
sk->sk_protocol = ip_hdr(skb)->protocol;
sk->sk_bound_dev_if = arg->bound_dev_if;
@@ -66,6 +66,7 @@
#include <net/transp_v6.h>
#include <net/ipv6.h>
#include <net/inet_common.h>
+#include <net/inet_ecn.h>
#include <net/timewait_sock.h>
#include <net/xfrm.h>
#include <net/secure_seq.h>
@@ -887,7 +888,8 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb,
BUILD_BUG_ON(offsetof(struct sock, sk_bound_dev_if) !=
offsetof(struct inet_timewait_sock, tw_bound_dev_if));
- arg.tos = ip_hdr(skb)->tos;
+ /* ECN bits of TW reset are cleared */
+ arg.tos = ip_hdr(skb)->tos & ~INET_ECN_MASK;
arg.uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
local_bh_disable();
local_lock_nested_bh(&ipv4_tcp_sk.bh_lock);
@@ -1033,11 +1035,21 @@ static void tcp_v4_send_ack(const struct sock *sk,
local_bh_enable();
}
-static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
+static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb,
+ enum tcp_tw_status tw_status)
{
struct inet_timewait_sock *tw = inet_twsk(sk);
struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
struct tcp_key key = {};
+ u8 tos = tw->tw_tos;
+
+ /* Cleaning only ECN bits of TW ACKs of oow data or is paws_reject,
+ * while not cleaning ECN bits of other TW ACKs to avoid these ACKs
+ * being placed in a different service queues (Classic rather than L4S)
+ */
+ if (tw_status == TCP_TW_ACK_OOW)
+ tos &= ~INET_ECN_MASK;
+
#ifdef CONFIG_TCP_AO
struct tcp_ao_info *ao_info;
@@ -1081,7 +1093,7 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
READ_ONCE(tcptw->tw_ts_recent),
tw->tw_bound_dev_if, &key,
tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0,
- tw->tw_tos,
+ tos,
tw->tw_txhash);
inet_twsk_put(tw);
@@ -1151,6 +1163,7 @@ static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
key.type = TCP_KEY_MD5;
}
+ /* Cleaning ECN bits of TW ACKs of oow data or is paws_reject */
tcp_v4_send_ack(sk, skb, seq,
tcp_rsk(req)->rcv_nxt,
tcp_synack_window(req) >> inet_rsk(req)->rcv_wscale,
@@ -1158,7 +1171,7 @@ static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
req->ts_recent,
0, &key,
inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0,
- ip_hdr(skb)->tos,
+ ip_hdr(skb)->tos & ~INET_ECN_MASK,
READ_ONCE(tcp_rsk(req)->txhash));
if (tcp_key_is_ao(&key))
kfree(key.traffic_key);
@@ -2175,6 +2188,7 @@ int tcp_v4_rcv(struct sk_buff *skb)
{
struct net *net = dev_net_rcu(skb->dev);
enum skb_drop_reason drop_reason;
+ enum tcp_tw_status tw_status;
int sdif = inet_sdif(skb);
int dif = inet_iif(skb);
const struct iphdr *iph;
@@ -2402,7 +2416,9 @@ int tcp_v4_rcv(struct sk_buff *skb)
inet_twsk_put(inet_twsk(sk));
goto csum_error;
}
- switch (tcp_timewait_state_process(inet_twsk(sk), skb, th, &isn)) {
+
+ tw_status = tcp_timewait_state_process(inet_twsk(sk), skb, th, &isn);
+ switch (tw_status) {
case TCP_TW_SYN: {
struct sock *sk2 = inet_lookup_listener(net,
net->ipv4.tcp_death_row.hashinfo,
@@ -2423,7 +2439,8 @@ int tcp_v4_rcv(struct sk_buff *skb)
/* to ACK */
fallthrough;
case TCP_TW_ACK:
- tcp_v4_timewait_ack(sk, skb);
+ case TCP_TW_ACK_OOW:
+ tcp_v4_timewait_ack(sk, skb, tw_status);
break;
case TCP_TW_RST:
tcp_v4_send_reset(sk, skb, SK_RST_REASON_TCP_TIMEWAIT_SOCKET);
@@ -44,7 +44,7 @@ tcp_timewait_check_oow_rate_limit(struct inet_timewait_sock *tw,
/* Send ACK. Note, we do not put the bucket,
* it will be released by caller.
*/
- return TCP_TW_ACK;
+ return TCP_TW_ACK_OOW;
}
/* We are rate-limiting, so just release the tw sock and drop skb. */
@@ -999,7 +999,7 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32
if (!IS_ERR(dst)) {
skb_dst_set(buff, dst);
ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL,
- tclass & ~INET_ECN_MASK, priority);
+ tclass, priority);
TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
if (rst)
TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
@@ -1135,7 +1135,8 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb,
trace_tcp_send_reset(sk, skb, reason);
tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, 1,
- ipv6_get_dsfield(ipv6h), label, priority, txhash,
+ ipv6_get_dsfield(ipv6h) & ~INET_ECN_MASK,
+ label, priority, txhash,
&key);
#if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO)
@@ -1155,11 +1156,16 @@ static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq,
tclass, label, priority, txhash, key);
}
-static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
+static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb,
+ enum tcp_tw_status tw_status)
{
struct inet_timewait_sock *tw = inet_twsk(sk);
struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
+ u8 tclass = tw->tw_tclass;
struct tcp_key key = {};
+
+ if (tw_status == TCP_TW_ACK_OOW)
+ tclass &= ~INET_ECN_MASK;
#ifdef CONFIG_TCP_AO
struct tcp_ao_info *ao_info;
@@ -1203,7 +1209,7 @@ static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
tcp_tw_tsval(tcptw),
READ_ONCE(tcptw->tw_ts_recent), tw->tw_bound_dev_if,
- &key, tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel),
+ &key, tclass, cpu_to_be32(tw->tw_flowlabel),
tw->tw_priority, tw->tw_txhash);
#ifdef CONFIG_TCP_AO
@@ -1280,7 +1286,8 @@ static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
tcp_synack_window(req) >> inet_rsk(req)->rcv_wscale,
tcp_rsk_tsval(tcp_rsk(req)),
req->ts_recent, sk->sk_bound_dev_if,
- &key, ipv6_get_dsfield(ipv6_hdr(skb)), 0,
+ &key, ipv6_get_dsfield(ipv6_hdr(skb)) & ~INET_ECN_MASK,
+ 0,
READ_ONCE(sk->sk_priority),
READ_ONCE(tcp_rsk(req)->txhash));
if (tcp_key_is_ao(&key))
@@ -1742,6 +1749,7 @@ INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
{
struct net *net = dev_net_rcu(skb->dev);
enum skb_drop_reason drop_reason;
+ enum tcp_tw_status tw_status;
int sdif = inet6_sdif(skb);
int dif = inet6_iif(skb);
const struct tcphdr *th;
@@ -1962,7 +1970,8 @@ INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
goto csum_error;
}
- switch (tcp_timewait_state_process(inet_twsk(sk), skb, th, &isn)) {
+ tw_status = tcp_timewait_state_process(inet_twsk(sk), skb, th, &isn);
+ switch (tw_status) {
case TCP_TW_SYN:
{
struct sock *sk2;
@@ -1987,7 +1996,8 @@ INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
/* to ACK */
fallthrough;
case TCP_TW_ACK:
- tcp_v6_timewait_ack(sk, skb);
+ case TCP_TW_ACK_OOW:
+ tcp_v6_timewait_ack(sk, skb, tw_status);
break;
case TCP_TW_RST:
tcp_v6_send_reset(sk, skb, SK_RST_REASON_TCP_TIMEWAIT_SOCKET);