@@ -51,11 +51,25 @@ static inline __u8 INET_ECN_encapsulate(__u8 outer, __u8 inner)
return outer;
}
+/* Apply either ECT(0) or ECT(1) */
+static inline void __INET_ECN_xmit(struct sock *sk, bool use_ect_1)
+{
+ __u8 ect = use_ect_1 ? INET_ECN_ECT_1 : INET_ECN_ECT_0;
+
+ /* Mask the complete byte in case the connection alternates between
+ * ECT(0) and ECT(1).
+ */
+ inet_sk(sk)->tos &= ~INET_ECN_MASK;
+ inet_sk(sk)->tos |= ect;
+ if (inet6_sk(sk) != NULL) {
+ inet6_sk(sk)->tclass &= ~INET_ECN_MASK;
+ inet6_sk(sk)->tclass |= ect;
+ }
+}
+
static inline void INET_ECN_xmit(struct sock *sk)
{
- inet_sk(sk)->tos |= INET_ECN_ECT_0;
- if (inet6_sk(sk) != NULL)
- inet6_sk(sk)->tclass |= INET_ECN_ECT_0;
+ __INET_ECN_xmit(sk, false);
}
static inline void INET_ECN_dontxmit(struct sock *sk)
@@ -426,6 +426,7 @@ static inline void tcp_dec_quickack_mode(struct sock *sk)
#define TCP_ECN_DEMAND_CWR BIT(2)
#define TCP_ECN_SEEN BIT(3)
#define TCP_ECN_MODE_ACCECN BIT(4)
+#define TCP_ECN_ECT_1 BIT(5)
#define TCP_ECN_DISABLED 0
#define TCP_ECN_MODE_PENDING (TCP_ECN_MODE_RFC3168|TCP_ECN_MODE_ACCECN)
@@ -1253,6 +1254,8 @@ enum tcp_ca_ack_event_flags {
#define TCP_CONG_NEEDS_ECN BIT(1)
/* Require successfully negotiated AccECN capability */
#define TCP_CONG_NEEDS_ACCECN BIT(2)
+/* Use ECT(1) instead of ECT(0) while the CA is uninitialized */
+#define TCP_CONG_WANTS_ECT_1 (TCP_CONG_NEEDS_ECN | TCP_CONG_NEEDS_ACCECN)
#define TCP_CONG_MASK (TCP_CONG_NON_RESTRICTED | TCP_CONG_NEEDS_ECN | \
TCP_CONG_NEEDS_ACCECN)
@@ -1394,6 +1397,11 @@ static inline bool tcp_ca_needs_accecn(const struct sock *sk)
return icsk->icsk_ca_ops->flags & TCP_CONG_NEEDS_ACCECN;
}
+static inline bool tcp_ca_wants_ect_1(const struct sock *sk)
+{
+ return inet_csk(sk)->icsk_ca_ops->flags & TCP_CONG_WANTS_ECT_1;
+}
+
static inline void tcp_ca_event(struct sock *sk, const enum tcp_ca_event event)
{
const struct inet_connection_sock *icsk = inet_csk(sk);
@@ -227,7 +227,7 @@ void tcp_assign_congestion_control(struct sock *sk)
memset(icsk->icsk_ca_priv, 0, sizeof(icsk->icsk_ca_priv));
if (ca->flags & TCP_CONG_NEEDS_ECN)
- INET_ECN_xmit(sk);
+ __INET_ECN_xmit(sk, tcp_ca_wants_ect_1(sk));
else
INET_ECN_dontxmit(sk);
}
@@ -240,7 +240,10 @@ void tcp_init_congestion_control(struct sock *sk)
if (icsk->icsk_ca_ops->init)
icsk->icsk_ca_ops->init(sk);
if (tcp_ca_needs_ecn(sk))
- INET_ECN_xmit(sk);
+ /* The CA is already initialized, expect it to set the
+ * appropriate flag to select ECT(1).
+ */
+ __INET_ECN_xmit(sk, tcp_sk(sk)->ecn_flags & TCP_ECN_ECT_1);
else
INET_ECN_dontxmit(sk);
icsk->icsk_ca_initialized = 1;
@@ -257,7 +260,7 @@ static void tcp_reinit_congestion_control(struct sock *sk,
memset(icsk->icsk_ca_priv, 0, sizeof(icsk->icsk_ca_priv));
if (ca->flags & TCP_CONG_NEEDS_ECN)
- INET_ECN_xmit(sk);
+ __INET_ECN_xmit(sk, tcp_ca_wants_ect_1(sk));
else
INET_ECN_dontxmit(sk);
@@ -326,7 +326,7 @@ static void tcp_ecn_send_synack(struct sock *sk, struct sk_buff *skb)
TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_ECE;
else if (tcp_ca_needs_ecn(sk) ||
tcp_bpf_ca_needs_ecn(sk))
- INET_ECN_xmit(sk);
+ __INET_ECN_xmit(sk, tcp_ca_wants_ect_1(sk));
if (tp->ecn_flags & TCP_ECN_MODE_ACCECN) {
TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_ACE;
@@ -366,7 +366,7 @@ static void tcp_ecn_send_syn(struct sock *sk, struct sk_buff *skb)
if (use_ecn) {
if (tcp_ca_needs_ecn(sk) || bpf_needs_ecn)
- INET_ECN_xmit(sk);
+ __INET_ECN_xmit(sk, tcp_ca_wants_ect_1(sk));
TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_ECE | TCPHDR_CWR;
if (use_accecn) {
@@ -435,7 +435,8 @@ static void tcp_ecn_send(struct sock *sk, struct sk_buff *skb,
return;
if (!tcp_accecn_ace_fail_recv(tp))
- INET_ECN_xmit(sk);
+ /* The CCA could change the ECT codepoint on the fly, reset it*/
+ __INET_ECN_xmit(sk, tp->ecn_flags & TCP_ECN_ECT_1);
if (tcp_ecn_mode_accecn(tp)) {
tcp_accecn_set_ace(tp, skb, th);
skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ACCECN;