Message ID | 20241010174817.1543642-6-edumazet@google.com (mailing list archive) |
---|---|
State | Accepted |
Commit | 79636038d37e7bd4d078238f2a3f002cab4423bc |
Delegated to: | Netdev Maintainers |
Headers | show |
Series | tcp: add skb->sk to more control packets | expand |
From: Eric Dumazet <edumazet@google.com> Date: Thu, 10 Oct 2024 17:48:17 +0000 > ip_send_unicast_reply() send orphaned 'control packets'. > > These are RST packets and also ACK packets sent from TIME_WAIT. > > Some eBPF programs would prefer to have a meaningful skb->sk > pointer as much as possible. > > This means that TCP can now attach TIME_WAIT sockets to outgoing > skbs. > > Signed-off-by: Eric Dumazet <edumazet@google.com> Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com>
On Thu, Oct 10, 2024 at 1:48 PM Eric Dumazet <edumazet@google.com> wrote: > > ip_send_unicast_reply() send orphaned 'control packets'. > > These are RST packets and also ACK packets sent from TIME_WAIT. > > Some eBPF programs would prefer to have a meaningful skb->sk > pointer as much as possible. > > This means that TCP can now attach TIME_WAIT sockets to outgoing > skbs. > > Signed-off-by: Eric Dumazet <edumazet@google.com> Reviewed-by: Brian Vazquez <brianvv@google.com> > --- > include/net/ip.h | 3 ++- > net/ipv4/ip_output.c | 5 ++++- > net/ipv4/tcp_ipv4.c | 4 ++-- > 3 files changed, 8 insertions(+), 4 deletions(-) > > diff --git a/include/net/ip.h b/include/net/ip.h > index bab084df15677543b7400bb2832c0e83988884cb..4be0a6a603b2b5d5cfddc045a7d49d0d77be9570 100644 > --- a/include/net/ip.h > +++ b/include/net/ip.h > @@ -288,7 +288,8 @@ static inline __u8 ip_reply_arg_flowi_flags(const struct ip_reply_arg *arg) > return (arg->flags & IP_REPLY_ARG_NOSRCCHECK) ? FLOWI_FLAG_ANYSRC : 0; > } > > -void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb, > +void ip_send_unicast_reply(struct sock *sk, const struct sock *orig_sk, > + struct sk_buff *skb, > const struct ip_options *sopt, > __be32 daddr, __be32 saddr, > const struct ip_reply_arg *arg, > diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c > index e5c55a95063dd8340f9a014102408e859b4eb755..0065b1996c947078bea210c9abe5c80fa0e0ab4f 100644 > --- a/net/ipv4/ip_output.c > +++ b/net/ipv4/ip_output.c > @@ -1596,7 +1596,8 @@ static int ip_reply_glue_bits(void *dptr, char *to, int offset, > * Generic function to send a packet as reply to another packet. > * Used to send some TCP resets/acks so far. > */ > -void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb, > +void ip_send_unicast_reply(struct sock *sk, const struct sock *orig_sk, > + struct sk_buff *skb, > const struct ip_options *sopt, > __be32 daddr, __be32 saddr, > const struct ip_reply_arg *arg, > @@ -1662,6 +1663,8 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb, > arg->csumoffset) = csum_fold(csum_add(nskb->csum, > arg->csum)); > nskb->ip_summed = CHECKSUM_NONE; > + if (orig_sk) > + skb_set_owner_edemux(nskb, (struct sock *)orig_sk); > if (transmit_time) > nskb->tstamp_type = SKB_CLOCK_MONOTONIC; > if (txhash) > diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c > index 985028434f644c399e51d12ba8d9c2c5740dc6e1..9d3dd101ea713b14e13afe662baa49d21b3b716c 100644 > --- a/net/ipv4/tcp_ipv4.c > +++ b/net/ipv4/tcp_ipv4.c > @@ -907,7 +907,7 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb, > ctl_sk->sk_mark = 0; > ctl_sk->sk_priority = 0; > } > - ip_send_unicast_reply(ctl_sk, > + ip_send_unicast_reply(ctl_sk, sk, > skb, &TCP_SKB_CB(skb)->header.h4.opt, > ip_hdr(skb)->saddr, ip_hdr(skb)->daddr, > &arg, arg.iov[0].iov_len, > @@ -1021,7 +1021,7 @@ static void tcp_v4_send_ack(const struct sock *sk, > ctl_sk->sk_priority = (sk->sk_state == TCP_TIME_WAIT) ? > inet_twsk(sk)->tw_priority : READ_ONCE(sk->sk_priority); > transmit_time = tcp_transmit_time(sk); > - ip_send_unicast_reply(ctl_sk, > + ip_send_unicast_reply(ctl_sk, sk, > skb, &TCP_SKB_CB(skb)->header.h4.opt, > ip_hdr(skb)->saddr, ip_hdr(skb)->daddr, > &arg, arg.iov[0].iov_len, > -- > 2.47.0.rc1.288.g06298d1525-goog >
diff --git a/include/net/ip.h b/include/net/ip.h index bab084df15677543b7400bb2832c0e83988884cb..4be0a6a603b2b5d5cfddc045a7d49d0d77be9570 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -288,7 +288,8 @@ static inline __u8 ip_reply_arg_flowi_flags(const struct ip_reply_arg *arg) return (arg->flags & IP_REPLY_ARG_NOSRCCHECK) ? FLOWI_FLAG_ANYSRC : 0; } -void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb, +void ip_send_unicast_reply(struct sock *sk, const struct sock *orig_sk, + struct sk_buff *skb, const struct ip_options *sopt, __be32 daddr, __be32 saddr, const struct ip_reply_arg *arg, diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index e5c55a95063dd8340f9a014102408e859b4eb755..0065b1996c947078bea210c9abe5c80fa0e0ab4f 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1596,7 +1596,8 @@ static int ip_reply_glue_bits(void *dptr, char *to, int offset, * Generic function to send a packet as reply to another packet. * Used to send some TCP resets/acks so far. */ -void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb, +void ip_send_unicast_reply(struct sock *sk, const struct sock *orig_sk, + struct sk_buff *skb, const struct ip_options *sopt, __be32 daddr, __be32 saddr, const struct ip_reply_arg *arg, @@ -1662,6 +1663,8 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb, arg->csumoffset) = csum_fold(csum_add(nskb->csum, arg->csum)); nskb->ip_summed = CHECKSUM_NONE; + if (orig_sk) + skb_set_owner_edemux(nskb, (struct sock *)orig_sk); if (transmit_time) nskb->tstamp_type = SKB_CLOCK_MONOTONIC; if (txhash) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 985028434f644c399e51d12ba8d9c2c5740dc6e1..9d3dd101ea713b14e13afe662baa49d21b3b716c 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -907,7 +907,7 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb, ctl_sk->sk_mark = 0; ctl_sk->sk_priority = 0; } - ip_send_unicast_reply(ctl_sk, + ip_send_unicast_reply(ctl_sk, sk, skb, &TCP_SKB_CB(skb)->header.h4.opt, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr, &arg, arg.iov[0].iov_len, @@ -1021,7 +1021,7 @@ static void tcp_v4_send_ack(const struct sock *sk, ctl_sk->sk_priority = (sk->sk_state == TCP_TIME_WAIT) ? inet_twsk(sk)->tw_priority : READ_ONCE(sk->sk_priority); transmit_time = tcp_transmit_time(sk); - ip_send_unicast_reply(ctl_sk, + ip_send_unicast_reply(ctl_sk, sk, skb, &TCP_SKB_CB(skb)->header.h4.opt, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr, &arg, arg.iov[0].iov_len,
ip_send_unicast_reply() send orphaned 'control packets'. These are RST packets and also ACK packets sent from TIME_WAIT. Some eBPF programs would prefer to have a meaningful skb->sk pointer as much as possible. This means that TCP can now attach TIME_WAIT sockets to outgoing skbs. Signed-off-by: Eric Dumazet <edumazet@google.com> --- include/net/ip.h | 3 ++- net/ipv4/ip_output.c | 5 ++++- net/ipv4/tcp_ipv4.c | 4 ++-- 3 files changed, 8 insertions(+), 4 deletions(-)