Message ID | 20241004191644.1687638-2-edumazet@google.com (mailing list archive) |
---|---|
State | Superseded |
Delegated to: | Netdev Maintainers |
Headers | show |
Series | tcp: add skb->sk to more control packets | expand |
On Fri, Oct 4, 2024 at 9:16 PM Eric Dumazet <edumazet@google.com> wrote: > > TCP will soon attach TIME_WAIT sockets to some ACK and RST. > > Make sure sk_to_full_sk() detects this and does not return > a non full socket. > > Signed-off-by: Eric Dumazet <edumazet@google.com> > --- > include/net/inet_sock.h | 4 +++- > 1 file changed, 3 insertions(+), 1 deletion(-) > > diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h > index 394c3b66065e20d34594d6e2a2010c55bb457810..cec093b78151b9a3b95ad4b3672a72b0aa9a8305 100644 > --- a/include/net/inet_sock.h > +++ b/include/net/inet_sock.h > @@ -319,8 +319,10 @@ static inline unsigned long inet_cmsg_flags(const struct inet_sock *inet) > static inline struct sock *sk_to_full_sk(struct sock *sk) > { > #ifdef CONFIG_INET > - if (sk && sk->sk_state == TCP_NEW_SYN_RECV) > + if (sk && READ_ONCE(sk->sk_state) == TCP_NEW_SYN_RECV) > sk = inet_reqsk(sk)->rsk_listener; > + if (sk && READ_ONCE(sk->sk_state) == TCP_TIME_WAIT) > + sk = NULL; > #endif > return sk; > } It appears some callers do not check if the return value could be NULL. I will have to add in v2 : diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index ce91d9b2acb9f8991150ceead4475b130bead438..c3ffb45489a6924c1bc80355e862e243ec195b01 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -209,7 +209,7 @@ static inline bool cgroup_bpf_sock_enabled(struct sock *sk, int __ret = 0; \ if (cgroup_bpf_enabled(CGROUP_INET_EGRESS) && sk) { \ typeof(sk) __sk = sk_to_full_sk(sk); \ - if (sk_fullsock(__sk) && __sk == skb_to_full_sk(skb) && \ + if (__sk && sk_fullsock(__sk) && __sk == skb_to_full_sk(skb) && \ cgroup_bpf_sock_enabled(__sk, CGROUP_INET_EGRESS)) \ __ret = __cgroup_bpf_run_filter_skb(__sk, skb, \ CGROUP_INET_EGRESS); \ diff --git a/net/core/filter.c b/net/core/filter.c index bd0d08bf76bb8de39ca2ca89cda99a97c9b0a034..533025618b2c06efa31548708f21d9e0ccbdc68d 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -6778,7 +6778,7 @@ __bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len, /* sk_to_full_sk() may return (sk)->rsk_listener, so make sure the original sk * sock refcnt is decremented to prevent a request_sock leak. */ - if (!sk_fullsock(sk2)) + if (sk2 && !sk_fullsock(sk2)) sk2 = NULL; if (sk2 != sk) { sock_gen_put(sk); @@ -6826,7 +6826,7 @@ bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len, /* sk_to_full_sk() may return (sk)->rsk_listener, so make sure the original sk * sock refcnt is decremented to prevent a request_sock leak. */ - if (!sk_fullsock(sk2)) + if (sk2 && !sk_fullsock(sk2)) sk2 = NULL; if (sk2 != sk) { sock_gen_put(sk); @@ -7276,7 +7276,7 @@ BPF_CALL_1(bpf_get_listener_sock, struct sock *, sk) { sk = sk_to_full_sk(sk); - if (sk->sk_state == TCP_LISTEN && sock_flag(sk, SOCK_RCU_FREE)) + if (sk && sk->sk_state == TCP_LISTEN && sock_flag(sk, SOCK_RCU_FREE)) return (unsigned long)sk; return (unsigned long)NULL;
On 10/4/24 2:56 PM, Eric Dumazet wrote: > On Fri, Oct 4, 2024 at 9:16 PM Eric Dumazet <edumazet@google.com> wrote: >> >> TCP will soon attach TIME_WAIT sockets to some ACK and RST. >> >> Make sure sk_to_full_sk() detects this and does not return >> a non full socket. >> >> Signed-off-by: Eric Dumazet <edumazet@google.com> >> --- >> include/net/inet_sock.h | 4 +++- >> 1 file changed, 3 insertions(+), 1 deletion(-) >> >> diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h >> index 394c3b66065e20d34594d6e2a2010c55bb457810..cec093b78151b9a3b95ad4b3672a72b0aa9a8305 100644 >> --- a/include/net/inet_sock.h >> +++ b/include/net/inet_sock.h >> @@ -319,8 +319,10 @@ static inline unsigned long inet_cmsg_flags(const struct inet_sock *inet) >> static inline struct sock *sk_to_full_sk(struct sock *sk) >> { >> #ifdef CONFIG_INET >> - if (sk && sk->sk_state == TCP_NEW_SYN_RECV) >> + if (sk && READ_ONCE(sk->sk_state) == TCP_NEW_SYN_RECV) >> sk = inet_reqsk(sk)->rsk_listener; >> + if (sk && READ_ONCE(sk->sk_state) == TCP_TIME_WAIT) >> + sk = NULL; >> #endif >> return sk; >> } > > It appears some callers do not check if the return value could be NULL. > I will have to add in v2 : > > diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h > index ce91d9b2acb9f8991150ceead4475b130bead438..c3ffb45489a6924c1bc80355e862e243ec195b01 > 100644 > --- a/include/linux/bpf-cgroup.h > +++ b/include/linux/bpf-cgroup.h > @@ -209,7 +209,7 @@ static inline bool cgroup_bpf_sock_enabled(struct sock *sk, > int __ret = 0; \ > if (cgroup_bpf_enabled(CGROUP_INET_EGRESS) && sk) { \ The above "&& sk" test can probably be removed after the "__sk &&" addition below. > typeof(sk) __sk = sk_to_full_sk(sk); \ > - if (sk_fullsock(__sk) && __sk == skb_to_full_sk(skb) && \ > + if (__sk && sk_fullsock(__sk) && __sk == skb_to_full_sk(skb) && \ sk_to_full_sk() includes the TCP_TIME_WAIT check now. I wonder if testing __sk for NULL is good enough and the sk_fullsock(__sk) check can be removed also. Thanks for working on this series. It is useful for the bpf prog. > cgroup_bpf_sock_enabled(__sk, CGROUP_INET_EGRESS)) \ > __ret = __cgroup_bpf_run_filter_skb(__sk, skb, \ > CGROUP_INET_EGRESS); \ > diff --git a/net/core/filter.c b/net/core/filter.c > index bd0d08bf76bb8de39ca2ca89cda99a97c9b0a034..533025618b2c06efa31548708f21d9e0ccbdc68d > 100644 > --- a/net/core/filter.c > +++ b/net/core/filter.c > @@ -6778,7 +6778,7 @@ __bpf_sk_lookup(struct sk_buff *skb, struct > bpf_sock_tuple *tuple, u32 len, > /* sk_to_full_sk() may return (sk)->rsk_listener, so > make sure the original sk > * sock refcnt is decremented to prevent a request_sock leak. > */ > - if (!sk_fullsock(sk2)) > + if (sk2 && !sk_fullsock(sk2)) > sk2 = NULL; > if (sk2 != sk) { > sock_gen_put(sk); > @@ -6826,7 +6826,7 @@ bpf_sk_lookup(struct sk_buff *skb, struct > bpf_sock_tuple *tuple, u32 len, > /* sk_to_full_sk() may return (sk)->rsk_listener, so > make sure the original sk > * sock refcnt is decremented to prevent a request_sock leak. > */ > - if (!sk_fullsock(sk2)) > + if (sk2 && !sk_fullsock(sk2)) > sk2 = NULL; > if (sk2 != sk) { > sock_gen_put(sk); > @@ -7276,7 +7276,7 @@ BPF_CALL_1(bpf_get_listener_sock, struct sock *, sk) > { > sk = sk_to_full_sk(sk); > > - if (sk->sk_state == TCP_LISTEN && sock_flag(sk, SOCK_RCU_FREE)) > + if (sk && sk->sk_state == TCP_LISTEN && sock_flag(sk, SOCK_RCU_FREE)) > return (unsigned long)sk; > > return (unsigned long)NULL;
On Sat, Oct 5, 2024 at 2:37 AM Martin KaFai Lau <martin.lau@linux.dev> wrote: > > On 10/4/24 2:56 PM, Eric Dumazet wrote: > > On Fri, Oct 4, 2024 at 9:16 PM Eric Dumazet <edumazet@google.com> wrote: > >> > >> TCP will soon attach TIME_WAIT sockets to some ACK and RST. > >> > >> Make sure sk_to_full_sk() detects this and does not return > >> a non full socket. > >> > >> Signed-off-by: Eric Dumazet <edumazet@google.com> > >> --- > >> include/net/inet_sock.h | 4 +++- > >> 1 file changed, 3 insertions(+), 1 deletion(-) > >> > >> diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h > >> index 394c3b66065e20d34594d6e2a2010c55bb457810..cec093b78151b9a3b95ad4b3672a72b0aa9a8305 100644 > >> --- a/include/net/inet_sock.h > >> +++ b/include/net/inet_sock.h > >> @@ -319,8 +319,10 @@ static inline unsigned long inet_cmsg_flags(const struct inet_sock *inet) > >> static inline struct sock *sk_to_full_sk(struct sock *sk) > >> { > >> #ifdef CONFIG_INET > >> - if (sk && sk->sk_state == TCP_NEW_SYN_RECV) > >> + if (sk && READ_ONCE(sk->sk_state) == TCP_NEW_SYN_RECV) > >> sk = inet_reqsk(sk)->rsk_listener; > >> + if (sk && READ_ONCE(sk->sk_state) == TCP_TIME_WAIT) > >> + sk = NULL; > >> #endif > >> return sk; > >> } > > > > It appears some callers do not check if the return value could be NULL. > > I will have to add in v2 : > > > > diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h > > index ce91d9b2acb9f8991150ceead4475b130bead438..c3ffb45489a6924c1bc80355e862e243ec195b01 > > 100644 > > --- a/include/linux/bpf-cgroup.h > > +++ b/include/linux/bpf-cgroup.h > > @@ -209,7 +209,7 @@ static inline bool cgroup_bpf_sock_enabled(struct sock *sk, > > int __ret = 0; \ > > if (cgroup_bpf_enabled(CGROUP_INET_EGRESS) && sk) { \ > > The above "&& sk" test can probably be removed after the "__sk &&" addition below. Yes, I can do that. > > > typeof(sk) __sk = sk_to_full_sk(sk); \ > > - if (sk_fullsock(__sk) && __sk == skb_to_full_sk(skb) && \ > > + if (__sk && sk_fullsock(__sk) && __sk == skb_to_full_sk(skb) && \ > > sk_to_full_sk() includes the TCP_TIME_WAIT check now. I wonder if testing __sk > for NULL is good enough and the sk_fullsock(__sk) check can be removed also. +2 > > Thanks for working on this series. It is useful for the bpf prog. Thanks.
diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index 394c3b66065e20d34594d6e2a2010c55bb457810..cec093b78151b9a3b95ad4b3672a72b0aa9a8305 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -319,8 +319,10 @@ static inline unsigned long inet_cmsg_flags(const struct inet_sock *inet) static inline struct sock *sk_to_full_sk(struct sock *sk) { #ifdef CONFIG_INET - if (sk && sk->sk_state == TCP_NEW_SYN_RECV) + if (sk && READ_ONCE(sk->sk_state) == TCP_NEW_SYN_RECV) sk = inet_reqsk(sk)->rsk_listener; + if (sk && READ_ONCE(sk->sk_state) == TCP_TIME_WAIT) + sk = NULL; #endif return sk; }
TCP will soon attach TIME_WAIT sockets to some ACK and RST. Make sure sk_to_full_sk() detects this and does not return a non full socket. Signed-off-by: Eric Dumazet <edumazet@google.com> --- include/net/inet_sock.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-)