diff mbox series

[net-next,1/5] net: add TIME_WAIT logic to sk_to_full_sk()

Message ID 20241004191644.1687638-2-edumazet@google.com (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series tcp: add skb->sk to more control packets | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for net-next, async
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 17 this patch: 17
netdev/build_tools success Errors and warnings before: 0 (+2) this patch: 0 (+2)
netdev/cc_maintainers success CCed 4 of 4 maintainers
netdev/build_clang success Errors and warnings before: 39 this patch: 39
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 2078 this patch: 2078
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 11 lines checked
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 2 this patch: 2
netdev/source_inline success Was 0 now: 0

Commit Message

Eric Dumazet Oct. 4, 2024, 7:16 p.m. UTC
TCP will soon attach TIME_WAIT sockets to some ACK and RST.

Make sure sk_to_full_sk() detects this and does not return
a non full socket.

Signed-off-by: Eric Dumazet <edumazet@google.com>
---
 include/net/inet_sock.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

Comments

Eric Dumazet Oct. 4, 2024, 9:56 p.m. UTC | #1
On Fri, Oct 4, 2024 at 9:16 PM Eric Dumazet <edumazet@google.com> wrote:
>
> TCP will soon attach TIME_WAIT sockets to some ACK and RST.
>
> Make sure sk_to_full_sk() detects this and does not return
> a non full socket.
>
> Signed-off-by: Eric Dumazet <edumazet@google.com>
> ---
>  include/net/inet_sock.h | 4 +++-
>  1 file changed, 3 insertions(+), 1 deletion(-)
>
> diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
> index 394c3b66065e20d34594d6e2a2010c55bb457810..cec093b78151b9a3b95ad4b3672a72b0aa9a8305 100644
> --- a/include/net/inet_sock.h
> +++ b/include/net/inet_sock.h
> @@ -319,8 +319,10 @@ static inline unsigned long inet_cmsg_flags(const struct inet_sock *inet)
>  static inline struct sock *sk_to_full_sk(struct sock *sk)
>  {
>  #ifdef CONFIG_INET
> -       if (sk && sk->sk_state == TCP_NEW_SYN_RECV)
> +       if (sk && READ_ONCE(sk->sk_state) == TCP_NEW_SYN_RECV)
>                 sk = inet_reqsk(sk)->rsk_listener;
> +       if (sk && READ_ONCE(sk->sk_state) == TCP_TIME_WAIT)
> +               sk = NULL;
>  #endif
>         return sk;
>  }

It appears some callers do not check if the return value could be NULL.
I will have to add in v2 :

diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index ce91d9b2acb9f8991150ceead4475b130bead438..c3ffb45489a6924c1bc80355e862e243ec195b01
100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -209,7 +209,7 @@ static inline bool cgroup_bpf_sock_enabled(struct sock *sk,
        int __ret = 0;                                                         \
        if (cgroup_bpf_enabled(CGROUP_INET_EGRESS) && sk) {                    \
                typeof(sk) __sk = sk_to_full_sk(sk);                           \
-               if (sk_fullsock(__sk) && __sk == skb_to_full_sk(skb) &&        \
+               if (__sk && sk_fullsock(__sk) && __sk ==
skb_to_full_sk(skb) &&        \
                    cgroup_bpf_sock_enabled(__sk, CGROUP_INET_EGRESS))         \
                        __ret = __cgroup_bpf_run_filter_skb(__sk, skb,         \
                                                      CGROUP_INET_EGRESS); \
diff --git a/net/core/filter.c b/net/core/filter.c
index bd0d08bf76bb8de39ca2ca89cda99a97c9b0a034..533025618b2c06efa31548708f21d9e0ccbdc68d
100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -6778,7 +6778,7 @@ __bpf_sk_lookup(struct sk_buff *skb, struct
bpf_sock_tuple *tuple, u32 len,
                /* sk_to_full_sk() may return (sk)->rsk_listener, so
make sure the original sk
                 * sock refcnt is decremented to prevent a request_sock leak.
                 */
-               if (!sk_fullsock(sk2))
+               if (sk2 && !sk_fullsock(sk2))
                        sk2 = NULL;
                if (sk2 != sk) {
                        sock_gen_put(sk);
@@ -6826,7 +6826,7 @@ bpf_sk_lookup(struct sk_buff *skb, struct
bpf_sock_tuple *tuple, u32 len,
                /* sk_to_full_sk() may return (sk)->rsk_listener, so
make sure the original sk
                 * sock refcnt is decremented to prevent a request_sock leak.
                 */
-               if (!sk_fullsock(sk2))
+               if (sk2 && !sk_fullsock(sk2))
                        sk2 = NULL;
                if (sk2 != sk) {
                        sock_gen_put(sk);
@@ -7276,7 +7276,7 @@ BPF_CALL_1(bpf_get_listener_sock, struct sock *, sk)
 {
        sk = sk_to_full_sk(sk);

-       if (sk->sk_state == TCP_LISTEN && sock_flag(sk, SOCK_RCU_FREE))
+       if (sk && sk->sk_state == TCP_LISTEN && sock_flag(sk, SOCK_RCU_FREE))
                return (unsigned long)sk;

        return (unsigned long)NULL;
Martin KaFai Lau Oct. 5, 2024, 12:36 a.m. UTC | #2
On 10/4/24 2:56 PM, Eric Dumazet wrote:
> On Fri, Oct 4, 2024 at 9:16 PM Eric Dumazet <edumazet@google.com> wrote:
>>
>> TCP will soon attach TIME_WAIT sockets to some ACK and RST.
>>
>> Make sure sk_to_full_sk() detects this and does not return
>> a non full socket.
>>
>> Signed-off-by: Eric Dumazet <edumazet@google.com>
>> ---
>>   include/net/inet_sock.h | 4 +++-
>>   1 file changed, 3 insertions(+), 1 deletion(-)
>>
>> diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
>> index 394c3b66065e20d34594d6e2a2010c55bb457810..cec093b78151b9a3b95ad4b3672a72b0aa9a8305 100644
>> --- a/include/net/inet_sock.h
>> +++ b/include/net/inet_sock.h
>> @@ -319,8 +319,10 @@ static inline unsigned long inet_cmsg_flags(const struct inet_sock *inet)
>>   static inline struct sock *sk_to_full_sk(struct sock *sk)
>>   {
>>   #ifdef CONFIG_INET
>> -       if (sk && sk->sk_state == TCP_NEW_SYN_RECV)
>> +       if (sk && READ_ONCE(sk->sk_state) == TCP_NEW_SYN_RECV)
>>                  sk = inet_reqsk(sk)->rsk_listener;
>> +       if (sk && READ_ONCE(sk->sk_state) == TCP_TIME_WAIT)
>> +               sk = NULL;
>>   #endif
>>          return sk;
>>   }
> 
> It appears some callers do not check if the return value could be NULL.
> I will have to add in v2 :
> 
> diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
> index ce91d9b2acb9f8991150ceead4475b130bead438..c3ffb45489a6924c1bc80355e862e243ec195b01
> 100644
> --- a/include/linux/bpf-cgroup.h
> +++ b/include/linux/bpf-cgroup.h
> @@ -209,7 +209,7 @@ static inline bool cgroup_bpf_sock_enabled(struct sock *sk,
>          int __ret = 0;                                                         \
>          if (cgroup_bpf_enabled(CGROUP_INET_EGRESS) && sk) {                    \

The above "&& sk" test can probably be removed after the "__sk &&" addition below.

>                  typeof(sk) __sk = sk_to_full_sk(sk);                           \
> -               if (sk_fullsock(__sk) && __sk == skb_to_full_sk(skb) &&        \
> +               if (__sk && sk_fullsock(__sk) && __sk == skb_to_full_sk(skb) &&        \

sk_to_full_sk() includes the TCP_TIME_WAIT check now. I wonder if testing __sk
for NULL is good enough and the sk_fullsock(__sk) check can be removed also.

Thanks for working on this series. It is useful for the bpf prog.

>                      cgroup_bpf_sock_enabled(__sk, CGROUP_INET_EGRESS))         \
>                          __ret = __cgroup_bpf_run_filter_skb(__sk, skb,         \
>                                                        CGROUP_INET_EGRESS); \
> diff --git a/net/core/filter.c b/net/core/filter.c
> index bd0d08bf76bb8de39ca2ca89cda99a97c9b0a034..533025618b2c06efa31548708f21d9e0ccbdc68d
> 100644
> --- a/net/core/filter.c
> +++ b/net/core/filter.c
> @@ -6778,7 +6778,7 @@ __bpf_sk_lookup(struct sk_buff *skb, struct
> bpf_sock_tuple *tuple, u32 len,
>                  /* sk_to_full_sk() may return (sk)->rsk_listener, so
> make sure the original sk
>                   * sock refcnt is decremented to prevent a request_sock leak.
>                   */
> -               if (!sk_fullsock(sk2))
> +               if (sk2 && !sk_fullsock(sk2))
>                          sk2 = NULL;
>                  if (sk2 != sk) {
>                          sock_gen_put(sk);
> @@ -6826,7 +6826,7 @@ bpf_sk_lookup(struct sk_buff *skb, struct
> bpf_sock_tuple *tuple, u32 len,
>                  /* sk_to_full_sk() may return (sk)->rsk_listener, so
> make sure the original sk
>                   * sock refcnt is decremented to prevent a request_sock leak.
>                   */
> -               if (!sk_fullsock(sk2))
> +               if (sk2 && !sk_fullsock(sk2))
>                          sk2 = NULL;
>                  if (sk2 != sk) {
>                          sock_gen_put(sk);
> @@ -7276,7 +7276,7 @@ BPF_CALL_1(bpf_get_listener_sock, struct sock *, sk)
>   {
>          sk = sk_to_full_sk(sk);
> 
> -       if (sk->sk_state == TCP_LISTEN && sock_flag(sk, SOCK_RCU_FREE))
> +       if (sk && sk->sk_state == TCP_LISTEN && sock_flag(sk, SOCK_RCU_FREE))
>                  return (unsigned long)sk;
> 
>          return (unsigned long)NULL;
Eric Dumazet Oct. 6, 2024, 8:14 p.m. UTC | #3
On Sat, Oct 5, 2024 at 2:37 AM Martin KaFai Lau <martin.lau@linux.dev> wrote:
>
> On 10/4/24 2:56 PM, Eric Dumazet wrote:
> > On Fri, Oct 4, 2024 at 9:16 PM Eric Dumazet <edumazet@google.com> wrote:
> >>
> >> TCP will soon attach TIME_WAIT sockets to some ACK and RST.
> >>
> >> Make sure sk_to_full_sk() detects this and does not return
> >> a non full socket.
> >>
> >> Signed-off-by: Eric Dumazet <edumazet@google.com>
> >> ---
> >>   include/net/inet_sock.h | 4 +++-
> >>   1 file changed, 3 insertions(+), 1 deletion(-)
> >>
> >> diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
> >> index 394c3b66065e20d34594d6e2a2010c55bb457810..cec093b78151b9a3b95ad4b3672a72b0aa9a8305 100644
> >> --- a/include/net/inet_sock.h
> >> +++ b/include/net/inet_sock.h
> >> @@ -319,8 +319,10 @@ static inline unsigned long inet_cmsg_flags(const struct inet_sock *inet)
> >>   static inline struct sock *sk_to_full_sk(struct sock *sk)
> >>   {
> >>   #ifdef CONFIG_INET
> >> -       if (sk && sk->sk_state == TCP_NEW_SYN_RECV)
> >> +       if (sk && READ_ONCE(sk->sk_state) == TCP_NEW_SYN_RECV)
> >>                  sk = inet_reqsk(sk)->rsk_listener;
> >> +       if (sk && READ_ONCE(sk->sk_state) == TCP_TIME_WAIT)
> >> +               sk = NULL;
> >>   #endif
> >>          return sk;
> >>   }
> >
> > It appears some callers do not check if the return value could be NULL.
> > I will have to add in v2 :
> >
> > diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
> > index ce91d9b2acb9f8991150ceead4475b130bead438..c3ffb45489a6924c1bc80355e862e243ec195b01
> > 100644
> > --- a/include/linux/bpf-cgroup.h
> > +++ b/include/linux/bpf-cgroup.h
> > @@ -209,7 +209,7 @@ static inline bool cgroup_bpf_sock_enabled(struct sock *sk,
> >          int __ret = 0;                                                         \
> >          if (cgroup_bpf_enabled(CGROUP_INET_EGRESS) && sk) {                    \
>
> The above "&& sk" test can probably be removed after the "__sk &&" addition below.

Yes, I can do that.

>
> >                  typeof(sk) __sk = sk_to_full_sk(sk);                           \
> > -               if (sk_fullsock(__sk) && __sk == skb_to_full_sk(skb) &&        \
> > +               if (__sk && sk_fullsock(__sk) && __sk == skb_to_full_sk(skb) &&        \
>
> sk_to_full_sk() includes the TCP_TIME_WAIT check now. I wonder if testing __sk
> for NULL is good enough and the sk_fullsock(__sk) check can be removed also.

+2

>
> Thanks for working on this series. It is useful for the bpf prog.

Thanks.
diff mbox series

Patch

diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index 394c3b66065e20d34594d6e2a2010c55bb457810..cec093b78151b9a3b95ad4b3672a72b0aa9a8305 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -319,8 +319,10 @@  static inline unsigned long inet_cmsg_flags(const struct inet_sock *inet)
 static inline struct sock *sk_to_full_sk(struct sock *sk)
 {
 #ifdef CONFIG_INET
-	if (sk && sk->sk_state == TCP_NEW_SYN_RECV)
+	if (sk && READ_ONCE(sk->sk_state) == TCP_NEW_SYN_RECV)
 		sk = inet_reqsk(sk)->rsk_listener;
+	if (sk && READ_ONCE(sk->sk_state) == TCP_TIME_WAIT)
+		sk = NULL;
 #endif
 	return sk;
 }