diff mbox series

net: Add tcp_drop_reason tracepoint

Message ID 20241023123212.15908-1-laoar.shao@gmail.com (mailing list archive)
State Handled Elsewhere
Headers show
Series net: Add tcp_drop_reason tracepoint | expand

Commit Message

Yafang Shao Oct. 23, 2024, 12:32 p.m. UTC
We previously hooked the tcp_drop_reason() function using BPF to monitor
TCP drop reasons. However, after upgrading our compiler from GCC 9 to GCC
11, tcp_drop_reason() is now inlined, preventing us from hooking into it.
To address this, it would be beneficial to introduce a dedicated tracepoint
for monitoring.

Signed-off-by: Yafang Shao <laoar.shao@gmail.com>
Cc: Menglong Dong <menglong8.dong@gmail.com>
---
 include/trace/events/tcp.h | 53 ++++++++++++++++++++++++++++++++++++++
 net/ipv4/tcp_input.c       |  1 +
 2 files changed, 54 insertions(+)

Comments

Menglong Dong Oct. 23, 2024, 1 p.m. UTC | #1
On Wed, Oct 23, 2024 at 8:33 PM Yafang Shao <laoar.shao@gmail.com> wrote:
>
> We previously hooked the tcp_drop_reason() function using BPF to monitor
> TCP drop reasons. However, after upgrading our compiler from GCC 9 to GCC
> 11, tcp_drop_reason() is now inlined, preventing us from hooking into it.
> To address this, it would be beneficial to introduce a dedicated tracepoint
> for monitoring.

Hello,

Can the existing tracepoint kfree_skb do this work? AFAIK, you
can attach you BPF to the kfree_skb tracepoint and do some filter
according to the "protocol" field, or the information "sk" field. And
this works fine in my tool.

I hope I'm not missing something :/

BTW, I do such filter in probe_parse_skb_sk() in
https://github.com/OpenCloudOS/nettrace/blob/master/shared/bpf/skb_parse.h

Thanks!
Menglong Dong
>
> Signed-off-by: Yafang Shao <laoar.shao@gmail.com>
> Cc: Menglong Dong <menglong8.dong@gmail.com>
> ---
>  include/trace/events/tcp.h | 53 ++++++++++++++++++++++++++++++++++++++
>  net/ipv4/tcp_input.c       |  1 +
>  2 files changed, 54 insertions(+)
>
> diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h
> index a27c4b619dff..056f7026224c 100644
> --- a/include/trace/events/tcp.h
> +++ b/include/trace/events/tcp.h
> @@ -12,6 +12,7 @@
>  #include <net/tcp.h>
>  #include <linux/sock_diag.h>
>  #include <net/rstreason.h>
> +#include <net/dropreason-core.h>
>
>  /*
>   * tcp event with arguments sk and skb
> @@ -728,6 +729,58 @@ DEFINE_EVENT(tcp_ao_event_sne, tcp_ao_rcv_sne_update,
>         TP_ARGS(sk, new_sne)
>  );
>
> +#undef FN
> +#undef FNe
> +#define FN(reason)     { SKB_DROP_REASON_##reason, #reason },
> +#define FNe(reason)    { SKB_DROP_REASON_##reason, #reason }
> +
> +TRACE_EVENT(tcp_drop_reason,
> +
> +       TP_PROTO(const struct sock *sk, const struct sk_buff *skb,
> +                const enum skb_drop_reason reason),
> +
> +       TP_ARGS(sk, skb, reason),
> +
> +       TP_STRUCT__entry(
> +               __field(const void *, skbaddr)
> +               __field(const void *, skaddr)
> +               __field(int, state)
> +               __field(enum skb_drop_reason, reason)
> +               __array(__u8, saddr, sizeof(struct sockaddr_in6))
> +               __array(__u8, daddr, sizeof(struct sockaddr_in6))
> +       ),
> +
> +       TP_fast_assign(
> +               __entry->skbaddr = skb;
> +               __entry->skaddr = sk;
> +               /* Zero means unknown state. */
> +               __entry->state = sk ? sk->sk_state : 0;
> +
> +               memset(__entry->saddr, 0, sizeof(struct sockaddr_in6));
> +               memset(__entry->daddr, 0, sizeof(struct sockaddr_in6));
> +
> +               if (sk && sk_fullsock(sk)) {
> +                       const struct inet_sock *inet = inet_sk(sk);
> +
> +                       TP_STORE_ADDR_PORTS(__entry, inet, sk);
> +               } else {
> +                       const struct tcphdr *th = (const struct tcphdr *)skb->data;
> +
> +                       TP_STORE_ADDR_PORTS_SKB(skb, th, entry->saddr, entry->daddr);
> +               }
> +               __entry->reason = reason;
> +       ),
> +
> +       TP_printk("skbaddr=%p skaddr=%p src=%pISpc dest=%pISpc state=%s reason=%s",
> +                 __entry->skbaddr, __entry->skaddr,
> +                 __entry->saddr, __entry->daddr,
> +                 __entry->state ? show_tcp_state_name(__entry->state) : "UNKNOWN",
> +                 __print_symbolic(__entry->reason, DEFINE_DROP_REASON(FN, FNe)))
> +);
> +
> +#undef FN
> +#undef FNe
> +
>  #endif /* _TRACE_TCP_H */
>
>  /* This part must be outside protection */
> diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
> index cc05ec1faac8..44795555596a 100644
> --- a/net/ipv4/tcp_input.c
> +++ b/net/ipv4/tcp_input.c
> @@ -4897,6 +4897,7 @@ static bool tcp_ooo_try_coalesce(struct sock *sk,
>  static void tcp_drop_reason(struct sock *sk, struct sk_buff *skb,
>                             enum skb_drop_reason reason)
>  {
> +       trace_tcp_drop_reason(sk, skb, reason);
>         sk_drops_add(sk, skb);
>         sk_skb_reason_drop(sk, skb, reason);
>  }
> --
> 2.43.5
>
Eric Dumazet Oct. 23, 2024, 1:01 p.m. UTC | #2
On Wed, Oct 23, 2024 at 2:33 PM Yafang Shao <laoar.shao@gmail.com> wrote:
>
> We previously hooked the tcp_drop_reason() function using BPF to monitor
> TCP drop reasons. However, after upgrading our compiler from GCC 9 to GCC
> 11, tcp_drop_reason() is now inlined, preventing us from hooking into it.
> To address this, it would be beneficial to introduce a dedicated tracepoint
> for monitoring.

This patch would require changes in user space tracers.
I am surprised no one came up with a noinline variant.

__bpf_kfunc is using

#define __bpf_kfunc __used __retain noinline

I would rather not have include/trace/events/tcp.h becoming the
biggest file in TCP stack...
Yafang Shao Oct. 23, 2024, 2:33 p.m. UTC | #3
On Wed, Oct 23, 2024 at 8:59 PM Menglong Dong <menglong8.dong@gmail.com> wrote:
>
> On Wed, Oct 23, 2024 at 8:33 PM Yafang Shao <laoar.shao@gmail.com> wrote:
> >
> > We previously hooked the tcp_drop_reason() function using BPF to monitor
> > TCP drop reasons. However, after upgrading our compiler from GCC 9 to GCC
> > 11, tcp_drop_reason() is now inlined, preventing us from hooking into it.
> > To address this, it would be beneficial to introduce a dedicated tracepoint
> > for monitoring.
>
> Hello,
>
> Can the existing tracepoint kfree_skb do this work? AFAIK, you
> can attach you BPF to the kfree_skb tracepoint and do some filter
> according to the "protocol" field, or the information "sk" field. And
> this works fine in my tool.
>
> I hope I'm not missing something :/
>
> BTW, I do such filter in probe_parse_skb_sk() in
> https://github.com/OpenCloudOS/nettrace/blob/master/shared/bpf/skb_parse.h

We prefer not to hook the kfree_skb tracepoint, as we want to avoid
the overhead of parsing extensive information from @skb. Since we now
have a function that can be easily hooked, why not hook it directly?

--
Regards
Yafang
Yafang Shao Oct. 23, 2024, 2:34 p.m. UTC | #4
On Wed, Oct 23, 2024 at 9:01 PM Eric Dumazet <edumazet@google.com> wrote:
>
> On Wed, Oct 23, 2024 at 2:33 PM Yafang Shao <laoar.shao@gmail.com> wrote:
> >
> > We previously hooked the tcp_drop_reason() function using BPF to monitor
> > TCP drop reasons. However, after upgrading our compiler from GCC 9 to GCC
> > 11, tcp_drop_reason() is now inlined, preventing us from hooking into it.
> > To address this, it would be beneficial to introduce a dedicated tracepoint
> > for monitoring.
>
> This patch would require changes in user space tracers.
> I am surprised no one came up with a noinline variant.
>
> __bpf_kfunc is using
>
> #define __bpf_kfunc __used __retain noinline
>
> I would rather not have include/trace/events/tcp.h becoming the
> biggest file in TCP stack...

I’d prefer not to introduce a new tracepoint if we can easily hook it
with BPF. Does the following change look good to you?

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 092456b8f8af..ebea844cc974 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4720,7 +4720,7 @@ static bool tcp_ooo_try_coalesce(struct sock *sk,
        return res;
 }

-static void tcp_drop_reason(struct sock *sk, struct sk_buff *skb,
+noinline static void tcp_drop_reason(struct sock *sk, struct sk_buff *skb,
                            enum skb_drop_reason reason)
 {
        sk_drops_add(sk, skb);




--
Regards
Yafang
Eric Dumazet Oct. 23, 2024, 3:43 p.m. UTC | #5
On Wed, Oct 23, 2024 at 4:35 PM Yafang Shao <laoar.shao@gmail.com> wrote:
>
> On Wed, Oct 23, 2024 at 9:01 PM Eric Dumazet <edumazet@google.com> wrote:
> >
> > On Wed, Oct 23, 2024 at 2:33 PM Yafang Shao <laoar.shao@gmail.com> wrote:
> > >
> > > We previously hooked the tcp_drop_reason() function using BPF to monitor
> > > TCP drop reasons. However, after upgrading our compiler from GCC 9 to GCC
> > > 11, tcp_drop_reason() is now inlined, preventing us from hooking into it.
> > > To address this, it would be beneficial to introduce a dedicated tracepoint
> > > for monitoring.
> >
> > This patch would require changes in user space tracers.
> > I am surprised no one came up with a noinline variant.
> >
> > __bpf_kfunc is using
> >
> > #define __bpf_kfunc __used __retain noinline
> >
> > I would rather not have include/trace/events/tcp.h becoming the
> > biggest file in TCP stack...
>
> I’d prefer not to introduce a new tracepoint if we can easily hook it
> with BPF. Does the following change look good to you?
>
> diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
> index 092456b8f8af..ebea844cc974 100644
> --- a/net/ipv4/tcp_input.c
> +++ b/net/ipv4/tcp_input.c
> @@ -4720,7 +4720,7 @@ static bool tcp_ooo_try_coalesce(struct sock *sk,
>         return res;
>  }
>
> -static void tcp_drop_reason(struct sock *sk, struct sk_buff *skb,
> +noinline static void tcp_drop_reason(struct sock *sk, struct sk_buff *skb,
>                             enum skb_drop_reason reason)
>  {
>         sk_drops_add(sk, skb);
>

I would suggest adding an explicit keyword, like the one we have for
noinline_for_stack, for documentation purposes.

noinline_for_tracing perhaps ?

diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h
index 1a957ea2f4fe78ed12d7f6a65e5759d07cea4449..9a687ca4bb4392583d150349ee11015bcb82ec74
100644
--- a/include/linux/compiler_types.h
+++ b/include/linux/compiler_types.h
@@ -265,6 +265,12 @@ struct ftrace_likely_data {
  */
 #define noinline_for_stack noinline

+/*
+ * Use noinline_for_tracing for functions that should not be inlined,
+ * for tracing reasons.
+ */
+#define noinline_for_tracing noinline
+
 /*
  * Sanitizer helper attributes: Because using __always_inline and
  * __no_sanitize_* conflict, provide helper attributes that will either expand
Yafang Shao Oct. 24, 2024, 2:21 a.m. UTC | #6
On Wed, Oct 23, 2024 at 11:43 PM Eric Dumazet <edumazet@google.com> wrote:
>
> On Wed, Oct 23, 2024 at 4:35 PM Yafang Shao <laoar.shao@gmail.com> wrote:
> >
> > On Wed, Oct 23, 2024 at 9:01 PM Eric Dumazet <edumazet@google.com> wrote:
> > >
> > > On Wed, Oct 23, 2024 at 2:33 PM Yafang Shao <laoar.shao@gmail.com> wrote:
> > > >
> > > > We previously hooked the tcp_drop_reason() function using BPF to monitor
> > > > TCP drop reasons. However, after upgrading our compiler from GCC 9 to GCC
> > > > 11, tcp_drop_reason() is now inlined, preventing us from hooking into it.
> > > > To address this, it would be beneficial to introduce a dedicated tracepoint
> > > > for monitoring.
> > >
> > > This patch would require changes in user space tracers.
> > > I am surprised no one came up with a noinline variant.
> > >
> > > __bpf_kfunc is using
> > >
> > > #define __bpf_kfunc __used __retain noinline
> > >
> > > I would rather not have include/trace/events/tcp.h becoming the
> > > biggest file in TCP stack...
> >
> > I’d prefer not to introduce a new tracepoint if we can easily hook it
> > with BPF. Does the following change look good to you?
> >
> > diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
> > index 092456b8f8af..ebea844cc974 100644
> > --- a/net/ipv4/tcp_input.c
> > +++ b/net/ipv4/tcp_input.c
> > @@ -4720,7 +4720,7 @@ static bool tcp_ooo_try_coalesce(struct sock *sk,
> >         return res;
> >  }
> >
> > -static void tcp_drop_reason(struct sock *sk, struct sk_buff *skb,
> > +noinline static void tcp_drop_reason(struct sock *sk, struct sk_buff *skb,
> >                             enum skb_drop_reason reason)
> >  {
> >         sk_drops_add(sk, skb);
> >
>
> I would suggest adding an explicit keyword, like the one we have for
> noinline_for_stack, for documentation purposes.
>
> noinline_for_tracing perhaps ?

Good suggestion! This approach eliminates the need to add comments for
each noinline.

>
> diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h
> index 1a957ea2f4fe78ed12d7f6a65e5759d07cea4449..9a687ca4bb4392583d150349ee11015bcb82ec74
> 100644
> --- a/include/linux/compiler_types.h
> +++ b/include/linux/compiler_types.h
> @@ -265,6 +265,12 @@ struct ftrace_likely_data {
>   */
>  #define noinline_for_stack noinline
>
> +/*
> + * Use noinline_for_tracing for functions that should not be inlined,
> + * for tracing reasons.
> + */
> +#define noinline_for_tracing noinline
> +
>  /*
>   * Sanitizer helper attributes: Because using __always_inline and
>   * __no_sanitize_* conflict, provide helper attributes that will either expand



--
Regards
Yafang
diff mbox series

Patch

diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h
index a27c4b619dff..056f7026224c 100644
--- a/include/trace/events/tcp.h
+++ b/include/trace/events/tcp.h
@@ -12,6 +12,7 @@ 
 #include <net/tcp.h>
 #include <linux/sock_diag.h>
 #include <net/rstreason.h>
+#include <net/dropreason-core.h>
 
 /*
  * tcp event with arguments sk and skb
@@ -728,6 +729,58 @@  DEFINE_EVENT(tcp_ao_event_sne, tcp_ao_rcv_sne_update,
 	TP_ARGS(sk, new_sne)
 );
 
+#undef FN
+#undef FNe
+#define FN(reason)	{ SKB_DROP_REASON_##reason, #reason },
+#define FNe(reason)	{ SKB_DROP_REASON_##reason, #reason }
+
+TRACE_EVENT(tcp_drop_reason,
+
+	TP_PROTO(const struct sock *sk, const struct sk_buff *skb,
+		 const enum skb_drop_reason reason),
+
+	TP_ARGS(sk, skb, reason),
+
+	TP_STRUCT__entry(
+		__field(const void *, skbaddr)
+		__field(const void *, skaddr)
+		__field(int, state)
+		__field(enum skb_drop_reason, reason)
+		__array(__u8, saddr, sizeof(struct sockaddr_in6))
+		__array(__u8, daddr, sizeof(struct sockaddr_in6))
+	),
+
+	TP_fast_assign(
+		__entry->skbaddr = skb;
+		__entry->skaddr = sk;
+		/* Zero means unknown state. */
+		__entry->state = sk ? sk->sk_state : 0;
+
+		memset(__entry->saddr, 0, sizeof(struct sockaddr_in6));
+		memset(__entry->daddr, 0, sizeof(struct sockaddr_in6));
+
+		if (sk && sk_fullsock(sk)) {
+			const struct inet_sock *inet = inet_sk(sk);
+
+			TP_STORE_ADDR_PORTS(__entry, inet, sk);
+		} else {
+			const struct tcphdr *th = (const struct tcphdr *)skb->data;
+
+			TP_STORE_ADDR_PORTS_SKB(skb, th, entry->saddr, entry->daddr);
+		}
+		__entry->reason = reason;
+	),
+
+	TP_printk("skbaddr=%p skaddr=%p src=%pISpc dest=%pISpc state=%s reason=%s",
+		  __entry->skbaddr, __entry->skaddr,
+		  __entry->saddr, __entry->daddr,
+		  __entry->state ? show_tcp_state_name(__entry->state) : "UNKNOWN",
+		  __print_symbolic(__entry->reason, DEFINE_DROP_REASON(FN, FNe)))
+);
+
+#undef FN
+#undef FNe
+
 #endif /* _TRACE_TCP_H */
 
 /* This part must be outside protection */
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index cc05ec1faac8..44795555596a 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4897,6 +4897,7 @@  static bool tcp_ooo_try_coalesce(struct sock *sk,
 static void tcp_drop_reason(struct sock *sk, struct sk_buff *skb,
 			    enum skb_drop_reason reason)
 {
+	trace_tcp_drop_reason(sk, skb, reason);
 	sk_drops_add(sk, skb);
 	sk_skb_reason_drop(sk, skb, reason);
 }