diff mbox series

[net-next,v3] net_sched: introduce tracepoint trace_qdisc_enqueue()

Message ID 20210715060324.43337-1-xiyou.wangcong@gmail.com (mailing list archive)
State Accepted
Commit 70713dddf3d25a02d1952f8c5d2688c986d2f2fb
Delegated to: Netdev Maintainers
Headers show
Series [net-next,v3] net_sched: introduce tracepoint trace_qdisc_enqueue() | expand

Checks

Context Check Description
netdev/cover_letter success Link
netdev/fixes_present success Link
netdev/patch_count success Link
netdev/tree_selection success Clearly marked for net-next
netdev/subject_prefix success Link
netdev/cc_maintainers warning 11 maintainers not CCed: mingo@redhat.com daniel@iogearbox.net andriin@fb.com rostedt@goodmis.org ast@kernel.org ap420073@gmail.com kuba@kernel.org alobakin@pm.me atenart@kernel.org davem@davemloft.net weiwan@google.com
netdev/source_inline success Was 0 now: 0
netdev/verify_signedoff success Link
netdev/module_param success Was 0 now: 0
netdev/build_32bit success Errors and warnings before: 11 this patch: 11
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/verify_fixes success Link
netdev/checkpatch warning CHECK: Alignment should match open parenthesis CHECK: Lines should not end with a '(' WARNING: Using vsprintf specifier '%px' potentially exposes the kernel memory layout, if you don't really need the address please consider using '%p'. WARNING: line length of 87 exceeds 80 columns WARNING: line length of 91 exceeds 80 columns
netdev/build_allmodconfig_warn success Errors and warnings before: 11 this patch: 11
netdev/header_inline success Link

Commit Message

Cong Wang July 15, 2021, 6:03 a.m. UTC
From: Qitao Xu <qitao.xu@bytedance.com>

Tracepoint trace_qdisc_enqueue() is introduced to trace skb at
the entrance of TC layer on TX side. This is similar to
trace_qdisc_dequeue():

1. For both we only trace successful cases. The failure cases
   can be traced via trace_kfree_skb().

2. They are called at entrance or exit of TC layer, not for each
   ->enqueue() or ->dequeue(). This is intentional, because
   we want to make trace_qdisc_enqueue() symmetric to
   trace_qdisc_dequeue(), which is easier to use.

The return value of qdisc_enqueue() is not interesting here,
we have Qdisc's drop packets in ->dequeue(), it is impossible to
trace them even if we have the return value, the only way to trace
them is tracing kfree_skb().

We only add information we need to trace ring buffer. If any other
information is needed, it is easy to extend it without breaking ABI,
see commit 3dd344ea84e1 ("net: tracepoint: exposing sk_family in all
tcp:tracepoints").

Reviewed-by: Cong Wang <cong.wang@bytedance.com>
Signed-off-by: Qitao Xu <qitao.xu@bytedance.com>
---
v3: expand changelog
    add helper dev_qdisc_enqueue()

v2: improve changelog

 include/trace/events/qdisc.h | 26 ++++++++++++++++++++++++++
 net/core/dev.c               | 20 ++++++++++++++++----
 2 files changed, 42 insertions(+), 4 deletions(-)

Comments

Yunsheng Lin July 15, 2021, 11:24 a.m. UTC | #1
On 2021/7/15 14:03, Cong Wang wrote:
> From: Qitao Xu <qitao.xu@bytedance.com>
> 
> Tracepoint trace_qdisc_enqueue() is introduced to trace skb at
> the entrance of TC layer on TX side. This is similar to
> trace_qdisc_dequeue():
> 
> 1. For both we only trace successful cases. The failure cases
>    can be traced via trace_kfree_skb().
> 
> 2. They are called at entrance or exit of TC layer, not for each
>    ->enqueue() or ->dequeue(). This is intentional, because
>    we want to make trace_qdisc_enqueue() symmetric to
>    trace_qdisc_dequeue(), which is easier to use.
> 
> The return value of qdisc_enqueue() is not interesting here,
> we have Qdisc's drop packets in ->dequeue(), it is impossible to
> trace them even if we have the return value, the only way to trace
> them is tracing kfree_skb().
> 
> We only add information we need to trace ring buffer. If any other
> information is needed, it is easy to extend it without breaking ABI,
> see commit 3dd344ea84e1 ("net: tracepoint: exposing sk_family in all
> tcp:tracepoints").
> 
> Reviewed-by: Cong Wang <cong.wang@bytedance.com>
> Signed-off-by: Qitao Xu <qitao.xu@bytedance.com>
> ---
> v3: expand changelog
>     add helper dev_qdisc_enqueue()
> 
> v2: improve changelog
> 
>  include/trace/events/qdisc.h | 26 ++++++++++++++++++++++++++
>  net/core/dev.c               | 20 ++++++++++++++++----
>  2 files changed, 42 insertions(+), 4 deletions(-)
> 
> diff --git a/include/trace/events/qdisc.h b/include/trace/events/qdisc.h
> index 58209557cb3a..c3006c6b4a87 100644
> --- a/include/trace/events/qdisc.h
> +++ b/include/trace/events/qdisc.h
> @@ -46,6 +46,32 @@ TRACE_EVENT(qdisc_dequeue,
>  		  __entry->txq_state, __entry->packets, __entry->skbaddr )
>  );
>  
> +TRACE_EVENT(qdisc_enqueue,
> +
> +	TP_PROTO(struct Qdisc *qdisc, const struct netdev_queue *txq, struct sk_buff *skb),
> +
> +	TP_ARGS(qdisc, txq, skb),
> +
> +	TP_STRUCT__entry(
> +		__field(struct Qdisc *, qdisc)
> +		__field(void *,	skbaddr)
> +		__field(int, ifindex)
> +		__field(u32, handle)
> +		__field(u32, parent)
> +	),
> +
> +	TP_fast_assign(
> +		__entry->qdisc = qdisc;
> +		__entry->skbaddr = skb;
> +		__entry->ifindex = txq->dev ? txq->dev->ifindex : 0;
> +		__entry->handle	 = qdisc->handle;
> +		__entry->parent	 = qdisc->parent;
> +	),
> +
> +	TP_printk("enqueue ifindex=%d qdisc handle=0x%X parent=0x%X skbaddr=%px",
> +		  __entry->ifindex, __entry->handle, __entry->parent, __entry->skbaddr)
> +);
> +
>  TRACE_EVENT(qdisc_reset,
>  
>  	TP_PROTO(struct Qdisc *q),
> diff --git a/net/core/dev.c b/net/core/dev.c
> index c253c2aafe97..0dcddd077d60 100644
> --- a/net/core/dev.c
> +++ b/net/core/dev.c
> @@ -131,6 +131,7 @@
>  #include <trace/events/napi.h>
>  #include <trace/events/net.h>
>  #include <trace/events/skb.h>
> +#include <trace/events/qdisc.h>
>  #include <linux/inetdevice.h>
>  #include <linux/cpu_rmap.h>
>  #include <linux/static_key.h>
> @@ -3844,6 +3845,18 @@ static void qdisc_pkt_len_init(struct sk_buff *skb)
>  	}
>  }
>  
> +static int dev_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *q,
> +			     struct sk_buff **to_free,
> +			     struct netdev_queue *txq)
> +{
> +	int rc;
> +
> +	rc = q->enqueue(skb, q, to_free) & NET_XMIT_MASK;
> +	if (rc == NET_XMIT_SUCCESS)

I do not really understand the usecase you metioned previously,
but tracepointing the rc in trace_qdisc_enqueue() will avoid the
above checking, which is in the fast path.

If there is no extra checking overhead when this tracepoint is
disabled, then ignore my comment.

> +		trace_qdisc_enqueue(q, txq, skb);
> +	return rc;
> +}
> +
>  static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
>  				 struct net_device *dev,
>  				 struct netdev_queue *txq)
> @@ -3862,8 +3875,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
>  			 * of q->seqlock to protect from racing with requeuing.
>  			 */
>  			if (unlikely(!nolock_qdisc_is_empty(q))) {
> -				rc = q->enqueue(skb, q, &to_free) &
> -					NET_XMIT_MASK;
> +				rc = dev_qdisc_enqueue(skb, q, &to_free, txq);
>  				__qdisc_run(q);
>  				qdisc_run_end(q);
>  
> @@ -3879,7 +3891,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
>  			return NET_XMIT_SUCCESS;
>  		}
>  
> -		rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK;
> +		rc = dev_qdisc_enqueue(skb, q, &to_free, txq);
>  		qdisc_run(q);
>  
>  no_lock_out:
> @@ -3923,7 +3935,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
>  		qdisc_run_end(q);
>  		rc = NET_XMIT_SUCCESS;
>  	} else {
> -		rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK;
> +		rc = dev_qdisc_enqueue(skb, q, &to_free, txq);
>  		if (qdisc_run_begin(q)) {
>  			if (unlikely(contended)) {
>  				spin_unlock(&q->busylock);
>
Cong Wang July 15, 2021, 3:25 p.m. UTC | #2
On Thu, Jul 15, 2021 at 4:24 AM Yunsheng Lin <linyunsheng@huawei.com> wrote:
>
> On 2021/7/15 14:03, Cong Wang wrote:
> > From: Qitao Xu <qitao.xu@bytedance.com>
> >
> > Tracepoint trace_qdisc_enqueue() is introduced to trace skb at
> > the entrance of TC layer on TX side. This is similar to
> > trace_qdisc_dequeue():
> >
> > 1. For both we only trace successful cases. The failure cases
> >    can be traced via trace_kfree_skb().
> >
> > 2. They are called at entrance or exit of TC layer, not for each
> >    ->enqueue() or ->dequeue(). This is intentional, because
> >    we want to make trace_qdisc_enqueue() symmetric to
> >    trace_qdisc_dequeue(), which is easier to use.
> >
> > The return value of qdisc_enqueue() is not interesting here,
> > we have Qdisc's drop packets in ->dequeue(), it is impossible to
> > trace them even if we have the return value, the only way to trace
> > them is tracing kfree_skb().
> >
> > We only add information we need to trace ring buffer. If any other
> > information is needed, it is easy to extend it without breaking ABI,
> > see commit 3dd344ea84e1 ("net: tracepoint: exposing sk_family in all
> > tcp:tracepoints").
> >
> > Reviewed-by: Cong Wang <cong.wang@bytedance.com>
> > Signed-off-by: Qitao Xu <qitao.xu@bytedance.com>
> > ---
> > v3: expand changelog
> >     add helper dev_qdisc_enqueue()
> >
> > v2: improve changelog
> >
> >  include/trace/events/qdisc.h | 26 ++++++++++++++++++++++++++
> >  net/core/dev.c               | 20 ++++++++++++++++----
> >  2 files changed, 42 insertions(+), 4 deletions(-)
> >
> > diff --git a/include/trace/events/qdisc.h b/include/trace/events/qdisc.h
> > index 58209557cb3a..c3006c6b4a87 100644
> > --- a/include/trace/events/qdisc.h
> > +++ b/include/trace/events/qdisc.h
> > @@ -46,6 +46,32 @@ TRACE_EVENT(qdisc_dequeue,
> >                 __entry->txq_state, __entry->packets, __entry->skbaddr )
> >  );
> >
> > +TRACE_EVENT(qdisc_enqueue,
> > +
> > +     TP_PROTO(struct Qdisc *qdisc, const struct netdev_queue *txq, struct sk_buff *skb),
> > +
> > +     TP_ARGS(qdisc, txq, skb),
> > +
> > +     TP_STRUCT__entry(
> > +             __field(struct Qdisc *, qdisc)
> > +             __field(void *, skbaddr)
> > +             __field(int, ifindex)
> > +             __field(u32, handle)
> > +             __field(u32, parent)
> > +     ),
> > +
> > +     TP_fast_assign(
> > +             __entry->qdisc = qdisc;
> > +             __entry->skbaddr = skb;
> > +             __entry->ifindex = txq->dev ? txq->dev->ifindex : 0;
> > +             __entry->handle  = qdisc->handle;
> > +             __entry->parent  = qdisc->parent;
> > +     ),
> > +
> > +     TP_printk("enqueue ifindex=%d qdisc handle=0x%X parent=0x%X skbaddr=%px",
> > +               __entry->ifindex, __entry->handle, __entry->parent, __entry->skbaddr)
> > +);
> > +
> >  TRACE_EVENT(qdisc_reset,
> >
> >       TP_PROTO(struct Qdisc *q),
> > diff --git a/net/core/dev.c b/net/core/dev.c
> > index c253c2aafe97..0dcddd077d60 100644
> > --- a/net/core/dev.c
> > +++ b/net/core/dev.c
> > @@ -131,6 +131,7 @@
> >  #include <trace/events/napi.h>
> >  #include <trace/events/net.h>
> >  #include <trace/events/skb.h>
> > +#include <trace/events/qdisc.h>
> >  #include <linux/inetdevice.h>
> >  #include <linux/cpu_rmap.h>
> >  #include <linux/static_key.h>
> > @@ -3844,6 +3845,18 @@ static void qdisc_pkt_len_init(struct sk_buff *skb)
> >       }
> >  }
> >
> > +static int dev_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *q,
> > +                          struct sk_buff **to_free,
> > +                          struct netdev_queue *txq)
> > +{
> > +     int rc;
> > +
> > +     rc = q->enqueue(skb, q, to_free) & NET_XMIT_MASK;
> > +     if (rc == NET_XMIT_SUCCESS)
>
> I do not really understand the usecase you metioned previously,
> but tracepointing the rc in trace_qdisc_enqueue() will avoid the
> above checking, which is in the fast path.

Good point, the solution is clearly not just tracing return value, instead
I use trace_qdisc_enqueue_enabled() which is based on jump label:

        static inline bool                                              \
        trace_##name##_enabled(void)                                    \
        {                                                               \
                return static_key_false(&__tracepoint_##name.key);      \
        }


>
> If there is no extra checking overhead when this tracepoint is
> disabled, then ignore my comment.

Yes, trace_qdisc_enqueue_enabled() should give 0 overhead.

Thanks.
patchwork-bot+netdevbpf@kernel.org July 15, 2021, 6:10 p.m. UTC | #3
Hello:

This patch was applied to netdev/net.git (refs/heads/master):

On Wed, 14 Jul 2021 23:03:24 -0700 you wrote:
> From: Qitao Xu <qitao.xu@bytedance.com>
> 
> Tracepoint trace_qdisc_enqueue() is introduced to trace skb at
> the entrance of TC layer on TX side. This is similar to
> trace_qdisc_dequeue():
> 
> 1. For both we only trace successful cases. The failure cases
>    can be traced via trace_kfree_skb().
> 
> [...]

Here is the summary with links:
  - [net-next,v3] net_sched: introduce tracepoint trace_qdisc_enqueue()
    https://git.kernel.org/netdev/net/c/70713dddf3d2

You are awesome, thank you!
--
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/patchwork/pwbot.html
diff mbox series

Patch

diff --git a/include/trace/events/qdisc.h b/include/trace/events/qdisc.h
index 58209557cb3a..c3006c6b4a87 100644
--- a/include/trace/events/qdisc.h
+++ b/include/trace/events/qdisc.h
@@ -46,6 +46,32 @@  TRACE_EVENT(qdisc_dequeue,
 		  __entry->txq_state, __entry->packets, __entry->skbaddr )
 );
 
+TRACE_EVENT(qdisc_enqueue,
+
+	TP_PROTO(struct Qdisc *qdisc, const struct netdev_queue *txq, struct sk_buff *skb),
+
+	TP_ARGS(qdisc, txq, skb),
+
+	TP_STRUCT__entry(
+		__field(struct Qdisc *, qdisc)
+		__field(void *,	skbaddr)
+		__field(int, ifindex)
+		__field(u32, handle)
+		__field(u32, parent)
+	),
+
+	TP_fast_assign(
+		__entry->qdisc = qdisc;
+		__entry->skbaddr = skb;
+		__entry->ifindex = txq->dev ? txq->dev->ifindex : 0;
+		__entry->handle	 = qdisc->handle;
+		__entry->parent	 = qdisc->parent;
+	),
+
+	TP_printk("enqueue ifindex=%d qdisc handle=0x%X parent=0x%X skbaddr=%px",
+		  __entry->ifindex, __entry->handle, __entry->parent, __entry->skbaddr)
+);
+
 TRACE_EVENT(qdisc_reset,
 
 	TP_PROTO(struct Qdisc *q),
diff --git a/net/core/dev.c b/net/core/dev.c
index c253c2aafe97..0dcddd077d60 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -131,6 +131,7 @@ 
 #include <trace/events/napi.h>
 #include <trace/events/net.h>
 #include <trace/events/skb.h>
+#include <trace/events/qdisc.h>
 #include <linux/inetdevice.h>
 #include <linux/cpu_rmap.h>
 #include <linux/static_key.h>
@@ -3844,6 +3845,18 @@  static void qdisc_pkt_len_init(struct sk_buff *skb)
 	}
 }
 
+static int dev_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *q,
+			     struct sk_buff **to_free,
+			     struct netdev_queue *txq)
+{
+	int rc;
+
+	rc = q->enqueue(skb, q, to_free) & NET_XMIT_MASK;
+	if (rc == NET_XMIT_SUCCESS)
+		trace_qdisc_enqueue(q, txq, skb);
+	return rc;
+}
+
 static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
 				 struct net_device *dev,
 				 struct netdev_queue *txq)
@@ -3862,8 +3875,7 @@  static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
 			 * of q->seqlock to protect from racing with requeuing.
 			 */
 			if (unlikely(!nolock_qdisc_is_empty(q))) {
-				rc = q->enqueue(skb, q, &to_free) &
-					NET_XMIT_MASK;
+				rc = dev_qdisc_enqueue(skb, q, &to_free, txq);
 				__qdisc_run(q);
 				qdisc_run_end(q);
 
@@ -3879,7 +3891,7 @@  static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
 			return NET_XMIT_SUCCESS;
 		}
 
-		rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK;
+		rc = dev_qdisc_enqueue(skb, q, &to_free, txq);
 		qdisc_run(q);
 
 no_lock_out:
@@ -3923,7 +3935,7 @@  static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
 		qdisc_run_end(q);
 		rc = NET_XMIT_SUCCESS;
 	} else {
-		rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK;
+		rc = dev_qdisc_enqueue(skb, q, &to_free, txq);
 		if (qdisc_run_begin(q)) {
 			if (unlikely(contended)) {
 				spin_unlock(&q->busylock);