Message ID | 20210715060324.43337-1-xiyou.wangcong@gmail.com (mailing list archive) |
---|---|
State | Accepted |
Commit | 70713dddf3d25a02d1952f8c5d2688c986d2f2fb |
Delegated to: | Netdev Maintainers |
Headers | show |
Series | [net-next,v3] net_sched: introduce tracepoint trace_qdisc_enqueue() | expand |
On 2021/7/15 14:03, Cong Wang wrote: > From: Qitao Xu <qitao.xu@bytedance.com> > > Tracepoint trace_qdisc_enqueue() is introduced to trace skb at > the entrance of TC layer on TX side. This is similar to > trace_qdisc_dequeue(): > > 1. For both we only trace successful cases. The failure cases > can be traced via trace_kfree_skb(). > > 2. They are called at entrance or exit of TC layer, not for each > ->enqueue() or ->dequeue(). This is intentional, because > we want to make trace_qdisc_enqueue() symmetric to > trace_qdisc_dequeue(), which is easier to use. > > The return value of qdisc_enqueue() is not interesting here, > we have Qdisc's drop packets in ->dequeue(), it is impossible to > trace them even if we have the return value, the only way to trace > them is tracing kfree_skb(). > > We only add information we need to trace ring buffer. If any other > information is needed, it is easy to extend it without breaking ABI, > see commit 3dd344ea84e1 ("net: tracepoint: exposing sk_family in all > tcp:tracepoints"). > > Reviewed-by: Cong Wang <cong.wang@bytedance.com> > Signed-off-by: Qitao Xu <qitao.xu@bytedance.com> > --- > v3: expand changelog > add helper dev_qdisc_enqueue() > > v2: improve changelog > > include/trace/events/qdisc.h | 26 ++++++++++++++++++++++++++ > net/core/dev.c | 20 ++++++++++++++++---- > 2 files changed, 42 insertions(+), 4 deletions(-) > > diff --git a/include/trace/events/qdisc.h b/include/trace/events/qdisc.h > index 58209557cb3a..c3006c6b4a87 100644 > --- a/include/trace/events/qdisc.h > +++ b/include/trace/events/qdisc.h > @@ -46,6 +46,32 @@ TRACE_EVENT(qdisc_dequeue, > __entry->txq_state, __entry->packets, __entry->skbaddr ) > ); > > +TRACE_EVENT(qdisc_enqueue, > + > + TP_PROTO(struct Qdisc *qdisc, const struct netdev_queue *txq, struct sk_buff *skb), > + > + TP_ARGS(qdisc, txq, skb), > + > + TP_STRUCT__entry( > + __field(struct Qdisc *, qdisc) > + __field(void *, skbaddr) > + __field(int, ifindex) > + __field(u32, handle) > + __field(u32, parent) > + ), > + > + TP_fast_assign( > + __entry->qdisc = qdisc; > + __entry->skbaddr = skb; > + __entry->ifindex = txq->dev ? txq->dev->ifindex : 0; > + __entry->handle = qdisc->handle; > + __entry->parent = qdisc->parent; > + ), > + > + TP_printk("enqueue ifindex=%d qdisc handle=0x%X parent=0x%X skbaddr=%px", > + __entry->ifindex, __entry->handle, __entry->parent, __entry->skbaddr) > +); > + > TRACE_EVENT(qdisc_reset, > > TP_PROTO(struct Qdisc *q), > diff --git a/net/core/dev.c b/net/core/dev.c > index c253c2aafe97..0dcddd077d60 100644 > --- a/net/core/dev.c > +++ b/net/core/dev.c > @@ -131,6 +131,7 @@ > #include <trace/events/napi.h> > #include <trace/events/net.h> > #include <trace/events/skb.h> > +#include <trace/events/qdisc.h> > #include <linux/inetdevice.h> > #include <linux/cpu_rmap.h> > #include <linux/static_key.h> > @@ -3844,6 +3845,18 @@ static void qdisc_pkt_len_init(struct sk_buff *skb) > } > } > > +static int dev_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *q, > + struct sk_buff **to_free, > + struct netdev_queue *txq) > +{ > + int rc; > + > + rc = q->enqueue(skb, q, to_free) & NET_XMIT_MASK; > + if (rc == NET_XMIT_SUCCESS) I do not really understand the usecase you metioned previously, but tracepointing the rc in trace_qdisc_enqueue() will avoid the above checking, which is in the fast path. If there is no extra checking overhead when this tracepoint is disabled, then ignore my comment. > + trace_qdisc_enqueue(q, txq, skb); > + return rc; > +} > + > static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, > struct net_device *dev, > struct netdev_queue *txq) > @@ -3862,8 +3875,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, > * of q->seqlock to protect from racing with requeuing. > */ > if (unlikely(!nolock_qdisc_is_empty(q))) { > - rc = q->enqueue(skb, q, &to_free) & > - NET_XMIT_MASK; > + rc = dev_qdisc_enqueue(skb, q, &to_free, txq); > __qdisc_run(q); > qdisc_run_end(q); > > @@ -3879,7 +3891,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, > return NET_XMIT_SUCCESS; > } > > - rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK; > + rc = dev_qdisc_enqueue(skb, q, &to_free, txq); > qdisc_run(q); > > no_lock_out: > @@ -3923,7 +3935,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, > qdisc_run_end(q); > rc = NET_XMIT_SUCCESS; > } else { > - rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK; > + rc = dev_qdisc_enqueue(skb, q, &to_free, txq); > if (qdisc_run_begin(q)) { > if (unlikely(contended)) { > spin_unlock(&q->busylock); >
On Thu, Jul 15, 2021 at 4:24 AM Yunsheng Lin <linyunsheng@huawei.com> wrote: > > On 2021/7/15 14:03, Cong Wang wrote: > > From: Qitao Xu <qitao.xu@bytedance.com> > > > > Tracepoint trace_qdisc_enqueue() is introduced to trace skb at > > the entrance of TC layer on TX side. This is similar to > > trace_qdisc_dequeue(): > > > > 1. For both we only trace successful cases. The failure cases > > can be traced via trace_kfree_skb(). > > > > 2. They are called at entrance or exit of TC layer, not for each > > ->enqueue() or ->dequeue(). This is intentional, because > > we want to make trace_qdisc_enqueue() symmetric to > > trace_qdisc_dequeue(), which is easier to use. > > > > The return value of qdisc_enqueue() is not interesting here, > > we have Qdisc's drop packets in ->dequeue(), it is impossible to > > trace them even if we have the return value, the only way to trace > > them is tracing kfree_skb(). > > > > We only add information we need to trace ring buffer. If any other > > information is needed, it is easy to extend it without breaking ABI, > > see commit 3dd344ea84e1 ("net: tracepoint: exposing sk_family in all > > tcp:tracepoints"). > > > > Reviewed-by: Cong Wang <cong.wang@bytedance.com> > > Signed-off-by: Qitao Xu <qitao.xu@bytedance.com> > > --- > > v3: expand changelog > > add helper dev_qdisc_enqueue() > > > > v2: improve changelog > > > > include/trace/events/qdisc.h | 26 ++++++++++++++++++++++++++ > > net/core/dev.c | 20 ++++++++++++++++---- > > 2 files changed, 42 insertions(+), 4 deletions(-) > > > > diff --git a/include/trace/events/qdisc.h b/include/trace/events/qdisc.h > > index 58209557cb3a..c3006c6b4a87 100644 > > --- a/include/trace/events/qdisc.h > > +++ b/include/trace/events/qdisc.h > > @@ -46,6 +46,32 @@ TRACE_EVENT(qdisc_dequeue, > > __entry->txq_state, __entry->packets, __entry->skbaddr ) > > ); > > > > +TRACE_EVENT(qdisc_enqueue, > > + > > + TP_PROTO(struct Qdisc *qdisc, const struct netdev_queue *txq, struct sk_buff *skb), > > + > > + TP_ARGS(qdisc, txq, skb), > > + > > + TP_STRUCT__entry( > > + __field(struct Qdisc *, qdisc) > > + __field(void *, skbaddr) > > + __field(int, ifindex) > > + __field(u32, handle) > > + __field(u32, parent) > > + ), > > + > > + TP_fast_assign( > > + __entry->qdisc = qdisc; > > + __entry->skbaddr = skb; > > + __entry->ifindex = txq->dev ? txq->dev->ifindex : 0; > > + __entry->handle = qdisc->handle; > > + __entry->parent = qdisc->parent; > > + ), > > + > > + TP_printk("enqueue ifindex=%d qdisc handle=0x%X parent=0x%X skbaddr=%px", > > + __entry->ifindex, __entry->handle, __entry->parent, __entry->skbaddr) > > +); > > + > > TRACE_EVENT(qdisc_reset, > > > > TP_PROTO(struct Qdisc *q), > > diff --git a/net/core/dev.c b/net/core/dev.c > > index c253c2aafe97..0dcddd077d60 100644 > > --- a/net/core/dev.c > > +++ b/net/core/dev.c > > @@ -131,6 +131,7 @@ > > #include <trace/events/napi.h> > > #include <trace/events/net.h> > > #include <trace/events/skb.h> > > +#include <trace/events/qdisc.h> > > #include <linux/inetdevice.h> > > #include <linux/cpu_rmap.h> > > #include <linux/static_key.h> > > @@ -3844,6 +3845,18 @@ static void qdisc_pkt_len_init(struct sk_buff *skb) > > } > > } > > > > +static int dev_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *q, > > + struct sk_buff **to_free, > > + struct netdev_queue *txq) > > +{ > > + int rc; > > + > > + rc = q->enqueue(skb, q, to_free) & NET_XMIT_MASK; > > + if (rc == NET_XMIT_SUCCESS) > > I do not really understand the usecase you metioned previously, > but tracepointing the rc in trace_qdisc_enqueue() will avoid the > above checking, which is in the fast path. Good point, the solution is clearly not just tracing return value, instead I use trace_qdisc_enqueue_enabled() which is based on jump label: static inline bool \ trace_##name##_enabled(void) \ { \ return static_key_false(&__tracepoint_##name.key); \ } > > If there is no extra checking overhead when this tracepoint is > disabled, then ignore my comment. Yes, trace_qdisc_enqueue_enabled() should give 0 overhead. Thanks.
Hello: This patch was applied to netdev/net.git (refs/heads/master): On Wed, 14 Jul 2021 23:03:24 -0700 you wrote: > From: Qitao Xu <qitao.xu@bytedance.com> > > Tracepoint trace_qdisc_enqueue() is introduced to trace skb at > the entrance of TC layer on TX side. This is similar to > trace_qdisc_dequeue(): > > 1. For both we only trace successful cases. The failure cases > can be traced via trace_kfree_skb(). > > [...] Here is the summary with links: - [net-next,v3] net_sched: introduce tracepoint trace_qdisc_enqueue() https://git.kernel.org/netdev/net/c/70713dddf3d2 You are awesome, thank you! -- Deet-doot-dot, I am a bot. https://korg.docs.kernel.org/patchwork/pwbot.html
diff --git a/include/trace/events/qdisc.h b/include/trace/events/qdisc.h index 58209557cb3a..c3006c6b4a87 100644 --- a/include/trace/events/qdisc.h +++ b/include/trace/events/qdisc.h @@ -46,6 +46,32 @@ TRACE_EVENT(qdisc_dequeue, __entry->txq_state, __entry->packets, __entry->skbaddr ) ); +TRACE_EVENT(qdisc_enqueue, + + TP_PROTO(struct Qdisc *qdisc, const struct netdev_queue *txq, struct sk_buff *skb), + + TP_ARGS(qdisc, txq, skb), + + TP_STRUCT__entry( + __field(struct Qdisc *, qdisc) + __field(void *, skbaddr) + __field(int, ifindex) + __field(u32, handle) + __field(u32, parent) + ), + + TP_fast_assign( + __entry->qdisc = qdisc; + __entry->skbaddr = skb; + __entry->ifindex = txq->dev ? txq->dev->ifindex : 0; + __entry->handle = qdisc->handle; + __entry->parent = qdisc->parent; + ), + + TP_printk("enqueue ifindex=%d qdisc handle=0x%X parent=0x%X skbaddr=%px", + __entry->ifindex, __entry->handle, __entry->parent, __entry->skbaddr) +); + TRACE_EVENT(qdisc_reset, TP_PROTO(struct Qdisc *q), diff --git a/net/core/dev.c b/net/core/dev.c index c253c2aafe97..0dcddd077d60 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -131,6 +131,7 @@ #include <trace/events/napi.h> #include <trace/events/net.h> #include <trace/events/skb.h> +#include <trace/events/qdisc.h> #include <linux/inetdevice.h> #include <linux/cpu_rmap.h> #include <linux/static_key.h> @@ -3844,6 +3845,18 @@ static void qdisc_pkt_len_init(struct sk_buff *skb) } } +static int dev_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *q, + struct sk_buff **to_free, + struct netdev_queue *txq) +{ + int rc; + + rc = q->enqueue(skb, q, to_free) & NET_XMIT_MASK; + if (rc == NET_XMIT_SUCCESS) + trace_qdisc_enqueue(q, txq, skb); + return rc; +} + static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, struct net_device *dev, struct netdev_queue *txq) @@ -3862,8 +3875,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, * of q->seqlock to protect from racing with requeuing. */ if (unlikely(!nolock_qdisc_is_empty(q))) { - rc = q->enqueue(skb, q, &to_free) & - NET_XMIT_MASK; + rc = dev_qdisc_enqueue(skb, q, &to_free, txq); __qdisc_run(q); qdisc_run_end(q); @@ -3879,7 +3891,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, return NET_XMIT_SUCCESS; } - rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK; + rc = dev_qdisc_enqueue(skb, q, &to_free, txq); qdisc_run(q); no_lock_out: @@ -3923,7 +3935,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, qdisc_run_end(q); rc = NET_XMIT_SUCCESS; } else { - rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK; + rc = dev_qdisc_enqueue(skb, q, &to_free, txq); if (qdisc_run_begin(q)) { if (unlikely(contended)) { spin_unlock(&q->busylock);