@@ -80,6 +80,12 @@
FN(IPV6_NDISC_BAD_OPTIONS) \
FN(IPV6_NDISC_NS_OTHERHOST) \
FN(QUEUE_PURGE) \
+ FN(TC_EXT_COOKIE_NOTFOUND) \
+ FN(TC_COOKIE_EXT_MISMATCH) \
+ FN(TC_COOKIE_MISMATCH) \
+ FN(TC_CHAIN_NOTFOUND) \
+ FN(TC_ALLOC_SKB_EXT) \
+ FN(TC_RECLASSIFY_LOOP) \
FNe(MAX)
/**
@@ -345,6 +351,26 @@ enum skb_drop_reason {
SKB_DROP_REASON_IPV6_NDISC_NS_OTHERHOST,
/** @SKB_DROP_REASON_QUEUE_PURGE: bulk free. */
SKB_DROP_REASON_QUEUE_PURGE,
+ /** @SKB_DROP_REASON_TC_EXT_COOKIE_NOTFOUND: tc cookie was lookuped using ext,
+ * but was not found.
+ */
+ SKB_DROP_REASON_TC_EXT_COOKIE_NOTFOUND,
+ /** @SKB_DROP_REASON_TC_COOKIE_EXT_MISMATCH: tc ext was lookup using cookie and
+ * either was not found or different from expected.
+ */
+ SKB_DROP_REASON_TC_COOKIE_EXT_MISMATCH,
+ /** @SKB_DROP_REASON_TC_COOKIE_MISMATCH: tc cookie available but was
+ * unable to match to filter.
+ */
+ SKB_DROP_REASON_TC_COOKIE_MISMATCH,
+ /** @SKB_DROP_REASON_TC_CHAIN_NOTFOUND: tc chain lookup failed. */
+ SKB_DROP_REASON_TC_CHAIN_NOTFOUND,
+ /** @SKB_DROP_REASON_TC_ALLOC_SKB_EXT: tc failed to allocate skb ext. */
+ SKB_DROP_REASON_TC_ALLOC_SKB_EXT,
+ /** @SKB_DROP_REASON_TC_RECLASSIFY_LOOP: tc exceeded max reclassify
+ * loop iterations.
+ */
+ SKB_DROP_REASON_TC_RECLASSIFY_LOOP,
/**
* @SKB_DROP_REASON_MAX: the maximum of core drop reasons, which
* shouldn't be used as a real 'reason' - only for tracing code gen
@@ -154,6 +154,12 @@ __cls_set_class(unsigned long *clp, unsigned long cl)
return xchg(clp, cl);
}
+static inline void tcf_set_drop_reason(struct tcf_result *res,
+ enum skb_drop_reason reason)
+{
+ res->drop_reason = reason;
+}
+
static inline void
__tcf_bind_filter(struct Qdisc *q, struct tcf_result *r, unsigned long base)
{
@@ -324,7 +324,6 @@ struct Qdisc_ops {
struct module *owner;
};
-
struct tcf_result {
union {
struct {
@@ -332,8 +331,8 @@ struct tcf_result {
u32 classid;
};
const struct tcf_proto *goto_tp;
-
};
+ enum skb_drop_reason drop_reason;
};
struct tcf_chain;
@@ -3910,24 +3910,27 @@ EXPORT_SYMBOL_GPL(netdev_xmit_skip_txqueue);
#endif /* CONFIG_NET_EGRESS */
#ifdef CONFIG_NET_XGRESS
-static int tc_run(struct tcx_entry *entry, struct sk_buff *skb)
+static int tc_run(struct tcx_entry *entry, struct sk_buff *skb,
+ enum skb_drop_reason *drop_reason)
{
int ret = TC_ACT_UNSPEC;
#ifdef CONFIG_NET_CLS_ACT
struct mini_Qdisc *miniq = rcu_dereference_bh(entry->miniq);
- struct tcf_result res;
+ struct tcf_result res = {0};
if (!miniq)
return ret;
tc_skb_cb(skb)->mru = 0;
tc_skb_cb(skb)->post_ct = false;
+ res.drop_reason = *drop_reason;
mini_qdisc_bstats_cpu_update(miniq, skb);
ret = tcf_classify(skb, miniq->block, miniq->filter_list, &res, false);
/* Only tcf related quirks below. */
switch (ret) {
case TC_ACT_SHOT:
+ *drop_reason = res.drop_reason;
mini_qdisc_qstats_cpu_drop(miniq);
break;
case TC_ACT_OK:
@@ -3977,6 +3980,7 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,
struct net_device *orig_dev, bool *another)
{
struct bpf_mprog_entry *entry = rcu_dereference_bh(skb->dev->tcx_ingress);
+ enum skb_drop_reason drop_reason = SKB_DROP_REASON_TC_INGRESS;
int sch_ret;
if (!entry)
@@ -3994,7 +3998,7 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,
if (sch_ret != TC_ACT_UNSPEC)
goto ingress_verdict;
}
- sch_ret = tc_run(tcx_entry(entry), skb);
+ sch_ret = tc_run(tcx_entry(entry), skb, &drop_reason);
ingress_verdict:
switch (sch_ret) {
case TC_ACT_REDIRECT:
@@ -4011,7 +4015,7 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,
*ret = NET_RX_SUCCESS;
return NULL;
case TC_ACT_SHOT:
- kfree_skb_reason(skb, SKB_DROP_REASON_TC_INGRESS);
+ kfree_skb_reason(skb, drop_reason);
*ret = NET_RX_DROP;
return NULL;
/* used by tc_run */
@@ -4032,6 +4036,7 @@ static __always_inline struct sk_buff *
sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev)
{
struct bpf_mprog_entry *entry = rcu_dereference_bh(dev->tcx_egress);
+ enum skb_drop_reason drop_reason = SKB_DROP_REASON_TC_EGRESS;
int sch_ret;
if (!entry)
@@ -4045,7 +4050,7 @@ sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev)
if (sch_ret != TC_ACT_UNSPEC)
goto egress_verdict;
}
- sch_ret = tc_run(tcx_entry(entry), skb);
+ sch_ret = tc_run(tcx_entry(entry), skb, &drop_reason);
egress_verdict:
switch (sch_ret) {
case TC_ACT_REDIRECT:
@@ -4054,7 +4059,7 @@ sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev)
*ret = NET_XMIT_SUCCESS;
return NULL;
case TC_ACT_SHOT:
- kfree_skb_reason(skb, SKB_DROP_REASON_TC_EGRESS);
+ kfree_skb_reason(skb, drop_reason);
*ret = NET_XMIT_DROP;
return NULL;
/* used by tc_run */
@@ -1681,12 +1681,20 @@ static inline int __tcf_classify(struct sk_buff *skb,
* time we got here with a cookie from hardware.
*/
if (unlikely(n->tp != tp || n->tp->chain != n->chain ||
- !tp->ops->get_exts))
+ !tp->ops->get_exts)) {
+ u32 drop_reason = SKB_DROP_REASON_TC_COOKIE_MISMATCH;
+
+ tcf_set_drop_reason(res, drop_reason);
return TC_ACT_SHOT;
+ }
exts = tp->ops->get_exts(tp, n->handle);
- if (unlikely(!exts || n->exts != exts))
+ if (unlikely(!exts || n->exts != exts)) {
+ u32 drop_reason = SKB_DROP_REASON_TC_COOKIE_EXT_MISMATCH;
+
+ tcf_set_drop_reason(res, drop_reason);
return TC_ACT_SHOT;
+ }
n = NULL;
err = tcf_exts_exec_ex(skb, exts, act_index, res);
@@ -1712,8 +1720,11 @@ static inline int __tcf_classify(struct sk_buff *skb,
return err;
}
- if (unlikely(n))
+ if (unlikely(n)) {
+ tcf_set_drop_reason(res,
+ SKB_DROP_REASON_TC_COOKIE_MISMATCH);
return TC_ACT_SHOT;
+ }
return TC_ACT_UNSPEC; /* signal: continue lookup */
#ifdef CONFIG_NET_CLS_ACT
@@ -1723,6 +1734,8 @@ static inline int __tcf_classify(struct sk_buff *skb,
tp->chain->block->index,
tp->prio & 0xffff,
ntohs(tp->protocol));
+ tcf_set_drop_reason(res,
+ SKB_DROP_REASON_TC_RECLASSIFY_LOOP);
return TC_ACT_SHOT;
}
@@ -1759,7 +1772,10 @@ int tcf_classify(struct sk_buff *skb,
if (ext->act_miss) {
n = tcf_exts_miss_cookie_lookup(ext->act_miss_cookie,
&act_index);
- if (!n)
+ if (!n) {
+ u32 drop_reason = SKB_TC_EXT_COOKIE_NOTFOUND;
+
+ tcf_set_drop_reason(res, drop_reason);
return TC_ACT_SHOT;
chain = n->chain_index;
@@ -1768,8 +1784,13 @@ int tcf_classify(struct sk_buff *skb,
}
fchain = tcf_chain_lookup_rcu(block, chain);
- if (!fchain)
+ if (!fchain) {
+ u32 drop_reason = SKB_DROP_REASON_TC_CHAIN_NOTFOUND;
+
+ tcf_set_drop_reason(res, drop_reason);
+
return TC_ACT_SHOT;
+ }
/* Consume, so cloned/redirect skbs won't inherit ext */
skb_ext_del(skb, TC_SKB_EXT);
@@ -1788,8 +1809,12 @@ int tcf_classify(struct sk_buff *skb,
struct tc_skb_cb *cb = tc_skb_cb(skb);
ext = tc_skb_ext_alloc(skb);
- if (WARN_ON_ONCE(!ext))
+ if (WARN_ON_ONCE(!ext)) {
+ u32 drop_reason = SKB_TC_ALLOC_SKB_EXT;
+
+ tcf_set_drop_reason(res, drop_reason);
return TC_ACT_SHOT;
+ }
ext->chain = last_executed_chain;
ext->mru = cb->mru;
ext->post_ct = cb->post_ct;
@@ -374,8 +374,8 @@ static struct ets_class *ets_classify(struct sk_buff *skb, struct Qdisc *sch,
int *qerr)
{
struct ets_sched *q = qdisc_priv(sch);
+ struct tcf_result res = {0};
u32 band = skb->priority;
- struct tcf_result res;
struct tcf_proto *fl;
int err;
@@ -383,7 +383,6 @@ static struct ets_class *ets_classify(struct sk_buff *skb, struct Qdisc *sch,
if (TC_H_MAJ(skb->priority) != sch->handle) {
fl = rcu_dereference_bh(q->filter_list);
err = tcf_classify(skb, NULL, fl, &res, false);
-#ifdef CONFIG_NET_CLS_ACT
switch (err) {
case TC_ACT_STOLEN:
case TC_ACT_QUEUED:
@@ -393,7 +392,6 @@ static struct ets_class *ets_classify(struct sk_buff *skb, struct Qdisc *sch,
case TC_ACT_SHOT:
return NULL;
}
-#endif
if (!fl || err < 0) {
if (TC_H_MAJ(band))
band = 0;
@@ -77,8 +77,8 @@ static unsigned int fq_codel_classify(struct sk_buff *skb, struct Qdisc *sch,
int *qerr)
{
struct fq_codel_sched_data *q = qdisc_priv(sch);
+ struct tcf_result res = {0};
struct tcf_proto *filter;
- struct tcf_result res;
int result;
if (TC_H_MAJ(skb->priority) == sch->handle &&
@@ -93,7 +93,6 @@ static unsigned int fq_codel_classify(struct sk_buff *skb, struct Qdisc *sch,
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
result = tcf_classify(skb, NULL, filter, &res, false);
if (result >= 0) {
-#ifdef CONFIG_NET_CLS_ACT
switch (result) {
case TC_ACT_STOLEN:
case TC_ACT_QUEUED:
@@ -103,7 +102,6 @@ static unsigned int fq_codel_classify(struct sk_buff *skb, struct Qdisc *sch,
case TC_ACT_SHOT:
return 0;
}
-#endif
if (TC_H_MIN(res.classid) <= q->flows_cnt)
return TC_H_MIN(res.classid);
}
@@ -81,8 +81,8 @@ static unsigned int fq_pie_classify(struct sk_buff *skb, struct Qdisc *sch,
int *qerr)
{
struct fq_pie_sched_data *q = qdisc_priv(sch);
+ struct tcf_result res = {0};
struct tcf_proto *filter;
- struct tcf_result res;
int result;
if (TC_H_MAJ(skb->priority) == sch->handle &&
@@ -97,7 +97,6 @@ static unsigned int fq_pie_classify(struct sk_buff *skb, struct Qdisc *sch,
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
result = tcf_classify(skb, NULL, filter, &res, false);
if (result >= 0) {
-#ifdef CONFIG_NET_CLS_ACT
switch (result) {
case TC_ACT_STOLEN:
case TC_ACT_QUEUED:
@@ -107,7 +106,6 @@ static unsigned int fq_pie_classify(struct sk_buff *skb, struct Qdisc *sch,
case TC_ACT_SHOT:
return 0;
}
-#endif
if (TC_H_MIN(res.classid) <= q->flows_cnt)
return TC_H_MIN(res.classid);
}
@@ -1122,7 +1122,7 @@ hfsc_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
{
struct hfsc_sched *q = qdisc_priv(sch);
struct hfsc_class *head, *cl;
- struct tcf_result res;
+ struct tcf_result res = {0};
struct tcf_proto *tcf;
int result;
@@ -1135,7 +1135,6 @@ hfsc_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
head = &q->root;
tcf = rcu_dereference_bh(q->root.filter_list);
while (tcf && (result = tcf_classify(skb, NULL, tcf, &res, false)) >= 0) {
-#ifdef CONFIG_NET_CLS_ACT
switch (result) {
case TC_ACT_QUEUED:
case TC_ACT_STOLEN:
@@ -1145,7 +1144,6 @@ hfsc_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
case TC_ACT_SHOT:
return NULL;
}
-#endif
cl = (struct hfsc_class *)res.class;
if (!cl) {
cl = hfsc_find_class(res.classid, sch);
@@ -220,8 +220,8 @@ static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch,
int *qerr)
{
struct htb_sched *q = qdisc_priv(sch);
+ struct tcf_result res = {0};
struct htb_class *cl;
- struct tcf_result res;
struct tcf_proto *tcf;
int result;
@@ -243,7 +243,6 @@ static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch,
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
while (tcf && (result = tcf_classify(skb, NULL, tcf, &res, false)) >= 0) {
-#ifdef CONFIG_NET_CLS_ACT
switch (result) {
case TC_ACT_QUEUED:
case TC_ACT_STOLEN:
@@ -253,7 +252,6 @@ static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch,
case TC_ACT_SHOT:
return NULL;
}
-#endif
cl = (void *)res.class;
if (!cl) {
if (res.classid == sch->handle)
@@ -631,13 +629,11 @@ static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch,
} else {
return qdisc_drop(skb, sch, to_free);
}
-#ifdef CONFIG_NET_CLS_ACT
} else if (!cl) {
if (ret & __NET_XMIT_BYPASS)
qdisc_qstats_drop(sch);
__qdisc_drop(skb, to_free);
return ret;
-#endif
} else if ((ret = qdisc_enqueue(skb, cl->leaf.q,
to_free)) != NET_XMIT_SUCCESS) {
if (net_xmit_drop_count(ret)) {
@@ -30,14 +30,13 @@ static struct Qdisc *
multiq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
{
struct multiq_sched_data *q = qdisc_priv(sch);
+ struct tcf_result res = {0};
u32 band;
- struct tcf_result res;
struct tcf_proto *fl = rcu_dereference_bh(q->filter_list);
int err;
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
err = tcf_classify(skb, NULL, fl, &res, false);
-#ifdef CONFIG_NET_CLS_ACT
switch (err) {
case TC_ACT_STOLEN:
case TC_ACT_QUEUED:
@@ -47,7 +46,6 @@ multiq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
case TC_ACT_SHOT:
return NULL;
}
-#endif
band = skb_get_queue_mapping(skb);
if (band >= q->bands)
@@ -64,7 +62,6 @@ multiq_enqueue(struct sk_buff *skb, struct Qdisc *sch,
int ret;
qdisc = multiq_classify(skb, sch, &ret);
-#ifdef CONFIG_NET_CLS_ACT
if (qdisc == NULL) {
if (ret & __NET_XMIT_BYPASS)
@@ -72,7 +69,6 @@ multiq_enqueue(struct sk_buff *skb, struct Qdisc *sch,
__qdisc_drop(skb, to_free);
return ret;
}
-#endif
ret = qdisc_enqueue(skb, qdisc, to_free);
if (ret == NET_XMIT_SUCCESS) {
@@ -31,8 +31,8 @@ static struct Qdisc *
prio_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
{
struct prio_sched_data *q = qdisc_priv(sch);
+ struct tcf_result res = {0};
u32 band = skb->priority;
- struct tcf_result res;
struct tcf_proto *fl;
int err;
@@ -40,7 +40,6 @@ prio_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
if (TC_H_MAJ(skb->priority) != sch->handle) {
fl = rcu_dereference_bh(q->filter_list);
err = tcf_classify(skb, NULL, fl, &res, false);
-#ifdef CONFIG_NET_CLS_ACT
switch (err) {
case TC_ACT_STOLEN:
case TC_ACT_QUEUED:
@@ -50,7 +49,6 @@ prio_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
case TC_ACT_SHOT:
return NULL;
}
-#endif
if (!fl || err < 0) {
if (TC_H_MAJ(band))
band = 0;
@@ -73,7 +71,6 @@ prio_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free)
int ret;
qdisc = prio_classify(skb, sch, &ret);
-#ifdef CONFIG_NET_CLS_ACT
if (qdisc == NULL) {
if (ret & __NET_XMIT_BYPASS)
@@ -81,7 +78,6 @@ prio_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free)
__qdisc_drop(skb, to_free);
return ret;
}
-#endif
ret = qdisc_enqueue(skb, qdisc, to_free);
if (ret == NET_XMIT_SUCCESS) {
@@ -680,8 +680,8 @@ static struct qfq_class *qfq_classify(struct sk_buff *skb, struct Qdisc *sch,
int *qerr)
{
struct qfq_sched *q = qdisc_priv(sch);
+ struct tcf_result res = {0};
struct qfq_class *cl;
- struct tcf_result res;
struct tcf_proto *fl;
int result;
@@ -696,7 +696,6 @@ static struct qfq_class *qfq_classify(struct sk_buff *skb, struct Qdisc *sch,
fl = rcu_dereference_bh(q->filter_list);
result = tcf_classify(skb, NULL, fl, &res, false);
if (result >= 0) {
-#ifdef CONFIG_NET_CLS_ACT
switch (result) {
case TC_ACT_QUEUED:
case TC_ACT_STOLEN:
@@ -706,7 +705,6 @@ static struct qfq_class *qfq_classify(struct sk_buff *skb, struct Qdisc *sch,
case TC_ACT_SHOT:
return NULL;
}
-#endif
cl = (struct qfq_class *)res.class;
if (cl == NULL)
cl = qfq_find_class(sch, res.classid);
@@ -254,12 +254,11 @@ static bool sfb_rate_limit(struct sk_buff *skb, struct sfb_sched_data *q)
static bool sfb_classify(struct sk_buff *skb, struct tcf_proto *fl,
int *qerr, u32 *salt)
{
- struct tcf_result res;
+ struct tcf_result res = {0};
int result;
result = tcf_classify(skb, NULL, fl, &res, false);
if (result >= 0) {
-#ifdef CONFIG_NET_CLS_ACT
switch (result) {
case TC_ACT_STOLEN:
case TC_ACT_QUEUED:
@@ -269,7 +268,6 @@ static bool sfb_classify(struct sk_buff *skb, struct tcf_proto *fl,
case TC_ACT_SHOT:
return false;
}
-#endif
*salt = TC_H_MIN(res.classid);
return true;
}
@@ -164,7 +164,7 @@ static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch,
int *qerr)
{
struct sfq_sched_data *q = qdisc_priv(sch);
- struct tcf_result res;
+ struct tcf_result res = {0};
struct tcf_proto *fl;
int result;
@@ -180,7 +180,6 @@ static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch,
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
result = tcf_classify(skb, NULL, fl, &res, false);
if (result >= 0) {
-#ifdef CONFIG_NET_CLS_ACT
switch (result) {
case TC_ACT_STOLEN:
case TC_ACT_QUEUED:
@@ -190,7 +189,6 @@ static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch,
case TC_ACT_SHOT:
return 0;
}
-#endif
if (TC_H_MIN(res.classid) <= q->divisor)
return TC_H_MIN(res.classid);
}
Currently there is no way to distinguish between an error and a classification verdict. Which has caused us a lot of pain with buggy qdiscs and syzkaller. This patch does 2 things - one is it disambiguates between an error and policy decisions. The reasons are added under the auspices of skb drop reason. We add the drop reason as a part of struct tcf_result. That way, tcf_classify can set a proper drop reason when it fails, and we keep the classification result as the tcf_classify's return value. This patch also adds a variety of drop reasons which are more fine grained on why a packet was dropped by the TC classification action subsystem. Co-developed-by: Daniel Borkmann <daniel@iogearbox.net> Signed-off-by: Victor Nogueira <victor@mojatatu.com> --- v1 -> v2: - Make tcf_classify set drop reason instead of verdict in struct tcf_result - Make tcf_classify return verdict (as it was doing before) - Only initialise struct tcf_result in tc_run - Add new drop reasons specific to TC - Merged v1 patch with Daniel's patch (https://lore.kernel.org/bpf/20231013141722.21165ef3@kernel.org/T/) for completeness include/net/dropreason-core.h | 26 ++++++++++++++++++++++++ include/net/pkt_cls.h | 6 ++++++ include/net/sch_generic.h | 3 +-- net/core/dev.c | 17 ++++++++++------ net/sched/cls_api.c | 37 +++++++++++++++++++++++++++++------ net/sched/sch_ets.c | 4 +--- net/sched/sch_fq_codel.c | 4 +--- net/sched/sch_fq_pie.c | 4 +--- net/sched/sch_hfsc.c | 4 +--- net/sched/sch_htb.c | 6 +----- net/sched/sch_multiq.c | 6 +----- net/sched/sch_prio.c | 6 +----- net/sched/sch_qfq.c | 4 +--- net/sched/sch_sfb.c | 4 +--- net/sched/sch_sfq.c | 4 +--- 15 files changed, 85 insertions(+), 50 deletions(-)