@@ -31,6 +31,7 @@
#include <linux/init.h>
#include <linux/interrupt.h>
#include <linux/moduleparam.h>
+#include <linux/netfilter_netdev.h>
#include <net/pkt_sched.h>
#include <net/net_namespace.h>
@@ -75,8 +76,10 @@ static void ifb_ri_tasklet(struct tasklet_struct *t)
}
while ((skb = __skb_dequeue(&txp->tq)) != NULL) {
+ /* Skip tc and netfilter to prevent redirection loop. */
skb->redirected = 0;
skb->tc_skip_classify = 1;
+ nf_skip_egress(skb, true);
u64_stats_update_begin(&txp->tsync);
txp->tx_packets++;
@@ -1861,6 +1861,7 @@ enum netdev_ml_priv_type {
* @xps_maps: XXX: need comments on this one
* @miniq_egress: clsact qdisc specific data for
* egress processing
+ * @nf_hooks_egress: netfilter hooks executed for egress packets
* @qdisc_hash: qdisc hash table
* @watchdog_timeo: Represents the timeout that is used by
* the watchdog (see dev_watchdog())
@@ -2161,6 +2162,9 @@ struct net_device {
#ifdef CONFIG_NET_CLS_ACT
struct mini_Qdisc __rcu *miniq_egress;
#endif
+#ifdef CONFIG_NETFILTER_EGRESS
+ struct nf_hook_entries __rcu *nf_hooks_egress;
+#endif
#ifdef CONFIG_NET_SCHED
DECLARE_HASHTABLE (qdisc_hash, 4);
@@ -50,11 +50,97 @@ static inline int nf_hook_ingress(struct sk_buff *skb)
}
#endif /* CONFIG_NETFILTER_INGRESS */
+#ifdef CONFIG_NETFILTER_EGRESS
+static inline bool nf_hook_egress_active(void)
+{
+#ifdef CONFIG_JUMP_LABEL
+ if (!static_key_false(&nf_hooks_needed[NFPROTO_NETDEV][NF_NETDEV_EGRESS]))
+ return false;
+#endif
+ return true;
+}
+
+/**
+ * nf_hook_egress - classify packets before transmission
+ * @skb: packet to be classified
+ * @rc: result code which shall be returned by __dev_queue_xmit() on failure
+ * @dev: netdev whose egress hooks shall be applied to @skb
+ *
+ * Returns @skb on success or %NULL if the packet was consumed or filtered.
+ * Caller must hold rcu_read_lock.
+ *
+ * On ingress, packets are classified first by tc, then by netfilter.
+ * On egress, the order is reversed for symmetry. Conceptually, tc and
+ * netfilter can be thought of as layers, with netfilter layered above tc:
+ * When tc redirects a packet to another interface, netfilter is not applied
+ * because the packet is on the tc layer.
+ *
+ * The nf_skip_egress flag controls whether netfilter is applied on egress.
+ * It is updated by __netif_receive_skb_core() and __dev_queue_xmit() when the
+ * packet passes through tc and netfilter. Because __dev_queue_xmit() may be
+ * called recursively by tunnel drivers such as vxlan, the flag is reverted to
+ * false after sch_handle_egress(). This ensures that netfilter is applied
+ * both on the overlay and underlying network.
+ */
+static inline struct sk_buff *nf_hook_egress(struct sk_buff *skb, int *rc,
+ struct net_device *dev)
+{
+ struct nf_hook_entries *e;
+ struct nf_hook_state state;
+ int ret;
+
+#ifdef CONFIG_NETFILTER_SKIP_EGRESS
+ if (skb->nf_skip_egress)
+ return skb;
+#endif
+
+ e = rcu_dereference(dev->nf_hooks_egress);
+ if (!e)
+ return skb;
+
+ nf_hook_state_init(&state, NF_NETDEV_EGRESS,
+ NFPROTO_NETDEV, dev, NULL, NULL,
+ dev_net(dev), NULL);
+ ret = nf_hook_slow(skb, &state, e, 0);
+
+ if (ret == 1) {
+ return skb;
+ } else if (ret < 0) {
+ *rc = NET_XMIT_DROP;
+ return NULL;
+ } else { /* ret == 0 */
+ *rc = NET_XMIT_SUCCESS;
+ return NULL;
+ }
+}
+#else /* CONFIG_NETFILTER_EGRESS */
+static inline bool nf_hook_egress_active(void)
+{
+ return false;
+}
+
+static inline struct sk_buff *nf_hook_egress(struct sk_buff *skb, int *rc,
+ struct net_device *dev)
+{
+ return skb;
+}
+#endif /* CONFIG_NETFILTER_EGRESS */
+
+static inline void nf_skip_egress(struct sk_buff *skb, bool skip)
+{
+#ifdef CONFIG_NETFILTER_SKIP_EGRESS
+ skb->nf_skip_egress = skip;
+#endif
+}
+
static inline void nf_hook_netdev_init(struct net_device *dev)
{
#ifdef CONFIG_NETFILTER_INGRESS
RCU_INIT_POINTER(dev->nf_hooks_ingress, NULL);
#endif
+#ifdef CONFIG_NETFILTER_EGRESS
+ RCU_INIT_POINTER(dev->nf_hooks_egress, NULL);
+#endif
}
#endif /* _NETFILTER_NETDEV_H_ */
@@ -652,6 +652,7 @@ typedef unsigned char *sk_buff_data_t;
* @tc_at_ingress: used within tc_classify to distinguish in/egress
* @redirected: packet was redirected by packet classifier
* @from_ingress: packet was redirected from the ingress path
+ * @nf_skip_egress: packet shall skip nf egress - see netfilter_netdev.h
* @peeked: this packet has been seen already, so stats have been
* done for it, don't do them again
* @nf_trace: netfilter packet trace flag
@@ -868,6 +869,9 @@ struct sk_buff {
#ifdef CONFIG_NET_REDIRECT
__u8 from_ingress:1;
#endif
+#ifdef CONFIG_NETFILTER_SKIP_EGRESS
+ __u8 nf_skip_egress:1;
+#endif
#ifdef CONFIG_TLS_DEVICE
__u8 decrypted:1;
#endif
@@ -51,6 +51,7 @@ enum nf_inet_hooks {
enum nf_dev_hooks {
NF_NETDEV_INGRESS,
+ NF_NETDEV_EGRESS,
NF_NETDEV_NUMHOOKS
};
@@ -3920,6 +3920,7 @@ EXPORT_SYMBOL(dev_loopback_xmit);
static struct sk_buff *
sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev)
{
+#ifdef CONFIG_NET_CLS_ACT
struct mini_Qdisc *miniq = rcu_dereference_bh(dev->miniq_egress);
struct tcf_result cl_res;
@@ -3955,6 +3956,7 @@ sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev)
default:
break;
}
+#endif /* CONFIG_NET_CLS_ACT */
return skb;
}
@@ -4148,13 +4150,20 @@ static int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev)
qdisc_pkt_len_init(skb);
#ifdef CONFIG_NET_CLS_ACT
skb->tc_at_ingress = 0;
-# ifdef CONFIG_NET_EGRESS
+#endif
+#ifdef CONFIG_NET_EGRESS
if (static_branch_unlikely(&egress_needed_key)) {
+ if (nf_hook_egress_active()) {
+ skb = nf_hook_egress(skb, &rc, dev);
+ if (!skb)
+ goto out;
+ }
+ nf_skip_egress(skb, true);
skb = sch_handle_egress(skb, &rc, dev);
if (!skb)
goto out;
+ nf_skip_egress(skb, false);
}
-# endif
#endif
/* If device/qdisc don't need skb->dst, release it right now while
* its hot in this cpu cache.
@@ -5296,6 +5305,7 @@ static int __netif_receive_skb_core(struct sk_buff **pskb, bool pfmemalloc,
if (static_branch_unlikely(&ingress_needed_key)) {
bool another = false;
+ nf_skip_egress(skb, true);
skb = sch_handle_ingress(skb, &pt_prev, &ret, orig_dev,
&another);
if (another)
@@ -5303,6 +5313,7 @@ static int __netif_receive_skb_core(struct sk_buff **pskb, bool pfmemalloc,
if (!skb)
goto out;
+ nf_skip_egress(skb, false);
if (nf_ingress(skb, &pt_prev, &ret, orig_dev) < 0)
goto out;
}
@@ -10,6 +10,17 @@ config NETFILTER_INGRESS
This allows you to classify packets from ingress using the Netfilter
infrastructure.
+config NETFILTER_EGRESS
+ bool "Netfilter egress support"
+ default y
+ select NET_EGRESS
+ help
+ This allows you to classify packets before transmission using the
+ Netfilter infrastructure.
+
+config NETFILTER_SKIP_EGRESS
+ def_bool NETFILTER_EGRESS && (NET_CLS_ACT || IFB)
+
config NETFILTER_NETLINK
tristate
@@ -316,6 +316,12 @@ nf_hook_entry_head(struct net *net, int pf, unsigned int hooknum,
if (dev && dev_net(dev) == net)
return &dev->nf_hooks_ingress;
}
+#endif
+#ifdef CONFIG_NETFILTER_EGRESS
+ if (hooknum == NF_NETDEV_EGRESS) {
+ if (dev && dev_net(dev) == net)
+ return &dev->nf_hooks_egress;
+ }
#endif
WARN_ON_ONCE(1);
return NULL;
@@ -344,6 +350,11 @@ static inline bool nf_ingress_hook(const struct nf_hook_ops *reg, int pf)
return false;
}
+static inline bool nf_egress_hook(const struct nf_hook_ops *reg, int pf)
+{
+ return pf == NFPROTO_NETDEV && reg->hooknum == NF_NETDEV_EGRESS;
+}
+
static void nf_static_key_inc(const struct nf_hook_ops *reg, int pf)
{
#ifdef CONFIG_JUMP_LABEL
@@ -383,9 +394,18 @@ static int __nf_register_net_hook(struct net *net, int pf,
switch (pf) {
case NFPROTO_NETDEV:
- err = nf_ingress_check(net, reg, NF_NETDEV_INGRESS);
- if (err < 0)
- return err;
+#ifndef CONFIG_NETFILTER_INGRESS
+ if (reg->hooknum == NF_NETDEV_INGRESS)
+ return -EOPNOTSUPP;
+#endif
+#ifndef CONFIG_NETFILTER_EGRESS
+ if (reg->hooknum == NF_NETDEV_EGRESS)
+ return -EOPNOTSUPP;
+#endif
+ if ((reg->hooknum != NF_NETDEV_INGRESS &&
+ reg->hooknum != NF_NETDEV_EGRESS) ||
+ !reg->dev || dev_net(reg->dev) != net)
+ return -EINVAL;
break;
case NFPROTO_INET:
if (reg->hooknum != NF_INET_INGRESS)
@@ -417,6 +437,10 @@ static int __nf_register_net_hook(struct net *net, int pf,
#ifdef CONFIG_NETFILTER_INGRESS
if (nf_ingress_hook(reg, pf))
net_inc_ingress_queue();
+#endif
+#ifdef CONFIG_NETFILTER_EGRESS
+ if (nf_egress_hook(reg, pf))
+ net_inc_egress_queue();
#endif
nf_static_key_inc(reg, pf);
@@ -474,6 +498,10 @@ static void __nf_unregister_net_hook(struct net *net, int pf,
#ifdef CONFIG_NETFILTER_INGRESS
if (nf_ingress_hook(reg, pf))
net_dec_ingress_queue();
+#endif
+#ifdef CONFIG_NETFILTER_EGRESS
+ if (nf_egress_hook(reg, pf))
+ net_dec_egress_queue();
#endif
nf_static_key_dec(reg, pf);
} else {
@@ -185,7 +185,7 @@ static const struct nf_hook_entries *
nfnl_hook_entries_head(u8 pf, unsigned int hook, struct net *net, const char *dev)
{
const struct nf_hook_entries *hook_head = NULL;
-#ifdef CONFIG_NETFILTER_INGRESS
+#if defined(CONFIG_NETFILTER_INGRESS) || defined(CONFIG_NETFILTER_EGRESS)
struct net_device *netdev;
#endif
@@ -221,9 +221,9 @@ nfnl_hook_entries_head(u8 pf, unsigned int hook, struct net *net, const char *de
hook_head = rcu_dereference(net->nf.hooks_decnet[hook]);
break;
#endif
-#ifdef CONFIG_NETFILTER_INGRESS
+#if defined(CONFIG_NETFILTER_INGRESS) || defined(CONFIG_NETFILTER_EGRESS)
case NFPROTO_NETDEV:
- if (hook != NF_NETDEV_INGRESS)
+ if (hook >= NF_NETDEV_NUMHOOKS)
return ERR_PTR(-EOPNOTSUPP);
if (!dev)
@@ -233,7 +233,15 @@ nfnl_hook_entries_head(u8 pf, unsigned int hook, struct net *net, const char *de
if (!netdev)
return ERR_PTR(-ENODEV);
- return rcu_dereference(netdev->nf_hooks_ingress);
+#ifdef CONFIG_NETFILTER_INGRESS
+ if (hook == NF_NETDEV_INGRESS)
+ return rcu_dereference(netdev->nf_hooks_ingress);
+#endif
+#ifdef CONFIG_NETFILTER_EGRESS
+ if (hook == NF_NETDEV_EGRESS)
+ return rcu_dereference(netdev->nf_hooks_egress);
+#endif
+ fallthrough;
#endif
default:
return ERR_PTR(-EPROTONOSUPPORT);
@@ -310,9 +310,11 @@ static const struct nft_chain_type nft_chain_filter_netdev = {
.name = "filter",
.type = NFT_CHAIN_T_DEFAULT,
.family = NFPROTO_NETDEV,
- .hook_mask = (1 << NF_NETDEV_INGRESS),
+ .hook_mask = (1 << NF_NETDEV_INGRESS) |
+ (1 << NF_NETDEV_EGRESS),
.hooks = {
[NF_NETDEV_INGRESS] = nft_do_chain_netdev,
+ [NF_NETDEV_EGRESS] = nft_do_chain_netdev,
},
};