diff mbox series

[bpf-next,v2,4/4] devmap: Exclude XDP broadcast to master device

Message ID 20210624091843.5151-5-joamaki@gmail.com (mailing list archive)
State Superseded
Delegated to: BPF
Headers show
Series XDP bonding support | expand

Checks

Context Check Description
netdev/cover_letter success Link
netdev/fixes_present success Link
netdev/patch_count success Link
netdev/tree_selection success Clearly marked for bpf-next
netdev/subject_prefix success Link
netdev/cc_maintainers warning 9 maintainers not CCed: yhs@fb.com kpsingh@kernel.org hawk@kernel.org kafai@fb.com ast@kernel.org john.fastabend@gmail.com songliubraving@fb.com davem@davemloft.net kuba@kernel.org
netdev/source_inline success Was 0 now: 0
netdev/verify_signedoff success Link
netdev/module_param success Was 0 now: 0
netdev/build_32bit success Errors and warnings before: 1 this patch: 1
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/verify_fixes success Link
netdev/checkpatch warning WARNING: line length of 81 exceeds 80 columns WARNING: line length of 84 exceeds 80 columns WARNING: line length of 91 exceeds 80 columns WARNING: line length of 93 exceeds 80 columns
netdev/build_allmodconfig_warn success Errors and warnings before: 1 this patch: 1
netdev/header_inline success Link

Commit Message

Jussi Maki June 24, 2021, 9:18 a.m. UTC
From: Jussi Maki <joamaki@gmail.com>

If the ingress device is bond slave, do not broadcast back
through it or the bond master.

Signed-off-by: Jussi Maki <joamaki@gmail.com>
---
 kernel/bpf/devmap.c | 34 ++++++++++++++++++++++++++++------
 1 file changed, 28 insertions(+), 6 deletions(-)

Comments

Jay Vosburgh July 1, 2021, 6:12 p.m. UTC | #1
joamaki@gmail.com wrote:

>From: Jussi Maki <joamaki@gmail.com>
>
>If the ingress device is bond slave, do not broadcast back
>through it or the bond master.
>
>Signed-off-by: Jussi Maki <joamaki@gmail.com>
>---
> kernel/bpf/devmap.c | 34 ++++++++++++++++++++++++++++------
> 1 file changed, 28 insertions(+), 6 deletions(-)
>
>diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
>index 2a75e6c2d27d..0864fb28c8b5 100644
>--- a/kernel/bpf/devmap.c
>+++ b/kernel/bpf/devmap.c
>@@ -514,9 +514,11 @@ int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
> }
> 
> static bool is_valid_dst(struct bpf_dtab_netdev *obj, struct xdp_buff *xdp,
>-			 int exclude_ifindex)
>+			 int exclude_ifindex, int exclude_ifindex_master)
> {
>-	if (!obj || obj->dev->ifindex == exclude_ifindex ||
>+	if (!obj ||
>+	    obj->dev->ifindex == exclude_ifindex ||
>+	    obj->dev->ifindex == exclude_ifindex_master ||
> 	    !obj->dev->netdev_ops->ndo_xdp_xmit)
> 		return false;
> 
>@@ -546,12 +548,19 @@ int dev_map_enqueue_multi(struct xdp_buff *xdp, struct net_device *dev_rx,
> {
> 	struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
> 	int exclude_ifindex = exclude_ingress ? dev_rx->ifindex : 0;
>+	int exclude_ifindex_master = 0;
> 	struct bpf_dtab_netdev *dst, *last_dst = NULL;
> 	struct hlist_head *head;
> 	struct xdp_frame *xdpf;
> 	unsigned int i;
> 	int err;
> 
>+	if (static_branch_unlikely(&bpf_master_redirect_enabled_key)) {
>+		struct net_device *master = netdev_master_upper_dev_get_rcu(dev_rx);
>+
>+		exclude_ifindex_master = (master && exclude_ingress) ? master->ifindex : 0;
>+	}
>+

	Will the above logic do what is intended if the device stacking
isn't a simple bond -> ethX arrangement?  I.e., bond -> VLAN.?? -> ethX
or perhaps even bondA -> VLAN.?? -> bondB -> ethX ?

	-J

> 	xdpf = xdp_convert_buff_to_frame(xdp);
> 	if (unlikely(!xdpf))
> 		return -EOVERFLOW;
>@@ -559,7 +568,7 @@ int dev_map_enqueue_multi(struct xdp_buff *xdp, struct net_device *dev_rx,
> 	if (map->map_type == BPF_MAP_TYPE_DEVMAP) {
> 		for (i = 0; i < map->max_entries; i++) {
> 			dst = READ_ONCE(dtab->netdev_map[i]);
>-			if (!is_valid_dst(dst, xdp, exclude_ifindex))
>+			if (!is_valid_dst(dst, xdp, exclude_ifindex, exclude_ifindex_master))
> 				continue;
> 
> 			/* we only need n-1 clones; last_dst enqueued below */
>@@ -579,7 +588,9 @@ int dev_map_enqueue_multi(struct xdp_buff *xdp, struct net_device *dev_rx,
> 			head = dev_map_index_hash(dtab, i);
> 			hlist_for_each_entry_rcu(dst, head, index_hlist,
> 						 lockdep_is_held(&dtab->index_lock)) {
>-				if (!is_valid_dst(dst, xdp, exclude_ifindex))
>+				if (!is_valid_dst(dst, xdp,
>+						  exclude_ifindex,
>+						  exclude_ifindex_master))
> 					continue;
> 
> 				/* we only need n-1 clones; last_dst enqueued below */
>@@ -646,16 +657,25 @@ int dev_map_redirect_multi(struct net_device *dev, struct sk_buff *skb,
> {
> 	struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
> 	int exclude_ifindex = exclude_ingress ? dev->ifindex : 0;
>+	int exclude_ifindex_master = 0;
> 	struct bpf_dtab_netdev *dst, *last_dst = NULL;
> 	struct hlist_head *head;
> 	struct hlist_node *next;
> 	unsigned int i;
> 	int err;
> 
>+	if (static_branch_unlikely(&bpf_master_redirect_enabled_key)) {
>+		struct net_device *master = netdev_master_upper_dev_get_rcu(dev);
>+
>+		exclude_ifindex_master = (master && exclude_ingress) ? master->ifindex : 0;
>+	}
>+
> 	if (map->map_type == BPF_MAP_TYPE_DEVMAP) {
> 		for (i = 0; i < map->max_entries; i++) {
> 			dst = READ_ONCE(dtab->netdev_map[i]);
>-			if (!dst || dst->dev->ifindex == exclude_ifindex)
>+			if (!dst ||
>+			    dst->dev->ifindex == exclude_ifindex ||
>+			    dst->dev->ifindex == exclude_ifindex_master)
> 				continue;
> 
> 			/* we only need n-1 clones; last_dst enqueued below */
>@@ -674,7 +694,9 @@ int dev_map_redirect_multi(struct net_device *dev, struct sk_buff *skb,
> 		for (i = 0; i < dtab->n_buckets; i++) {
> 			head = dev_map_index_hash(dtab, i);
> 			hlist_for_each_entry_safe(dst, next, head, index_hlist) {
>-				if (!dst || dst->dev->ifindex == exclude_ifindex)
>+				if (!dst ||
>+				    dst->dev->ifindex == exclude_ifindex ||
>+				    dst->dev->ifindex == exclude_ifindex_master)
> 					continue;
> 
> 				/* we only need n-1 clones; last_dst enqueued below */
>-- 
>2.27.0
>

---
	-Jay Vosburgh, jay.vosburgh@canonical.com
Jussi Maki July 5, 2021, 11:44 a.m. UTC | #2
On Thu, Jul 1, 2021 at 9:12 PM Jay Vosburgh <jay.vosburgh@canonical.com> wrote:
> >+      if (static_branch_unlikely(&bpf_master_redirect_enabled_key)) {
> >+              struct net_device *master = netdev_master_upper_dev_get_rcu(dev_rx);
> >+
> >+              exclude_ifindex_master = (master && exclude_ingress) ? master->ifindex : 0;
> >+      }
> >+
>
>         Will the above logic do what is intended if the device stacking
> isn't a simple bond -> ethX arrangement?  I.e., bond -> VLAN.?? -> ethX
> or perhaps even bondA -> VLAN.?? -> bondB -> ethX ?

Good point. "bond -> VLAN -> eth" isn't an issue currently as vlan
devices do not support XDP. "bondA -> bondB -> ethX" however would be
supported, so I think it makes sense to change the code to collect all
upper devices and exclude them. I'll try to follow up with an updated
patch for this soon.
diff mbox series

Patch

diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
index 2a75e6c2d27d..0864fb28c8b5 100644
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -514,9 +514,11 @@  int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
 }
 
 static bool is_valid_dst(struct bpf_dtab_netdev *obj, struct xdp_buff *xdp,
-			 int exclude_ifindex)
+			 int exclude_ifindex, int exclude_ifindex_master)
 {
-	if (!obj || obj->dev->ifindex == exclude_ifindex ||
+	if (!obj ||
+	    obj->dev->ifindex == exclude_ifindex ||
+	    obj->dev->ifindex == exclude_ifindex_master ||
 	    !obj->dev->netdev_ops->ndo_xdp_xmit)
 		return false;
 
@@ -546,12 +548,19 @@  int dev_map_enqueue_multi(struct xdp_buff *xdp, struct net_device *dev_rx,
 {
 	struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
 	int exclude_ifindex = exclude_ingress ? dev_rx->ifindex : 0;
+	int exclude_ifindex_master = 0;
 	struct bpf_dtab_netdev *dst, *last_dst = NULL;
 	struct hlist_head *head;
 	struct xdp_frame *xdpf;
 	unsigned int i;
 	int err;
 
+	if (static_branch_unlikely(&bpf_master_redirect_enabled_key)) {
+		struct net_device *master = netdev_master_upper_dev_get_rcu(dev_rx);
+
+		exclude_ifindex_master = (master && exclude_ingress) ? master->ifindex : 0;
+	}
+
 	xdpf = xdp_convert_buff_to_frame(xdp);
 	if (unlikely(!xdpf))
 		return -EOVERFLOW;
@@ -559,7 +568,7 @@  int dev_map_enqueue_multi(struct xdp_buff *xdp, struct net_device *dev_rx,
 	if (map->map_type == BPF_MAP_TYPE_DEVMAP) {
 		for (i = 0; i < map->max_entries; i++) {
 			dst = READ_ONCE(dtab->netdev_map[i]);
-			if (!is_valid_dst(dst, xdp, exclude_ifindex))
+			if (!is_valid_dst(dst, xdp, exclude_ifindex, exclude_ifindex_master))
 				continue;
 
 			/* we only need n-1 clones; last_dst enqueued below */
@@ -579,7 +588,9 @@  int dev_map_enqueue_multi(struct xdp_buff *xdp, struct net_device *dev_rx,
 			head = dev_map_index_hash(dtab, i);
 			hlist_for_each_entry_rcu(dst, head, index_hlist,
 						 lockdep_is_held(&dtab->index_lock)) {
-				if (!is_valid_dst(dst, xdp, exclude_ifindex))
+				if (!is_valid_dst(dst, xdp,
+						  exclude_ifindex,
+						  exclude_ifindex_master))
 					continue;
 
 				/* we only need n-1 clones; last_dst enqueued below */
@@ -646,16 +657,25 @@  int dev_map_redirect_multi(struct net_device *dev, struct sk_buff *skb,
 {
 	struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
 	int exclude_ifindex = exclude_ingress ? dev->ifindex : 0;
+	int exclude_ifindex_master = 0;
 	struct bpf_dtab_netdev *dst, *last_dst = NULL;
 	struct hlist_head *head;
 	struct hlist_node *next;
 	unsigned int i;
 	int err;
 
+	if (static_branch_unlikely(&bpf_master_redirect_enabled_key)) {
+		struct net_device *master = netdev_master_upper_dev_get_rcu(dev);
+
+		exclude_ifindex_master = (master && exclude_ingress) ? master->ifindex : 0;
+	}
+
 	if (map->map_type == BPF_MAP_TYPE_DEVMAP) {
 		for (i = 0; i < map->max_entries; i++) {
 			dst = READ_ONCE(dtab->netdev_map[i]);
-			if (!dst || dst->dev->ifindex == exclude_ifindex)
+			if (!dst ||
+			    dst->dev->ifindex == exclude_ifindex ||
+			    dst->dev->ifindex == exclude_ifindex_master)
 				continue;
 
 			/* we only need n-1 clones; last_dst enqueued below */
@@ -674,7 +694,9 @@  int dev_map_redirect_multi(struct net_device *dev, struct sk_buff *skb,
 		for (i = 0; i < dtab->n_buckets; i++) {
 			head = dev_map_index_hash(dtab, i);
 			hlist_for_each_entry_safe(dst, next, head, index_hlist) {
-				if (!dst || dst->dev->ifindex == exclude_ifindex)
+				if (!dst ||
+				    dst->dev->ifindex == exclude_ifindex ||
+				    dst->dev->ifindex == exclude_ifindex_master)
 					continue;
 
 				/* we only need n-1 clones; last_dst enqueued below */