diff mbox series

[net-next,2/4] veth: allow enabling NAPI even without XDP

Message ID dbc26ec87852a112126c83ae546f367841ec554d.1617965243.git.pabeni@redhat.com (mailing list archive)
State Accepted
Commit d3256efd8e8b234a6251e4d4580bd2c3c31fdc4c
Delegated to: Netdev Maintainers
Headers show
Series veth: allow GRO even without XDP | expand

Checks

Context Check Description
netdev/cover_letter success Link
netdev/fixes_present success Link
netdev/patch_count success Link
netdev/tree_selection success Clearly marked for net-next
netdev/subject_prefix success Link
netdev/cc_maintainers warning 5 maintainers not CCed: hawk@kernel.org daniel@iogearbox.net bpf@vger.kernel.org ast@kernel.org john.fastabend@gmail.com
netdev/source_inline success Was 0 now: 0
netdev/verify_signedoff success Link
netdev/module_param success Was 0 now: 0
netdev/build_32bit success Errors and warnings before: 0 this patch: 0
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/verify_fixes success Link
netdev/checkpatch warning WARNING: line length of 81 exceeds 80 columns WARNING: line length of 82 exceeds 80 columns WARNING: line length of 95 exceeds 80 columns WARNING: line length of 96 exceeds 80 columns
netdev/build_allmodconfig_warn success Errors and warnings before: 0 this patch: 0
netdev/header_inline success Link

Commit Message

Paolo Abeni April 9, 2021, 11:04 a.m. UTC
Currently the veth device has the GRO feature bit set, even if
no GRO aggregation is possible with the default configuration,
as the veth device does not hook into the GRO engine.

Flipping the GRO feature bit from user-space is a no-op, unless
XDP is enabled. In such scenario GRO could actually take place, but
TSO is forced to off on the peer device.

This change allow user-space to really control the GRO feature, with
no need for an XDP program.

The GRO feature bit is now cleared by default - so that there are no
user-visible behavior changes with the default configuration.

When the GRO bit is set, the per-queue NAPI instances are initialized
and registered. On xmit, when napi instances are available, we try
to use them.

Some additional checks are in place to ensure we initialize/delete NAPIs
only when needed in case of overlapping XDP and GRO configuration
changes.

Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 drivers/net/veth.c | 129 ++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 116 insertions(+), 13 deletions(-)

Comments

Toke Høiland-Jørgensen April 9, 2021, 2:58 p.m. UTC | #1
Paolo Abeni <pabeni@redhat.com> writes:

> Currently the veth device has the GRO feature bit set, even if
> no GRO aggregation is possible with the default configuration,
> as the veth device does not hook into the GRO engine.
>
> Flipping the GRO feature bit from user-space is a no-op, unless
> XDP is enabled. In such scenario GRO could actually take place, but
> TSO is forced to off on the peer device.
>
> This change allow user-space to really control the GRO feature, with
> no need for an XDP program.
>
> The GRO feature bit is now cleared by default - so that there are no
> user-visible behavior changes with the default configuration.
>
> When the GRO bit is set, the per-queue NAPI instances are initialized
> and registered. On xmit, when napi instances are available, we try
> to use them.

Am I mistaken in thinking that this also makes XDP redirect into a veth
work without having to load an XDP program on the peer device? That's
been a long-outstanding thing we've been meaning to fix, so that would
be awesome! :)

-Toke
Paolo Abeni April 9, 2021, 3:20 p.m. UTC | #2
On Fri, 2021-04-09 at 16:58 +0200, Toke Høiland-Jørgensen wrote:
> Paolo Abeni <pabeni@redhat.com> writes:
> 
> > Currently the veth device has the GRO feature bit set, even if
> > no GRO aggregation is possible with the default configuration,
> > as the veth device does not hook into the GRO engine.
> > 
> > Flipping the GRO feature bit from user-space is a no-op, unless
> > XDP is enabled. In such scenario GRO could actually take place, but
> > TSO is forced to off on the peer device.
> > 
> > This change allow user-space to really control the GRO feature, with
> > no need for an XDP program.
> > 
> > The GRO feature bit is now cleared by default - so that there are no
> > user-visible behavior changes with the default configuration.
> > 
> > When the GRO bit is set, the per-queue NAPI instances are initialized
> > and registered. On xmit, when napi instances are available, we try
> > to use them.
> 
> Am I mistaken in thinking that this also makes XDP redirect into a veth
> work without having to load an XDP program on the peer device? That's
> been a long-outstanding thing we've been meaning to fix, so that would
> be awesome! :)

I have not experimented that, and I admit gross ignorance WRT this
argument, but AFAICS the needed bits to get XDP redirect working on
veth are the ptr_ring initialization and the napi instance available.

With this patch both are in place when GRO is enabled, so I guess XPD
redirect should work, too (modulo bugs for untested scenario).

Thanks!

Paolo
Toke Høiland-Jørgensen April 16, 2021, 3:29 p.m. UTC | #3
Paolo Abeni <pabeni@redhat.com> writes:

> On Fri, 2021-04-09 at 16:58 +0200, Toke Høiland-Jørgensen wrote:
>> Paolo Abeni <pabeni@redhat.com> writes:
>> 
>> > Currently the veth device has the GRO feature bit set, even if
>> > no GRO aggregation is possible with the default configuration,
>> > as the veth device does not hook into the GRO engine.
>> > 
>> > Flipping the GRO feature bit from user-space is a no-op, unless
>> > XDP is enabled. In such scenario GRO could actually take place, but
>> > TSO is forced to off on the peer device.
>> > 
>> > This change allow user-space to really control the GRO feature, with
>> > no need for an XDP program.
>> > 
>> > The GRO feature bit is now cleared by default - so that there are no
>> > user-visible behavior changes with the default configuration.
>> > 
>> > When the GRO bit is set, the per-queue NAPI instances are initialized
>> > and registered. On xmit, when napi instances are available, we try
>> > to use them.
>> 
>> Am I mistaken in thinking that this also makes XDP redirect into a veth
>> work without having to load an XDP program on the peer device? That's
>> been a long-outstanding thing we've been meaning to fix, so that would
>> be awesome! :)
>
> I have not experimented that, and I admit gross ignorance WRT this
> argument, but AFAICS the needed bits to get XDP redirect working on
> veth are the ptr_ring initialization and the napi instance available.
>
> With this patch both are in place when GRO is enabled, so I guess XPD
> redirect should work, too (modulo bugs for untested scenario).

OK, finally got around to testing this; it doesn't quite work with just
your patch, because veth_xdp_xmit() still checks for rq->xdp_prog
instead of rq->napi. Fixing this indeed enabled veth to be an
XDP_REDIRECT target without an XDP program loaded on the peer. So yay!
I'll send a followup fixing that check.

So with this we seem to have some nice improvements in both
functionality and performance when GRO is turned on; so any reason why
we shouldn't just flip the default to on?

-Toke
Paolo Abeni April 16, 2021, 5:26 p.m. UTC | #4
On Fri, 2021-04-16 at 17:29 +0200, Toke Høiland-Jørgensen wrote:
> Paolo Abeni <pabeni@redhat.com> writes:
> 
> > On Fri, 2021-04-09 at 16:58 +0200, Toke Høiland-Jørgensen wrote:
> > > Paolo Abeni <pabeni@redhat.com> writes:
> > > 
> > > > Currently the veth device has the GRO feature bit set, even if
> > > > no GRO aggregation is possible with the default configuration,
> > > > as the veth device does not hook into the GRO engine.
> > > > 
> > > > Flipping the GRO feature bit from user-space is a no-op, unless
> > > > XDP is enabled. In such scenario GRO could actually take place, but
> > > > TSO is forced to off on the peer device.
> > > > 
> > > > This change allow user-space to really control the GRO feature, with
> > > > no need for an XDP program.
> > > > 
> > > > The GRO feature bit is now cleared by default - so that there are no
> > > > user-visible behavior changes with the default configuration.
> > > > 
> > > > When the GRO bit is set, the per-queue NAPI instances are initialized
> > > > and registered. On xmit, when napi instances are available, we try
> > > > to use them.
> > > 
> > > Am I mistaken in thinking that this also makes XDP redirect into a veth
> > > work without having to load an XDP program on the peer device? That's
> > > been a long-outstanding thing we've been meaning to fix, so that would
> > > be awesome! :)
> > 
> > I have not experimented that, and I admit gross ignorance WRT this
> > argument, but AFAICS the needed bits to get XDP redirect working on
> > veth are the ptr_ring initialization and the napi instance available.
> > 
> > With this patch both are in place when GRO is enabled, so I guess XPD
> > redirect should work, too (modulo bugs for untested scenario).
> 
> OK, finally got around to testing this; it doesn't quite work with just
> your patch, because veth_xdp_xmit() still checks for rq->xdp_prog
> instead of rq->napi. Fixing this indeed enabled veth to be an
> XDP_REDIRECT target without an XDP program loaded on the peer. So yay!
> I'll send a followup fixing that check.

Thank you for double checking!

> So with this we seem to have some nice improvements in both
> functionality and performance when GRO is turned on; so any reason why
> we shouldn't just flip the default to on?

Uhmmm... patch 3/4 should avoid the GRO overhead for most cases where
we can't leverage the aggregation benefit, but I'm not 110% sure that
enabling GRO by default will not cause performance regressions in some
scenarios.

It this proves to be always a win we can still change the default
later, I think.

Cheers,

Paolo
Toke Høiland-Jørgensen April 16, 2021, 6:19 p.m. UTC | #5
Paolo Abeni <pabeni@redhat.com> writes:

> On Fri, 2021-04-16 at 17:29 +0200, Toke Høiland-Jørgensen wrote:
>> Paolo Abeni <pabeni@redhat.com> writes:
>> 
>> > On Fri, 2021-04-09 at 16:58 +0200, Toke Høiland-Jørgensen wrote:
>> > > Paolo Abeni <pabeni@redhat.com> writes:
>> > > 
>> > > > Currently the veth device has the GRO feature bit set, even if
>> > > > no GRO aggregation is possible with the default configuration,
>> > > > as the veth device does not hook into the GRO engine.
>> > > > 
>> > > > Flipping the GRO feature bit from user-space is a no-op, unless
>> > > > XDP is enabled. In such scenario GRO could actually take place, but
>> > > > TSO is forced to off on the peer device.
>> > > > 
>> > > > This change allow user-space to really control the GRO feature, with
>> > > > no need for an XDP program.
>> > > > 
>> > > > The GRO feature bit is now cleared by default - so that there are no
>> > > > user-visible behavior changes with the default configuration.
>> > > > 
>> > > > When the GRO bit is set, the per-queue NAPI instances are initialized
>> > > > and registered. On xmit, when napi instances are available, we try
>> > > > to use them.
>> > > 
>> > > Am I mistaken in thinking that this also makes XDP redirect into a veth
>> > > work without having to load an XDP program on the peer device? That's
>> > > been a long-outstanding thing we've been meaning to fix, so that would
>> > > be awesome! :)
>> > 
>> > I have not experimented that, and I admit gross ignorance WRT this
>> > argument, but AFAICS the needed bits to get XDP redirect working on
>> > veth are the ptr_ring initialization and the napi instance available.
>> > 
>> > With this patch both are in place when GRO is enabled, so I guess XPD
>> > redirect should work, too (modulo bugs for untested scenario).
>> 
>> OK, finally got around to testing this; it doesn't quite work with just
>> your patch, because veth_xdp_xmit() still checks for rq->xdp_prog
>> instead of rq->napi. Fixing this indeed enabled veth to be an
>> XDP_REDIRECT target without an XDP program loaded on the peer. So yay!
>> I'll send a followup fixing that check.
>
> Thank you for double checking!
>
>> So with this we seem to have some nice improvements in both
>> functionality and performance when GRO is turned on; so any reason why
>> we shouldn't just flip the default to on?
>
> Uhmmm... patch 3/4 should avoid the GRO overhead for most cases where
> we can't leverage the aggregation benefit, but I'm not 110% sure that
> enabling GRO by default will not cause performance regressions in some
> scenarios.
>
> It this proves to be always a win we can still change the default
> later, I think.

Alright, sure, let's hold off on that and revisit once this has had some
more testing :)

-Toke
diff mbox series

Patch

diff --git a/drivers/net/veth.c b/drivers/net/veth.c
index ad36e7ed16134..ca44e82d1edeb 100644
--- a/drivers/net/veth.c
+++ b/drivers/net/veth.c
@@ -57,6 +57,7 @@  struct veth_rq_stats {
 
 struct veth_rq {
 	struct napi_struct	xdp_napi;
+	struct napi_struct __rcu *napi; /* points to xdp_napi when the latter is initialized */
 	struct net_device	*dev;
 	struct bpf_prog __rcu	*xdp_prog;
 	struct xdp_mem_info	xdp_mem;
@@ -287,7 +288,7 @@  static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev)
 	struct veth_rq *rq = NULL;
 	struct net_device *rcv;
 	int length = skb->len;
-	bool rcv_xdp = false;
+	bool use_napi = false;
 	int rxq;
 
 	rcu_read_lock();
@@ -301,20 +302,24 @@  static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev)
 	rxq = skb_get_queue_mapping(skb);
 	if (rxq < rcv->real_num_rx_queues) {
 		rq = &rcv_priv->rq[rxq];
-		rcv_xdp = rcu_access_pointer(rq->xdp_prog);
+
+		/* The napi pointer is available when an XDP program is
+		 * attached or when GRO is enabled
+		 */
+		use_napi = rcu_access_pointer(rq->napi);
 		skb_record_rx_queue(skb, rxq);
 	}
 
 	skb_tx_timestamp(skb);
-	if (likely(veth_forward_skb(rcv, skb, rq, rcv_xdp) == NET_RX_SUCCESS)) {
-		if (!rcv_xdp)
+	if (likely(veth_forward_skb(rcv, skb, rq, use_napi) == NET_RX_SUCCESS)) {
+		if (!use_napi)
 			dev_lstats_add(dev, length);
 	} else {
 drop:
 		atomic64_inc(&priv->dropped);
 	}
 
-	if (rcv_xdp)
+	if (use_napi)
 		__veth_xdp_flush(rq);
 
 	rcu_read_unlock();
@@ -891,7 +896,7 @@  static int veth_poll(struct napi_struct *napi, int budget)
 	return done;
 }
 
-static int veth_napi_add(struct net_device *dev)
+static int __veth_napi_enable(struct net_device *dev)
 {
 	struct veth_priv *priv = netdev_priv(dev);
 	int err, i;
@@ -908,6 +913,7 @@  static int veth_napi_add(struct net_device *dev)
 		struct veth_rq *rq = &priv->rq[i];
 
 		napi_enable(&rq->xdp_napi);
+		rcu_assign_pointer(priv->rq[i].napi, &priv->rq[i].xdp_napi);
 	}
 
 	return 0;
@@ -926,6 +932,7 @@  static void veth_napi_del(struct net_device *dev)
 	for (i = 0; i < dev->real_num_rx_queues; i++) {
 		struct veth_rq *rq = &priv->rq[i];
 
+		rcu_assign_pointer(priv->rq[i].napi, NULL);
 		napi_disable(&rq->xdp_napi);
 		__netif_napi_del(&rq->xdp_napi);
 	}
@@ -939,8 +946,14 @@  static void veth_napi_del(struct net_device *dev)
 	}
 }
 
+static bool veth_gro_requested(const struct net_device *dev)
+{
+	return !!(dev->wanted_features & NETIF_F_GRO);
+}
+
 static int veth_enable_xdp(struct net_device *dev)
 {
+	bool napi_already_on = veth_gro_requested(dev) && (dev->flags & IFF_UP);
 	struct veth_priv *priv = netdev_priv(dev);
 	int err, i;
 
@@ -948,7 +961,8 @@  static int veth_enable_xdp(struct net_device *dev)
 		for (i = 0; i < dev->real_num_rx_queues; i++) {
 			struct veth_rq *rq = &priv->rq[i];
 
-			netif_napi_add(dev, &rq->xdp_napi, veth_poll, NAPI_POLL_WEIGHT);
+			if (!napi_already_on)
+				netif_napi_add(dev, &rq->xdp_napi, veth_poll, NAPI_POLL_WEIGHT);
 			err = xdp_rxq_info_reg(&rq->xdp_rxq, dev, i, rq->xdp_napi.napi_id);
 			if (err < 0)
 				goto err_rxq_reg;
@@ -963,13 +977,25 @@  static int veth_enable_xdp(struct net_device *dev)
 			rq->xdp_mem = rq->xdp_rxq.mem;
 		}
 
-		err = veth_napi_add(dev);
-		if (err)
-			goto err_rxq_reg;
+		if (!napi_already_on) {
+			err = __veth_napi_enable(dev);
+			if (err)
+				goto err_rxq_reg;
+
+			if (!veth_gro_requested(dev)) {
+				/* user-space did not require GRO, but adding XDP
+				 * is supposed to get GRO working
+				 */
+				dev->features |= NETIF_F_GRO;
+				netdev_features_change(dev);
+			}
+		}
 	}
 
-	for (i = 0; i < dev->real_num_rx_queues; i++)
+	for (i = 0; i < dev->real_num_rx_queues; i++) {
 		rcu_assign_pointer(priv->rq[i].xdp_prog, priv->_xdp_prog);
+		rcu_assign_pointer(priv->rq[i].napi, &priv->rq[i].xdp_napi);
+	}
 
 	return 0;
 err_reg_mem:
@@ -979,7 +1005,8 @@  static int veth_enable_xdp(struct net_device *dev)
 		struct veth_rq *rq = &priv->rq[i];
 
 		xdp_rxq_info_unreg(&rq->xdp_rxq);
-		netif_napi_del(&rq->xdp_napi);
+		if (!napi_already_on)
+			netif_napi_del(&rq->xdp_napi);
 	}
 
 	return err;
@@ -992,7 +1019,19 @@  static void veth_disable_xdp(struct net_device *dev)
 
 	for (i = 0; i < dev->real_num_rx_queues; i++)
 		rcu_assign_pointer(priv->rq[i].xdp_prog, NULL);
-	veth_napi_del(dev);
+
+	if (!netif_running(dev) || !veth_gro_requested(dev)) {
+		veth_napi_del(dev);
+
+		/* if user-space did not require GRO, since adding XDP
+		 * enabled it, clear it now
+		 */
+		if (!veth_gro_requested(dev) && netif_running(dev)) {
+			dev->features &= ~NETIF_F_GRO;
+			netdev_features_change(dev);
+		}
+	}
+
 	for (i = 0; i < dev->real_num_rx_queues; i++) {
 		struct veth_rq *rq = &priv->rq[i];
 
@@ -1001,6 +1040,29 @@  static void veth_disable_xdp(struct net_device *dev)
 	}
 }
 
+static int veth_napi_enable(struct net_device *dev)
+{
+	struct veth_priv *priv = netdev_priv(dev);
+	int err, i;
+
+	for (i = 0; i < dev->real_num_rx_queues; i++) {
+		struct veth_rq *rq = &priv->rq[i];
+
+		netif_napi_add(dev, &rq->xdp_napi, veth_poll, NAPI_POLL_WEIGHT);
+	}
+
+	err = __veth_napi_enable(dev);
+	if (err) {
+		for (i = 0; i < dev->real_num_rx_queues; i++) {
+			struct veth_rq *rq = &priv->rq[i];
+
+			netif_napi_del(&rq->xdp_napi);
+		}
+		return err;
+	}
+	return err;
+}
+
 static int veth_open(struct net_device *dev)
 {
 	struct veth_priv *priv = netdev_priv(dev);
@@ -1014,6 +1076,10 @@  static int veth_open(struct net_device *dev)
 		err = veth_enable_xdp(dev);
 		if (err)
 			return err;
+	} else if (veth_gro_requested(dev)) {
+		err = veth_napi_enable(dev);
+		if (err)
+			return err;
 	}
 
 	if (peer->flags & IFF_UP) {
@@ -1035,6 +1101,8 @@  static int veth_close(struct net_device *dev)
 
 	if (priv->_xdp_prog)
 		veth_disable_xdp(dev);
+	else if (veth_gro_requested(dev))
+		veth_napi_del(dev);
 
 	return 0;
 }
@@ -1133,10 +1201,32 @@  static netdev_features_t veth_fix_features(struct net_device *dev,
 		if (peer_priv->_xdp_prog)
 			features &= ~NETIF_F_GSO_SOFTWARE;
 	}
+	if (priv->_xdp_prog)
+		features |= NETIF_F_GRO;
 
 	return features;
 }
 
+static int veth_set_features(struct net_device *dev,
+			     netdev_features_t features)
+{
+	netdev_features_t changed = features ^ dev->features;
+	struct veth_priv *priv = netdev_priv(dev);
+	int err;
+
+	if (!(changed & NETIF_F_GRO) || !(dev->flags & IFF_UP) || priv->_xdp_prog)
+		return 0;
+
+	if (features & NETIF_F_GRO) {
+		err = veth_napi_enable(dev);
+		if (err)
+			return err;
+	} else {
+		veth_napi_del(dev);
+	}
+	return 0;
+}
+
 static void veth_set_rx_headroom(struct net_device *dev, int new_hr)
 {
 	struct veth_priv *peer_priv, *priv = netdev_priv(dev);
@@ -1255,6 +1345,7 @@  static const struct net_device_ops veth_netdev_ops = {
 #endif
 	.ndo_get_iflink		= veth_get_iflink,
 	.ndo_fix_features	= veth_fix_features,
+	.ndo_set_features	= veth_set_features,
 	.ndo_features_check	= passthru_features_check,
 	.ndo_set_rx_headroom	= veth_set_rx_headroom,
 	.ndo_bpf		= veth_xdp,
@@ -1317,6 +1408,13 @@  static int veth_validate(struct nlattr *tb[], struct nlattr *data[],
 
 static struct rtnl_link_ops veth_link_ops;
 
+static void veth_disable_gro(struct net_device *dev)
+{
+	dev->features &= ~NETIF_F_GRO;
+	dev->wanted_features &= ~NETIF_F_GRO;
+	netdev_update_features(dev);
+}
+
 static int veth_newlink(struct net *src_net, struct net_device *dev,
 			struct nlattr *tb[], struct nlattr *data[],
 			struct netlink_ext_ack *extack)
@@ -1389,6 +1487,10 @@  static int veth_newlink(struct net *src_net, struct net_device *dev,
 	if (err < 0)
 		goto err_register_peer;
 
+	/* keep GRO disabled by default to be consistent with the established
+	 * veth behavior
+	 */
+	veth_disable_gro(peer);
 	netif_carrier_off(peer);
 
 	err = rtnl_configure_link(peer, ifmp);
@@ -1426,6 +1528,7 @@  static int veth_newlink(struct net *src_net, struct net_device *dev,
 	priv = netdev_priv(peer);
 	rcu_assign_pointer(priv->peer, dev);
 
+	veth_disable_gro(dev);
 	return 0;
 
 err_register_dev: