diff mbox series

[net-next,02/15] ipv6: add dev->gso_ipv6_max_size

Message ID 20220203015140.3022854-3-eric.dumazet@gmail.com (mailing list archive)
State Changes Requested
Delegated to: Netdev Maintainers
Headers show
Series tcp: BIG TCP implementation | expand

Checks

Context Check Description
netdev/tree_selection success Clearly marked for net-next
netdev/fixes_present success Fixes tag not required for -next series
netdev/subject_prefix success Link
netdev/cover_letter success Series has a cover letter
netdev/patch_count success Link
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 4834 this patch: 4834
netdev/cc_maintainers warning 1 maintainers not CCed: liuhangbin@gmail.com
netdev/build_clang success Errors and warnings before: 823 this patch: 823
netdev/module_param success Was 0 now: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 4987 this patch: 4987
netdev/checkpatch warning CHECK: Alignment should match open parenthesis WARNING: line length of 83 exceeds 80 columns WARNING: line length of 93 exceeds 80 columns
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Eric Dumazet Feb. 3, 2022, 1:51 a.m. UTC
From: Eric Dumazet <edumazet@google.com>

This enable TCP stack to build TSO packets bigger than
64KB if the driver is LSOv2 compatible.

This patch introduces new variable gso_ipv6_max_size
that is modifiable through ip link.

ip link set dev eth0 gso_ipv6_max_size 185000

User input is capped by driver limit.

Signed-off-by: Coco Li <lixiaoyan@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
---
 include/linux/netdevice.h          | 12 ++++++++++++
 include/uapi/linux/if_link.h       |  1 +
 net/core/dev.c                     |  1 +
 net/core/rtnetlink.c               | 15 +++++++++++++++
 net/core/sock.c                    |  6 ++++++
 tools/include/uapi/linux/if_link.h |  1 +
 6 files changed, 36 insertions(+)

Comments

Paolo Abeni Feb. 3, 2022, 8:57 a.m. UTC | #1
Hello,

On Wed, 2022-02-02 at 17:51 -0800, Eric Dumazet wrote:
> From: Eric Dumazet <edumazet@google.com>
> 
> This enable TCP stack to build TSO packets bigger than
> 64KB if the driver is LSOv2 compatible.
> 
> This patch introduces new variable gso_ipv6_max_size
> that is modifiable through ip link.
> 
> ip link set dev eth0 gso_ipv6_max_size 185000
> 
> User input is capped by driver limit.
> 
> Signed-off-by: Coco Li <lixiaoyan@google.com>
> Signed-off-by: Eric Dumazet <edumazet@google.com>
> ---
>  include/linux/netdevice.h          | 12 ++++++++++++
>  include/uapi/linux/if_link.h       |  1 +
>  net/core/dev.c                     |  1 +
>  net/core/rtnetlink.c               | 15 +++++++++++++++
>  net/core/sock.c                    |  6 ++++++
>  tools/include/uapi/linux/if_link.h |  1 +
>  6 files changed, 36 insertions(+)
> 
> diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
> index b1f68df2b37bc4b623f61cc2c6f0c02ba2afbe02..2a563869ba44f7d48095d36b1395e3fbd8cfff87 100644
> --- a/include/linux/netdevice.h
> +++ b/include/linux/netdevice.h
> @@ -1949,6 +1949,7 @@ enum netdev_ml_priv_type {
>   *	@linkwatch_dev_tracker:	refcount tracker used by linkwatch.
>   *	@watchdog_dev_tracker:	refcount tracker used by watchdog.
>   *	@tso_ipv6_max_size:	Maximum size of IPv6 TSO packets (driver/NIC limit)
> + *	@gso_ipv6_max_size:	Maximum size of IPv6 GSO packets (user/admin limit)
>   *
>   *	FIXME: cleanup struct net_device such that network protocol info
>   *	moves out.
> @@ -2284,6 +2285,7 @@ struct net_device {
>  	netdevice_tracker	linkwatch_dev_tracker;
>  	netdevice_tracker	watchdog_dev_tracker;
>  	unsigned int		tso_ipv6_max_size;
> +	unsigned int		gso_ipv6_max_size;
>  };
>  #define to_net_dev(d) container_of(d, struct net_device, dev)
>  
> @@ -4804,6 +4806,10 @@ static inline void netif_set_gso_max_size(struct net_device *dev,
>  {
>  	/* dev->gso_max_size is read locklessly from sk_setup_caps() */
>  	WRITE_ONCE(dev->gso_max_size, size);
> +
> +	/* legacy drivers want to lower gso_max_size, regardless of family. */
> +	size = min(size, dev->gso_ipv6_max_size);
> +	WRITE_ONCE(dev->gso_ipv6_max_size, size);
>  }
>  
>  static inline void netif_set_gso_max_segs(struct net_device *dev,
> @@ -4827,6 +4833,12 @@ static inline void netif_set_tso_ipv6_max_size(struct net_device *dev,
>  	dev->tso_ipv6_max_size = size;
>  }
>  
> +static inline void netif_set_gso_ipv6_max_size(struct net_device *dev,
> +					       unsigned int size)
> +{
> +	size = min(size, dev->tso_ipv6_max_size);
> +	WRITE_ONCE(dev->gso_ipv6_max_size, size);

Dumb questions on my side: should the above be limited to
tso_ipv6_max_size ? or increasing gso_ipv6_max_size helps even if the
egress NIC does not support LSOv2?

Should gso_ipv6_max_size be capped to some reasonable value (well lower
than 4G), to avoid the stack building very complex skbs?

Thanks!

Paolo
Eric Dumazet Feb. 3, 2022, 3:34 p.m. UTC | #2
On Thu, Feb 3, 2022 at 12:57 AM Paolo Abeni <pabeni@redhat.com> wrote:
>
> Hello,
>
> On Wed, 2022-02-02 at 17:51 -0800, Eric Dumazet wrote:
> > From: Eric Dumazet <edumazet@google.com>
> >
> > This enable TCP stack to build TSO packets bigger than
> > 64KB if the driver is LSOv2 compatible.
> >
> > This patch introduces new variable gso_ipv6_max_size
> > that is modifiable through ip link.
> >
> > ip link set dev eth0 gso_ipv6_max_size 185000
> >
> > User input is capped by driver limit.
> >
> > Signed-off-by: Coco Li <lixiaoyan@google.com>
> > Signed-off-by: Eric Dumazet <edumazet@google.com>
> > ---
> >  include/linux/netdevice.h          | 12 ++++++++++++
> >  include/uapi/linux/if_link.h       |  1 +
> >  net/core/dev.c                     |  1 +
> >  net/core/rtnetlink.c               | 15 +++++++++++++++
> >  net/core/sock.c                    |  6 ++++++
> >  tools/include/uapi/linux/if_link.h |  1 +
> >  6 files changed, 36 insertions(+)
> >
> > diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
> > index b1f68df2b37bc4b623f61cc2c6f0c02ba2afbe02..2a563869ba44f7d48095d36b1395e3fbd8cfff87 100644
> > --- a/include/linux/netdevice.h
> > +++ b/include/linux/netdevice.h
> > @@ -1949,6 +1949,7 @@ enum netdev_ml_priv_type {
> >   *   @linkwatch_dev_tracker: refcount tracker used by linkwatch.
> >   *   @watchdog_dev_tracker:  refcount tracker used by watchdog.
> >   *   @tso_ipv6_max_size:     Maximum size of IPv6 TSO packets (driver/NIC limit)
> > + *   @gso_ipv6_max_size:     Maximum size of IPv6 GSO packets (user/admin limit)
> >   *
> >   *   FIXME: cleanup struct net_device such that network protocol info
> >   *   moves out.
> > @@ -2284,6 +2285,7 @@ struct net_device {
> >       netdevice_tracker       linkwatch_dev_tracker;
> >       netdevice_tracker       watchdog_dev_tracker;
> >       unsigned int            tso_ipv6_max_size;
> > +     unsigned int            gso_ipv6_max_size;
> >  };
> >  #define to_net_dev(d) container_of(d, struct net_device, dev)
> >
> > @@ -4804,6 +4806,10 @@ static inline void netif_set_gso_max_size(struct net_device *dev,
> >  {
> >       /* dev->gso_max_size is read locklessly from sk_setup_caps() */
> >       WRITE_ONCE(dev->gso_max_size, size);
> > +
> > +     /* legacy drivers want to lower gso_max_size, regardless of family. */
> > +     size = min(size, dev->gso_ipv6_max_size);
> > +     WRITE_ONCE(dev->gso_ipv6_max_size, size);
> >  }
> >
> >  static inline void netif_set_gso_max_segs(struct net_device *dev,
> > @@ -4827,6 +4833,12 @@ static inline void netif_set_tso_ipv6_max_size(struct net_device *dev,
> >       dev->tso_ipv6_max_size = size;
> >  }
> >
> > +static inline void netif_set_gso_ipv6_max_size(struct net_device *dev,
> > +                                            unsigned int size)
> > +{
> > +     size = min(size, dev->tso_ipv6_max_size);
> > +     WRITE_ONCE(dev->gso_ipv6_max_size, size);
>
> Dumb questions on my side: should the above be limited to
> tso_ipv6_max_size ? or increasing gso_ipv6_max_size helps even if the
> egress NIC does not support LSOv2?

I thought that " size = min(size, dev->tso_ipv6_max_size);" was doing
exactly that ?

I  will fix the From: tag because patch autor is Coco Li

>
> Should gso_ipv6_max_size be capped to some reasonable value (well lower
> than 4G), to avoid the stack building very complex skbs?
>

Drivers are responsible for choosing the max value, then admins choose
optimal operational values based on their constraints (like device MTU)

Typical LSOv2 values are 256K or 512KB, but we really tested BIG TCP
with 45 4K segments per packet.

> Thanks!
>
> Paolo
>
diff mbox series

Patch

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index b1f68df2b37bc4b623f61cc2c6f0c02ba2afbe02..2a563869ba44f7d48095d36b1395e3fbd8cfff87 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1949,6 +1949,7 @@  enum netdev_ml_priv_type {
  *	@linkwatch_dev_tracker:	refcount tracker used by linkwatch.
  *	@watchdog_dev_tracker:	refcount tracker used by watchdog.
  *	@tso_ipv6_max_size:	Maximum size of IPv6 TSO packets (driver/NIC limit)
+ *	@gso_ipv6_max_size:	Maximum size of IPv6 GSO packets (user/admin limit)
  *
  *	FIXME: cleanup struct net_device such that network protocol info
  *	moves out.
@@ -2284,6 +2285,7 @@  struct net_device {
 	netdevice_tracker	linkwatch_dev_tracker;
 	netdevice_tracker	watchdog_dev_tracker;
 	unsigned int		tso_ipv6_max_size;
+	unsigned int		gso_ipv6_max_size;
 };
 #define to_net_dev(d) container_of(d, struct net_device, dev)
 
@@ -4804,6 +4806,10 @@  static inline void netif_set_gso_max_size(struct net_device *dev,
 {
 	/* dev->gso_max_size is read locklessly from sk_setup_caps() */
 	WRITE_ONCE(dev->gso_max_size, size);
+
+	/* legacy drivers want to lower gso_max_size, regardless of family. */
+	size = min(size, dev->gso_ipv6_max_size);
+	WRITE_ONCE(dev->gso_ipv6_max_size, size);
 }
 
 static inline void netif_set_gso_max_segs(struct net_device *dev,
@@ -4827,6 +4833,12 @@  static inline void netif_set_tso_ipv6_max_size(struct net_device *dev,
 	dev->tso_ipv6_max_size = size;
 }
 
+static inline void netif_set_gso_ipv6_max_size(struct net_device *dev,
+					       unsigned int size)
+{
+	size = min(size, dev->tso_ipv6_max_size);
+	WRITE_ONCE(dev->gso_ipv6_max_size, size);
+}
 
 static inline void skb_gso_error_unwind(struct sk_buff *skb, __be16 protocol,
 					int pulled_hlen, u16 mac_offset,
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index 79b9d399cd297a1f79dca5ce89762800c38ed4a8..024b3bd0467e1360917001dba6bcfd1f30391894 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -349,6 +349,7 @@  enum {
 	IFLA_PARENT_DEV_BUS_NAME,
 	IFLA_GRO_MAX_SIZE,
 	IFLA_TSO_IPV6_MAX_SIZE,
+	IFLA_GSO_IPV6_MAX_SIZE,
 
 	__IFLA_MAX
 };
diff --git a/net/core/dev.c b/net/core/dev.c
index b6ca3c348d41a097baf210f2a5d966b71308c69b..53c947e6fdb7c47e6cc92fd4e38b71e9b90d921c 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -10189,6 +10189,7 @@  struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
 	dev->gso_max_segs = GSO_MAX_SEGS;
 	dev->gro_max_size = GRO_MAX_SIZE;
 	dev->tso_ipv6_max_size = GSO_MAX_SIZE;
+	dev->gso_ipv6_max_size = GSO_MAX_SIZE;
 
 	dev->upper_level = 1;
 	dev->lower_level = 1;
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 4cefa07195ba3b67e7b724194b5d729d395ba466..0a0b26261f6d9e4e40bf9cfbda31a29c1f2e3aaa 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1028,6 +1028,7 @@  static noinline size_t if_nlmsg_size(const struct net_device *dev,
 	       + nla_total_size(4) /* IFLA_GSO_MAX_SIZE */
 	       + nla_total_size(4) /* IFLA_GRO_MAX_SIZE */
 	       + nla_total_size(4) /* IFLA_TSO_IPV6_MAX_SIZE */
+	       + nla_total_size(4) /* IFLA_GSO_IPV6_MAX_SIZE */
 	       + nla_total_size(1) /* IFLA_OPERSTATE */
 	       + nla_total_size(1) /* IFLA_LINKMODE */
 	       + nla_total_size(4) /* IFLA_CARRIER_CHANGES */
@@ -1732,6 +1733,7 @@  static int rtnl_fill_ifinfo(struct sk_buff *skb,
 	    nla_put_u32(skb, IFLA_GSO_MAX_SIZE, dev->gso_max_size) ||
 	    nla_put_u32(skb, IFLA_GRO_MAX_SIZE, dev->gro_max_size) ||
 	    nla_put_u32(skb, IFLA_TSO_IPV6_MAX_SIZE, dev->tso_ipv6_max_size) ||
+	    nla_put_u32(skb, IFLA_GSO_IPV6_MAX_SIZE, dev->gso_ipv6_max_size) ||
 #ifdef CONFIG_RPS
 	    nla_put_u32(skb, IFLA_NUM_RX_QUEUES, dev->num_rx_queues) ||
 #endif
@@ -1886,6 +1888,7 @@  static const struct nla_policy ifla_policy[IFLA_MAX+1] = {
 	[IFLA_PARENT_DEV_NAME]	= { .type = NLA_NUL_STRING },
 	[IFLA_GRO_MAX_SIZE]	= { .type = NLA_U32 },
 	[IFLA_TSO_IPV6_MAX_SIZE]	= { .type = NLA_U32 },
+	[IFLA_GSO_IPV6_MAX_SIZE]	= { .type = NLA_U32 },
 };
 
 static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = {
@@ -2772,6 +2775,15 @@  static int do_setlink(const struct sk_buff *skb,
 		}
 	}
 
+	if (tb[IFLA_GSO_IPV6_MAX_SIZE]) {
+		u32 max_size = nla_get_u32(tb[IFLA_GSO_IPV6_MAX_SIZE]);
+
+		if (dev->gso_ipv6_max_size ^ max_size) {
+			netif_set_gso_ipv6_max_size(dev, max_size);
+			status |= DO_SETLINK_MODIFIED;
+		}
+	}
+
 	if (tb[IFLA_GSO_MAX_SEGS]) {
 		u32 max_segs = nla_get_u32(tb[IFLA_GSO_MAX_SEGS]);
 
@@ -3247,6 +3259,9 @@  struct net_device *rtnl_create_link(struct net *net, const char *ifname,
 		netif_set_gso_max_segs(dev, nla_get_u32(tb[IFLA_GSO_MAX_SEGS]));
 	if (tb[IFLA_GRO_MAX_SIZE])
 		netif_set_gro_max_size(dev, nla_get_u32(tb[IFLA_GRO_MAX_SIZE]));
+	if (tb[IFLA_GSO_IPV6_MAX_SIZE])
+		netif_set_gso_ipv6_max_size(dev,
+			nla_get_u32(tb[IFLA_GSO_IPV6_MAX_SIZE]));
 
 	return dev;
 }
diff --git a/net/core/sock.c b/net/core/sock.c
index 09d31a7dc68f88af42f75f3f445818fe273b04fb..aec1e156548ea0818f025fd8f448f5e353f79a3b 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -2279,6 +2279,12 @@  void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
 			sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM;
 			/* pairs with the WRITE_ONCE() in netif_set_gso_max_size() */
 			sk->sk_gso_max_size = READ_ONCE(dst->dev->gso_max_size);
+#if IS_ENABLED(CONFIG_IPV6)
+			if (sk->sk_family == AF_INET6 &&
+			    sk_is_tcp(sk) &&
+			    !ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr))
+				sk->sk_gso_max_size = READ_ONCE(dst->dev->gso_ipv6_max_size);
+#endif
 			sk->sk_gso_max_size -= (MAX_TCP_HEADER + 1);
 			/* pairs with the WRITE_ONCE() in netif_set_gso_max_segs() */
 			max_segs = max_t(u32, READ_ONCE(dst->dev->gso_max_segs), 1);
diff --git a/tools/include/uapi/linux/if_link.h b/tools/include/uapi/linux/if_link.h
index 79b9d399cd297a1f79dca5ce89762800c38ed4a8..024b3bd0467e1360917001dba6bcfd1f30391894 100644
--- a/tools/include/uapi/linux/if_link.h
+++ b/tools/include/uapi/linux/if_link.h
@@ -349,6 +349,7 @@  enum {
 	IFLA_PARENT_DEV_BUS_NAME,
 	IFLA_GRO_MAX_SIZE,
 	IFLA_TSO_IPV6_MAX_SIZE,
+	IFLA_GSO_IPV6_MAX_SIZE,
 
 	__IFLA_MAX
 };