Message ID | 20220203015140.3022854-3-eric.dumazet@gmail.com (mailing list archive) |
---|---|
State | Changes Requested |
Delegated to: | Netdev Maintainers |
Headers | show |
Series | tcp: BIG TCP implementation | expand |
Hello, On Wed, 2022-02-02 at 17:51 -0800, Eric Dumazet wrote: > From: Eric Dumazet <edumazet@google.com> > > This enable TCP stack to build TSO packets bigger than > 64KB if the driver is LSOv2 compatible. > > This patch introduces new variable gso_ipv6_max_size > that is modifiable through ip link. > > ip link set dev eth0 gso_ipv6_max_size 185000 > > User input is capped by driver limit. > > Signed-off-by: Coco Li <lixiaoyan@google.com> > Signed-off-by: Eric Dumazet <edumazet@google.com> > --- > include/linux/netdevice.h | 12 ++++++++++++ > include/uapi/linux/if_link.h | 1 + > net/core/dev.c | 1 + > net/core/rtnetlink.c | 15 +++++++++++++++ > net/core/sock.c | 6 ++++++ > tools/include/uapi/linux/if_link.h | 1 + > 6 files changed, 36 insertions(+) > > diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h > index b1f68df2b37bc4b623f61cc2c6f0c02ba2afbe02..2a563869ba44f7d48095d36b1395e3fbd8cfff87 100644 > --- a/include/linux/netdevice.h > +++ b/include/linux/netdevice.h > @@ -1949,6 +1949,7 @@ enum netdev_ml_priv_type { > * @linkwatch_dev_tracker: refcount tracker used by linkwatch. > * @watchdog_dev_tracker: refcount tracker used by watchdog. > * @tso_ipv6_max_size: Maximum size of IPv6 TSO packets (driver/NIC limit) > + * @gso_ipv6_max_size: Maximum size of IPv6 GSO packets (user/admin limit) > * > * FIXME: cleanup struct net_device such that network protocol info > * moves out. > @@ -2284,6 +2285,7 @@ struct net_device { > netdevice_tracker linkwatch_dev_tracker; > netdevice_tracker watchdog_dev_tracker; > unsigned int tso_ipv6_max_size; > + unsigned int gso_ipv6_max_size; > }; > #define to_net_dev(d) container_of(d, struct net_device, dev) > > @@ -4804,6 +4806,10 @@ static inline void netif_set_gso_max_size(struct net_device *dev, > { > /* dev->gso_max_size is read locklessly from sk_setup_caps() */ > WRITE_ONCE(dev->gso_max_size, size); > + > + /* legacy drivers want to lower gso_max_size, regardless of family. */ > + size = min(size, dev->gso_ipv6_max_size); > + WRITE_ONCE(dev->gso_ipv6_max_size, size); > } > > static inline void netif_set_gso_max_segs(struct net_device *dev, > @@ -4827,6 +4833,12 @@ static inline void netif_set_tso_ipv6_max_size(struct net_device *dev, > dev->tso_ipv6_max_size = size; > } > > +static inline void netif_set_gso_ipv6_max_size(struct net_device *dev, > + unsigned int size) > +{ > + size = min(size, dev->tso_ipv6_max_size); > + WRITE_ONCE(dev->gso_ipv6_max_size, size); Dumb questions on my side: should the above be limited to tso_ipv6_max_size ? or increasing gso_ipv6_max_size helps even if the egress NIC does not support LSOv2? Should gso_ipv6_max_size be capped to some reasonable value (well lower than 4G), to avoid the stack building very complex skbs? Thanks! Paolo
On Thu, Feb 3, 2022 at 12:57 AM Paolo Abeni <pabeni@redhat.com> wrote: > > Hello, > > On Wed, 2022-02-02 at 17:51 -0800, Eric Dumazet wrote: > > From: Eric Dumazet <edumazet@google.com> > > > > This enable TCP stack to build TSO packets bigger than > > 64KB if the driver is LSOv2 compatible. > > > > This patch introduces new variable gso_ipv6_max_size > > that is modifiable through ip link. > > > > ip link set dev eth0 gso_ipv6_max_size 185000 > > > > User input is capped by driver limit. > > > > Signed-off-by: Coco Li <lixiaoyan@google.com> > > Signed-off-by: Eric Dumazet <edumazet@google.com> > > --- > > include/linux/netdevice.h | 12 ++++++++++++ > > include/uapi/linux/if_link.h | 1 + > > net/core/dev.c | 1 + > > net/core/rtnetlink.c | 15 +++++++++++++++ > > net/core/sock.c | 6 ++++++ > > tools/include/uapi/linux/if_link.h | 1 + > > 6 files changed, 36 insertions(+) > > > > diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h > > index b1f68df2b37bc4b623f61cc2c6f0c02ba2afbe02..2a563869ba44f7d48095d36b1395e3fbd8cfff87 100644 > > --- a/include/linux/netdevice.h > > +++ b/include/linux/netdevice.h > > @@ -1949,6 +1949,7 @@ enum netdev_ml_priv_type { > > * @linkwatch_dev_tracker: refcount tracker used by linkwatch. > > * @watchdog_dev_tracker: refcount tracker used by watchdog. > > * @tso_ipv6_max_size: Maximum size of IPv6 TSO packets (driver/NIC limit) > > + * @gso_ipv6_max_size: Maximum size of IPv6 GSO packets (user/admin limit) > > * > > * FIXME: cleanup struct net_device such that network protocol info > > * moves out. > > @@ -2284,6 +2285,7 @@ struct net_device { > > netdevice_tracker linkwatch_dev_tracker; > > netdevice_tracker watchdog_dev_tracker; > > unsigned int tso_ipv6_max_size; > > + unsigned int gso_ipv6_max_size; > > }; > > #define to_net_dev(d) container_of(d, struct net_device, dev) > > > > @@ -4804,6 +4806,10 @@ static inline void netif_set_gso_max_size(struct net_device *dev, > > { > > /* dev->gso_max_size is read locklessly from sk_setup_caps() */ > > WRITE_ONCE(dev->gso_max_size, size); > > + > > + /* legacy drivers want to lower gso_max_size, regardless of family. */ > > + size = min(size, dev->gso_ipv6_max_size); > > + WRITE_ONCE(dev->gso_ipv6_max_size, size); > > } > > > > static inline void netif_set_gso_max_segs(struct net_device *dev, > > @@ -4827,6 +4833,12 @@ static inline void netif_set_tso_ipv6_max_size(struct net_device *dev, > > dev->tso_ipv6_max_size = size; > > } > > > > +static inline void netif_set_gso_ipv6_max_size(struct net_device *dev, > > + unsigned int size) > > +{ > > + size = min(size, dev->tso_ipv6_max_size); > > + WRITE_ONCE(dev->gso_ipv6_max_size, size); > > Dumb questions on my side: should the above be limited to > tso_ipv6_max_size ? or increasing gso_ipv6_max_size helps even if the > egress NIC does not support LSOv2? I thought that " size = min(size, dev->tso_ipv6_max_size);" was doing exactly that ? I will fix the From: tag because patch autor is Coco Li > > Should gso_ipv6_max_size be capped to some reasonable value (well lower > than 4G), to avoid the stack building very complex skbs? > Drivers are responsible for choosing the max value, then admins choose optimal operational values based on their constraints (like device MTU) Typical LSOv2 values are 256K or 512KB, but we really tested BIG TCP with 45 4K segments per packet. > Thanks! > > Paolo >
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index b1f68df2b37bc4b623f61cc2c6f0c02ba2afbe02..2a563869ba44f7d48095d36b1395e3fbd8cfff87 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1949,6 +1949,7 @@ enum netdev_ml_priv_type { * @linkwatch_dev_tracker: refcount tracker used by linkwatch. * @watchdog_dev_tracker: refcount tracker used by watchdog. * @tso_ipv6_max_size: Maximum size of IPv6 TSO packets (driver/NIC limit) + * @gso_ipv6_max_size: Maximum size of IPv6 GSO packets (user/admin limit) * * FIXME: cleanup struct net_device such that network protocol info * moves out. @@ -2284,6 +2285,7 @@ struct net_device { netdevice_tracker linkwatch_dev_tracker; netdevice_tracker watchdog_dev_tracker; unsigned int tso_ipv6_max_size; + unsigned int gso_ipv6_max_size; }; #define to_net_dev(d) container_of(d, struct net_device, dev) @@ -4804,6 +4806,10 @@ static inline void netif_set_gso_max_size(struct net_device *dev, { /* dev->gso_max_size is read locklessly from sk_setup_caps() */ WRITE_ONCE(dev->gso_max_size, size); + + /* legacy drivers want to lower gso_max_size, regardless of family. */ + size = min(size, dev->gso_ipv6_max_size); + WRITE_ONCE(dev->gso_ipv6_max_size, size); } static inline void netif_set_gso_max_segs(struct net_device *dev, @@ -4827,6 +4833,12 @@ static inline void netif_set_tso_ipv6_max_size(struct net_device *dev, dev->tso_ipv6_max_size = size; } +static inline void netif_set_gso_ipv6_max_size(struct net_device *dev, + unsigned int size) +{ + size = min(size, dev->tso_ipv6_max_size); + WRITE_ONCE(dev->gso_ipv6_max_size, size); +} static inline void skb_gso_error_unwind(struct sk_buff *skb, __be16 protocol, int pulled_hlen, u16 mac_offset, diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 79b9d399cd297a1f79dca5ce89762800c38ed4a8..024b3bd0467e1360917001dba6bcfd1f30391894 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -349,6 +349,7 @@ enum { IFLA_PARENT_DEV_BUS_NAME, IFLA_GRO_MAX_SIZE, IFLA_TSO_IPV6_MAX_SIZE, + IFLA_GSO_IPV6_MAX_SIZE, __IFLA_MAX }; diff --git a/net/core/dev.c b/net/core/dev.c index b6ca3c348d41a097baf210f2a5d966b71308c69b..53c947e6fdb7c47e6cc92fd4e38b71e9b90d921c 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -10189,6 +10189,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, dev->gso_max_segs = GSO_MAX_SEGS; dev->gro_max_size = GRO_MAX_SIZE; dev->tso_ipv6_max_size = GSO_MAX_SIZE; + dev->gso_ipv6_max_size = GSO_MAX_SIZE; dev->upper_level = 1; dev->lower_level = 1; diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 4cefa07195ba3b67e7b724194b5d729d395ba466..0a0b26261f6d9e4e40bf9cfbda31a29c1f2e3aaa 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1028,6 +1028,7 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev, + nla_total_size(4) /* IFLA_GSO_MAX_SIZE */ + nla_total_size(4) /* IFLA_GRO_MAX_SIZE */ + nla_total_size(4) /* IFLA_TSO_IPV6_MAX_SIZE */ + + nla_total_size(4) /* IFLA_GSO_IPV6_MAX_SIZE */ + nla_total_size(1) /* IFLA_OPERSTATE */ + nla_total_size(1) /* IFLA_LINKMODE */ + nla_total_size(4) /* IFLA_CARRIER_CHANGES */ @@ -1732,6 +1733,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, nla_put_u32(skb, IFLA_GSO_MAX_SIZE, dev->gso_max_size) || nla_put_u32(skb, IFLA_GRO_MAX_SIZE, dev->gro_max_size) || nla_put_u32(skb, IFLA_TSO_IPV6_MAX_SIZE, dev->tso_ipv6_max_size) || + nla_put_u32(skb, IFLA_GSO_IPV6_MAX_SIZE, dev->gso_ipv6_max_size) || #ifdef CONFIG_RPS nla_put_u32(skb, IFLA_NUM_RX_QUEUES, dev->num_rx_queues) || #endif @@ -1886,6 +1888,7 @@ static const struct nla_policy ifla_policy[IFLA_MAX+1] = { [IFLA_PARENT_DEV_NAME] = { .type = NLA_NUL_STRING }, [IFLA_GRO_MAX_SIZE] = { .type = NLA_U32 }, [IFLA_TSO_IPV6_MAX_SIZE] = { .type = NLA_U32 }, + [IFLA_GSO_IPV6_MAX_SIZE] = { .type = NLA_U32 }, }; static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = { @@ -2772,6 +2775,15 @@ static int do_setlink(const struct sk_buff *skb, } } + if (tb[IFLA_GSO_IPV6_MAX_SIZE]) { + u32 max_size = nla_get_u32(tb[IFLA_GSO_IPV6_MAX_SIZE]); + + if (dev->gso_ipv6_max_size ^ max_size) { + netif_set_gso_ipv6_max_size(dev, max_size); + status |= DO_SETLINK_MODIFIED; + } + } + if (tb[IFLA_GSO_MAX_SEGS]) { u32 max_segs = nla_get_u32(tb[IFLA_GSO_MAX_SEGS]); @@ -3247,6 +3259,9 @@ struct net_device *rtnl_create_link(struct net *net, const char *ifname, netif_set_gso_max_segs(dev, nla_get_u32(tb[IFLA_GSO_MAX_SEGS])); if (tb[IFLA_GRO_MAX_SIZE]) netif_set_gro_max_size(dev, nla_get_u32(tb[IFLA_GRO_MAX_SIZE])); + if (tb[IFLA_GSO_IPV6_MAX_SIZE]) + netif_set_gso_ipv6_max_size(dev, + nla_get_u32(tb[IFLA_GSO_IPV6_MAX_SIZE])); return dev; } diff --git a/net/core/sock.c b/net/core/sock.c index 09d31a7dc68f88af42f75f3f445818fe273b04fb..aec1e156548ea0818f025fd8f448f5e353f79a3b 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2279,6 +2279,12 @@ void sk_setup_caps(struct sock *sk, struct dst_entry *dst) sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM; /* pairs with the WRITE_ONCE() in netif_set_gso_max_size() */ sk->sk_gso_max_size = READ_ONCE(dst->dev->gso_max_size); +#if IS_ENABLED(CONFIG_IPV6) + if (sk->sk_family == AF_INET6 && + sk_is_tcp(sk) && + !ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr)) + sk->sk_gso_max_size = READ_ONCE(dst->dev->gso_ipv6_max_size); +#endif sk->sk_gso_max_size -= (MAX_TCP_HEADER + 1); /* pairs with the WRITE_ONCE() in netif_set_gso_max_segs() */ max_segs = max_t(u32, READ_ONCE(dst->dev->gso_max_segs), 1); diff --git a/tools/include/uapi/linux/if_link.h b/tools/include/uapi/linux/if_link.h index 79b9d399cd297a1f79dca5ce89762800c38ed4a8..024b3bd0467e1360917001dba6bcfd1f30391894 100644 --- a/tools/include/uapi/linux/if_link.h +++ b/tools/include/uapi/linux/if_link.h @@ -349,6 +349,7 @@ enum { IFLA_PARENT_DEV_BUS_NAME, IFLA_GRO_MAX_SIZE, IFLA_TSO_IPV6_MAX_SIZE, + IFLA_GSO_IPV6_MAX_SIZE, __IFLA_MAX };