Message ID | 20220506153048.3695721-8-eric.dumazet@gmail.com (mailing list archive) |
---|---|
State | Superseded |
Delegated to: | Netdev Maintainers |
Headers | show |
Series | tcp: BIG TCP implementation | expand |
On Fri, 2022-05-06 at 08:30 -0700, Eric Dumazet wrote: > From: Coco Li <lixiaoyan@google.com> > > Enable GRO to have IPv6 specific limit for max packet size. > > This patch introduces new dev->gro_ipv6_max_size > that is modifiable through ip link. > > ip link set dev eth0 gro_ipv6_max_size 185000 > > Note that this value is only considered if bigger than > gro_max_size, and for non encapsulated TCP/ipv6 packets. > > Signed-off-by: Coco Li <lixiaoyan@google.com> > Signed-off-by: Eric Dumazet <edumazet@google.com> This is another spot where it doesn't make much sense to me to add yet another control. Instead it would make much more sense to simply remove the cap from the existing control and simply add a check that caps the non-IPv6 protocols at GRO_MAX_SIZE. > --- > include/linux/netdevice.h | 3 +++ > include/uapi/linux/if_link.h | 1 + > net/core/dev.c | 1 + > net/core/gro.c | 20 ++++++++++++++++++-- > net/core/rtnetlink.c | 22 ++++++++++++++++++++++ > tools/include/uapi/linux/if_link.h | 1 + > 6 files changed, 46 insertions(+), 2 deletions(-) > > diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h > index 47f413dac12e901700045f4b73d47ecdca0f4f3c..df12c9843d94cb847e0ce5ba1b3b36bde7d476ed 100644 > --- a/include/linux/netdevice.h > +++ b/include/linux/netdevice.h > @@ -1962,6 +1962,8 @@ enum netdev_ml_priv_type { > * keep a list of interfaces to be deleted. > * @gro_max_size: Maximum size of aggregated packet in generic > * receive offload (GRO) > + * @gro_ipv6_max_size: Maximum size of aggregated packet in generic > + * receive offload (GRO), for IPv6 > * > * @dev_addr_shadow: Copy of @dev_addr to catch direct writes. > * @linkwatch_dev_tracker: refcount tracker used by linkwatch. > @@ -2154,6 +2156,7 @@ struct net_device { > int napi_defer_hard_irqs; > #define GRO_MAX_SIZE 65536 > unsigned int gro_max_size; > + unsigned int gro_ipv6_max_size; > rx_handler_func_t __rcu *rx_handler; > void __rcu *rx_handler_data; > > diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h > index aa05fc9cc23f4ccf92f4cbba57f43472749cd42a..9ece3a391105c171057cc491c1458ee8a45e07e0 100644 > --- a/include/uapi/linux/if_link.h > +++ b/include/uapi/linux/if_link.h > @@ -371,6 +371,7 @@ enum { > IFLA_TSO_MAX_SIZE, > IFLA_TSO_MAX_SEGS, > IFLA_GSO_IPV6_MAX_SIZE, > + IFLA_GRO_IPV6_MAX_SIZE, > > __IFLA_MAX > }; > diff --git a/net/core/dev.c b/net/core/dev.c > index aa8757215b2a9f14683f95086732668eb99a875b..582b7fe052a6fb06437f95bd6a451b79e188cc57 100644 > --- a/net/core/dev.c > +++ b/net/core/dev.c > @@ -10608,6 +10608,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, > dev->tso_max_size = TSO_LEGACY_MAX_SIZE; > dev->tso_max_segs = TSO_MAX_SEGS; > dev->gso_ipv6_max_size = GSO_MAX_SIZE; > + dev->gro_ipv6_max_size = GRO_MAX_SIZE; > > dev->upper_level = 1; > dev->lower_level = 1; > diff --git a/net/core/gro.c b/net/core/gro.c > index 78110edf5d4b36d2fa6f8a2676096efe0112aa0e..8b35403dd7e909a8d7df591d952a4600c13f360b 100644 > --- a/net/core/gro.c > +++ b/net/core/gro.c > @@ -161,11 +161,27 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb) > unsigned int new_truesize; > struct sk_buff *lp; > > + if (unlikely(NAPI_GRO_CB(skb)->flush)) > + return -E2BIG; > + > /* pairs with WRITE_ONCE() in netif_set_gro_max_size() */ > gro_max_size = READ_ONCE(p->dev->gro_max_size); > > - if (unlikely(p->len + len >= gro_max_size || NAPI_GRO_CB(skb)->flush)) > - return -E2BIG; So if we just overwrite the existing gro_max_size we could skip the changes above and all the extra netlink overhead. > + if (unlikely(p->len + len >= gro_max_size)) { > + /* pairs with WRITE_ONCE() in netif_set_gro_ipv6_max_size() */ > + unsigned int gro6_max_size = READ_ONCE(p->dev->gro_ipv6_max_size); > + > + if (gro6_max_size > gro_max_size && > + p->protocol == htons(ETH_P_IPV6) && > + skb_headroom(p) >= sizeof(struct hop_jumbo_hdr) && > + ipv6_hdr(p)->nexthdr == IPPROTO_TCP && > + !p->encapsulation) > + gro_max_size = gro6_max_size; > + > + if (p->len + len >= gro_max_size) > + return -E2BIG; > + } > + Instead all we would need to do is add an extra section here along the lines of: if (p->len + len > GRO_MAX_SIZE && (p->protocol != htons(ETH_P_IPV6) || skb_headroom(p) < sizeof(struct hop_jumbo_hdr) || ipv6_hdr(p)->nexthdr != IPPROTO_TCP || p->encapsulation) return -E2BIG;
On Fri, May 6, 2022 at 2:06 PM Alexander H Duyck <alexander.duyck@gmail.com> wrote: > > On Fri, 2022-05-06 at 08:30 -0700, Eric Dumazet wrote: > > From: Coco Li <lixiaoyan@google.com> > > > > Enable GRO to have IPv6 specific limit for max packet size. > > > > This patch introduces new dev->gro_ipv6_max_size > > that is modifiable through ip link. > > > > ip link set dev eth0 gro_ipv6_max_size 185000 > > > > Note that this value is only considered if bigger than > > gro_max_size, and for non encapsulated TCP/ipv6 packets. > > > > Signed-off-by: Coco Li <lixiaoyan@google.com> > > Signed-off-by: Eric Dumazet <edumazet@google.com> > > This is another spot where it doesn't make much sense to me to add yet > another control. Instead it would make much more sense to simply remove > the cap from the existing control and simply add a check that caps the > non-IPv6 protocols at GRO_MAX_SIZE. Can you please send a diff on top of our patch series ? It is kind of hard to see what you want, and _why_ you want this. Note that GRO_MAX_SIZE has been replaced by dev->gro_max_size last year. Yes, yet another control, but some people want more control than others I guess.
On Fri, May 6, 2022 at 2:22 PM Eric Dumazet <edumazet@google.com> wrote: > > On Fri, May 6, 2022 at 2:06 PM Alexander H Duyck > <alexander.duyck@gmail.com> wrote: > > > > On Fri, 2022-05-06 at 08:30 -0700, Eric Dumazet wrote: > > > From: Coco Li <lixiaoyan@google.com> > > > > > > Enable GRO to have IPv6 specific limit for max packet size. > > > > > > This patch introduces new dev->gro_ipv6_max_size > > > that is modifiable through ip link. > > > > > > ip link set dev eth0 gro_ipv6_max_size 185000 > > > > > > Note that this value is only considered if bigger than > > > gro_max_size, and for non encapsulated TCP/ipv6 packets. > > > > > > Signed-off-by: Coco Li <lixiaoyan@google.com> > > > Signed-off-by: Eric Dumazet <edumazet@google.com> > > > > This is another spot where it doesn't make much sense to me to add yet > > another control. Instead it would make much more sense to simply remove > > the cap from the existing control and simply add a check that caps the > > non-IPv6 protocols at GRO_MAX_SIZE. > > Can you please send a diff on top of our patch series ? I would rather not as it would essentially just be a revert of the two problematic patches since what I am suggesting is significantly smaller. > It is kind of hard to see what you want, and _why_ you want this. > > Note that GRO_MAX_SIZE has been replaced by dev->gro_max_size last year. I am using GRO_MAX_SIZE as a legacy value for everything that is not IPv6. If it would help you could go back and take a look at Jakub's patch series and see what he did with TSO_LEGACY_MAX_SIZE. You could think of my use here as GRO_LEGACY_MAX_SIZE. What I am doing is capping all the non-ipv6/tcp flows at the default maximum limit for legacy setups. > Yes, yet another control, but some people want more control than others I guess. Basically these patches are reducing functionality from an existing control. The g[sr]o_max_size values were applied to all incoming or outgoing traffic. The patches are adding a special control that only applies to a subset of ipv6 traffic. Instead of taking that route I would rather have the max_size values allowed to exceed the legacy limits, and in those cases that cannot support the new sizes we default back to the legacy maxes. Doing that I feel like we would get much more consistent behavior and if somebody is wanting to use these values for their original intended purpose which was limiting the traffic they will be able to affect all traffic, not just the non-ipv6/tcp traffic.
On Fri, May 6, 2022 at 3:01 PM Alexander Duyck <alexander.duyck@gmail.com> wrote: > > On Fri, May 6, 2022 at 2:22 PM Eric Dumazet <edumazet@google.com> wrote: > > > > On Fri, May 6, 2022 at 2:06 PM Alexander H Duyck > > <alexander.duyck@gmail.com> wrote: > > > > > > On Fri, 2022-05-06 at 08:30 -0700, Eric Dumazet wrote: > > > > From: Coco Li <lixiaoyan@google.com> > > > > > > > > Enable GRO to have IPv6 specific limit for max packet size. > > > > > > > > This patch introduces new dev->gro_ipv6_max_size > > > > that is modifiable through ip link. > > > > > > > > ip link set dev eth0 gro_ipv6_max_size 185000 > > > > > > > > Note that this value is only considered if bigger than > > > > gro_max_size, and for non encapsulated TCP/ipv6 packets. > > > > > > > > Signed-off-by: Coco Li <lixiaoyan@google.com> > > > > Signed-off-by: Eric Dumazet <edumazet@google.com> > > > > > > This is another spot where it doesn't make much sense to me to add yet > > > another control. Instead it would make much more sense to simply remove > > > the cap from the existing control and simply add a check that caps the > > > non-IPv6 protocols at GRO_MAX_SIZE. > > > > Can you please send a diff on top of our patch series ? > > I would rather not as it would essentially just be a revert of the two > problematic patches since what I am suggesting is significantly > smaller. > > > It is kind of hard to see what you want, and _why_ you want this. > > > > Note that GRO_MAX_SIZE has been replaced by dev->gro_max_size last year. > > I am using GRO_MAX_SIZE as a legacy value for everything that is not > IPv6. If it would help you could go back and take a look at Jakub's > patch series and see what he did with TSO_LEGACY_MAX_SIZE. Yes, I was the one suggesting this TSO_LEGACY_MAX_SIZE. > You could > think of my use here as GRO_LEGACY_MAX_SIZE. What I am doing is > capping all the non-ipv6/tcp flows at the default maximum limit for > legacy setups. > > > Yes, yet another control, but some people want more control than others I guess. > > Basically these patches are reducing functionality from an existing > control. The g[sr]o_max_size values were applied to all incoming or > outgoing traffic. Yes, and we need to change that, otherwise we are stuck at 65536, because legacy. > The patches are adding a special control that only applies to a subset of ipv6 traffic. Exactly. This is not an accident. > Instead of taking that route I > would rather have the max_size values allowed to exceed the legacy > limits, and in those cases that cannot support the new sizes we > default back to the legacy maxes. Please send a tested patch. I think it will break drivers. We spent months doing extensive tests, and I do not see any reason to spend more time on something that you suggest that I feel is wrong. > Doing that I feel like we would get > much more consistent behavior and if somebody is wanting to use these > values for their original intended purpose which was limiting the > traffic they will be able to affect all traffic, not just the > non-ipv6/tcp traffic. Some people (not us) want to add BIG-TCP with IPv4 as well in a future evolution.
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 47f413dac12e901700045f4b73d47ecdca0f4f3c..df12c9843d94cb847e0ce5ba1b3b36bde7d476ed 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1962,6 +1962,8 @@ enum netdev_ml_priv_type { * keep a list of interfaces to be deleted. * @gro_max_size: Maximum size of aggregated packet in generic * receive offload (GRO) + * @gro_ipv6_max_size: Maximum size of aggregated packet in generic + * receive offload (GRO), for IPv6 * * @dev_addr_shadow: Copy of @dev_addr to catch direct writes. * @linkwatch_dev_tracker: refcount tracker used by linkwatch. @@ -2154,6 +2156,7 @@ struct net_device { int napi_defer_hard_irqs; #define GRO_MAX_SIZE 65536 unsigned int gro_max_size; + unsigned int gro_ipv6_max_size; rx_handler_func_t __rcu *rx_handler; void __rcu *rx_handler_data; diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index aa05fc9cc23f4ccf92f4cbba57f43472749cd42a..9ece3a391105c171057cc491c1458ee8a45e07e0 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -371,6 +371,7 @@ enum { IFLA_TSO_MAX_SIZE, IFLA_TSO_MAX_SEGS, IFLA_GSO_IPV6_MAX_SIZE, + IFLA_GRO_IPV6_MAX_SIZE, __IFLA_MAX }; diff --git a/net/core/dev.c b/net/core/dev.c index aa8757215b2a9f14683f95086732668eb99a875b..582b7fe052a6fb06437f95bd6a451b79e188cc57 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -10608,6 +10608,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, dev->tso_max_size = TSO_LEGACY_MAX_SIZE; dev->tso_max_segs = TSO_MAX_SEGS; dev->gso_ipv6_max_size = GSO_MAX_SIZE; + dev->gro_ipv6_max_size = GRO_MAX_SIZE; dev->upper_level = 1; dev->lower_level = 1; diff --git a/net/core/gro.c b/net/core/gro.c index 78110edf5d4b36d2fa6f8a2676096efe0112aa0e..8b35403dd7e909a8d7df591d952a4600c13f360b 100644 --- a/net/core/gro.c +++ b/net/core/gro.c @@ -161,11 +161,27 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb) unsigned int new_truesize; struct sk_buff *lp; + if (unlikely(NAPI_GRO_CB(skb)->flush)) + return -E2BIG; + /* pairs with WRITE_ONCE() in netif_set_gro_max_size() */ gro_max_size = READ_ONCE(p->dev->gro_max_size); - if (unlikely(p->len + len >= gro_max_size || NAPI_GRO_CB(skb)->flush)) - return -E2BIG; + if (unlikely(p->len + len >= gro_max_size)) { + /* pairs with WRITE_ONCE() in netif_set_gro_ipv6_max_size() */ + unsigned int gro6_max_size = READ_ONCE(p->dev->gro_ipv6_max_size); + + if (gro6_max_size > gro_max_size && + p->protocol == htons(ETH_P_IPV6) && + skb_headroom(p) >= sizeof(struct hop_jumbo_hdr) && + ipv6_hdr(p)->nexthdr == IPPROTO_TCP && + !p->encapsulation) + gro_max_size = gro6_max_size; + + if (p->len + len >= gro_max_size) + return -E2BIG; + } + lp = NAPI_GRO_CB(p)->last; pinfo = skb_shinfo(lp); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 847cf80f81754451e5f220f846db734a7625695b..5fa3ff835aaf6601c31458ec88e88837d353eabd 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1067,6 +1067,7 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev, + nla_total_size(4) /* IFLA_TSO_MAX_SIZE */ + nla_total_size(4) /* IFLA_TSO_MAX_SEGS */ + nla_total_size(4) /* IFLA_GSO_IPV6_MAX_SIZE */ + + nla_total_size(4) /* IFLA_GRO_IPV6_MAX_SIZE */ + nla_total_size(1) /* IFLA_OPERSTATE */ + nla_total_size(1) /* IFLA_LINKMODE */ + nla_total_size(4) /* IFLA_CARRIER_CHANGES */ @@ -1775,6 +1776,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, nla_put_u32(skb, IFLA_TSO_MAX_SIZE, dev->tso_max_size) || nla_put_u32(skb, IFLA_TSO_MAX_SEGS, dev->tso_max_segs) || nla_put_u32(skb, IFLA_GSO_IPV6_MAX_SIZE, dev->gso_ipv6_max_size) || + nla_put_u32(skb, IFLA_GRO_IPV6_MAX_SIZE, dev->gro_ipv6_max_size) || #ifdef CONFIG_RPS nla_put_u32(skb, IFLA_NUM_RX_QUEUES, dev->num_rx_queues) || #endif @@ -1931,6 +1933,7 @@ static const struct nla_policy ifla_policy[IFLA_MAX+1] = { [IFLA_TSO_MAX_SIZE] = { .type = NLA_REJECT }, [IFLA_TSO_MAX_SEGS] = { .type = NLA_REJECT }, [IFLA_GSO_IPV6_MAX_SIZE] = { .type = NLA_U32 }, + [IFLA_GRO_IPV6_MAX_SIZE] = { .type = NLA_U32 }, }; static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = { @@ -2655,6 +2658,13 @@ static void netif_set_gso_ipv6_max_size(struct net_device *dev, WRITE_ONCE(dev->gso_ipv6_max_size, size); } +static void netif_set_gro_ipv6_max_size(struct net_device *dev, + unsigned int size) +{ + /* This pairs with the READ_ONCE() in skb_gro_receive() */ + WRITE_ONCE(dev->gro_ipv6_max_size, size); +} + #define DO_SETLINK_MODIFIED 0x01 /* notify flag means notify + modified. */ #define DO_SETLINK_NOTIFY 0x03 @@ -2840,6 +2850,15 @@ static int do_setlink(const struct sk_buff *skb, } } + if (tb[IFLA_GRO_IPV6_MAX_SIZE]) { + u32 max_size = nla_get_u32(tb[IFLA_GRO_IPV6_MAX_SIZE]); + + if (dev->gro_ipv6_max_size ^ max_size) { + netif_set_gro_ipv6_max_size(dev, max_size); + status |= DO_SETLINK_MODIFIED; + } + } + if (tb[IFLA_GSO_MAX_SEGS]) { u32 max_segs = nla_get_u32(tb[IFLA_GSO_MAX_SEGS]); @@ -3306,6 +3325,9 @@ struct net_device *rtnl_create_link(struct net *net, const char *ifname, if (tb[IFLA_GSO_IPV6_MAX_SIZE]) netif_set_gso_ipv6_max_size(dev, nla_get_u32(tb[IFLA_GSO_IPV6_MAX_SIZE])); + if (tb[IFLA_GRO_IPV6_MAX_SIZE]) + netif_set_gro_ipv6_max_size(dev, + nla_get_u32(tb[IFLA_GRO_IPV6_MAX_SIZE])); return dev; } diff --git a/tools/include/uapi/linux/if_link.h b/tools/include/uapi/linux/if_link.h index 443eddd285f37198566fa1357f0d394ec5270ab9..5aead1be6b99623fb6ffd31cfcfd44976eb8794f 100644 --- a/tools/include/uapi/linux/if_link.h +++ b/tools/include/uapi/linux/if_link.h @@ -351,6 +351,7 @@ enum { IFLA_TSO_MAX_SIZE, IFLA_TSO_MAX_SEGS, IFLA_GSO_IPV6_MAX_SIZE, + IFLA_GRO_IPV6_MAX_SIZE, __IFLA_MAX };