Message ID | 7e1f733cc96c7f7658fbf3276a90281b2f37acd1.1674921359.git.lucien.xin@gmail.com (mailing list archive) |
---|---|
State | Accepted |
Commit | 9eefedd58ae1daece2ba907849a44db2941fb4b0 |
Delegated to: | Netdev Maintainers |
Headers | show |
Series | net: support ipv4 big tcp | expand |
On Sat, 2023-01-28 at 10:58 -0500, Xin Long wrote: > This patch introduces gso_ipv4_max_size and gro_ipv4_max_size > per device and adds netlink attributes for them, so that IPV4 > BIG TCP can be guarded by a separate tunable in the next patch. > > To not break the old application using "gso/gro_max_size" for > IPv4 GSO packets, this patch updates "gso/gro_ipv4_max_size" > in netif_set_gso/gro_max_size() if the new size isn't greater > than GSO_LEGACY_MAX_SIZE, so that nothing will change even if > userspace doesn't realize the new netlink attributes. Not a big deal, but I think it would be nice to include the pahole info showing where the new fields are located and why that are good locations. No need to send a new version for just for the above, unless Eric asks otherwise ;) Cheers, Paolo
On Tue, Jan 31, 2023 at 9:59 AM Paolo Abeni <pabeni@redhat.com> wrote: > > On Sat, 2023-01-28 at 10:58 -0500, Xin Long wrote: > > This patch introduces gso_ipv4_max_size and gro_ipv4_max_size > > per device and adds netlink attributes for them, so that IPV4 > > BIG TCP can be guarded by a separate tunable in the next patch. > > > > To not break the old application using "gso/gro_max_size" for > > IPv4 GSO packets, this patch updates "gso/gro_ipv4_max_size" > > in netif_set_gso/gro_max_size() if the new size isn't greater > > than GSO_LEGACY_MAX_SIZE, so that nothing will change even if > > userspace doesn't realize the new netlink attributes. > > Not a big deal, but I think it would be nice to include the pahole info > showing where the new fields are located and why that are good > locations. > > No need to send a new version for just for the above, unless Eric asks > otherwise ;) > The the pahole info without and with the patch shows below: - Without the Patch: # pahole --hex -C net_device vmlinux struct net_device { ... long unsigned int gro_flush_timeout; /* 0x330 0x8 */ int napi_defer_hard_irqs; /* 0x338 0x4 */ unsigned int gro_max_size; /* 0x33c 0x4 */ <--------- /* --- cacheline 13 boundary (832 bytes) --- */ rx_handler_func_t * rx_handler; /* 0x340 0x8 */ void * rx_handler_data; /* 0x348 0x8 */ struct mini_Qdisc * miniq_ingress; /* 0x350 0x8 */ struct netdev_queue * ingress_queue; /* 0x358 0x8 */ struct nf_hook_entries * nf_hooks_ingress; /* 0x360 0x8 */ unsigned char broadcast[32]; /* 0x368 0x20 */ /* --- cacheline 14 boundary (896 bytes) was 8 bytes ago --- */ struct cpu_rmap * rx_cpu_rmap; /* 0x388 0x8 */ struct hlist_node index_hlist; /* 0x390 0x10 */ /* XXX 32 bytes hole, try to pack */ /* --- cacheline 15 boundary (960 bytes) --- */ struct netdev_queue * _tx __attribute__((__aligned__(64))); /* 0x3c0 0x8 */ ... /* --- cacheline 32 boundary (2048 bytes) was 24 bytes ago --- */ const struct attribute_group * sysfs_groups[4]; /* 0x818 0x20 */ const struct attribute_group * sysfs_rx_queue_group; /* 0x838 0x8 */ /* --- cacheline 33 boundary (2112 bytes) --- */ const struct rtnl_link_ops * rtnl_link_ops; /* 0x840 0x8 */ unsigned int gso_max_size; /* 0x848 0x4 */ unsigned int tso_max_size; /* 0x84c 0x4 */ u16 gso_max_segs; /* 0x850 0x2 */ u16 tso_max_segs; /* 0x852 0x2 */ <--------- /* XXX 4 bytes hole, try to pack */ const struct dcbnl_rtnl_ops * dcbnl_ops; /* 0x858 0x8 */ s16 num_tc; /* 0x860 0x2 */ struct netdev_tc_txq tc_to_txq[16]; /* 0x862 0x40 */ /* --- cacheline 34 boundary (2176 bytes) was 34 bytes ago --- */ u8 prio_tc_map[16]; /* 0x8a2 0x10 */ ... } - With the Patch: For "gso_ipv4_max_size", it filled the hole as expected. /* --- cacheline 33 boundary (2112 bytes) --- */ const struct rtnl_link_ops * rtnl_link_ops; /* 0x840 0x8 */ unsigned int gso_max_size; /* 0x848 0x4 */ unsigned int tso_max_size; /* 0x84c 0x4 */ u16 gso_max_segs; /* 0x850 0x2 */ u16 tso_max_segs; /* 0x852 0x2 */ unsigned int gso_ipv4_max_size; /* 0x854 0x4 */ <------- const struct dcbnl_rtnl_ops * dcbnl_ops; /* 0x858 0x8 */ s16 num_tc; /* 0x860 0x2 */ struct netdev_tc_txq tc_to_txq[16]; /* 0x862 0x40 */ /* --- cacheline 34 boundary (2176 bytes) was 34 bytes ago --- */ u8 prio_tc_map[16]; /* 0x8a2 0x10 */ For "gro_ipv4_max_size", these are no byte holes, I just put it in the "Cache lines mostly used on receive path" area, and next to gro_max_size. long unsigned int gro_flush_timeout; /* 0x330 0x8 */ int napi_defer_hard_irqs; /* 0x338 0x4 */ unsigned int gro_max_size; /* 0x33c 0x4 */ /* --- cacheline 13 boundary (832 bytes) --- */ unsigned int gro_ipv4_max_size; /* 0x340 0x4 */ <------ /* XXX 4 bytes hole, try to pack */ rx_handler_func_t * rx_handler; /* 0x348 0x8 */ void * rx_handler_data; /* 0x350 0x8 */ struct mini_Qdisc * miniq_ingress; /* 0x358 0x8 */ struct netdev_queue * ingress_queue; /* 0x360 0x8 */ struct nf_hook_entries * nf_hooks_ingress; /* 0x368 0x8 */ unsigned char broadcast[32]; /* 0x370 0x20 */ /* --- cacheline 14 boundary (896 bytes) was 16 bytes ago --- */ struct cpu_rmap * rx_cpu_rmap; /* 0x390 0x8 */ struct hlist_node index_hlist; /* 0x398 0x10 */ /* XXX 24 bytes hole, try to pack */ /* --- cacheline 15 boundary (960 bytes) --- */ struct netdev_queue * _tx __attribute__((__aligned__(64))); /* 0x3c0 0x8 */ Thanks.
On 1/28/23 8:58 AM, Xin Long wrote: > This patch introduces gso_ipv4_max_size and gro_ipv4_max_size > per device and adds netlink attributes for them, so that IPV4 > BIG TCP can be guarded by a separate tunable in the next patch. > > To not break the old application using "gso/gro_max_size" for > IPv4 GSO packets, this patch updates "gso/gro_ipv4_max_size" > in netif_set_gso/gro_max_size() if the new size isn't greater > than GSO_LEGACY_MAX_SIZE, so that nothing will change even if > userspace doesn't realize the new netlink attributes. > > Signed-off-by: Xin Long <lucien.xin@gmail.com> > --- > include/linux/netdevice.h | 6 ++++++ > include/uapi/linux/if_link.h | 3 +++ > net/core/dev.c | 4 ++++ > net/core/dev.h | 18 ++++++++++++++++++ > net/core/rtnetlink.c | 33 +++++++++++++++++++++++++++++++++ > 5 files changed, 64 insertions(+) > Reviewed-by: David Ahern <dsahern@kernel.org>
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 2466afa25078..d5ef4c1fedd2 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1964,6 +1964,8 @@ enum netdev_ml_priv_type { * @gso_max_segs: Maximum number of segments that can be passed to the * NIC for GSO * @tso_max_segs: Device (as in HW) limit on the max TSO segment count + * @gso_ipv4_max_size: Maximum size of generic segmentation offload, + * for IPv4. * * @dcbnl_ops: Data Center Bridging netlink ops * @num_tc: Number of traffic classes in the net device @@ -2004,6 +2006,8 @@ enum netdev_ml_priv_type { * keep a list of interfaces to be deleted. * @gro_max_size: Maximum size of aggregated packet in generic * receive offload (GRO) + * @gro_ipv4_max_size: Maximum size of aggregated packet in generic + * receive offload (GRO), for IPv4. * * @dev_addr_shadow: Copy of @dev_addr to catch direct writes. * @linkwatch_dev_tracker: refcount tracker used by linkwatch. @@ -2207,6 +2211,7 @@ struct net_device { */ #define GRO_MAX_SIZE (8 * 65535u) unsigned int gro_max_size; + unsigned int gro_ipv4_max_size; rx_handler_func_t __rcu *rx_handler; void __rcu *rx_handler_data; @@ -2330,6 +2335,7 @@ struct net_device { u16 gso_max_segs; #define TSO_MAX_SEGS U16_MAX u16 tso_max_segs; + unsigned int gso_ipv4_max_size; #ifdef CONFIG_DCB const struct dcbnl_rtnl_ops *dcbnl_ops; diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 1021a7e47a86..02b87e4c65be 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -374,6 +374,9 @@ enum { IFLA_DEVLINK_PORT, + IFLA_GSO_IPV4_MAX_SIZE, + IFLA_GRO_IPV4_MAX_SIZE, + __IFLA_MAX }; diff --git a/net/core/dev.c b/net/core/dev.c index f72f5c4ee7e2..bb42150a38ec 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3001,6 +3001,8 @@ void netif_set_tso_max_size(struct net_device *dev, unsigned int size) dev->tso_max_size = min(GSO_MAX_SIZE, size); if (size < READ_ONCE(dev->gso_max_size)) netif_set_gso_max_size(dev, size); + if (size < READ_ONCE(dev->gso_ipv4_max_size)) + netif_set_gso_ipv4_max_size(dev, size); } EXPORT_SYMBOL(netif_set_tso_max_size); @@ -10614,6 +10616,8 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, dev->gso_max_size = GSO_LEGACY_MAX_SIZE; dev->gso_max_segs = GSO_MAX_SEGS; dev->gro_max_size = GRO_LEGACY_MAX_SIZE; + dev->gso_ipv4_max_size = GSO_LEGACY_MAX_SIZE; + dev->gro_ipv4_max_size = GRO_LEGACY_MAX_SIZE; dev->tso_max_size = TSO_LEGACY_MAX_SIZE; dev->tso_max_segs = TSO_MAX_SEGS; dev->upper_level = 1; diff --git a/net/core/dev.h b/net/core/dev.h index 814ed5b7b960..a065b7571441 100644 --- a/net/core/dev.h +++ b/net/core/dev.h @@ -100,6 +100,8 @@ static inline void netif_set_gso_max_size(struct net_device *dev, { /* dev->gso_max_size is read locklessly from sk_setup_caps() */ WRITE_ONCE(dev->gso_max_size, size); + if (size <= GSO_LEGACY_MAX_SIZE) + WRITE_ONCE(dev->gso_ipv4_max_size, size); } static inline void netif_set_gso_max_segs(struct net_device *dev, @@ -114,6 +116,22 @@ static inline void netif_set_gro_max_size(struct net_device *dev, { /* This pairs with the READ_ONCE() in skb_gro_receive() */ WRITE_ONCE(dev->gro_max_size, size); + if (size <= GRO_LEGACY_MAX_SIZE) + WRITE_ONCE(dev->gro_ipv4_max_size, size); +} + +static inline void netif_set_gso_ipv4_max_size(struct net_device *dev, + unsigned int size) +{ + /* dev->gso_ipv4_max_size is read locklessly from sk_setup_caps() */ + WRITE_ONCE(dev->gso_ipv4_max_size, size); +} + +static inline void netif_set_gro_ipv4_max_size(struct net_device *dev, + unsigned int size) +{ + /* This pairs with the READ_ONCE() in skb_gro_receive() */ + WRITE_ONCE(dev->gro_ipv4_max_size, size); } #endif diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 64289bc98887..b9f584955b77 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1074,6 +1074,8 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev, + nla_total_size(4) /* IFLA_GSO_MAX_SEGS */ + nla_total_size(4) /* IFLA_GSO_MAX_SIZE */ + nla_total_size(4) /* IFLA_GRO_MAX_SIZE */ + + nla_total_size(4) /* IFLA_GSO_IPV4_MAX_SIZE */ + + nla_total_size(4) /* IFLA_GRO_IPV4_MAX_SIZE */ + nla_total_size(4) /* IFLA_TSO_MAX_SIZE */ + nla_total_size(4) /* IFLA_TSO_MAX_SEGS */ + nla_total_size(1) /* IFLA_OPERSTATE */ @@ -1807,6 +1809,8 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, nla_put_u32(skb, IFLA_GSO_MAX_SEGS, dev->gso_max_segs) || nla_put_u32(skb, IFLA_GSO_MAX_SIZE, dev->gso_max_size) || nla_put_u32(skb, IFLA_GRO_MAX_SIZE, dev->gro_max_size) || + nla_put_u32(skb, IFLA_GSO_IPV4_MAX_SIZE, dev->gso_ipv4_max_size) || + nla_put_u32(skb, IFLA_GRO_IPV4_MAX_SIZE, dev->gro_ipv4_max_size) || nla_put_u32(skb, IFLA_TSO_MAX_SIZE, dev->tso_max_size) || nla_put_u32(skb, IFLA_TSO_MAX_SEGS, dev->tso_max_segs) || #ifdef CONFIG_RPS @@ -1968,6 +1972,8 @@ static const struct nla_policy ifla_policy[IFLA_MAX+1] = { [IFLA_TSO_MAX_SIZE] = { .type = NLA_REJECT }, [IFLA_TSO_MAX_SEGS] = { .type = NLA_REJECT }, [IFLA_ALLMULTI] = { .type = NLA_REJECT }, + [IFLA_GSO_IPV4_MAX_SIZE] = { .type = NLA_U32 }, + [IFLA_GRO_IPV4_MAX_SIZE] = { .type = NLA_U32 }, }; static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = { @@ -2883,6 +2889,29 @@ static int do_setlink(const struct sk_buff *skb, } } + if (tb[IFLA_GSO_IPV4_MAX_SIZE]) { + u32 max_size = nla_get_u32(tb[IFLA_GSO_IPV4_MAX_SIZE]); + + if (max_size > dev->tso_max_size) { + err = -EINVAL; + goto errout; + } + + if (dev->gso_ipv4_max_size ^ max_size) { + netif_set_gso_ipv4_max_size(dev, max_size); + status |= DO_SETLINK_MODIFIED; + } + } + + if (tb[IFLA_GRO_IPV4_MAX_SIZE]) { + u32 gro_max_size = nla_get_u32(tb[IFLA_GRO_IPV4_MAX_SIZE]); + + if (dev->gro_ipv4_max_size ^ gro_max_size) { + netif_set_gro_ipv4_max_size(dev, gro_max_size); + status |= DO_SETLINK_MODIFIED; + } + } + if (tb[IFLA_OPERSTATE]) set_operstate(dev, nla_get_u8(tb[IFLA_OPERSTATE])); @@ -3325,6 +3354,10 @@ struct net_device *rtnl_create_link(struct net *net, const char *ifname, netif_set_gso_max_segs(dev, nla_get_u32(tb[IFLA_GSO_MAX_SEGS])); if (tb[IFLA_GRO_MAX_SIZE]) netif_set_gro_max_size(dev, nla_get_u32(tb[IFLA_GRO_MAX_SIZE])); + if (tb[IFLA_GSO_IPV4_MAX_SIZE]) + netif_set_gso_ipv4_max_size(dev, nla_get_u32(tb[IFLA_GSO_IPV4_MAX_SIZE])); + if (tb[IFLA_GRO_IPV4_MAX_SIZE]) + netif_set_gro_ipv4_max_size(dev, nla_get_u32(tb[IFLA_GRO_IPV4_MAX_SIZE])); return dev; }
This patch introduces gso_ipv4_max_size and gro_ipv4_max_size per device and adds netlink attributes for them, so that IPV4 BIG TCP can be guarded by a separate tunable in the next patch. To not break the old application using "gso/gro_max_size" for IPv4 GSO packets, this patch updates "gso/gro_ipv4_max_size" in netif_set_gso/gro_max_size() if the new size isn't greater than GSO_LEGACY_MAX_SIZE, so that nothing will change even if userspace doesn't realize the new netlink attributes. Signed-off-by: Xin Long <lucien.xin@gmail.com> --- include/linux/netdevice.h | 6 ++++++ include/uapi/linux/if_link.h | 3 +++ net/core/dev.c | 4 ++++ net/core/dev.h | 18 ++++++++++++++++++ net/core/rtnetlink.c | 33 +++++++++++++++++++++++++++++++++ 5 files changed, 64 insertions(+)