Message ID | 20220506153048.3695721-13-eric.dumazet@gmail.com (mailing list archive) |
---|---|
State | Superseded |
Delegated to: | Netdev Maintainers |
Headers | show |
Series | tcp: BIG TCP implementation | expand |
On Fri, 6 May 2022 08:30:48 -0700 Eric Dumazet wrote: > From: Coco Li <lixiaoyan@google.com> > > mlx5 supports LSOv2. > > IPv6 gro/tcp stacks insert a temporary Hop-by-Hop header > with JUMBO TLV for big packets. > > We need to ignore/skip this HBH header when populating TX descriptor. > > Note that ipv6_has_hopopt_jumbo() only recognizes very specific packet > layout, thus mlx5e_sq_xmit_wqe() is taking care of this layout only. > > v2: clear hopbyhop in mlx5e_tx_get_gso_ihs() > v4: fix compile error for CONFIG_MLX5_CORE_IPOIB=y In file included from ../include/linux/string.h:253, from ../arch/x86/include/asm/page_32.h:22, from ../arch/x86/include/asm/page.h:14, from ../arch/x86/include/asm/processor.h:19, from ../arch/x86/include/asm/timex.h:5, from ../include/linux/timex.h:65, from ../include/linux/time32.h:13, from ../include/linux/time.h:60, from ../include/linux/skbuff.h:15, from ../include/linux/tcp.h:17, from ../drivers/net/ethernet/mellanox/mlx5/core/en_tx.c:33: In function ‘fortify_memcpy_chk’, inlined from ‘mlx5e_insert_vlan’ at ../drivers/net/ethernet/mellanox/mlx5/core/en_tx.c:104:2, inlined from ‘mlx5e_sq_xmit_wqe’ at ../drivers/net/ethernet/mellanox/mlx5/core/en_tx.c:404:5: ../include/linux/fortify-string.h:328:25: warning: call to ‘__write_overflow_field’ declared with attribute warning: detected write beyond size of field (1st parameter); maybe use struct_group()? [-Wattribute-warning] 328 | __write_overflow_field(p_size_field, size); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ In function ‘fortify_memcpy_chk’, inlined from ‘mlx5e_sq_xmit_wqe’ at ../drivers/net/ethernet/mellanox/mlx5/core/en_tx.c:408:5: ../include/linux/fortify-string.h:328:25: warning: call to ‘__write_overflow_field’ declared with attribute warning: detected write beyond size of field (1st parameter); maybe use struct_group()? [-Wattribute-warning] 328 | __write_overflow_field(p_size_field, size); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ In function ‘fortify_memcpy_chk’, inlined from ‘mlx5i_sq_xmit’ at ../drivers/net/ethernet/mellanox/mlx5/core/en_tx.c:962:4: ../include/linux/fortify-string.h:328:25: warning: call to ‘__write_overflow_field’ declared with attribute warning: detected write beyond size of field (1st parameter); maybe use struct_group()? [-Wattribute-warning] 328 | __write_overflow_field(p_size_field, size); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
On Fri, May 6, 2022 at 3:34 PM Jakub Kicinski <kuba@kernel.org> wrote: > > On Fri, 6 May 2022 08:30:48 -0700 Eric Dumazet wrote: > > From: Coco Li <lixiaoyan@google.com> > > > > mlx5 supports LSOv2. > > > > IPv6 gro/tcp stacks insert a temporary Hop-by-Hop header > > with JUMBO TLV for big packets. > > > > We need to ignore/skip this HBH header when populating TX descriptor. > > > > Note that ipv6_has_hopopt_jumbo() only recognizes very specific packet > > layout, thus mlx5e_sq_xmit_wqe() is taking care of this layout only. > > > > v2: clear hopbyhop in mlx5e_tx_get_gso_ihs() > > v4: fix compile error for CONFIG_MLX5_CORE_IPOIB=y > > In file included from ../include/linux/string.h:253, > from ../arch/x86/include/asm/page_32.h:22, > from ../arch/x86/include/asm/page.h:14, > from ../arch/x86/include/asm/processor.h:19, > from ../arch/x86/include/asm/timex.h:5, > from ../include/linux/timex.h:65, > from ../include/linux/time32.h:13, > from ../include/linux/time.h:60, > from ../include/linux/skbuff.h:15, > from ../include/linux/tcp.h:17, > from ../drivers/net/ethernet/mellanox/mlx5/core/en_tx.c:33: > In function ‘fortify_memcpy_chk’, > inlined from ‘mlx5e_insert_vlan’ at ../drivers/net/ethernet/mellanox/mlx5/core/en_tx.c:104:2, > inlined from ‘mlx5e_sq_xmit_wqe’ at ../drivers/net/ethernet/mellanox/mlx5/core/en_tx.c:404:5: > ../include/linux/fortify-string.h:328:25: warning: call to ‘__write_overflow_field’ declared with attribute warning: detected write beyond size of field (1st parameter); maybe use struct_group()? [-Wattribute-warning] > 328 | __write_overflow_field(p_size_field, size); > | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ > In function ‘fortify_memcpy_chk’, > inlined from ‘mlx5e_sq_xmit_wqe’ at ../drivers/net/ethernet/mellanox/mlx5/core/en_tx.c:408:5: > ../include/linux/fortify-string.h:328:25: warning: call to ‘__write_overflow_field’ declared with attribute warning: detected write beyond size of field (1st parameter); maybe use struct_group()? [-Wattribute-warning] > 328 | __write_overflow_field(p_size_field, size); > | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ > In function ‘fortify_memcpy_chk’, > inlined from ‘mlx5i_sq_xmit’ at ../drivers/net/ethernet/mellanox/mlx5/core/en_tx.c:962:4: > ../include/linux/fortify-string.h:328:25: warning: call to ‘__write_overflow_field’ declared with attribute warning: detected write beyond size of field (1st parameter); maybe use struct_group()? [-Wattribute-warning] > 328 | __write_overflow_field(p_size_field, size); > | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ I guess these warnings show up before this BIG TCP patch ? I do not see any struct_group() being used in mlx5 May I ask which compiler is used here, and what CONFIG_ option needs to be set ? Thanks.
On Fri, 6 May 2022 17:32:43 -0700 Eric Dumazet wrote: > On Fri, May 6, 2022 at 3:34 PM Jakub Kicinski <kuba@kernel.org> wrote: > > > > On Fri, 6 May 2022 08:30:48 -0700 Eric Dumazet wrote: > > > From: Coco Li <lixiaoyan@google.com> > > > > > > mlx5 supports LSOv2. > > > > > > IPv6 gro/tcp stacks insert a temporary Hop-by-Hop header > > > with JUMBO TLV for big packets. > > > > > > We need to ignore/skip this HBH header when populating TX descriptor. > > > > > > Note that ipv6_has_hopopt_jumbo() only recognizes very specific packet > > > layout, thus mlx5e_sq_xmit_wqe() is taking care of this layout only. > > > > > > v2: clear hopbyhop in mlx5e_tx_get_gso_ihs() > > > v4: fix compile error for CONFIG_MLX5_CORE_IPOIB=y > > > > In file included from ../include/linux/string.h:253, > > from ../arch/x86/include/asm/page_32.h:22, > > from ../arch/x86/include/asm/page.h:14, > > from ../arch/x86/include/asm/processor.h:19, > > from ../arch/x86/include/asm/timex.h:5, > > from ../include/linux/timex.h:65, > > from ../include/linux/time32.h:13, > > from ../include/linux/time.h:60, > > from ../include/linux/skbuff.h:15, > > from ../include/linux/tcp.h:17, > > from ../drivers/net/ethernet/mellanox/mlx5/core/en_tx.c:33: > > In function ‘fortify_memcpy_chk’, > > inlined from ‘mlx5e_insert_vlan’ at ../drivers/net/ethernet/mellanox/mlx5/core/en_tx.c:104:2, > > inlined from ‘mlx5e_sq_xmit_wqe’ at ../drivers/net/ethernet/mellanox/mlx5/core/en_tx.c:404:5: > > ../include/linux/fortify-string.h:328:25: warning: call to ‘__write_overflow_field’ declared with attribute warning: detected write beyond size of field (1st parameter); maybe use struct_group()? [-Wattribute-warning] > > 328 | __write_overflow_field(p_size_field, size); > > | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ > > In function ‘fortify_memcpy_chk’, > > inlined from ‘mlx5e_sq_xmit_wqe’ at ../drivers/net/ethernet/mellanox/mlx5/core/en_tx.c:408:5: > > ../include/linux/fortify-string.h:328:25: warning: call to ‘__write_overflow_field’ declared with attribute warning: detected write beyond size of field (1st parameter); maybe use struct_group()? [-Wattribute-warning] > > 328 | __write_overflow_field(p_size_field, size); > > | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ > > In function ‘fortify_memcpy_chk’, > > inlined from ‘mlx5i_sq_xmit’ at ../drivers/net/ethernet/mellanox/mlx5/core/en_tx.c:962:4: > > ../include/linux/fortify-string.h:328:25: warning: call to ‘__write_overflow_field’ declared with attribute warning: detected write beyond size of field (1st parameter); maybe use struct_group()? [-Wattribute-warning] > > 328 | __write_overflow_field(p_size_field, size); > > | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ > > I guess these warnings show up before this BIG TCP patch ? > > I do not see any struct_group() being used in mlx5 > > May I ask which compiler is used here, and what CONFIG_ option needs to be set ? > > Thanks. Without our patches drivers/net/ethernet/mellanox/mlx5/core/ builds cleanly. Gotta be the new W=1 filed overflow warnings, let's bother Kees. I believe this is the code in question: @@ -379,15 +393,36 @@ mlx5e_sq_xmit_wqe(struct mlx5e_txqsq *sq, struct sk_buff *skb, + u8 *start = eseg->inline_hdr.start; + + if (unlikely(attr->hopbyhop)) { + /* remove the HBH header. + * Layout: [Ethernet header][IPv6 header][HBH][TCP header] + */ + if (skb_vlan_tag_present(skb)) { + mlx5e_insert_vlan(start, skb, ETH_HLEN + sizeof(*h6)); Unhappiness #1 ^^^ Where mlx5e_insert_vlan() is: static inline void mlx5e_insert_vlan(void *start, struct sk_buff *skb, u16 ihs) { struct vlan_ethhdr *vhdr = (struct vlan_ethhdr *)start; int cpy1_sz = 2 * ETH_ALEN; int cpy2_sz = ihs - cpy1_sz; memcpy(&vhdr->addrs, skb->data, cpy1_sz); vhdr->h_vlan_proto = skb->vlan_proto; vhdr->h_vlan_TCI = cpu_to_be16(skb_vlan_tag_get(skb)); memcpy(&vhdr->h_vlan_encapsulated_proto, skb->data + cpy1_sz, cpy2_sz); } indeed ihs == ETH_HLEN + sizeof(*h6) will make cpy2_sz come out as something much bigger than the vhdr->h_vlan_encapsulated_proto field. + ihs += VLAN_HLEN; + h6 = (struct ipv6hdr *)(start + sizeof(struct vlan_ethhdr)); + } else { + memcpy(start, skb->data, ETH_HLEN + sizeof(*h6)); Unhappiness #2 ^^^ Again, ETH_HLEN + sizeof(*h6) will be larger than eseg->inline_hdr.start which is what start is pointing at. + h6 = (struct ipv6hdr *)(start + ETH_HLEN); + } I didn't look where #3 is.
On Fri, 6 May 2022 18:54:05 -0700 Jakub Kicinski wrote:
> Without our patches drivers/net/ethernet/mellanox/mlx5/core/ builds
s/our/your/
On Fri, May 6, 2022 at 6:54 PM Jakub Kicinski <kuba@kernel.org> wrote: > > On Fri, 6 May 2022 17:32:43 -0700 Eric Dumazet wrote: > > On Fri, May 6, 2022 at 3:34 PM Jakub Kicinski <kuba@kernel.org> wrote: > > > > > > On Fri, 6 May 2022 08:30:48 -0700 Eric Dumazet wrote: > > > > From: Coco Li <lixiaoyan@google.com> > > > > > > > > mlx5 supports LSOv2. > > > > > > > > IPv6 gro/tcp stacks insert a temporary Hop-by-Hop header > > > > with JUMBO TLV for big packets. > > > > > > > > We need to ignore/skip this HBH header when populating TX descriptor. > > > > > > > > Note that ipv6_has_hopopt_jumbo() only recognizes very specific packet > > > > layout, thus mlx5e_sq_xmit_wqe() is taking care of this layout only. > > > > > > > > v2: clear hopbyhop in mlx5e_tx_get_gso_ihs() > > > > v4: fix compile error for CONFIG_MLX5_CORE_IPOIB=y > > > > > > In file included from ../include/linux/string.h:253, > > > from ../arch/x86/include/asm/page_32.h:22, > > > from ../arch/x86/include/asm/page.h:14, > > > from ../arch/x86/include/asm/processor.h:19, > > > from ../arch/x86/include/asm/timex.h:5, > > > from ../include/linux/timex.h:65, > > > from ../include/linux/time32.h:13, > > > from ../include/linux/time.h:60, > > > from ../include/linux/skbuff.h:15, > > > from ../include/linux/tcp.h:17, > > > from ../drivers/net/ethernet/mellanox/mlx5/core/en_tx.c:33: > > > In function ‘fortify_memcpy_chk’, > > > inlined from ‘mlx5e_insert_vlan’ at ../drivers/net/ethernet/mellanox/mlx5/core/en_tx.c:104:2, > > > inlined from ‘mlx5e_sq_xmit_wqe’ at ../drivers/net/ethernet/mellanox/mlx5/core/en_tx.c:404:5: > > > ../include/linux/fortify-string.h:328:25: warning: call to ‘__write_overflow_field’ declared with attribute warning: detected write beyond size of field (1st parameter); maybe use struct_group()? [-Wattribute-warning] > > > 328 | __write_overflow_field(p_size_field, size); > > > | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ > > > In function ‘fortify_memcpy_chk’, > > > inlined from ‘mlx5e_sq_xmit_wqe’ at ../drivers/net/ethernet/mellanox/mlx5/core/en_tx.c:408:5: > > > ../include/linux/fortify-string.h:328:25: warning: call to ‘__write_overflow_field’ declared with attribute warning: detected write beyond size of field (1st parameter); maybe use struct_group()? [-Wattribute-warning] > > > 328 | __write_overflow_field(p_size_field, size); > > > | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ > > > In function ‘fortify_memcpy_chk’, > > > inlined from ‘mlx5i_sq_xmit’ at ../drivers/net/ethernet/mellanox/mlx5/core/en_tx.c:962:4: > > > ../include/linux/fortify-string.h:328:25: warning: call to ‘__write_overflow_field’ declared with attribute warning: detected write beyond size of field (1st parameter); maybe use struct_group()? [-Wattribute-warning] > > > 328 | __write_overflow_field(p_size_field, size); > > > | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ > > > > I guess these warnings show up before this BIG TCP patch ? > > > > I do not see any struct_group() being used in mlx5 > > > > May I ask which compiler is used here, and what CONFIG_ option needs to be set ? > > > > Thanks. > > Without our patches drivers/net/ethernet/mellanox/mlx5/core/ builds > cleanly. Gotta be the new W=1 filed overflow warnings, let's bother > Kees. Note that inline_hdr.start is a 2 byte array. Obviously mlx5 driver copies more than 2 bytes of inlined headers. mlx5e_insert_vlan(eseg->inline_hdr.start, skb, attr->ihs) is called already with attr->ihs > 2 So it should already complain ? static inline void mlx5e_insert_vlan(void *start, struct sk_buff *skb, u16 ihs) { struct vlan_ethhdr *vhdr = (struct vlan_ethhdr *)start; int cpy1_sz = 2 * ETH_ALEN; int cpy2_sz = ihs - cpy1_sz; memcpy(&vhdr->addrs, skb->data, cpy1_sz); vhdr->h_vlan_proto = skb->vlan_proto; vhdr->h_vlan_TCI = cpu_to_be16(skb_vlan_tag_get(skb)); memcpy(&vhdr->h_vlan_encapsulated_proto, skb->data + cpy1_sz, cpy2_sz); // Here, more than 2 bytes are copied already }
On Fri, 6 May 2022 19:10:48 -0700 Eric Dumazet wrote: > On Fri, May 6, 2022 at 6:54 PM Jakub Kicinski <kuba@kernel.org> wrote: > > Without our patches drivers/net/ethernet/mellanox/mlx5/core/ builds > > cleanly. Gotta be the new W=1 filed overflow warnings, let's bother > > Kees. > > Note that inline_hdr.start is a 2 byte array. > > Obviously mlx5 driver copies more than 2 bytes of inlined headers. > > mlx5e_insert_vlan(eseg->inline_hdr.start, skb, attr->ihs) > is called already with attr->ihs > 2 > > So it should already complain ? It's a static checker, I presume it ignores attr->ihs because it can't prove its value is indeed > 2. Unpleasant :/ > static inline void mlx5e_insert_vlan(void *start, struct sk_buff *skb, u16 ihs) > { > struct vlan_ethhdr *vhdr = (struct vlan_ethhdr *)start; > int cpy1_sz = 2 * ETH_ALEN; > int cpy2_sz = ihs - cpy1_sz; > > memcpy(&vhdr->addrs, skb->data, cpy1_sz); > vhdr->h_vlan_proto = skb->vlan_proto; > vhdr->h_vlan_TCI = cpu_to_be16(skb_vlan_tag_get(skb)); > memcpy(&vhdr->h_vlan_encapsulated_proto, skb->data + cpy1_sz, > cpy2_sz); // Here, more than 2 bytes are copied already > }
On Fri, May 6, 2022 at 7:37 PM Jakub Kicinski <kuba@kernel.org> wrote: > > On Fri, 6 May 2022 19:10:48 -0700 Eric Dumazet wrote: > > On Fri, May 6, 2022 at 6:54 PM Jakub Kicinski <kuba@kernel.org> wrote: > > > Without our patches drivers/net/ethernet/mellanox/mlx5/core/ builds > > > cleanly. Gotta be the new W=1 filed overflow warnings, let's bother > > > Kees. > > > > Note that inline_hdr.start is a 2 byte array. > > > > Obviously mlx5 driver copies more than 2 bytes of inlined headers. > > > > mlx5e_insert_vlan(eseg->inline_hdr.start, skb, attr->ihs) > > is called already with attr->ihs > 2 > > > > So it should already complain ? > > It's a static checker, I presume it ignores attr->ihs because > it can't prove its value is indeed > 2. Unpleasant :/ Well, the unpleasant thing is that I do not see a way to get rid of this warning. Networking is full of variable sized headers. > > > static inline void mlx5e_insert_vlan(void *start, struct sk_buff *skb, u16 ihs) > > { > > struct vlan_ethhdr *vhdr = (struct vlan_ethhdr *)start; > > int cpy1_sz = 2 * ETH_ALEN; > > int cpy2_sz = ihs - cpy1_sz; > > > > memcpy(&vhdr->addrs, skb->data, cpy1_sz); > > vhdr->h_vlan_proto = skb->vlan_proto; > > vhdr->h_vlan_TCI = cpu_to_be16(skb_vlan_tag_get(skb)); > > memcpy(&vhdr->h_vlan_encapsulated_proto, skb->data + cpy1_sz, > > cpy2_sz); // Here, more than 2 bytes are copied already > > } >
On Fri, May 06, 2022 at 06:54:05PM -0700, Jakub Kicinski wrote: > On Fri, 6 May 2022 17:32:43 -0700 Eric Dumazet wrote: > > On Fri, May 6, 2022 at 3:34 PM Jakub Kicinski <kuba@kernel.org> wrote: > > > > > > On Fri, 6 May 2022 08:30:48 -0700 Eric Dumazet wrote: > > > > From: Coco Li <lixiaoyan@google.com> > > > > > > > > mlx5 supports LSOv2. > > > > > > > > IPv6 gro/tcp stacks insert a temporary Hop-by-Hop header > > > > with JUMBO TLV for big packets. > > > > > > > > We need to ignore/skip this HBH header when populating TX descriptor. > > > > > > > > Note that ipv6_has_hopopt_jumbo() only recognizes very specific packet > > > > layout, thus mlx5e_sq_xmit_wqe() is taking care of this layout only. > > > > > > > > v2: clear hopbyhop in mlx5e_tx_get_gso_ihs() > > > > v4: fix compile error for CONFIG_MLX5_CORE_IPOIB=y > > > > > > In file included from ../include/linux/string.h:253, > > > from ../arch/x86/include/asm/page_32.h:22, > > > from ../arch/x86/include/asm/page.h:14, > > > from ../arch/x86/include/asm/processor.h:19, > > > from ../arch/x86/include/asm/timex.h:5, > > > from ../include/linux/timex.h:65, > > > from ../include/linux/time32.h:13, > > > from ../include/linux/time.h:60, > > > from ../include/linux/skbuff.h:15, > > > from ../include/linux/tcp.h:17, > > > from ../drivers/net/ethernet/mellanox/mlx5/core/en_tx.c:33: > > > In function ‘fortify_memcpy_chk’, > > > inlined from ‘mlx5e_insert_vlan’ at ../drivers/net/ethernet/mellanox/mlx5/core/en_tx.c:104:2, > > > inlined from ‘mlx5e_sq_xmit_wqe’ at ../drivers/net/ethernet/mellanox/mlx5/core/en_tx.c:404:5: > > > ../include/linux/fortify-string.h:328:25: warning: call to ‘__write_overflow_field’ declared with attribute warning: detected write beyond size of field (1st parameter); maybe use struct_group()? [-Wattribute-warning] > > > 328 | __write_overflow_field(p_size_field, size); > > > | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ > > > In function ‘fortify_memcpy_chk’, > > > inlined from ‘mlx5e_sq_xmit_wqe’ at ../drivers/net/ethernet/mellanox/mlx5/core/en_tx.c:408:5: > > > ../include/linux/fortify-string.h:328:25: warning: call to ‘__write_overflow_field’ declared with attribute warning: detected write beyond size of field (1st parameter); maybe use struct_group()? [-Wattribute-warning] > > > 328 | __write_overflow_field(p_size_field, size); > > > | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ > > > In function ‘fortify_memcpy_chk’, > > > inlined from ‘mlx5i_sq_xmit’ at ../drivers/net/ethernet/mellanox/mlx5/core/en_tx.c:962:4: > > > ../include/linux/fortify-string.h:328:25: warning: call to ‘__write_overflow_field’ declared with attribute warning: detected write beyond size of field (1st parameter); maybe use struct_group()? [-Wattribute-warning] > > > 328 | __write_overflow_field(p_size_field, size); > > > | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ > > > > I guess these warnings show up before this BIG TCP patch ? > > > > I do not see any struct_group() being used in mlx5 > > > > May I ask which compiler is used here, and what CONFIG_ option needs to be set ? > > > > Thanks. > > Without our patches drivers/net/ethernet/mellanox/mlx5/core/ builds > cleanly. Gotta be the new W=1 filed overflow warnings, let's bother > Kees. Hello! These aren't from W=1. The read overflows are hidden behind W=1. I imagine this is due to gcc getting smarter and being able to introspect the possible values of ihs during inlining. > I believe this is the code in question: > > @@ -379,15 +393,36 @@ mlx5e_sq_xmit_wqe(struct mlx5e_txqsq *sq, struct sk_buff *skb, > > + u8 *start = eseg->inline_hdr.start; > + > + if (unlikely(attr->hopbyhop)) { > + /* remove the HBH header. > + * Layout: [Ethernet header][IPv6 header][HBH][TCP header] > + */ > + if (skb_vlan_tag_present(skb)) { > + mlx5e_insert_vlan(start, skb, ETH_HLEN + sizeof(*h6)); > > Unhappiness #1 ^^^ > > Where mlx5e_insert_vlan() is: > > static inline void mlx5e_insert_vlan(void *start, struct sk_buff *skb, u16 ihs) > { > struct vlan_ethhdr *vhdr = (struct vlan_ethhdr *)start; > int cpy1_sz = 2 * ETH_ALEN; > int cpy2_sz = ihs - cpy1_sz; Why are these "int"? Seems like they should be u16? > > memcpy(&vhdr->addrs, skb->data, cpy1_sz); ^^^^^ this line was actually fixed earlier. > vhdr->h_vlan_proto = skb->vlan_proto; > vhdr->h_vlan_TCI = cpu_to_be16(skb_vlan_tag_get(skb)); > memcpy(&vhdr->h_vlan_encapsulated_proto, skb->data + cpy1_sz, cpy2_sz); ^^^^^ This one, though, is the new problem. The lack of annotation in the struct made me miss it -- this code is asking the compiler to potentially copy beyond the end of the struct declaration. If this is intentional, I could suggest a solution, but ... > } > > indeed ihs == ETH_HLEN + sizeof(*h6) will make cpy2_sz come out as something > much bigger than the vhdr->h_vlan_encapsulated_proto field. It sounds like it's not. In which case, I would ask: "what validates the size of ihs?" because neither I nor the compiler can see it. :P If nothing validates it, then this looks like a potential heap overflow, though I haven't studied how these is laid out in memory. Maybe it's harmless, but I never assume that. :)
On Fri, May 06, 2022 at 07:43:13PM -0700, Eric Dumazet wrote: > On Fri, May 6, 2022 at 7:37 PM Jakub Kicinski <kuba@kernel.org> wrote: > > > > On Fri, 6 May 2022 19:10:48 -0700 Eric Dumazet wrote: > > > On Fri, May 6, 2022 at 6:54 PM Jakub Kicinski <kuba@kernel.org> wrote: > > > > Without our patches drivers/net/ethernet/mellanox/mlx5/core/ builds > > > > cleanly. Gotta be the new W=1 filed overflow warnings, let's bother > > > > Kees. > > > > > > Note that inline_hdr.start is a 2 byte array. > > > > > > Obviously mlx5 driver copies more than 2 bytes of inlined headers. > > > > > > mlx5e_insert_vlan(eseg->inline_hdr.start, skb, attr->ihs) > > > is called already with attr->ihs > 2 > > > > > > So it should already complain ? > > > > It's a static checker, I presume it ignores attr->ihs because > > it can't prove its value is indeed > 2. Unpleasant :/ > > Well, the unpleasant thing is that I do not see a way to get rid of > this warning. > Networking is full of variable sized headers. So... this _is_ supposed to be copying off the end of struct vlan_ethhdr? In that case, either don't use the vhdr cast, or add a flex array to the end of the header. e.g. (untested): diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index 2dc48406cd08..990476b2e595 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -94,13 +94,18 @@ static inline u16 mlx5e_calc_min_inline(enum mlx5_inline_modes mode, static inline void mlx5e_insert_vlan(void *start, struct sk_buff *skb, u16 ihs) { struct vlan_ethhdr *vhdr = (struct vlan_ethhdr *)start; - int cpy1_sz = 2 * ETH_ALEN; - int cpy2_sz = ihs - cpy1_sz; + void *data = skb->data; + const u16 cpy1_sz = sizeof(vhdr->addrs); + const u16 cpy2_sz = sizeof(vhdr->h_vlan_encapsulated_proto); + const u16 cpy3_sz = ihs - cpy1_sz - cpy2_sz; - memcpy(&vhdr->addrs, skb->data, cpy1_sz); + memcpy(&vhdr->addrs, data, cpy1_sz); + data += sizeof(cpy1_sz); vhdr->h_vlan_proto = skb->vlan_proto; vhdr->h_vlan_TCI = cpu_to_be16(skb_vlan_tag_get(skb)); - memcpy(&vhdr->h_vlan_encapsulated_proto, skb->data + cpy1_sz, cpy2_sz); + memcpy(&vhdr->h_vlan_encapsulated_proto, data, cpy2_sz); + data += sizeof(cpy2_sz); + memcpy(&vhdr->h_vlan_contents, data, cpy3_sz); } static inline void diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index 2be4dd7e90a9..8178e20ce5b3 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -44,6 +44,7 @@ struct vlan_hdr { * @h_vlan_proto: ethernet protocol * @h_vlan_TCI: priority and VLAN ID * @h_vlan_encapsulated_proto: packet type ID or len + * @h_vlan_contents: The rest of the packet */ struct vlan_ethhdr { struct_group(addrs, @@ -53,6 +54,7 @@ struct vlan_ethhdr { __be16 h_vlan_proto; __be16 h_vlan_TCI; __be16 h_vlan_encapsulated_proto; + u8 h_vlan_contents[]; }; #include <linux/skbuff.h> I'm still learning the skb helpers, but shouldn't this be using something similar to skb_pull() that would do bounds checking, etc? Open-coded accesses of skb->data have shown a repeated pattern of being a source of flaws: https://github.com/KSPP/linux/issues/140 And speaking to the existing code, even if skb->data were bounds-checked, what are the bounds of "start"?
On Fri, May 06, 2022 at 07:37:34PM -0700, Jakub Kicinski wrote: > On Fri, 6 May 2022 19:10:48 -0700 Eric Dumazet wrote: > > On Fri, May 6, 2022 at 6:54 PM Jakub Kicinski <kuba@kernel.org> wrote: > > > Without our patches drivers/net/ethernet/mellanox/mlx5/core/ builds > > > cleanly. Gotta be the new W=1 filed overflow warnings, let's bother > > > Kees. > > > > Note that inline_hdr.start is a 2 byte array. > > > > Obviously mlx5 driver copies more than 2 bytes of inlined headers. > > > > mlx5e_insert_vlan(eseg->inline_hdr.start, skb, attr->ihs) > > is called already with attr->ihs > 2 > > > > So it should already complain ? > > It's a static checker, I presume it ignores attr->ihs because > it can't prove its value is indeed > 2. Unpleasant :/ I think it's actually the reverse. GCC keeps getting better about tracking potential variable value ranges. In this case it thinks ihs WILL be > 2. And this is bumping up against the kernel's lack of "intentional overflow" annotations in source (i.e. structure layouts). But we can't protect against unintentional overflow unless we've been able to explicitly describe to the compiler what is intended.
On Fri, May 06, 2022 at 06:54:05PM -0700, Jakub Kicinski wrote: > On Fri, 6 May 2022 17:32:43 -0700 Eric Dumazet wrote: > > On Fri, May 6, 2022 at 3:34 PM Jakub Kicinski <kuba@kernel.org> wrote: > > > In function ‘fortify_memcpy_chk’, > > > inlined from ‘mlx5e_sq_xmit_wqe’ at ../drivers/net/ethernet/mellanox/mlx5/core/en_tx.c:408:5: > > > ../include/linux/fortify-string.h:328:25: warning: call to ‘__write_overflow_field’ declared with attribute warning: detected write beyond size of field (1st parameter); maybe use struct_group()? [-Wattribute-warning] > > > 328 | __write_overflow_field(p_size_field, size); > > > | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Ah, my old friend, inline_hdr.start. Looks a lot like another one I fixed earlier in ad5185735f7d ("net/mlx5e: Avoid field-overflowing memcpy()"): if (attr->ihs) { if (skb_vlan_tag_present(skb)) { eseg->inline_hdr.sz |= cpu_to_be16(attr->ihs + VLAN_HLEN); mlx5e_insert_vlan(eseg->inline_hdr.start, skb, attr->ihs); stats->added_vlan_packets++; } else { eseg->inline_hdr.sz |= cpu_to_be16(attr->ihs); memcpy(eseg->inline_hdr.start, skb->data, attr->ihs); ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ } dseg += wqe_attr->ds_cnt_inl; This is actually two regions, 2 bytes in eseg and everything else in dseg. Splitting the memcpy() will work: memcpy(eseg->inline_hdr.start, skb->data, sizeof(eseg->inline_hdr.start)); memcpy(dseg, skb->data + sizeof(eseg->inline_hdr.start), ihs - sizeof(eseg->inline_hdr.start)); But this begs the question, what is validating that ihs -2 is equal to wqe_attr->ds_cnt_inl * sizeof(*desg) ? And how is wqe bounds checked? > > > In function ‘fortify_memcpy_chk’, > > > inlined from ‘mlx5i_sq_xmit’ at ../drivers/net/ethernet/mellanox/mlx5/core/en_tx.c:962:4: > > > ../include/linux/fortify-string.h:328:25: warning: call to ‘__write_overflow_field’ declared with attribute warning: detected write beyond size of field (1st parameter); maybe use struct_group()? [-Wattribute-warning] > > > 328 | __write_overflow_field(p_size_field, size); > > > | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ And moar inline_hdr.start: if (attr.ihs) { memcpy(eseg->inline_hdr.start, skb->data, attr.ihs); eseg->inline_hdr.sz = cpu_to_be16(attr.ihs); dseg += wqe_attr.ds_cnt_inl; } again, a split: memcpy(eseg->inline_hdr.start, skb->data, sizeof(eseg->inline_hdr.start)); eseg->inline_hdr.sz = cpu_to_be16(attr.ihs); memcpy(dseg, skb->data + sizeof(eseg->inline_hdr.start), ihs - sizeof(eseg->inline_hdr.start)); dseg += wqe_attr.ds_cnt_inl; And the same bounds questions come up. It'd be really nice to get some kind of generalized "copy out of skb->data with bounds checking that may likely all get reduced to constant checks".
On Sat, May 7, 2022 at 12:46 AM Kees Cook <keescook@chromium.org> wrote: > > On Fri, May 06, 2022 at 06:54:05PM -0700, Jakub Kicinski wrote: > > On Fri, 6 May 2022 17:32:43 -0700 Eric Dumazet wrote: > > > On Fri, May 6, 2022 at 3:34 PM Jakub Kicinski <kuba@kernel.org> wrote: > > > > In function ‘fortify_memcpy_chk’, > > > > inlined from ‘mlx5e_sq_xmit_wqe’ at ../drivers/net/ethernet/mellanox/mlx5/core/en_tx.c:408:5: > > > > ../include/linux/fortify-string.h:328:25: warning: call to ‘__write_overflow_field’ declared with attribute warning: detected write beyond size of field (1st parameter); maybe use struct_group()? [-Wattribute-warning] > > > > 328 | __write_overflow_field(p_size_field, size); > > > > | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ > > Ah, my old friend, inline_hdr.start. Looks a lot like another one I fixed > earlier in ad5185735f7d ("net/mlx5e: Avoid field-overflowing memcpy()"): > > if (attr->ihs) { > if (skb_vlan_tag_present(skb)) { > eseg->inline_hdr.sz |= cpu_to_be16(attr->ihs + VLAN_HLEN); > mlx5e_insert_vlan(eseg->inline_hdr.start, skb, attr->ihs); > stats->added_vlan_packets++; > } else { > eseg->inline_hdr.sz |= cpu_to_be16(attr->ihs); > memcpy(eseg->inline_hdr.start, skb->data, attr->ihs); > ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ > } > dseg += wqe_attr->ds_cnt_inl; > > This is actually two regions, 2 bytes in eseg and everything else in > dseg. Splitting the memcpy() will work: > > memcpy(eseg->inline_hdr.start, skb->data, sizeof(eseg->inline_hdr.start)); > memcpy(dseg, skb->data + sizeof(eseg->inline_hdr.start), ihs - sizeof(eseg->inline_hdr.start)); > > But this begs the question, what is validating that ihs -2 is equal to > wqe_attr->ds_cnt_inl * sizeof(*desg) ? > > And how is wqe bounds checked? Look at the definition of struct mlx5i_tx_wqe Then mlx5i_sq_calc_wqe_attr() computes the number of ds_cnt (16 bytes granularity) units needed. Then look at mlx5e_txqsq_get_next_pi() I doubt a compiler can infer that the driver is correct. Basically this is variable length structure, quite common in NIC world, given number of dma descriptor can vary from 1 to XX, and variable size of headers. (Typically, fast NIC want to get the headers inlined in TX descriptor) > > > > > > In function ‘fortify_memcpy_chk’, > > > > inlined from ‘mlx5i_sq_xmit’ at ../drivers/net/ethernet/mellanox/mlx5/core/en_tx.c:962:4: > > > > ../include/linux/fortify-string.h:328:25: warning: call to ‘__write_overflow_field’ declared with attribute warning: detected write beyond size of field (1st parameter); maybe use struct_group()? [-Wattribute-warning] > > > > 328 | __write_overflow_field(p_size_field, size); > > > > | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ > > And moar inline_hdr.start: > > if (attr.ihs) { > memcpy(eseg->inline_hdr.start, skb->data, attr.ihs); > eseg->inline_hdr.sz = cpu_to_be16(attr.ihs); > dseg += wqe_attr.ds_cnt_inl; > } > > again, a split: > > memcpy(eseg->inline_hdr.start, skb->data, sizeof(eseg->inline_hdr.start)); > eseg->inline_hdr.sz = cpu_to_be16(attr.ihs); > memcpy(dseg, skb->data + sizeof(eseg->inline_hdr.start), ihs - sizeof(eseg->inline_hdr.start)); > dseg += wqe_attr.ds_cnt_inl; > > And the same bounds questions come up. > > It'd be really nice to get some kind of generalized "copy out of > skb->data with bounds checking that may likely all get reduced to > constant checks". NIC drivers send millions of packets per second. We can not really afford copying each component of a frame one byte at a time. The memcpy() here can typically copy IPv6 header (40 bytes) + TCP header (up to 60 bytes), plus more headers if encapsulation is added. Thanks.
From: Eric Dumazet > Sent: 07 May 2022 12:19 .... > NIC drivers send millions of packets per second. > We can not really afford copying each component of a frame one byte at a time. Any run-time checks om memcpy() are also going to kill performance. The 'user copy hardening' tests already have a measurable effect on system calls like sendmsg(). David - Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK Registration No: 1397386 (Wales)
On Sat, May 07, 2022 at 04:19:06AM -0700, Eric Dumazet wrote: > On Sat, May 7, 2022 at 12:46 AM Kees Cook <keescook@chromium.org> wrote: > > > > On Fri, May 06, 2022 at 06:54:05PM -0700, Jakub Kicinski wrote: > > > On Fri, 6 May 2022 17:32:43 -0700 Eric Dumazet wrote: > > > > On Fri, May 6, 2022 at 3:34 PM Jakub Kicinski <kuba@kernel.org> wrote: > > > > > In function ‘fortify_memcpy_chk’, > > > > > inlined from ‘mlx5e_sq_xmit_wqe’ at ../drivers/net/ethernet/mellanox/mlx5/core/en_tx.c:408:5: > > > > > ../include/linux/fortify-string.h:328:25: warning: call to ‘__write_overflow_field’ declared with attribute warning: detected write beyond size of field (1st parameter); maybe use struct_group()? [-Wattribute-warning] > > > > > 328 | __write_overflow_field(p_size_field, size); > > > > > | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ > > > > Ah, my old friend, inline_hdr.start. Looks a lot like another one I fixed > > earlier in ad5185735f7d ("net/mlx5e: Avoid field-overflowing memcpy()"): > > > > if (attr->ihs) { > > if (skb_vlan_tag_present(skb)) { > > eseg->inline_hdr.sz |= cpu_to_be16(attr->ihs + VLAN_HLEN); > > mlx5e_insert_vlan(eseg->inline_hdr.start, skb, attr->ihs); > > stats->added_vlan_packets++; > > } else { > > eseg->inline_hdr.sz |= cpu_to_be16(attr->ihs); > > memcpy(eseg->inline_hdr.start, skb->data, attr->ihs); > > ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ > > } > > dseg += wqe_attr->ds_cnt_inl; > > > > This is actually two regions, 2 bytes in eseg and everything else in > > dseg. Splitting the memcpy() will work: > > > > memcpy(eseg->inline_hdr.start, skb->data, sizeof(eseg->inline_hdr.start)); > > memcpy(dseg, skb->data + sizeof(eseg->inline_hdr.start), ihs - sizeof(eseg->inline_hdr.start)); > > > > But this begs the question, what is validating that ihs -2 is equal to > > wqe_attr->ds_cnt_inl * sizeof(*desg) ? > > > > And how is wqe bounds checked? > > Look at the definition of struct mlx5i_tx_wqe > > Then mlx5i_sq_calc_wqe_attr() computes the number of ds_cnt (16 bytes > granularity) > units needed. > > Then look at mlx5e_txqsq_get_next_pi() Thanks! I'll study the paths. > I doubt a compiler can infer that the driver is correct. Agreed; this layering visibility is a bit strange to deal with. I'll see if I can come up with a sane solution that doesn't split the memcpy but establishes some way to do compile-time (or run-time) bounds checking. If I can't, I suspect I'll have to create an "unsafe_memcpy" wrapper that expressly ignores the structure layouts, etc. That's basically what memcpy() currently is, so it's not a regression from that perspective. I'd just prefer to find a way to refactor things so that the compiler can actually help us do the bounds checking. > Basically this is variable length structure, quite common in NIC > world, given number of dma descriptor can vary from 1 to XX, > and variable size of headers. (Typically, fast NIC want to get the > headers inlined in TX descriptor) Yup; most of the refactoring patches I've sent for the memcpy bounds checking have been in networking. :) (But then, also, all the recent security flaws with memcpy overflows have also been in networking, so no real surprise, I guess.) > NIC drivers send millions of packets per second. > We can not really afford copying each component of a frame one byte at a time. > > The memcpy() here can typically copy IPv6 header (40 bytes) + TCP > header (up to 60 bytes), plus more headers if encapsulation is added. Right; I need to make sure this gets fixed without wrecking performance. :)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index d27986869b8ba070d1a4f8bcdc7e14ab54ae984e..226825410a1aa55b5b7941a7389a78abdb800521 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -4920,6 +4920,7 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev) netdev->priv_flags |= IFF_UNICAST_FLT; + netif_set_tso_max_size(netdev, 512 * 1024); mlx5e_set_netdev_dev_addr(netdev); mlx5e_ipsec_build_netdev(priv); mlx5e_ktls_build_netdev(priv); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index 2dc48406cd08d21ff94f665cd61ab9227f351215..b4fc45ba1b347fb9ad0f46b9c091cc45e4d3d84f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -40,6 +40,7 @@ #include "en_accel/en_accel.h" #include "en_accel/ipsec_rxtx.h" #include "en/ptp.h" +#include <net/ipv6.h> static void mlx5e_dma_unmap_wqe_err(struct mlx5e_txqsq *sq, u8 num_dma) { @@ -130,23 +131,32 @@ mlx5e_txwqe_build_eseg_csum(struct mlx5e_txqsq *sq, struct sk_buff *skb, sq->stats->csum_none++; } +/* Returns the number of header bytes that we plan + * to inline later in the transmit descriptor + */ static inline u16 -mlx5e_tx_get_gso_ihs(struct mlx5e_txqsq *sq, struct sk_buff *skb) +mlx5e_tx_get_gso_ihs(struct mlx5e_txqsq *sq, struct sk_buff *skb, int *hopbyhop) { struct mlx5e_sq_stats *stats = sq->stats; u16 ihs; + *hopbyhop = 0; if (skb->encapsulation) { ihs = skb_inner_transport_offset(skb) + inner_tcp_hdrlen(skb); stats->tso_inner_packets++; stats->tso_inner_bytes += skb->len - ihs; } else { - if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) + if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) { ihs = skb_transport_offset(skb) + sizeof(struct udphdr); - else + } else { ihs = skb_transport_offset(skb) + tcp_hdrlen(skb); + if (ipv6_has_hopopt_jumbo(skb)) { + *hopbyhop = sizeof(struct hop_jumbo_hdr); + ihs -= sizeof(struct hop_jumbo_hdr); + } + } stats->tso_packets++; - stats->tso_bytes += skb->len - ihs; + stats->tso_bytes += skb->len - ihs - *hopbyhop; } return ihs; @@ -208,6 +218,7 @@ struct mlx5e_tx_attr { __be16 mss; u16 insz; u8 opcode; + u8 hopbyhop; }; struct mlx5e_tx_wqe_attr { @@ -244,14 +255,16 @@ static void mlx5e_sq_xmit_prepare(struct mlx5e_txqsq *sq, struct sk_buff *skb, struct mlx5e_sq_stats *stats = sq->stats; if (skb_is_gso(skb)) { - u16 ihs = mlx5e_tx_get_gso_ihs(sq, skb); + int hopbyhop; + u16 ihs = mlx5e_tx_get_gso_ihs(sq, skb, &hopbyhop); *attr = (struct mlx5e_tx_attr) { .opcode = MLX5_OPCODE_LSO, .mss = cpu_to_be16(skb_shinfo(skb)->gso_size), .ihs = ihs, .num_bytes = skb->len + (skb_shinfo(skb)->gso_segs - 1) * ihs, - .headlen = skb_headlen(skb) - ihs, + .headlen = skb_headlen(skb) - ihs - hopbyhop, + .hopbyhop = hopbyhop, }; stats->packets += skb_shinfo(skb)->gso_segs; @@ -365,7 +378,8 @@ mlx5e_sq_xmit_wqe(struct mlx5e_txqsq *sq, struct sk_buff *skb, struct mlx5_wqe_eth_seg *eseg; struct mlx5_wqe_data_seg *dseg; struct mlx5e_tx_wqe_info *wi; - + u16 ihs = attr->ihs; + struct ipv6hdr *h6; struct mlx5e_sq_stats *stats = sq->stats; int num_dma; @@ -379,15 +393,36 @@ mlx5e_sq_xmit_wqe(struct mlx5e_txqsq *sq, struct sk_buff *skb, eseg->mss = attr->mss; - if (attr->ihs) { - if (skb_vlan_tag_present(skb)) { - eseg->inline_hdr.sz |= cpu_to_be16(attr->ihs + VLAN_HLEN); - mlx5e_insert_vlan(eseg->inline_hdr.start, skb, attr->ihs); + if (ihs) { + u8 *start = eseg->inline_hdr.start; + + if (unlikely(attr->hopbyhop)) { + /* remove the HBH header. + * Layout: [Ethernet header][IPv6 header][HBH][TCP header] + */ + if (skb_vlan_tag_present(skb)) { + mlx5e_insert_vlan(start, skb, ETH_HLEN + sizeof(*h6)); + ihs += VLAN_HLEN; + h6 = (struct ipv6hdr *)(start + sizeof(struct vlan_ethhdr)); + } else { + memcpy(start, skb->data, ETH_HLEN + sizeof(*h6)); + h6 = (struct ipv6hdr *)(start + ETH_HLEN); + } + h6->nexthdr = IPPROTO_TCP; + /* Copy the TCP header after the IPv6 one */ + memcpy(h6 + 1, + skb->data + ETH_HLEN + sizeof(*h6) + + sizeof(struct hop_jumbo_hdr), + tcp_hdrlen(skb)); + /* Leave ipv6 payload_len set to 0, as LSO v2 specs request. */ + } else if (skb_vlan_tag_present(skb)) { + mlx5e_insert_vlan(start, skb, ihs); + ihs += VLAN_HLEN; stats->added_vlan_packets++; } else { - eseg->inline_hdr.sz |= cpu_to_be16(attr->ihs); - memcpy(eseg->inline_hdr.start, skb->data, attr->ihs); + memcpy(start, skb->data, ihs); } + eseg->inline_hdr.sz |= cpu_to_be16(ihs); dseg += wqe_attr->ds_cnt_inl; } else if (skb_vlan_tag_present(skb)) { eseg->insert.type = cpu_to_be16(MLX5_ETH_WQE_INSERT_VLAN); @@ -398,7 +433,7 @@ mlx5e_sq_xmit_wqe(struct mlx5e_txqsq *sq, struct sk_buff *skb, } dseg += wqe_attr->ds_cnt_ids; - num_dma = mlx5e_txwqe_build_dsegs(sq, skb, skb->data + attr->ihs, + num_dma = mlx5e_txwqe_build_dsegs(sq, skb, skb->data + attr->ihs + attr->hopbyhop, attr->headlen, dseg); if (unlikely(num_dma < 0)) goto err_drop; @@ -918,12 +953,29 @@ void mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb, eseg->mss = attr.mss; if (attr.ihs) { - memcpy(eseg->inline_hdr.start, skb->data, attr.ihs); + if (unlikely(attr.hopbyhop)) { + struct ipv6hdr *h6; + + /* remove the HBH header. + * Layout: [Ethernet header][IPv6 header][HBH][TCP header] + */ + memcpy(eseg->inline_hdr.start, skb->data, ETH_HLEN + sizeof(*h6)); + h6 = (struct ipv6hdr *)((char *)eseg->inline_hdr.start + ETH_HLEN); + h6->nexthdr = IPPROTO_TCP; + /* Copy the TCP header after the IPv6 one */ + memcpy(h6 + 1, + skb->data + ETH_HLEN + sizeof(*h6) + + sizeof(struct hop_jumbo_hdr), + tcp_hdrlen(skb)); + /* Leave ipv6 payload_len set to 0, as LSO v2 specs request. */ + } else { + memcpy(eseg->inline_hdr.start, skb->data, attr.ihs); + } eseg->inline_hdr.sz = cpu_to_be16(attr.ihs); dseg += wqe_attr.ds_cnt_inl; } - num_dma = mlx5e_txwqe_build_dsegs(sq, skb, skb->data + attr.ihs, + num_dma = mlx5e_txwqe_build_dsegs(sq, skb, skb->data + attr.ihs + attr.hopbyhop, attr.headlen, dseg); if (unlikely(num_dma < 0)) goto err_drop;