Message ID | 20220124172028.2410761-1-keescook@chromium.org (mailing list archive) |
---|---|
State | Not Applicable |
Headers | show |
Series | [v2,RESEND] net/mlx5e: Avoid field-overflowing memcpy() | expand |
On 24 Jan 09:20, Kees Cook wrote: >In preparation for FORTIFY_SOURCE performing compile-time and run-time >field bounds checking for memcpy(), memmove(), and memset(), avoid >intentionally writing across neighboring fields. > >Use flexible arrays instead of zero-element arrays (which look like they >are always overflowing) and split the cross-field memcpy() into two halves >that can be appropriately bounds-checked by the compiler. > >We were doing: > > #define ETH_HLEN 14 > #define VLAN_HLEN 4 > ... > #define MLX5E_XDP_MIN_INLINE (ETH_HLEN + VLAN_HLEN) > ... > struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(wq, pi); > ... > struct mlx5_wqe_eth_seg *eseg = &wqe->eth; > struct mlx5_wqe_data_seg *dseg = wqe->data; > ... > memcpy(eseg->inline_hdr.start, xdptxd->data, MLX5E_XDP_MIN_INLINE); > >target is wqe->eth.inline_hdr.start (which the compiler sees as being >2 bytes in size), but copying 18, intending to write across start >(really vlan_tci, 2 bytes). The remaining 16 bytes get written into >wqe->data[0], covering byte_count (4 bytes), lkey (4 bytes), and addr >(8 bytes). > >struct mlx5e_tx_wqe { > struct mlx5_wqe_ctrl_seg ctrl; /* 0 16 */ > struct mlx5_wqe_eth_seg eth; /* 16 16 */ > struct mlx5_wqe_data_seg data[]; /* 32 0 */ > > /* size: 32, cachelines: 1, members: 3 */ > /* last cacheline: 32 bytes */ >}; > >struct mlx5_wqe_eth_seg { > u8 swp_outer_l4_offset; /* 0 1 */ > u8 swp_outer_l3_offset; /* 1 1 */ > u8 swp_inner_l4_offset; /* 2 1 */ > u8 swp_inner_l3_offset; /* 3 1 */ > u8 cs_flags; /* 4 1 */ > u8 swp_flags; /* 5 1 */ > __be16 mss; /* 6 2 */ > __be32 flow_table_metadata; /* 8 4 */ > union { > struct { > __be16 sz; /* 12 2 */ > u8 start[2]; /* 14 2 */ > } inline_hdr; /* 12 4 */ > struct { > __be16 type; /* 12 2 */ > __be16 vlan_tci; /* 14 2 */ > } insert; /* 12 4 */ > __be32 trailer; /* 12 4 */ > }; /* 12 4 */ > > /* size: 16, cachelines: 1, members: 9 */ > /* last cacheline: 16 bytes */ >}; > >struct mlx5_wqe_data_seg { > __be32 byte_count; /* 0 4 */ > __be32 lkey; /* 4 4 */ > __be64 addr; /* 8 8 */ > > /* size: 16, cachelines: 1, members: 3 */ > /* last cacheline: 16 bytes */ >}; > >So, split the memcpy() so the compiler can reason about the buffer >sizes. > >"pahole" shows no size nor member offset changes to struct mlx5e_tx_wqe >nor struct mlx5e_umr_wqe. "objdump -d" shows no meaningful object >code changes (i.e. only source line number induced differences and >optimizations). > >Cc: Saeed Mahameed <saeedm@nvidia.com> >Cc: Leon Romanovsky <leon@kernel.org> >Cc: "David S. Miller" <davem@davemloft.net> >Cc: Jakub Kicinski <kuba@kernel.org> >Cc: Alexei Starovoitov <ast@kernel.org> >Cc: Daniel Borkmann <daniel@iogearbox.net> >Cc: Jesper Dangaard Brouer <hawk@kernel.org> >Cc: John Fastabend <john.fastabend@gmail.com> >Cc: netdev@vger.kernel.org >Cc: linux-rdma@vger.kernel.org >Cc: bpf@vger.kernel.org >Signed-off-by: Kees Cook <keescook@chromium.org> >--- >Since this results in no binary differences, I will carry this in my tree >unless someone else wants to pick it up. It's one of the last remaining >clean-ups needed for the next step in memcpy() hardening. applied to net-next-mlx5. Thanks, Saeed
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 812e6810cb3b..c14e06ca64d8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -224,7 +224,7 @@ static inline int mlx5e_get_max_num_channels(struct mlx5_core_dev *mdev) struct mlx5e_tx_wqe { struct mlx5_wqe_ctrl_seg ctrl; struct mlx5_wqe_eth_seg eth; - struct mlx5_wqe_data_seg data[0]; + struct mlx5_wqe_data_seg data[]; }; struct mlx5e_rx_wqe_ll { @@ -241,8 +241,8 @@ struct mlx5e_umr_wqe { struct mlx5_wqe_umr_ctrl_seg uctrl; struct mlx5_mkey_seg mkc; union { - struct mlx5_mtt inline_mtts[0]; - struct mlx5_klm inline_klms[0]; + DECLARE_FLEX_ARRAY(struct mlx5_mtt, inline_mtts); + DECLARE_FLEX_ARRAY(struct mlx5_klm, inline_klms); }; }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c index 338d65e2c9ce..56e10c84a706 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c @@ -341,8 +341,10 @@ mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptxd, /* copy the inline part if required */ if (sq->min_inline_mode != MLX5_INLINE_MODE_NONE) { - memcpy(eseg->inline_hdr.start, xdptxd->data, MLX5E_XDP_MIN_INLINE); + memcpy(eseg->inline_hdr.start, xdptxd->data, sizeof(eseg->inline_hdr.start)); eseg->inline_hdr.sz = cpu_to_be16(MLX5E_XDP_MIN_INLINE); + memcpy(dseg, xdptxd->data + sizeof(eseg->inline_hdr.start), + MLX5E_XDP_MIN_INLINE - sizeof(eseg->inline_hdr.start)); dma_len -= MLX5E_XDP_MIN_INLINE; dma_addr += MLX5E_XDP_MIN_INLINE; dseg++;
In preparation for FORTIFY_SOURCE performing compile-time and run-time field bounds checking for memcpy(), memmove(), and memset(), avoid intentionally writing across neighboring fields. Use flexible arrays instead of zero-element arrays (which look like they are always overflowing) and split the cross-field memcpy() into two halves that can be appropriately bounds-checked by the compiler. We were doing: #define ETH_HLEN 14 #define VLAN_HLEN 4 ... #define MLX5E_XDP_MIN_INLINE (ETH_HLEN + VLAN_HLEN) ... struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(wq, pi); ... struct mlx5_wqe_eth_seg *eseg = &wqe->eth; struct mlx5_wqe_data_seg *dseg = wqe->data; ... memcpy(eseg->inline_hdr.start, xdptxd->data, MLX5E_XDP_MIN_INLINE); target is wqe->eth.inline_hdr.start (which the compiler sees as being 2 bytes in size), but copying 18, intending to write across start (really vlan_tci, 2 bytes). The remaining 16 bytes get written into wqe->data[0], covering byte_count (4 bytes), lkey (4 bytes), and addr (8 bytes). struct mlx5e_tx_wqe { struct mlx5_wqe_ctrl_seg ctrl; /* 0 16 */ struct mlx5_wqe_eth_seg eth; /* 16 16 */ struct mlx5_wqe_data_seg data[]; /* 32 0 */ /* size: 32, cachelines: 1, members: 3 */ /* last cacheline: 32 bytes */ }; struct mlx5_wqe_eth_seg { u8 swp_outer_l4_offset; /* 0 1 */ u8 swp_outer_l3_offset; /* 1 1 */ u8 swp_inner_l4_offset; /* 2 1 */ u8 swp_inner_l3_offset; /* 3 1 */ u8 cs_flags; /* 4 1 */ u8 swp_flags; /* 5 1 */ __be16 mss; /* 6 2 */ __be32 flow_table_metadata; /* 8 4 */ union { struct { __be16 sz; /* 12 2 */ u8 start[2]; /* 14 2 */ } inline_hdr; /* 12 4 */ struct { __be16 type; /* 12 2 */ __be16 vlan_tci; /* 14 2 */ } insert; /* 12 4 */ __be32 trailer; /* 12 4 */ }; /* 12 4 */ /* size: 16, cachelines: 1, members: 9 */ /* last cacheline: 16 bytes */ }; struct mlx5_wqe_data_seg { __be32 byte_count; /* 0 4 */ __be32 lkey; /* 4 4 */ __be64 addr; /* 8 8 */ /* size: 16, cachelines: 1, members: 3 */ /* last cacheline: 16 bytes */ }; So, split the memcpy() so the compiler can reason about the buffer sizes. "pahole" shows no size nor member offset changes to struct mlx5e_tx_wqe nor struct mlx5e_umr_wqe. "objdump -d" shows no meaningful object code changes (i.e. only source line number induced differences and optimizations). Cc: Saeed Mahameed <saeedm@nvidia.com> Cc: Leon Romanovsky <leon@kernel.org> Cc: "David S. Miller" <davem@davemloft.net> Cc: Jakub Kicinski <kuba@kernel.org> Cc: Alexei Starovoitov <ast@kernel.org> Cc: Daniel Borkmann <daniel@iogearbox.net> Cc: Jesper Dangaard Brouer <hawk@kernel.org> Cc: John Fastabend <john.fastabend@gmail.com> Cc: netdev@vger.kernel.org Cc: linux-rdma@vger.kernel.org Cc: bpf@vger.kernel.org Signed-off-by: Kees Cook <keescook@chromium.org> --- Since this results in no binary differences, I will carry this in my tree unless someone else wants to pick it up. It's one of the last remaining clean-ups needed for the next step in memcpy() hardening. --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 6 +++--- drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c | 4 +++- 2 files changed, 6 insertions(+), 4 deletions(-)