@@ -604,6 +604,63 @@ static inline int set_tso_eth_seg(void **seg, struct ibv_send_wr *wr,
return 0;
}
+static inline int mlx5_post_send_underlay(struct mlx5_qp *qp, struct ibv_send_wr *wr,
+ void **pseg, int *total_size,
+ struct mlx5_sg_copy_ptr *sg_copy_ptr)
+{
+ struct mlx5_wqe_eth_seg *eseg;
+ int inl_hdr_copy_size;
+ void *seg = *pseg;
+ int size = 0;
+
+ if (unlikely(wr->opcode == IBV_WR_SEND_WITH_IMM))
+ return EINVAL;
+
+ memset(seg, 0, sizeof(struct mlx5_wqe_eth_pad));
+ size += sizeof(struct mlx5_wqe_eth_pad);
+ seg += sizeof(struct mlx5_wqe_eth_pad);
+ eseg = seg;
+ *((uint64_t *)eseg) = 0;
+ eseg->rsvd2 = 0;
+
+ if (wr->send_flags & IBV_SEND_IP_CSUM) {
+ if (!(qp->qp_cap_cache & MLX5_CSUM_SUPPORT_UNDERLAY_UD))
+ return EINVAL;
+
+ eseg->cs_flags |= MLX5_ETH_WQE_L3_CSUM | MLX5_ETH_WQE_L4_CSUM;
+ }
+
+ if (likely(wr->sg_list[0].length >= MLX5_SOURCE_QPN_INLINE_MAX_HEADER_SIZE))
+ /* Copying the minimum required data unless inline mode is set */
+ inl_hdr_copy_size = (wr->send_flags & IBV_SEND_INLINE) ?
+ MLX5_SOURCE_QPN_INLINE_MAX_HEADER_SIZE :
+ MLX5_IPOIB_INLINE_MIN_HEADER_SIZE;
+ else {
+ inl_hdr_copy_size = MLX5_IPOIB_INLINE_MIN_HEADER_SIZE;
+ /* We expect at least 4 bytes as part of first entry to hold the IPoIB header */
+ if (unlikely(wr->sg_list[0].length < inl_hdr_copy_size))
+ return EINVAL;
+ }
+
+ memcpy(eseg->inline_hdr_start, (void *)(uintptr_t)wr->sg_list[0].addr,
+ inl_hdr_copy_size);
+ eseg->inline_hdr_sz = htobe16(inl_hdr_copy_size);
+ size += sizeof(struct mlx5_wqe_eth_seg);
+ seg += sizeof(struct mlx5_wqe_eth_seg);
+
+ /* If we copied all the sge into the inline-headers, then we need to
+ * start copying from the next sge into the data-segment.
+ */
+ if (unlikely(wr->sg_list[0].length == inl_hdr_copy_size))
+ sg_copy_ptr->index++;
+ else
+ sg_copy_ptr->offset = inl_hdr_copy_size;
+
+ *pseg = seg;
+ *total_size += (size / 16);
+ return 0;
+}
+
static inline int _mlx5_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr,
struct ibv_send_wr **bad_wr)
{
@@ -806,6 +863,14 @@ static inline int _mlx5_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr,
size += sizeof(struct mlx5_wqe_datagram_seg) / 16;
if (unlikely((seg == qend)))
seg = mlx5_get_send_wqe(qp, 0);
+
+ if (unlikely(qp->flags & MLX5_QP_FLAGS_USE_UNDERLAY)) {
+ err = mlx5_post_send_underlay(qp, wr, &seg, &size, &sg_copy_ptr);
+ if (unlikely(err)) {
+ *bad_wr = wr;
+ goto out;
+ }
+ }
break;
case IBV_QPT_RAW_PACKET:
@@ -756,7 +756,7 @@ int mlx5_destroy_srq(struct ibv_srq *srq)
return 0;
}
-static int sq_overhead(enum ibv_qp_type qp_type)
+static int sq_overhead(struct mlx5_qp *qp, enum ibv_qp_type qp_type)
{
size_t size = 0;
size_t mw_bind_size =
@@ -781,6 +781,10 @@ static int sq_overhead(enum ibv_qp_type qp_type)
case IBV_QPT_UD:
size = sizeof(struct mlx5_wqe_ctrl_seg) +
sizeof(struct mlx5_wqe_datagram_seg);
+
+ if (qp->flags & MLX5_QP_FLAGS_USE_UNDERLAY)
+ size += (sizeof(struct mlx5_wqe_eth_seg) + sizeof(struct mlx5_wqe_eth_pad));
+
break;
case IBV_QPT_XRC_SEND:
@@ -814,7 +818,7 @@ static int mlx5_calc_send_wqe(struct mlx5_context *ctx,
int max_gather;
int tot_size;
- size = sq_overhead(attr->qp_type);
+ size = sq_overhead(qp, attr->qp_type);
if (size < 0)
return size;
@@ -887,7 +891,7 @@ static int mlx5_calc_sq_size(struct mlx5_context *ctx,
return -EINVAL;
}
- qp->max_inline_data = wqe_size - sq_overhead(attr->qp_type) -
+ qp->max_inline_data = wqe_size - sq_overhead(qp, attr->qp_type) -
sizeof(struct mlx5_wqe_inl_data_seg);
attr->cap.max_inline_data = qp->max_inline_data;
@@ -50,6 +50,10 @@ struct mlx5_eqe_qp_srq {
uint32_t qp_srq_n;
};
+struct mlx5_wqe_eth_pad {
+ uint8_t rsvd0[16];
+};
+
struct mlx5_wqe_xrc_seg {
__be32 xrc_srqn;
uint8_t rsvd[12];
@@ -63,6 +67,8 @@ struct mlx5_wqe_masked_atomic_seg {
};
enum {
+ MLX5_IPOIB_INLINE_MIN_HEADER_SIZE = 4,
+ MLX5_SOURCE_QPN_INLINE_MAX_HEADER_SIZE = 18,
MLX5_ETH_L2_INLINE_HEADER_SIZE = 18,
MLX5_ETH_L2_MIN_HEADER_SIZE = 14,
};
It includes: - Update WQE size to match the addition required segments. - Build a proper packet as part of the post send flow. The specific IPoIB handling as of copying from the application gather data into the header is some device limitation to support sending IPoIB packets. This is not required in the general case but will work as well. Future devices may work properly without that need, once be ready we may have some device capability exposed from kernel and clean this specific handling. Signed-off-by: Yishai Hadas <yishaih@mellanox.com> --- providers/mlx5/qp.c | 65 ++++++++++++++++++++++++++++++++++++++++++++++++++ providers/mlx5/verbs.c | 10 +++++--- providers/mlx5/wqe.h | 6 +++++ 3 files changed, 78 insertions(+), 3 deletions(-)