@@ -41,6 +41,13 @@ enum mlx5_cap_mode {
enum {
MLX5_CMD_OP_QUERY_HCA_CAP = 0x100,
+ MLX5_CMD_OP_INIT_HCA = 0x102,
+ MLX5_CMD_OP_TEARDOWN_HCA = 0x103,
+ MLX5_CMD_OP_ENABLE_HCA = 0x104,
+ MLX5_CMD_OP_QUERY_PAGES = 0x107,
+ MLX5_CMD_OP_MANAGE_PAGES = 0x108,
+ MLX5_CMD_OP_QUERY_ISSI = 0x10a,
+ MLX5_CMD_OP_SET_ISSI = 0x10b,
MLX5_CMD_OP_CREATE_MKEY = 0x200,
MLX5_CMD_OP_CREATE_QP = 0x500,
MLX5_CMD_OP_RST2INIT_QP = 0x502,
@@ -55,6 +62,7 @@ enum {
MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT = 0x752,
MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT = 0x754,
MLX5_CMD_OP_QUERY_ROCE_ADDRESS = 0x760,
+ MLX5_CMD_OP_ACCESS_REG = 0x805,
MLX5_CMD_OP_QUERY_LAG = 0x842,
MLX5_CMD_OP_CREATE_TIR = 0x900,
MLX5_CMD_OP_MODIFY_SQ = 0x905,
@@ -92,6 +100,16 @@ enum {
MLX5_CMD_STAT_BAD_SIZE_OUTS_CQES_ERR = 0x40,
};
+enum {
+ MLX5_PAGES_CANT_GIVE = 0,
+ MLX5_PAGES_GIVE = 1,
+ MLX5_PAGES_TAKE = 2,
+};
+
+enum {
+ MLX5_REG_HOST_ENDIANNESS = 0x7004,
+};
+
struct mlx5_ifc_atomic_caps_bits {
u8 reserved_at_0[0x40];
@@ -4131,4 +4149,201 @@ struct mlx5_ifc_mbox_in_bits {
u8 reserved_at_40[0x40];
};
+struct mlx5_ifc_enable_hca_in_bits {
+ u8 opcode[0x10];
+ u8 reserved_at_10[0x10];
+
+ u8 reserved_at_20[0x10];
+ u8 op_mod[0x10];
+
+ u8 reserved_at_40[0x10];
+ u8 function_id[0x10];
+
+ u8 reserved_at_60[0x20];
+};
+
+struct mlx5_ifc_enable_hca_out_bits {
+ u8 status[0x8];
+ u8 reserved_at_8[0x18];
+
+ u8 syndrome[0x20];
+
+ u8 reserved_at_40[0x20];
+};
+
+struct mlx5_ifc_query_issi_out_bits {
+ u8 status[0x8];
+ u8 reserved_at_8[0x18];
+
+ u8 syndrome[0x20];
+
+ u8 reserved_at_40[0x10];
+ u8 current_issi[0x10];
+
+ u8 reserved_at_60[0xa0];
+
+ u8 reserved_at_100[76][0x8];
+ u8 supported_issi_dw0[0x20];
+};
+
+struct mlx5_ifc_query_issi_in_bits {
+ u8 opcode[0x10];
+ u8 reserved_at_10[0x10];
+
+ u8 reserved_at_20[0x10];
+ u8 op_mod[0x10];
+
+ u8 reserved_at_40[0x40];
+};
+
+struct mlx5_ifc_set_issi_out_bits {
+ u8 status[0x8];
+ u8 reserved_at_8[0x18];
+
+ u8 syndrome[0x20];
+
+ u8 reserved_at_40[0x40];
+};
+
+struct mlx5_ifc_set_issi_in_bits {
+ u8 opcode[0x10];
+ u8 reserved_at_10[0x10];
+
+ u8 reserved_at_20[0x10];
+ u8 op_mod[0x10];
+
+ u8 reserved_at_40[0x10];
+ u8 current_issi[0x10];
+
+ u8 reserved_at_60[0x20];
+};
+
+struct mlx5_ifc_query_pages_out_bits {
+ u8 status[0x8];
+ u8 reserved_at_8[0x18];
+
+ u8 syndrome[0x20];
+
+ u8 embedded_cpu_function[0x01];
+ u8 reserved_bits[0x0f];
+ u8 function_id[0x10];
+
+ u8 num_pages[0x20];
+};
+
+struct mlx5_ifc_query_pages_in_bits {
+ u8 opcode[0x10];
+ u8 reserved_at_10[0x10];
+
+ u8 reserved_at_20[0x10];
+ u8 op_mod[0x10];
+
+ u8 reserved_at_40[0x10];
+ u8 function_id[0x10];
+
+ u8 reserved_at_60[0x20];
+};
+
+struct mlx5_ifc_manage_pages_out_bits {
+ u8 status[0x8];
+ u8 reserved_at_8[0x18];
+
+ u8 syndrome[0x20];
+
+ u8 output_num_entries[0x20];
+
+ u8 reserved_at_60[0x20];
+
+ u8 pas[][0x40];
+};
+
+struct mlx5_ifc_manage_pages_in_bits {
+ u8 opcode[0x10];
+ u8 reserved_at_10[0x10];
+
+ u8 reserved_at_20[0x10];
+ u8 op_mod[0x10];
+
+ u8 embedded_cpu_function[0x1];
+ u8 reserved_at_41[0xf];
+ u8 function_id[0x10];
+
+ u8 input_num_entries[0x20];
+
+ u8 pas[][0x40];
+};
+
+struct mlx5_ifc_teardown_hca_out_bits {
+ u8 status[0x8];
+ u8 reserved_at_8[0x18];
+
+ u8 syndrome[0x20];
+
+ u8 reserved_at_40[0x3f];
+
+ u8 state[0x1];
+};
+
+enum {
+ MLX5_TEARDOWN_HCA_IN_PROFILE_GRACEFUL_CLOSE = 0x0,
+};
+
+struct mlx5_ifc_teardown_hca_in_bits {
+ u8 opcode[0x10];
+ u8 reserved_at_10[0x10];
+
+ u8 reserved_at_20[0x10];
+ u8 op_mod[0x10];
+
+ u8 reserved_at_40[0x10];
+ u8 profile[0x10];
+
+ u8 reserved_at_60[0x20];
+};
+
+struct mlx5_ifc_init_hca_out_bits {
+ u8 status[0x8];
+ u8 reserved_at_8[0x18];
+
+ u8 syndrome[0x20];
+
+ u8 reserved_at_40[0x40];
+};
+
+struct mlx5_ifc_init_hca_in_bits {
+ u8 opcode[0x10];
+ u8 reserved_at_10[0x10];
+
+ u8 reserved_at_20[0x10];
+ u8 op_mod[0x10];
+
+ u8 reserved_at_40[0x40];
+};
+
+struct mlx5_ifc_access_register_out_bits {
+ u8 status[0x8];
+ u8 reserved_at_8[0x18];
+
+ u8 syndrome[0x20];
+
+ u8 reserved_at_40[0x40];
+
+ u8 register_data[][0x20];
+};
+
+struct mlx5_ifc_access_register_in_bits {
+ u8 opcode[0x10];
+ u8 reserved_at_10[0x10];
+
+ u8 reserved_at_20[0x10];
+ u8 op_mod[0x10];
+
+ u8 reserved_at_40[0x10];
+ u8 register_id[0x10];
+
+ u8 argument[0x20];
+
+ u8 register_data[][0x20];
+};
+
#endif /* MLX5_IFC_H */
@@ -988,6 +988,246 @@ close_cont:
return -1;
}
+static int mlx5_vfio_enable_hca(struct mlx5_vfio_context *ctx)
+{
+ uint32_t in[DEVX_ST_SZ_DW(enable_hca_in)] = {};
+ uint32_t out[DEVX_ST_SZ_DW(enable_hca_out)] = {};
+
+ DEVX_SET(enable_hca_in, in, opcode, MLX5_CMD_OP_ENABLE_HCA);
+ return mlx5_vfio_cmd_exec(ctx, in, sizeof(in), out, sizeof(out), 0);
+}
+
+static int mlx5_vfio_set_issi(struct mlx5_vfio_context *ctx)
+{
+ uint32_t query_in[DEVX_ST_SZ_DW(query_issi_in)] = {};
+ uint32_t query_out[DEVX_ST_SZ_DW(query_issi_out)] = {};
+ uint32_t set_in[DEVX_ST_SZ_DW(set_issi_in)] = {};
+ uint32_t set_out[DEVX_ST_SZ_DW(set_issi_out)] = {};
+ uint32_t sup_issi;
+ int err;
+
+ DEVX_SET(query_issi_in, query_in, opcode, MLX5_CMD_OP_QUERY_ISSI);
+ err = mlx5_vfio_cmd_exec(ctx, query_in, sizeof(query_in), query_out,
+ sizeof(query_out), 0);
+ if (err)
+ return err;
+
+ sup_issi = DEVX_GET(query_issi_out, query_out, supported_issi_dw0);
+
+ if (!(sup_issi & (1 << 1))) {
+ errno = EOPNOTSUPP;
+ return errno;
+ }
+
+ DEVX_SET(set_issi_in, set_in, opcode, MLX5_CMD_OP_SET_ISSI);
+ DEVX_SET(set_issi_in, set_in, current_issi, 1);
+ return mlx5_vfio_cmd_exec(ctx, set_in, sizeof(set_in), set_out,
+ sizeof(set_out), 0);
+}
+
+static int mlx5_vfio_give_pages(struct mlx5_vfio_context *ctx,
+ uint16_t func_id,
+ int32_t npages)
+{
+ int32_t out[DEVX_ST_SZ_DW(manage_pages_out)] = {};
+ int inlen = DEVX_ST_SZ_BYTES(manage_pages_in);
+ int i, err;
+ int32_t *in;
+ uint64_t iova;
+
+ inlen += npages * DEVX_FLD_SZ_BYTES(manage_pages_in, pas[0]);
+ in = calloc(1, inlen);
+ if (!in) {
+ errno = ENOMEM;
+ return errno;
+ }
+
+ for (i = 0; i < npages; i++) {
+ err = mlx5_vfio_alloc_page(ctx, &iova);
+ if (err)
+ goto err;
+
+ DEVX_ARRAY_SET64(manage_pages_in, in, pas, i, iova);
+ }
+
+ DEVX_SET(manage_pages_in, in, opcode, MLX5_CMD_OP_MANAGE_PAGES);
+ DEVX_SET(manage_pages_in, in, op_mod, MLX5_PAGES_GIVE);
+ DEVX_SET(manage_pages_in, in, function_id, func_id);
+ DEVX_SET(manage_pages_in, in, input_num_entries, npages);
+
+ err = mlx5_vfio_cmd_exec(ctx, in, inlen, out, sizeof(out),
+ MLX5_MAX_COMMANDS - 1);
+ if (!err)
+ goto end;
+err:
+ for (i--; i >= 0; i--)
+ mlx5_vfio_free_page(ctx, DEVX_GET64(manage_pages_in, in, pas[i]));
+end:
+ free(in);
+ return err;
+}
+
+static int mlx5_vfio_query_pages(struct mlx5_vfio_context *ctx, int boot,
+ uint16_t *func_id, int32_t *npages)
+{
+ uint32_t query_pages_in[DEVX_ST_SZ_DW(query_pages_in)] = {};
+ uint32_t query_pages_out[DEVX_ST_SZ_DW(query_pages_out)] = {};
+ int ret;
+
+ DEVX_SET(query_pages_in, query_pages_in, opcode, MLX5_CMD_OP_QUERY_PAGES);
+ DEVX_SET(query_pages_in, query_pages_in, op_mod, boot ? 0x01 : 0x02);
+
+ ret = mlx5_vfio_cmd_exec(ctx, query_pages_in, sizeof(query_pages_in),
+ query_pages_out, sizeof(query_pages_out), 0);
+ if (ret)
+ return ret;
+
+ *npages = DEVX_GET(query_pages_out, query_pages_out, num_pages);
+ *func_id = DEVX_GET(query_pages_out, query_pages_out, function_id);
+
+ return 0;
+}
+
+static int mlx5_vfio_satisfy_startup_pages(struct mlx5_vfio_context *ctx,
+ int boot)
+{
+ uint16_t function_id;
+ int32_t npages = 0;
+ int ret;
+
+ ret = mlx5_vfio_query_pages(ctx, boot, &function_id, &npages);
+ if (ret)
+ return ret;
+
+ return mlx5_vfio_give_pages(ctx, function_id, npages);
+}
+
+static int mlx5_vfio_access_reg(struct mlx5_vfio_context *ctx, void *data_in,
+ int size_in, void *data_out, int size_out,
+ uint16_t reg_id, int arg, int write)
+{
+ int outlen = DEVX_ST_SZ_BYTES(access_register_out) + size_out;
+ int inlen = DEVX_ST_SZ_BYTES(access_register_in) + size_in;
+ int err = ENOMEM;
+ uint32_t *out = NULL;
+ uint32_t *in = NULL;
+ void *data;
+
+ in = calloc(1, inlen);
+ out = calloc(1, outlen);
+ if (!in || !out) {
+ errno = ENOMEM;
+ goto out;
+ }
+
+ data = DEVX_ADDR_OF(access_register_in, in, register_data);
+ memcpy(data, data_in, size_in);
+
+ DEVX_SET(access_register_in, in, opcode, MLX5_CMD_OP_ACCESS_REG);
+ DEVX_SET(access_register_in, in, op_mod, !write);
+ DEVX_SET(access_register_in, in, argument, arg);
+ DEVX_SET(access_register_in, in, register_id, reg_id);
+
+ err = mlx5_vfio_cmd_exec(ctx, in, inlen, out, outlen, 0);
+ if (err)
+ goto out;
+
+ data = DEVX_ADDR_OF(access_register_out, out, register_data);
+ memcpy(data_out, data, size_out);
+
+out:
+ free(out);
+ free(in);
+ return err;
+}
+
+static int mlx5_vfio_set_hca_ctrl(struct mlx5_vfio_context *ctx)
+{
+ struct mlx5_reg_host_endianness he_in = {};
+ struct mlx5_reg_host_endianness he_out = {};
+
+ he_in.he = MLX5_SET_HOST_ENDIANNESS;
+ return mlx5_vfio_access_reg(ctx, &he_in, sizeof(he_in),
+ &he_out, sizeof(he_out),
+ MLX5_REG_HOST_ENDIANNESS, 0, 1);
+}
+
+static int mlx5_vfio_init_hca(struct mlx5_vfio_context *ctx)
+{
+ uint32_t in[DEVX_ST_SZ_DW(init_hca_in)] = {};
+ uint32_t out[DEVX_ST_SZ_DW(init_hca_out)] = {};
+
+ DEVX_SET(init_hca_in, in, opcode, MLX5_CMD_OP_INIT_HCA);
+ return mlx5_vfio_cmd_exec(ctx, in, sizeof(in), out, sizeof(out), 0);
+}
+
+static int fw_initializing(struct mlx5_init_seg *init_seg)
+{
+ return be32toh(init_seg->initializing) >> 31;
+}
+
+static int wait_fw_init(struct mlx5_init_seg *init_seg, uint32_t max_wait_mili)
+{
+ int num_loops = max_wait_mili / FW_INIT_WAIT_MS;
+ int loop = 0;
+
+ while (fw_initializing(init_seg)) {
+ usleep(FW_INIT_WAIT_MS * 1000);
+ loop++;
+ if (loop == num_loops) {
+ errno = EBUSY;
+ return errno;
+ }
+ }
+
+ return 0;
+}
+
+static int mlx5_vfio_teardown_hca(struct mlx5_vfio_context *ctx)
+{
+ uint32_t in[DEVX_ST_SZ_DW(teardown_hca_in)] = {};
+ uint32_t out[DEVX_ST_SZ_DW(teardown_hca_out)] = {};
+
+ DEVX_SET(teardown_hca_in, in, opcode, MLX5_CMD_OP_TEARDOWN_HCA);
+ DEVX_SET(teardown_hca_in, in, profile, MLX5_TEARDOWN_HCA_IN_PROFILE_GRACEFUL_CLOSE);
+ return mlx5_vfio_cmd_exec(ctx, in, sizeof(in), out, sizeof(out), 0);
+}
+
+static int mlx5_vfio_setup_function(struct mlx5_vfio_context *ctx)
+{
+ int err;
+
+ err = wait_fw_init(ctx->bar_map, FW_PRE_INIT_TIMEOUT_MILI);
+ if (err)
+ return err;
+
+ err = mlx5_vfio_enable_hca(ctx);
+ if (err)
+ return err;
+
+ err = mlx5_vfio_set_issi(ctx);
+ if (err)
+ return err;
+
+ err = mlx5_vfio_satisfy_startup_pages(ctx, 1);
+ if (err)
+ return err;
+
+ err = mlx5_vfio_set_hca_ctrl(ctx);
+ if (err)
+ return err;
+
+ err = mlx5_vfio_satisfy_startup_pages(ctx, 0);
+ if (err)
+ return err;
+
+ err = mlx5_vfio_init_hca(ctx);
+ if (err)
+ return err;
+
+ return 0;
+}
+
static void mlx5_vfio_uninit_context(struct mlx5_vfio_context *ctx)
{
mlx5_close_debug_file(ctx->dbg_fp);
@@ -1000,6 +1240,7 @@ static void mlx5_vfio_free_context(struct ibv_context *ibctx)
{
struct mlx5_vfio_context *ctx = to_mvfio_ctx(ibctx);
+ mlx5_vfio_teardown_hca(ctx);
mlx5_vfio_clean_cmd_interface(ctx);
mlx5_vfio_clean_device_dma(ctx);
mlx5_vfio_uninit_bar0(ctx);
@@ -1040,9 +1281,14 @@ mlx5_vfio_alloc_context(struct ibv_device *ibdev,
if (mlx5_vfio_init_cmd_interface(mctx))
goto err_dma;
+ if (mlx5_vfio_setup_function(mctx))
+ goto clean_cmd;
+
verbs_set_ops(&mctx->vctx, &mlx5_vfio_common_ops);
return &mctx->vctx;
+clean_cmd:
+ mlx5_vfio_clean_cmd_interface(mctx);
err_dma:
mlx5_vfio_clean_device_dma(mctx);
err_bar:
@@ -13,6 +13,9 @@
#include <infiniband/driver.h>
#include <util/interval_set.h>
+#define FW_INIT_WAIT_MS 2
+#define FW_PRE_INIT_TIMEOUT_MILI 120000
+
enum {
MLX5_MAX_COMMANDS = 32,
MLX5_CMD_DATA_BLOCK_SIZE = 512,
@@ -32,6 +35,19 @@ struct mlx5_vfio_device {
uint32_t flags;
};
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+#define MLX5_SET_HOST_ENDIANNESS 0
+#elif __BYTE_ORDER == __BIG_ENDIAN
+#define MLX5_SET_HOST_ENDIANNESS 0x80
+#else
+#error Host endianness not defined
+#endif
+
+struct mlx5_reg_host_endianness {
+ uint8_t he;
+ uint8_t rsvd[15];
+};
+
struct health_buffer {
__be32 assert_var[5];
__be32 rsvd0[3];
@@ -1687,6 +1687,10 @@ static inline uint64_t _devx_get64(const void *p, size_t bit_off)
#define DEVX_GET64(typ, p, fld) _devx_get64(p, __devx_bit_off(typ, fld))
+#define DEVX_ARRAY_SET64(typ, p, fld, idx, v) do { \
+ DEVX_SET64(typ, p, fld[idx], v); \
+} while (0)
+
struct mlx5dv_dr_domain;
struct mlx5dv_dr_table;
struct mlx5dv_dr_matcher;
Setup device function support by following the required command sequence and steps based on the device specification. Signed-off-by: Yishai Hadas <yishaih@nvidia.com> --- providers/mlx5/mlx5_ifc.h | 215 +++++++++++++++++++++++++++++++++++++++ providers/mlx5/mlx5_vfio.c | 246 +++++++++++++++++++++++++++++++++++++++++++++ providers/mlx5/mlx5_vfio.h | 16 +++ providers/mlx5/mlx5dv.h | 4 + 4 files changed, 481 insertions(+)