@@ -28,6 +28,7 @@ libmlx5.so.1 ibverbs-providers #MINVER#
MLX5_1.18@MLX5_1.18 34
MLX5_1.19@MLX5_1.19 35
MLX5_1.20@MLX5_1.20 36
+ MLX5_1.21@MLX5_1.21 37
mlx5dv_init_obj@MLX5_1.0 13
mlx5dv_init_obj@MLX5_1.2 15
mlx5dv_query_device@MLX5_1.0 13
@@ -133,6 +134,7 @@ libmlx5.so.1 ibverbs-providers #MINVER#
mlx5dv_map_ah_to_qp@MLX5_1.20 36
mlx5dv_qp_cancel_posted_send_wrs@MLX5_1.20 36
_mlx5dv_mkey_check@MLX5_1.20 36
+ mlx5dv_get_vfio_device_list@MLX5_1.21 37
libefa.so.1 ibverbs-providers #MINVER#
* Build-Depends-Package: libibverbs-dev
EFA_1.0@EFA_1.0 24
@@ -11,7 +11,7 @@ if (MLX5_MW_DEBUG)
endif()
rdma_shared_provider(mlx5 libmlx5.map
- 1 1.20.${PACKAGE_VERSION}
+ 1 1.21.${PACKAGE_VERSION}
buf.c
cq.c
dbrec.c
@@ -30,6 +30,7 @@ rdma_shared_provider(mlx5 libmlx5.map
dr_table.c
dr_send.c
mlx5.c
+ mlx5_vfio.c
qp.c
srq.c
verbs.c
@@ -189,3 +189,8 @@ MLX5_1.20 {
mlx5dv_qp_cancel_posted_send_wrs;
_mlx5dv_mkey_check;
} MLX5_1.19;
+
+MLX5_1.21 {
+ global:
+ mlx5dv_get_vfio_device_list;
+} MLX5_1.20;
@@ -22,6 +22,7 @@ rdma_man_pages(
mlx5dv_dump.3.md
mlx5dv_flow_action_esp.3.md
mlx5dv_get_clock_info.3
+ mlx5dv_get_vfio_device_list.3.md
mlx5dv_init_obj.3
mlx5dv_is_supported.3.md
mlx5dv_map_ah_to_qp.3.md
new file mode 100644
@@ -0,0 +1,64 @@
+---
+layout: page
+title: mlx5dv_get_vfio_device_list
+section: 3
+tagline: Verbs
+---
+
+# NAME
+
+mlx5dv_get_vfio_device_list - Get list of available devices to be used over VFIO
+
+# SYNOPSIS
+
+```c
+#include <infiniband/mlx5dv.h>
+
+struct ibv_device **
+mlx5dv_get_vfio_device_list(struct mlx5dv_vfio_context_attr *attr);
+```
+
+# DESCRIPTION
+
+Returns a NULL-terminated array of devices based on input *attr*.
+
+# ARGUMENTS
+
+*attr*
+: Describe the VFIO devices to return in list.
+
+## *attr* argument
+
+```c
+struct mlx5dv_vfio_context_attr {
+ const char *pci_name;
+ uint32_t flags;
+ uint64_t comp_mask;
+};
+```
+*pci_name*
+: The PCI name of the required device.
+
+*flags*
+: A bitwise OR of the various values described below.
+
+ *MLX5DV_VFIO_CTX_FLAGS_INIT_LINK_DOWN*:
+ Upon device initialization link should stay down.
+
+*comp_mask*
+: Bitmask specifying what fields in the structure are valid.
+
+# RETURN VALUE
+Returns the array of the matching devices, or sets errno and returns NULL if the request fails.
+
+# NOTES
+Client code should open all the devices it intends to use with ibv_open_device() before calling ibv_free_device_list(). Once it frees the array with ibv_free_device_list(), it will be able to
+use only the open devices; pointers to unopened devices will no longer be valid.
+
+# SEE ALSO
+
+*ibv_open_device(3)* *ibv_free_device_list(3)*
+
+# AUTHOR
+
+Yishai Hadas <yishaih@nvidia.com>
@@ -62,7 +62,7 @@ static void mlx5_free_context(struct ibv_context *ibctx);
#endif
#define HCA(v, d) VERBS_PCI_MATCH(PCI_VENDOR_ID_##v, d, NULL)
-static const struct verbs_match_ent hca_table[] = {
+const struct verbs_match_ent mlx5_hca_table[] = {
VERBS_DRIVER_ID(RDMA_DRIVER_MLX5),
HCA(MELLANOX, 0x1011), /* MT4113 Connect-IB */
HCA(MELLANOX, 0x1012), /* Connect-IB Virtual Function */
@@ -2410,7 +2410,7 @@ static const struct verbs_device_ops mlx5_dev_ops = {
.name = "mlx5",
.match_min_abi_version = MLX5_UVERBS_MIN_ABI_VERSION,
.match_max_abi_version = MLX5_UVERBS_MAX_ABI_VERSION,
- .match_table = hca_table,
+ .match_table = mlx5_hca_table,
.alloc_device = mlx5_device_alloc,
.uninit_device = mlx5_uninit_device,
.alloc_context = mlx5_alloc_context,
@@ -94,6 +94,7 @@ enum {
extern uint32_t mlx5_debug_mask;
extern int mlx5_freeze_on_error_cqe;
+extern const struct verbs_match_ent mlx5_hca_table[];
#ifdef MLX5_DEBUG
#define mlx5_dbg(fp, mask, format, arg...) \
new file mode 100644
@@ -0,0 +1,190 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved
+ */
+
+#define _GNU_SOURCE
+#include <config.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <sys/mman.h>
+#include <string.h>
+#include <sys/param.h>
+
+#include "mlx5dv.h"
+#include "mlx5_vfio.h"
+#include "mlx5.h"
+
+static struct verbs_context *
+mlx5_vfio_alloc_context(struct ibv_device *ibdev,
+ int cmd_fd, void *private_data)
+{
+ return NULL;
+}
+
+static void mlx5_vfio_uninit_device(struct verbs_device *verbs_device)
+{
+ struct mlx5_vfio_device *dev = to_mvfio_dev(&verbs_device->device);
+
+ free(dev->pci_name);
+ free(dev);
+}
+
+static const struct verbs_device_ops mlx5_vfio_dev_ops = {
+ .name = "mlx5_vfio",
+ .alloc_context = mlx5_vfio_alloc_context,
+ .uninit_device = mlx5_vfio_uninit_device,
+};
+
+static bool is_mlx5_pci(const char *pci_path)
+{
+ const struct verbs_match_ent *ent;
+ uint16_t vendor_id, device_id;
+ char pci_info_path[256];
+ char buff[128];
+ int fd;
+
+ snprintf(pci_info_path, sizeof(pci_info_path), "%s/vendor", pci_path);
+ fd = open(pci_info_path, O_RDONLY);
+ if (fd < 0)
+ return false;
+
+ if (read(fd, buff, sizeof(buff)) <= 0)
+ goto err;
+
+ vendor_id = strtoul(buff, NULL, 0);
+ close(fd);
+
+ snprintf(pci_info_path, sizeof(pci_info_path), "%s/device", pci_path);
+ fd = open(pci_info_path, O_RDONLY);
+ if (fd < 0)
+ return false;
+
+ if (read(fd, buff, sizeof(buff)) <= 0)
+ goto err;
+
+ device_id = strtoul(buff, NULL, 0);
+ close(fd);
+
+ for (ent = mlx5_hca_table; ent->kind != VERBS_MATCH_SENTINEL; ent++) {
+ if (ent->kind != VERBS_MATCH_PCI)
+ continue;
+ if (ent->device == device_id && ent->vendor == vendor_id)
+ return true;
+ }
+
+ return false;
+
+err:
+ close(fd);
+ return false;
+}
+
+static int mlx5_vfio_get_iommu_group_id(const char *pci_name)
+{
+ int seg, bus, slot, func;
+ int ret, groupid;
+ char path[128], iommu_group_path[128], *group_name;
+ struct stat st;
+ ssize_t len;
+
+ ret = sscanf(pci_name, "%04x:%02x:%02x.%d", &seg, &bus, &slot, &func);
+ if (ret != 4)
+ return -1;
+
+ snprintf(path, sizeof(path),
+ "/sys/bus/pci/devices/%04x:%02x:%02x.%01x/",
+ seg, bus, slot, func);
+
+ ret = stat(path, &st);
+ if (ret < 0)
+ return -1;
+
+ if (!is_mlx5_pci(path))
+ return -1;
+
+ strncat(path, "iommu_group", sizeof(path) - strlen(path) - 1);
+
+ len = readlink(path, iommu_group_path, sizeof(iommu_group_path));
+ if (len <= 0)
+ return -1;
+
+ iommu_group_path[len] = 0;
+ group_name = basename(iommu_group_path);
+
+ if (sscanf(group_name, "%d", &groupid) != 1)
+ return -1;
+
+ snprintf(path, sizeof(path), "/dev/vfio/%d", groupid);
+ ret = stat(path, &st);
+ if (ret < 0)
+ return -1;
+
+ return groupid;
+}
+
+static int mlx5_vfio_get_handle(struct mlx5_vfio_device *vfio_dev,
+ struct mlx5dv_vfio_context_attr *attr)
+{
+ int iommu_group;
+
+ iommu_group = mlx5_vfio_get_iommu_group_id(attr->pci_name);
+ if (iommu_group < 0)
+ return -1;
+
+ sprintf(vfio_dev->vfio_path, "/dev/vfio/%d", iommu_group);
+ vfio_dev->pci_name = strdup(attr->pci_name);
+
+ return 0;
+}
+
+struct ibv_device **
+mlx5dv_get_vfio_device_list(struct mlx5dv_vfio_context_attr *attr)
+{
+ struct mlx5_vfio_device *vfio_dev;
+ struct ibv_device **list = NULL;
+ int err;
+
+ if (!check_comp_mask(attr->comp_mask, 0) ||
+ !check_comp_mask(attr->flags, MLX5DV_VFIO_CTX_FLAGS_INIT_LINK_DOWN)) {
+ errno = EOPNOTSUPP;
+ return NULL;
+ }
+
+ list = calloc(1, sizeof(struct ibv_device *));
+ if (!list) {
+ errno = ENOMEM;
+ return NULL;
+ }
+
+ vfio_dev = calloc(1, sizeof(*vfio_dev));
+ if (!vfio_dev) {
+ errno = ENOMEM;
+ goto end;
+ }
+
+ vfio_dev->vdev.ops = &mlx5_vfio_dev_ops;
+ atomic_init(&vfio_dev->vdev.refcount, 1);
+
+ /* Find the vfio handle for attrs, store in mlx5_vfio_device */
+ err = mlx5_vfio_get_handle(vfio_dev, attr);
+ if (err)
+ goto err_get;
+
+ vfio_dev->flags = attr->flags;
+ vfio_dev->page_size = sysconf(_SC_PAGESIZE);
+
+ list[0] = &vfio_dev->vdev.device;
+ return list;
+
+err_get:
+ free(vfio_dev);
+end:
+ free(list);
+ return NULL;
+}
new file mode 100644
@@ -0,0 +1,27 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved
+ */
+
+#ifndef MLX5_VFIO_H
+#define MLX5_VFIO_H
+
+#include <stddef.h>
+#include <stdio.h>
+
+#include <infiniband/driver.h>
+
+struct mlx5_vfio_device {
+ struct verbs_device vdev;
+ char *pci_name;
+ char vfio_path[IBV_SYSFS_PATH_MAX];
+ int page_size;
+ uint32_t flags;
+};
+
+static inline struct mlx5_vfio_device *to_mvfio_dev(struct ibv_device *ibdev)
+{
+ return container_of(ibdev, struct mlx5_vfio_device, vdev.device);
+}
+
+#endif
@@ -1474,6 +1474,19 @@ struct mlx5dv_context_attr {
bool mlx5dv_is_supported(struct ibv_device *device);
+enum mlx5dv_vfio_context_attr_flags {
+ MLX5DV_VFIO_CTX_FLAGS_INIT_LINK_DOWN = 1 << 0,
+};
+
+struct mlx5dv_vfio_context_attr {
+ const char *pci_name;
+ uint32_t flags; /* Use enum mlx5dv_vfio_context_attr_flags */
+ uint64_t comp_mask;
+};
+
+struct ibv_device **
+mlx5dv_get_vfio_device_list(struct mlx5dv_vfio_context_attr *attr);
+
struct ibv_context *
mlx5dv_open_device(struct ibv_device *device, struct mlx5dv_context_attr *attr);
Introduce mlx5dv_get_vfio_device_list() API for getting list of mlx5 devices which can be used over VFIO. A man page with the expected usage was added. Signed-off-by: Yishai Hadas <yishaih@nvidia.com> --- debian/ibverbs-providers.symbols | 2 + providers/mlx5/CMakeLists.txt | 3 +- providers/mlx5/libmlx5.map | 5 + providers/mlx5/man/CMakeLists.txt | 1 + .../mlx5/man/mlx5dv_get_vfio_device_list.3.md | 64 +++++++ providers/mlx5/mlx5.c | 4 +- providers/mlx5/mlx5.h | 1 + providers/mlx5/mlx5_vfio.c | 190 +++++++++++++++++++++ providers/mlx5/mlx5_vfio.h | 27 +++ providers/mlx5/mlx5dv.h | 13 ++ 10 files changed, 307 insertions(+), 3 deletions(-) create mode 100644 providers/mlx5/man/mlx5dv_get_vfio_device_list.3.md create mode 100644 providers/mlx5/mlx5_vfio.c create mode 100644 providers/mlx5/mlx5_vfio.h