diff mbox series

[rdma-core,02/27] mlx5: Introduce mlx5dv_get_vfio_device_list()

Message ID 20210720081647.1980-3-yishaih@nvidia.com (mailing list archive)
State Not Applicable
Headers show
Series Introduce mlx5 user space driver over VFIO | expand

Commit Message

Yishai Hadas July 20, 2021, 8:16 a.m. UTC
Introduce mlx5dv_get_vfio_device_list() API for getting list of mlx5
devices which can be used over VFIO.

A man page with the expected usage was added.

Signed-off-by: Yishai Hadas <yishaih@nvidia.com>
---
 debian/ibverbs-providers.symbols                   |   2 +
 providers/mlx5/CMakeLists.txt                      |   3 +-
 providers/mlx5/libmlx5.map                         |   5 +
 providers/mlx5/man/CMakeLists.txt                  |   1 +
 .../mlx5/man/mlx5dv_get_vfio_device_list.3.md      |  64 +++++++
 providers/mlx5/mlx5.c                              |   4 +-
 providers/mlx5/mlx5.h                              |   1 +
 providers/mlx5/mlx5_vfio.c                         | 190 +++++++++++++++++++++
 providers/mlx5/mlx5_vfio.h                         |  27 +++
 providers/mlx5/mlx5dv.h                            |  13 ++
 10 files changed, 307 insertions(+), 3 deletions(-)
 create mode 100644 providers/mlx5/man/mlx5dv_get_vfio_device_list.3.md
 create mode 100644 providers/mlx5/mlx5_vfio.c
 create mode 100644 providers/mlx5/mlx5_vfio.h
diff mbox series

Patch

diff --git a/debian/ibverbs-providers.symbols b/debian/ibverbs-providers.symbols
index 294832b..64e29b1 100644
--- a/debian/ibverbs-providers.symbols
+++ b/debian/ibverbs-providers.symbols
@@ -28,6 +28,7 @@  libmlx5.so.1 ibverbs-providers #MINVER#
  MLX5_1.18@MLX5_1.18 34
  MLX5_1.19@MLX5_1.19 35
  MLX5_1.20@MLX5_1.20 36
+ MLX5_1.21@MLX5_1.21 37
  mlx5dv_init_obj@MLX5_1.0 13
  mlx5dv_init_obj@MLX5_1.2 15
  mlx5dv_query_device@MLX5_1.0 13
@@ -133,6 +134,7 @@  libmlx5.so.1 ibverbs-providers #MINVER#
  mlx5dv_map_ah_to_qp@MLX5_1.20 36
  mlx5dv_qp_cancel_posted_send_wrs@MLX5_1.20 36
  _mlx5dv_mkey_check@MLX5_1.20 36
+ mlx5dv_get_vfio_device_list@MLX5_1.21 37
 libefa.so.1 ibverbs-providers #MINVER#
 * Build-Depends-Package: libibverbs-dev
  EFA_1.0@EFA_1.0 24
diff --git a/providers/mlx5/CMakeLists.txt b/providers/mlx5/CMakeLists.txt
index 69abdd1..45e397e 100644
--- a/providers/mlx5/CMakeLists.txt
+++ b/providers/mlx5/CMakeLists.txt
@@ -11,7 +11,7 @@  if (MLX5_MW_DEBUG)
 endif()
 
 rdma_shared_provider(mlx5 libmlx5.map
-  1 1.20.${PACKAGE_VERSION}
+  1 1.21.${PACKAGE_VERSION}
   buf.c
   cq.c
   dbrec.c
@@ -30,6 +30,7 @@  rdma_shared_provider(mlx5 libmlx5.map
   dr_table.c
   dr_send.c
   mlx5.c
+  mlx5_vfio.c
   qp.c
   srq.c
   verbs.c
diff --git a/providers/mlx5/libmlx5.map b/providers/mlx5/libmlx5.map
index af7541d..3e8a4d8 100644
--- a/providers/mlx5/libmlx5.map
+++ b/providers/mlx5/libmlx5.map
@@ -189,3 +189,8 @@  MLX5_1.20 {
 		mlx5dv_qp_cancel_posted_send_wrs;
 		_mlx5dv_mkey_check;
 } MLX5_1.19;
+
+MLX5_1.21 {
+        global:
+		mlx5dv_get_vfio_device_list;
+} MLX5_1.20;
diff --git a/providers/mlx5/man/CMakeLists.txt b/providers/mlx5/man/CMakeLists.txt
index bb6499d..91aebed 100644
--- a/providers/mlx5/man/CMakeLists.txt
+++ b/providers/mlx5/man/CMakeLists.txt
@@ -22,6 +22,7 @@  rdma_man_pages(
   mlx5dv_dump.3.md
   mlx5dv_flow_action_esp.3.md
   mlx5dv_get_clock_info.3
+  mlx5dv_get_vfio_device_list.3.md
   mlx5dv_init_obj.3
   mlx5dv_is_supported.3.md
   mlx5dv_map_ah_to_qp.3.md
diff --git a/providers/mlx5/man/mlx5dv_get_vfio_device_list.3.md b/providers/mlx5/man/mlx5dv_get_vfio_device_list.3.md
new file mode 100644
index 0000000..13c8e63
--- /dev/null
+++ b/providers/mlx5/man/mlx5dv_get_vfio_device_list.3.md
@@ -0,0 +1,64 @@ 
+---
+layout: page
+title: mlx5dv_get_vfio_device_list
+section: 3
+tagline: Verbs
+---
+
+# NAME
+
+mlx5dv_get_vfio_device_list - Get list of available devices to be used over VFIO
+
+# SYNOPSIS
+
+```c
+#include <infiniband/mlx5dv.h>
+
+struct ibv_device **
+mlx5dv_get_vfio_device_list(struct mlx5dv_vfio_context_attr *attr);
+```
+
+# DESCRIPTION
+
+Returns a NULL-terminated array of devices based on input *attr*.
+
+# ARGUMENTS
+
+*attr*
+:	Describe the VFIO devices to return in list.
+
+## *attr* argument
+
+```c
+struct mlx5dv_vfio_context_attr {
+	const char *pci_name;
+	uint32_t flags;
+	uint64_t comp_mask;
+};
+```
+*pci_name*
+:      The PCI name of the required device.
+
+*flags*
+:       A bitwise OR of the various values described below.
+
+        *MLX5DV_VFIO_CTX_FLAGS_INIT_LINK_DOWN*:
+        Upon device initialization link should stay down.
+
+*comp_mask*
+:       Bitmask specifying what fields in the structure are valid.
+
+# RETURN VALUE
+Returns the array of the matching devices, or sets errno and returns NULL if the request fails.
+
+# NOTES
+Client  code  should open all the devices it intends to use with ibv_open_device() before calling ibv_free_device_list().  Once it frees the array with ibv_free_device_list(), it will be able to
+use only the open devices; pointers to unopened devices will no longer be valid.
+
+# SEE ALSO
+
+*ibv_open_device(3)* *ibv_free_device_list(3)*
+
+# AUTHOR
+
+Yishai Hadas <yishaih@nvidia.com>
diff --git a/providers/mlx5/mlx5.c b/providers/mlx5/mlx5.c
index e172b9d..46d7748 100644
--- a/providers/mlx5/mlx5.c
+++ b/providers/mlx5/mlx5.c
@@ -62,7 +62,7 @@  static void mlx5_free_context(struct ibv_context *ibctx);
 #endif
 
 #define HCA(v, d) VERBS_PCI_MATCH(PCI_VENDOR_ID_##v, d, NULL)
-static const struct verbs_match_ent hca_table[] = {
+const struct verbs_match_ent mlx5_hca_table[] = {
 	VERBS_DRIVER_ID(RDMA_DRIVER_MLX5),
 	HCA(MELLANOX, 0x1011),	/* MT4113 Connect-IB */
 	HCA(MELLANOX, 0x1012),	/* Connect-IB Virtual Function */
@@ -2410,7 +2410,7 @@  static const struct verbs_device_ops mlx5_dev_ops = {
 	.name = "mlx5",
 	.match_min_abi_version = MLX5_UVERBS_MIN_ABI_VERSION,
 	.match_max_abi_version = MLX5_UVERBS_MAX_ABI_VERSION,
-	.match_table = hca_table,
+	.match_table = mlx5_hca_table,
 	.alloc_device = mlx5_device_alloc,
 	.uninit_device = mlx5_uninit_device,
 	.alloc_context = mlx5_alloc_context,
diff --git a/providers/mlx5/mlx5.h b/providers/mlx5/mlx5.h
index ac2f88c..3862007 100644
--- a/providers/mlx5/mlx5.h
+++ b/providers/mlx5/mlx5.h
@@ -94,6 +94,7 @@  enum {
 
 extern uint32_t mlx5_debug_mask;
 extern int mlx5_freeze_on_error_cqe;
+extern const struct verbs_match_ent mlx5_hca_table[];
 
 #ifdef MLX5_DEBUG
 #define mlx5_dbg(fp, mask, format, arg...)				\
diff --git a/providers/mlx5/mlx5_vfio.c b/providers/mlx5/mlx5_vfio.c
new file mode 100644
index 0000000..69c7662
--- /dev/null
+++ b/providers/mlx5/mlx5_vfio.c
@@ -0,0 +1,190 @@ 
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved
+ */
+
+#define _GNU_SOURCE
+#include <config.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <sys/mman.h>
+#include <string.h>
+#include <sys/param.h>
+
+#include "mlx5dv.h"
+#include "mlx5_vfio.h"
+#include "mlx5.h"
+
+static struct verbs_context *
+mlx5_vfio_alloc_context(struct ibv_device *ibdev,
+			int cmd_fd, void *private_data)
+{
+	return NULL;
+}
+
+static void mlx5_vfio_uninit_device(struct verbs_device *verbs_device)
+{
+	struct mlx5_vfio_device *dev = to_mvfio_dev(&verbs_device->device);
+
+	free(dev->pci_name);
+	free(dev);
+}
+
+static const struct verbs_device_ops mlx5_vfio_dev_ops = {
+	.name = "mlx5_vfio",
+	.alloc_context = mlx5_vfio_alloc_context,
+	.uninit_device = mlx5_vfio_uninit_device,
+};
+
+static bool is_mlx5_pci(const char *pci_path)
+{
+	const struct verbs_match_ent *ent;
+	uint16_t vendor_id, device_id;
+	char pci_info_path[256];
+	char buff[128];
+	int fd;
+
+	snprintf(pci_info_path, sizeof(pci_info_path), "%s/vendor", pci_path);
+	fd = open(pci_info_path, O_RDONLY);
+	if (fd < 0)
+		return false;
+
+	if (read(fd, buff, sizeof(buff)) <= 0)
+		goto err;
+
+	vendor_id = strtoul(buff, NULL, 0);
+	close(fd);
+
+	snprintf(pci_info_path, sizeof(pci_info_path), "%s/device", pci_path);
+	fd = open(pci_info_path, O_RDONLY);
+	if (fd < 0)
+		return false;
+
+	if (read(fd, buff, sizeof(buff)) <= 0)
+		goto err;
+
+	device_id = strtoul(buff, NULL, 0);
+	close(fd);
+
+	for (ent = mlx5_hca_table; ent->kind != VERBS_MATCH_SENTINEL; ent++) {
+		if (ent->kind != VERBS_MATCH_PCI)
+			continue;
+		if (ent->device == device_id && ent->vendor == vendor_id)
+			return true;
+	}
+
+	return false;
+
+err:
+	close(fd);
+	return false;
+}
+
+static int mlx5_vfio_get_iommu_group_id(const char *pci_name)
+{
+	int seg, bus, slot, func;
+	int ret, groupid;
+	char path[128], iommu_group_path[128], *group_name;
+	struct stat st;
+	ssize_t len;
+
+	ret = sscanf(pci_name, "%04x:%02x:%02x.%d", &seg, &bus, &slot, &func);
+	if (ret != 4)
+		return -1;
+
+	snprintf(path, sizeof(path),
+		 "/sys/bus/pci/devices/%04x:%02x:%02x.%01x/",
+		 seg, bus, slot, func);
+
+	ret = stat(path, &st);
+	if (ret < 0)
+		return -1;
+
+	if (!is_mlx5_pci(path))
+		return -1;
+
+	strncat(path, "iommu_group", sizeof(path) - strlen(path) - 1);
+
+	len = readlink(path, iommu_group_path, sizeof(iommu_group_path));
+	if (len <= 0)
+		return -1;
+
+	iommu_group_path[len] = 0;
+	group_name = basename(iommu_group_path);
+
+	if (sscanf(group_name, "%d", &groupid) != 1)
+		return -1;
+
+	snprintf(path, sizeof(path), "/dev/vfio/%d", groupid);
+	ret = stat(path, &st);
+	if (ret < 0)
+		return -1;
+
+	return groupid;
+}
+
+static int mlx5_vfio_get_handle(struct mlx5_vfio_device *vfio_dev,
+			 struct mlx5dv_vfio_context_attr *attr)
+{
+	int iommu_group;
+
+	iommu_group = mlx5_vfio_get_iommu_group_id(attr->pci_name);
+	if (iommu_group < 0)
+		return -1;
+
+	sprintf(vfio_dev->vfio_path, "/dev/vfio/%d", iommu_group);
+	vfio_dev->pci_name = strdup(attr->pci_name);
+
+	return 0;
+}
+
+struct ibv_device **
+mlx5dv_get_vfio_device_list(struct mlx5dv_vfio_context_attr *attr)
+{
+	struct mlx5_vfio_device *vfio_dev;
+	struct ibv_device **list = NULL;
+	int err;
+
+	if (!check_comp_mask(attr->comp_mask, 0) ||
+	    !check_comp_mask(attr->flags, MLX5DV_VFIO_CTX_FLAGS_INIT_LINK_DOWN)) {
+		errno = EOPNOTSUPP;
+		return NULL;
+	}
+
+	list = calloc(1, sizeof(struct ibv_device *));
+	if (!list) {
+		errno = ENOMEM;
+		return NULL;
+	}
+
+	vfio_dev = calloc(1, sizeof(*vfio_dev));
+	if (!vfio_dev) {
+		errno = ENOMEM;
+		goto end;
+	}
+
+	vfio_dev->vdev.ops = &mlx5_vfio_dev_ops;
+	atomic_init(&vfio_dev->vdev.refcount, 1);
+
+	/* Find the vfio handle for attrs, store in mlx5_vfio_device */
+	err = mlx5_vfio_get_handle(vfio_dev, attr);
+	if (err)
+		goto err_get;
+
+	vfio_dev->flags = attr->flags;
+	vfio_dev->page_size = sysconf(_SC_PAGESIZE);
+
+	list[0] = &vfio_dev->vdev.device;
+	return list;
+
+err_get:
+	free(vfio_dev);
+end:
+	free(list);
+	return NULL;
+}
diff --git a/providers/mlx5/mlx5_vfio.h b/providers/mlx5/mlx5_vfio.h
new file mode 100644
index 0000000..6ba4254
--- /dev/null
+++ b/providers/mlx5/mlx5_vfio.h
@@ -0,0 +1,27 @@ 
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved
+ */
+
+#ifndef MLX5_VFIO_H
+#define MLX5_VFIO_H
+
+#include <stddef.h>
+#include <stdio.h>
+
+#include <infiniband/driver.h>
+
+struct mlx5_vfio_device {
+	struct verbs_device vdev;
+	char *pci_name;
+	char vfio_path[IBV_SYSFS_PATH_MAX];
+	int page_size;
+	uint32_t flags;
+};
+
+static inline struct mlx5_vfio_device *to_mvfio_dev(struct ibv_device *ibdev)
+{
+	return container_of(ibdev, struct mlx5_vfio_device, vdev.device);
+}
+
+#endif
diff --git a/providers/mlx5/mlx5dv.h b/providers/mlx5/mlx5dv.h
index 2eba232..e657527 100644
--- a/providers/mlx5/mlx5dv.h
+++ b/providers/mlx5/mlx5dv.h
@@ -1474,6 +1474,19 @@  struct mlx5dv_context_attr {
 
 bool mlx5dv_is_supported(struct ibv_device *device);
 
+enum mlx5dv_vfio_context_attr_flags {
+	MLX5DV_VFIO_CTX_FLAGS_INIT_LINK_DOWN = 1 << 0,
+};
+
+struct mlx5dv_vfio_context_attr {
+	const char *pci_name;
+	uint32_t flags; /* Use enum mlx5dv_vfio_context_attr_flags */
+	uint64_t comp_mask;
+};
+
+struct ibv_device **
+mlx5dv_get_vfio_device_list(struct mlx5dv_vfio_context_attr *attr);
+
 struct ibv_context *
 mlx5dv_open_device(struct ibv_device *device, struct mlx5dv_context_attr *attr);