@@ -3,3 +3,6 @@ ccflags-y += -I$(srctree)/$(src)/include
obj-$(CONFIG_NVIDIA_VGPU_MGR) += nvidia-vgpu-mgr.o
nvidia-vgpu-mgr-y := vgpu_mgr.o vgpu.o vgpu_types.o rpc.o
+
+obj-$(CONFIG_NVIDIA_VGPU_VFIO_PCI) += nvidia-vgpu-vfio-pci.o
+nvidia-vgpu-vfio-pci-y := vfio_main.o vfio_access.o
new file mode 100644
@@ -0,0 +1,43 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
+/*
+ * Copyright © 2024 NVIDIA Corporation
+ */
+
+#ifndef _NVIDIA_VGPU_VFIO_H__
+#define _NVIDIA_VGPU_VFIO_H__
+
+#include <linux/vfio_pci_core.h>
+
+#include <nvrm/nvtypes.h>
+#include <nvrm/common/sdk/nvidia/inc/ctrl/ctrla081.h>
+#include <nvrm/common/sdk/nvidia/inc/ctrl/ctrl2080/ctrl2080vgpumgrinternal.h>
+
+#include "vgpu_mgr.h"
+
+#define VGPU_CONFIG_PARAMS_MAX_LENGTH 1024
+#define DEVICE_CLASS_LENGTH 5
+#define PCI_CONFIG_SPACE_LENGTH 4096
+
+#define CAP_LIST_NEXT_PTR_MSIX 0x7c
+#define MSIX_CAP_SIZE 0xc
+
+struct nvidia_vgpu_vfio {
+ struct vfio_pci_core_device core_dev;
+ u8 config_space[PCI_CONFIG_SPACE_LENGTH];
+
+ void __iomem *bar0_map;
+
+ u8 **vgpu_types;
+ NVA081_CTRL_VGPU_INFO *curr_vgpu_type;
+ u32 num_vgpu_types;
+
+ struct nvidia_vgpu_mgr *vgpu_mgr;
+ struct nvidia_vgpu *vgpu;
+};
+
+void nvidia_vgpu_vfio_setup_config(struct nvidia_vgpu_vfio *nvdev);
+ssize_t nvidia_vgpu_vfio_access(struct nvidia_vgpu_vfio *nvdev,
+ char __user *buf, size_t count,
+ loff_t ppos, bool iswrite);
+
+#endif /* _NVIDIA_VGPU_VFIO_H__ */
new file mode 100644
@@ -0,0 +1,297 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
+/*
+ * Copyright © 2024 NVIDIA Corporation
+ */
+
+#include <linux/string.h>
+#include <linux/pci.h>
+#include <linux/pci_regs.h>
+
+#include "vfio.h"
+
+void nvidia_vgpu_vfio_setup_config(struct nvidia_vgpu_vfio *nvdev)
+{
+ u8 *buffer = NULL;
+
+ memset(nvdev->config_space, 0, sizeof(nvdev->config_space));
+
+ /* Header type 0 (normal devices) */
+ *(u16 *)&nvdev->config_space[PCI_VENDOR_ID] = 0x10de;
+ *(u16 *)&nvdev->config_space[PCI_DEVICE_ID] =
+ FIELD_GET(GENMASK(31, 16), nvdev->curr_vgpu_type->vdevId);
+ *(u16 *)&nvdev->config_space[PCI_COMMAND] = 0x0000;
+ *(u16 *)&nvdev->config_space[PCI_STATUS] = 0x0010;
+
+ buffer = &nvdev->config_space[PCI_CLASS_REVISION];
+ pci_read_config_byte(nvdev->core_dev.pdev, PCI_CLASS_REVISION, buffer);
+
+ nvdev->config_space[PCI_CLASS_PROG] = 0; /* VGA-compatible */
+ nvdev->config_space[PCI_CLASS_DEVICE] = 0; /* VGA controller */
+ nvdev->config_space[PCI_CLASS_DEVICE + 1] = 3; /* display controller */
+
+ /* BAR0: 32-bit */
+ *(u32 *)&nvdev->config_space[PCI_BASE_ADDRESS_0] = 0x00000000;
+ /* BAR1: 64-bit, prefetchable */
+ *(u32 *)&nvdev->config_space[PCI_BASE_ADDRESS_1] = 0x0000000c;
+ /* BAR2: 64-bit, prefetchable */
+ *(u32 *)&nvdev->config_space[PCI_BASE_ADDRESS_3] = 0x0000000c;
+ /* Disable BAR3: I/O */
+ *(u32 *)&nvdev->config_space[PCI_BASE_ADDRESS_5] = 0x00000000;
+
+ *(u16 *)&nvdev->config_space[PCI_SUBSYSTEM_VENDOR_ID] = 0x10de;
+ *(u16 *)&nvdev->config_space[PCI_SUBSYSTEM_ID] =
+ FIELD_GET(GENMASK(15, 0), nvdev->curr_vgpu_type->vdevId);
+
+ nvdev->config_space[PCI_CAPABILITY_LIST] = CAP_LIST_NEXT_PTR_MSIX;
+ nvdev->config_space[CAP_LIST_NEXT_PTR_MSIX + 1] = 0x0;
+
+ /* INTx disabled */
+ nvdev->config_space[0x3d] = 0;
+}
+
+static void read_hw_pci_config(struct pci_dev *pdev, char *buf,
+ size_t count, loff_t offset)
+{
+ switch (count) {
+ case 4:
+ pci_read_config_dword(pdev, offset, (u32 *)buf);
+ break;
+
+ case 2:
+ pci_read_config_word(pdev, offset, (u16 *)buf);
+ break;
+
+ case 1:
+ pci_read_config_byte(pdev, offset, (u8 *)buf);
+ break;
+ default:
+ WARN_ONCE(1, "Not supported access len\n");
+ break;
+ }
+}
+
+static void write_hw_pci_config(struct pci_dev *pdev, char *buf,
+ size_t count, loff_t offset)
+{
+ switch (count) {
+ case 4:
+ pci_write_config_dword(pdev, offset, *(u32 *)buf);
+ break;
+
+ case 2:
+ pci_write_config_word(pdev, offset, *(u16 *)buf);
+ break;
+
+ case 1:
+ pci_write_config_byte(pdev, offset, *(u8 *)buf);
+ break;
+ default:
+ WARN_ONCE(1, "Not supported access len\n");
+ break;
+ }
+}
+
+static void hw_pci_config_rw(struct pci_dev *pdev, char *buf,
+ size_t count, loff_t offset,
+ bool is_write)
+{
+ is_write ? write_hw_pci_config(pdev, buf, count, offset) :
+ read_hw_pci_config(pdev, buf, count, offset);
+}
+
+static ssize_t bar0_rw(struct nvidia_vgpu_vfio *nvdev, char *buf,
+ size_t count, loff_t ppos, bool iswrite)
+{
+ struct pci_dev *pdev = nvdev->core_dev.pdev;
+ int index = VFIO_PCI_OFFSET_TO_INDEX(ppos);
+ loff_t offset = ppos;
+ void __iomem *map;
+ u32 val;
+ int ret;
+
+ if (index != VFIO_PCI_BAR0_REGION_INDEX)
+ return -EINVAL;
+
+ offset &= VFIO_PCI_OFFSET_MASK;
+
+ if (nvdev->bar0_map == NULL) {
+ ret = pci_request_selected_regions(pdev, 1 << index, "nvidia-vgpu-vfio");
+ if (ret)
+ return ret;
+
+ if (!(pci_resource_flags(pdev, index) & IORESOURCE_MEM)) {
+ pci_release_selected_regions(pdev, 1 << index);
+ return -EIO;
+ }
+
+ map = ioremap(pci_resource_start(pdev, index), pci_resource_len(pdev, index));
+ if (!map) {
+ pci_err(pdev, "Can't map BAR0 MMIO space\n");
+ pci_release_selected_regions(pdev, 1 << index);
+ return -ENOMEM;
+ }
+ nvdev->bar0_map = map;
+ } else
+ map = nvdev->bar0_map;
+
+ if (!iswrite) {
+ switch (count) {
+ case 4:
+ val = ioread32(map + offset);
+ break;
+ case 2:
+ val = ioread16(map + offset);
+ break;
+ case 1:
+ val = ioread8(map + offset);
+ break;
+ }
+ memcpy(buf, (u8 *)&val, count);
+ } else {
+ switch (count) {
+ case 4:
+ iowrite32(*(u32 *)buf, map + offset);
+ break;
+ case 2:
+ iowrite16(*(u16 *)buf, map + offset);
+ break;
+ case 1:
+ iowrite8(*(u8 *)buf, map + offset);
+ break;
+ }
+ }
+ return count;
+}
+
+static ssize_t pci_config_rw(struct nvidia_vgpu_vfio *nvdev, char *buf,
+ size_t count, loff_t ppos, bool iswrite)
+{
+ struct pci_dev *pdev = nvdev->core_dev.pdev;
+ int index = VFIO_PCI_OFFSET_TO_INDEX(ppos);
+ loff_t offset = ppos;
+ u32 bar_mask, cfg_addr;
+ u32 val = 0;
+
+ if (index != VFIO_PCI_CONFIG_REGION_INDEX)
+ return -EINVAL;
+
+ offset &= VFIO_PCI_OFFSET_MASK;
+
+ if ((offset >= CAP_LIST_NEXT_PTR_MSIX) && (offset <
+ (CAP_LIST_NEXT_PTR_MSIX + MSIX_CAP_SIZE))) {
+ hw_pci_config_rw(pdev, buf, count, offset, iswrite);
+ return count;
+ }
+
+ if (!iswrite) {
+ memcpy(buf, (u8 *)&nvdev->config_space[offset], count);
+
+ switch (offset) {
+ case PCI_COMMAND:
+ hw_pci_config_rw(pdev, (char *)&val, count, offset, iswrite);
+
+ switch (count) {
+ case 4:
+ val = (u32)(val & 0xFFFF0000) | (val &
+ (PCI_COMMAND_PARITY | PCI_COMMAND_SERR));
+ break;
+ case 2:
+ val = (val & (PCI_COMMAND_PARITY | PCI_COMMAND_SERR));
+ break;
+ default:
+ WARN_ONCE(1, "Not supported access len\n");
+ break;
+ }
+ break;
+ case PCI_STATUS:
+ hw_pci_config_rw(pdev, (char *)&val, count, offset, iswrite);
+ break;
+
+ default:
+ break;
+ }
+ *(u32 *)buf = *(u32 *)buf | val;
+ } else {
+ switch (offset) {
+ case PCI_VENDOR_ID:
+ case PCI_DEVICE_ID:
+ case PCI_CAPABILITY_LIST:
+ break;
+
+ case PCI_STATUS:
+ hw_pci_config_rw(pdev, buf, count, offset, iswrite);
+ break;
+
+ case PCI_COMMAND:
+ if (count == 4) {
+ val = (u32)((*(u32 *)buf & 0xFFFF0000) >> 16);
+ hw_pci_config_rw(pdev, (char *)&val, 2, PCI_STATUS, iswrite);
+
+ val = (u32)(*(u32 *)buf & 0x0000FFFF);
+ *(u32 *)buf = val;
+ }
+
+ memcpy((u8 *)&nvdev->config_space[offset], buf, count);
+ break;
+
+ case PCI_BASE_ADDRESS_0:
+ case PCI_BASE_ADDRESS_1:
+ case PCI_BASE_ADDRESS_2:
+ case PCI_BASE_ADDRESS_3:
+ case PCI_BASE_ADDRESS_4:
+ cfg_addr = *(u32 *)buf;
+
+ switch (offset) {
+ case PCI_BASE_ADDRESS_0:
+ bar_mask = (u32)((~(pci_resource_len(pdev, VFIO_PCI_BAR0_REGION_INDEX)) + 1) & ~0xFul);
+ cfg_addr = (cfg_addr & bar_mask) | (nvdev->config_space[offset] & 0xFul);
+ break;
+ case PCI_BASE_ADDRESS_1:
+ bar_mask = (u32)((~(nvdev->curr_vgpu_type->bar1Length * 1024 * 1024) + 1) & ~0xFul);
+ cfg_addr = (cfg_addr & bar_mask) | (nvdev->config_space[offset] & 0xFul);
+ break;
+
+ case PCI_BASE_ADDRESS_2:
+ bar_mask = (u32)(((~(nvdev->curr_vgpu_type->bar1Length * 1024 * 1024) + 1) & ~0xFul) >> 32);
+ cfg_addr = (cfg_addr & bar_mask);
+ break;
+
+ case PCI_BASE_ADDRESS_3:
+ bar_mask = (u32)((~(pci_resource_len(pdev, VFIO_PCI_BAR3_REGION_INDEX)) + 1) & ~0xFul);
+ cfg_addr = (cfg_addr & bar_mask) | (nvdev->config_space[offset] & 0xFul);
+ break;
+
+ case PCI_BASE_ADDRESS_4:
+ bar_mask = (u32)(((~(pci_resource_len(pdev, VFIO_PCI_BAR3_REGION_INDEX)) + 1) & ~0xFul) >> 32);
+ cfg_addr = (cfg_addr & bar_mask);
+ break;
+ }
+ *(u32 *)&nvdev->config_space[offset] = cfg_addr;
+ break;
+ default:
+ break;
+
+ }
+ }
+ return count;
+}
+
+ssize_t nvidia_vgpu_vfio_access(struct nvidia_vgpu_vfio *nvdev, char *buf,
+ size_t count, loff_t ppos, bool iswrite)
+{
+ int index = VFIO_PCI_OFFSET_TO_INDEX(ppos);
+
+ if (index >= VFIO_PCI_NUM_REGIONS)
+ return -EINVAL;
+
+ switch (index) {
+ case VFIO_PCI_CONFIG_REGION_INDEX:
+ return pci_config_rw(nvdev, buf, count, ppos,
+ iswrite);
+ case VFIO_PCI_BAR0_REGION_INDEX:
+ return bar0_rw(nvdev, buf, count, ppos, iswrite);
+ default:
+ return -EINVAL;
+ }
+ return count;
+}
new file mode 100644
@@ -0,0 +1,511 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
+/*
+ * Copyright © 2024 NVIDIA Corporation
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/pci.h>
+#include <linux/vfio_pci_core.h>
+#include <linux/types.h>
+
+#include "vfio.h"
+
+static int pdev_to_gfid(struct pci_dev *pdev)
+{
+ return pci_iov_vf_id(pdev) + 1;
+}
+
+static int destroy_vgpu(struct nvidia_vgpu_vfio *nvdev)
+{
+ int ret;
+
+ ret = nvidia_vgpu_mgr_destroy_vgpu(nvdev->vgpu);
+ if (ret)
+ return ret;
+
+ kfree(nvdev->vgpu);
+ nvdev->vgpu = NULL;
+ return 0;
+}
+
+static int create_vgpu(struct nvidia_vgpu_vfio *nvdev)
+{
+ struct nvidia_vgpu_mgr *vgpu_mgr = nvdev->vgpu_mgr;
+ struct pci_dev *pdev = nvdev->core_dev.pdev;
+ struct nvidia_vgpu *vgpu;
+ int ret;
+
+ vgpu = kzalloc(sizeof(*vgpu), GFP_KERNEL);
+ if (!vgpu)
+ return -ENOMEM;
+
+ vgpu->info.id = pci_iov_vf_id(pdev);
+ vgpu->info.dbdf = (0 << 16) | pci_dev_id(pdev);
+ vgpu->info.gfid = pdev_to_gfid(pdev);
+
+ vgpu->vgpu_mgr = vgpu_mgr;
+ vgpu->pdev = pdev;
+
+ ret = nvidia_vgpu_mgr_create_vgpu(vgpu,
+ (u8 *)nvdev->curr_vgpu_type);
+ if (ret) {
+ kfree(vgpu);
+ return ret;
+ }
+
+ pr_err("create_vgpu() called\n");
+ nvdev->vgpu = vgpu;
+ return 0;
+}
+
+static inline struct vfio_pci_core_device *
+vdev_to_core_dev(struct vfio_device *vdev)
+{
+ return container_of(vdev, struct vfio_pci_core_device, vdev);
+}
+
+static inline struct nvidia_vgpu_vfio *
+core_dev_to_nvdev(struct vfio_pci_core_device *core_dev)
+{
+ return container_of(core_dev, struct nvidia_vgpu_vfio, core_dev);
+}
+
+static void detach_vgpu_mgr(struct nvidia_vgpu_vfio *nvdev)
+{
+ nvidia_vgpu_mgr_put(nvdev->vgpu_mgr);
+
+ nvdev->vgpu_mgr = NULL;
+ nvdev->vgpu_types = NULL;
+ nvdev->num_vgpu_types = 0;
+}
+
+static int attach_vgpu_mgr(struct nvidia_vgpu_vfio *nvdev,
+ struct pci_dev *pdev)
+{
+ struct nvidia_vgpu_mgr *vgpu_mgr;
+
+ vgpu_mgr = nvidia_vgpu_mgr_get(pdev);
+ if (IS_ERR(vgpu_mgr))
+ return PTR_ERR(vgpu_mgr);
+
+ nvdev->vgpu_mgr = vgpu_mgr;
+ nvdev->vgpu_types = nvdev->vgpu_mgr->vgpu_types;
+ nvdev->num_vgpu_types = nvdev->vgpu_mgr->num_vgpu_types;
+
+ return 0;
+}
+
+static NVA081_CTRL_VGPU_INFO *
+find_vgpu_type(struct nvidia_vgpu_vfio *nvdev, u32 type_id)
+{
+ NVA081_CTRL_VGPU_INFO *vgpu_type;
+ u32 i;
+
+ for (i = 0; i < nvdev->num_vgpu_types; i++) {
+ vgpu_type = (NVA081_CTRL_VGPU_INFO *)nvdev->vgpu_types[i];
+ if (vgpu_type->vgpuType == type_id)
+ return vgpu_type;
+ }
+
+ return NULL;
+}
+
+static int
+nvidia_vgpu_vfio_open_device(struct vfio_device *vdev)
+{
+ struct vfio_pci_core_device *core_dev = vdev_to_core_dev(vdev);
+ struct nvidia_vgpu_vfio *nvdev = core_dev_to_nvdev(core_dev);
+ struct pci_dev *pdev = core_dev->pdev;
+ u64 pf_dma_mask;
+ int ret;
+
+ if (!nvdev->curr_vgpu_type)
+ return -ENODEV;
+
+ if (!pdev->physfn)
+ return -EINVAL;
+
+ ret = create_vgpu(nvdev);
+ if (ret)
+ return ret;
+
+ ret = pci_enable_device(pdev);
+ if (ret)
+ goto err_enable_device;
+
+ pci_set_master(pdev);
+
+ pf_dma_mask = dma_get_mask(&pdev->physfn->dev);
+ dma_set_mask(&pdev->dev, pf_dma_mask);
+ dma_set_coherent_mask(&pdev->dev, pf_dma_mask);
+
+ ret = pci_try_reset_function(pdev);
+ if (ret)
+ goto err_reset_function;
+
+ ret = nvidia_vgpu_mgr_enable_bme(nvdev->vgpu);
+ if (ret)
+ goto err_enable_bme;
+
+ return 0;
+
+err_enable_bme:
+err_reset_function:
+ pci_clear_master(pdev);
+ pci_disable_device(pdev);
+err_enable_device:
+ destroy_vgpu(nvdev);
+ return ret;
+}
+
+static void
+nvidia_vgpu_vfio_close_device(struct vfio_device *vdev)
+{
+ struct vfio_pci_core_device *core_dev = vdev_to_core_dev(vdev);
+ struct nvidia_vgpu_vfio *nvdev = core_dev_to_nvdev(core_dev);
+ struct pci_dev *pdev = core_dev->pdev;
+
+ WARN_ON(destroy_vgpu(nvdev));
+
+ if (nvdev->bar0_map) {
+ iounmap(nvdev->bar0_map);
+ pci_release_selected_regions(pdev, 1 << 0);
+ nvdev->bar0_map = NULL;
+ }
+
+ pci_clear_master(pdev);
+ pci_disable_device(pdev);
+}
+
+static int
+get_region_info(struct vfio_pci_core_device *core_dev, unsigned long arg)
+{
+ struct nvidia_vgpu_vfio *nvdev = core_dev_to_nvdev(core_dev);
+ struct pci_dev *pdev = core_dev->pdev;
+ struct vfio_region_info info;
+ unsigned long minsz;
+ int ret = 0;
+
+ minsz = offsetofend(struct vfio_region_info, offset);
+ if (copy_from_user(&info, (void __user *)arg, minsz))
+ return -EINVAL;
+
+ if (info.argsz < minsz)
+ return -EINVAL;
+
+ switch (info.index) {
+ case VFIO_PCI_CONFIG_REGION_INDEX:
+ info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
+ info.size = PCI_CONFIG_SPACE_LENGTH;
+ info.flags = VFIO_REGION_INFO_FLAG_READ |
+ VFIO_REGION_INFO_FLAG_WRITE;
+ break;
+
+ case VFIO_PCI_BAR0_REGION_INDEX ... VFIO_PCI_BAR4_REGION_INDEX:
+ struct vfio_info_cap caps = { .buf = NULL, .size = 0 };
+
+ info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
+ info.size = pci_resource_len(pdev, info.index);
+
+ if (info.index == VFIO_PCI_BAR1_REGION_INDEX)
+ info.size = nvdev->curr_vgpu_type->bar1Length * 1024 * 1024;
+
+ if (!info.size) {
+ info.flags = 0;
+ break;
+ }
+ info.flags = VFIO_REGION_INFO_FLAG_READ |
+ VFIO_REGION_INFO_FLAG_WRITE |
+ VFIO_REGION_INFO_FLAG_MMAP;
+
+ if (caps.size) {
+ info.flags |= VFIO_REGION_INFO_FLAG_CAPS;
+ if (info.argsz < sizeof(info) + caps.size) {
+ info.argsz = sizeof(info) + caps.size;
+ info.cap_offset = 0;
+ } else {
+ vfio_info_cap_shift(&caps, sizeof(info));
+ if (copy_to_user((void __user *)arg +
+ sizeof(info), caps.buf,
+ caps.size)) {
+ kfree(caps.buf);
+ ret = -EFAULT;
+ break;
+ }
+ info.cap_offset = sizeof(info);
+ }
+ kfree(caps.buf);
+ }
+ break;
+ case VFIO_PCI_BAR5_REGION_INDEX:
+ case VFIO_PCI_ROM_REGION_INDEX:
+ case VFIO_PCI_VGA_REGION_INDEX:
+ info.size = 0;
+ break;
+
+ default:
+ if (info.index >= VFIO_PCI_NUM_REGIONS)
+ ret = -EINVAL;
+ break;
+ }
+
+ if (!ret)
+ ret = copy_to_user((void __user *)arg, &info, minsz) ? -EFAULT : 0;
+
+ return ret;
+}
+
+static long nvidia_vgpu_vfio_ioctl(struct vfio_device *vdev,
+ unsigned int cmd,
+ unsigned long arg)
+{
+ struct vfio_pci_core_device *core_dev = vdev_to_core_dev(vdev);
+ struct nvidia_vgpu_vfio *nvdev = core_dev_to_nvdev(core_dev);
+ int ret = 0;
+
+ if (!nvdev->curr_vgpu_type)
+ return -ENODEV;
+
+ switch (cmd) {
+ case VFIO_DEVICE_GET_REGION_INFO:
+ ret = get_region_info(core_dev, arg);
+ break;
+ case VFIO_DEVICE_GET_PCI_HOT_RESET_INFO:
+ case VFIO_DEVICE_PCI_HOT_RESET:
+ case VFIO_DEVICE_RESET:
+ break;
+
+ default:
+ ret = vfio_pci_core_ioctl(vdev, cmd, arg);
+ break;
+ }
+
+ return ret;
+}
+
+static ssize_t nvidia_vgpu_vfio_read(struct vfio_device *vdev,
+ char __user *buf, size_t count,
+ loff_t *ppos)
+{
+ struct vfio_pci_core_device *core_dev = vdev_to_core_dev(vdev);
+ struct nvidia_vgpu_vfio *nvdev = core_dev_to_nvdev(core_dev);
+ u64 val;
+ size_t done = 0;
+ int ret = 0, size;
+
+ if (!nvdev->curr_vgpu_type)
+ return -ENODEV;
+
+ while (count) {
+ if (count >= 4 && !(*ppos % 4))
+ size = 4;
+ else if (count >= 2 && !(*ppos % 2))
+ size = 2;
+ else
+ size = 1;
+
+ ret = nvidia_vgpu_vfio_access(nvdev, (char *)&val, size, *ppos, false);
+
+ if (ret <= 0)
+ return ret;
+
+ if (copy_to_user(buf, &val, size) != 0)
+ return -EFAULT;
+
+ *ppos += size;
+ buf += size;
+ count -= size;
+ done += size;
+ }
+
+ return done;
+}
+
+static ssize_t nvidia_vgpu_vfio_write(struct vfio_device *vdev,
+ const char __user *buf, size_t count,
+ loff_t *ppos)
+{
+ struct vfio_pci_core_device *core_dev = vdev_to_core_dev(vdev);
+ struct nvidia_vgpu_vfio *nvdev = core_dev_to_nvdev(core_dev);
+ u64 val;
+ size_t done = 0;
+ int ret = 0, size;
+
+ if (!nvdev->curr_vgpu_type)
+ return -ENODEV;
+
+ while (count) {
+ if (count >= 4 && !(*ppos % 4))
+ size = 4;
+ else if (count >= 2 && !(*ppos % 2))
+ size = 2;
+ else
+ size = 1;
+
+ if (copy_from_user(&val, buf, size) != 0)
+ return -EFAULT;
+
+ ret = nvidia_vgpu_vfio_access(nvdev, (char *)&val, size, *ppos, true);
+
+ if (ret <= 0)
+ return ret;
+
+ *ppos += size;
+ buf += size;
+ count -= size;
+ done += size;
+ }
+
+ return done;
+}
+
+static int nvidia_vgpu_vfio_mmap(struct vfio_device *vdev,
+ struct vm_area_struct *vma)
+{
+ struct vfio_pci_core_device *core_dev = vdev_to_core_dev(vdev);
+ struct nvidia_vgpu_vfio *nvdev = core_dev_to_nvdev(core_dev);
+ struct pci_dev *pdev = core_dev->pdev;
+ u64 phys_len, req_len, pgoff, req_start;
+ unsigned int index;
+
+ if (!nvdev->curr_vgpu_type)
+ return -ENODEV;
+
+ index = vma->vm_pgoff >> (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT);
+
+ if (index >= VFIO_PCI_BAR5_REGION_INDEX)
+ return -EINVAL;
+ if (vma->vm_end < vma->vm_start)
+ return -EINVAL;
+ if ((vma->vm_flags & VM_SHARED) == 0)
+ return -EINVAL;
+
+ phys_len = PAGE_ALIGN(pci_resource_len(pdev, index));
+ req_len = vma->vm_end - vma->vm_start;
+ pgoff = vma->vm_pgoff &
+ ((1U << (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT)) - 1);
+ req_start = pgoff << PAGE_SHIFT;
+
+ if (req_len == 0)
+ return -EINVAL;
+
+ if ((req_start + req_len > phys_len) || (phys_len == 0))
+ return -EINVAL;
+
+ vma->vm_private_data = vdev;
+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+ vma->vm_pgoff = (pci_resource_start(pdev, index) >> PAGE_SHIFT) + pgoff;
+ vm_flags_set(vma, VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP);
+
+ return remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, req_len, vma->vm_page_prot);
+}
+
+static const struct vfio_device_ops nvidia_vgpu_vfio_ops = {
+ .name = "nvidia-vgpu-vfio-pci",
+ .init = vfio_pci_core_init_dev,
+ .release = vfio_pci_core_release_dev,
+ .open_device = nvidia_vgpu_vfio_open_device,
+ .close_device = nvidia_vgpu_vfio_close_device,
+ .ioctl = nvidia_vgpu_vfio_ioctl,
+ .device_feature = vfio_pci_core_ioctl_feature,
+ .read = nvidia_vgpu_vfio_read,
+ .write = nvidia_vgpu_vfio_write,
+ .mmap = nvidia_vgpu_vfio_mmap,
+ .request = vfio_pci_core_request,
+ .match = vfio_pci_core_match,
+ .bind_iommufd = vfio_iommufd_physical_bind,
+ .unbind_iommufd = vfio_iommufd_physical_unbind,
+ .attach_ioas = vfio_iommufd_physical_attach_ioas,
+ .detach_ioas = vfio_iommufd_physical_detach_ioas,
+};
+
+static int setup_vgpu_type(struct nvidia_vgpu_vfio *nvdev)
+{
+ nvdev->curr_vgpu_type = find_vgpu_type(nvdev, 869);
+ if (!nvdev->curr_vgpu_type)
+ return -ENODEV;
+ return 0;
+}
+
+static int nvidia_vgpu_vfio_probe(struct pci_dev *pdev,
+ const struct pci_device_id *id_table)
+{
+ struct nvidia_vgpu_vfio *nvdev;
+ int ret;
+
+ if (!pdev->is_virtfn)
+ return -EINVAL;
+
+ nvdev = vfio_alloc_device(nvidia_vgpu_vfio, core_dev.vdev,
+ &pdev->dev, &nvidia_vgpu_vfio_ops);
+ if (IS_ERR(nvdev))
+ return PTR_ERR(nvdev);
+
+ ret = attach_vgpu_mgr(nvdev, pdev);
+ if (ret)
+ goto err_attach_vgpu_mgr;
+
+ ret = setup_vgpu_type(nvdev);
+ if (ret)
+ goto err_setup_vgpu_type;
+
+ nvidia_vgpu_vfio_setup_config(nvdev);
+
+ dev_set_drvdata(&pdev->dev, &nvdev->core_dev);
+
+ ret = vfio_pci_core_register_device(&nvdev->core_dev);
+ if (ret)
+ goto err_setup_vgpu_type;
+
+ return 0;
+
+err_setup_vgpu_type:
+ detach_vgpu_mgr(nvdev);
+
+err_attach_vgpu_mgr:
+ vfio_put_device(&nvdev->core_dev.vdev);
+
+ pci_err(pdev, "VF probe failed with ret: %d\n", ret);
+ return ret;
+}
+
+static void nvidia_vgpu_vfio_remove(struct pci_dev *pdev)
+{
+ struct vfio_pci_core_device *core_dev = dev_get_drvdata(&pdev->dev);
+ struct nvidia_vgpu_vfio *nvdev = core_dev_to_nvdev(core_dev);
+
+ vfio_pci_core_unregister_device(core_dev);
+ detach_vgpu_mgr(nvdev);
+ vfio_put_device(&core_dev->vdev);
+}
+
+struct pci_device_id nvidia_vgpu_vfio_table[] = {
+ {
+ .vendor = PCI_VENDOR_ID_NVIDIA,
+ .device = PCI_ANY_ID,
+ .subvendor = PCI_ANY_ID,
+ .subdevice = PCI_ANY_ID,
+ .class = (PCI_CLASS_DISPLAY_3D << 8),
+ .class_mask = ~0,
+ },
+ { }
+};
+MODULE_DEVICE_TABLE(pci, nvidia_vgpu_vfio_table);
+
+struct pci_driver nvidia_vgpu_vfio_driver = {
+ .name = "nvidia-vgpu-vfio",
+ .id_table = nvidia_vgpu_vfio_table,
+ .probe = nvidia_vgpu_vfio_probe,
+ .remove = nvidia_vgpu_vfio_remove,
+ .driver_managed_dma = true,
+};
+
+module_pci_driver(nvidia_vgpu_vfio_driver);
+
+MODULE_LICENSE("Dual MIT/GPL");
+MODULE_AUTHOR("Vinay Kabra <vkabra@nvidia.com>");
+MODULE_AUTHOR("Kirti Wankhede <kwankhede@nvidia.com>");
+MODULE_AUTHOR("Zhi Wang <zhiw@nvidia.com>");
+MODULE_DESCRIPTION("NVIDIA vGPU VFIO Variant Driver - User Level driver for NVIDIA vGPU");
@@ -328,3 +328,25 @@ int nvidia_vgpu_mgr_create_vgpu(struct nvidia_vgpu *vgpu, u8 *vgpu_type)
return ret;
}
EXPORT_SYMBOL(nvidia_vgpu_mgr_create_vgpu);
+
+static int update_bme_state(struct nvidia_vgpu *vgpu)
+{
+ NV_VGPU_CPU_RPC_DATA_UPDATE_BME_STATE params = {0};
+
+ params.enable = true;
+
+ return nvidia_vgpu_rpc_call(vgpu, NV_VGPU_CPU_RPC_MSG_UPDATE_BME_STATE,
+ ¶ms, sizeof(params));
+}
+
+/**
+ * nvidia_vgpu_enable_bme - handle BME sequence
+ * @vf: the vGPU instance
+ *
+ * Returns: 0 on success, others on failure.
+ */
+int nvidia_vgpu_mgr_enable_bme(struct nvidia_vgpu *vgpu)
+{
+ return update_bme_state(vgpu);
+}
+EXPORT_SYMBOL(nvidia_vgpu_mgr_enable_bme);
@@ -84,6 +84,6 @@ int nvidia_vgpu_rpc_call(struct nvidia_vgpu *vgpu, u32 msg_type,
void nvidia_vgpu_clean_rpc(struct nvidia_vgpu *vgpu);
int nvidia_vgpu_setup_rpc(struct nvidia_vgpu *vgpu);
-int nvidia_vgpu_mgr_reset_vgpu(struct nvidia_vgpu *vgpu);
+int nvidia_vgpu_mgr_enable_bme(struct nvidia_vgpu *vgpu);
#endif
A VFIO variant driver module is designed to extend the capabilities of the existing VFIO (Virtual Function I/O), offering device management interfaces to the userspace and advanced feature support. For the userspace to use the NVIDIA vGPU, a new vGPU VFIO variant driver is introduced to provide vGPU management, like selecting/creating vGPU instance, support advance features like live migration. Introduce the NVIDIA vGPU VFIO variant driver to support vGPU lifecycle management UABI and the future advancd features. Cc: Neo Jia <cjia@nvidia.com> Cc: Surath Mitra <smitra@nvidia.com> Cc: Kirti Wankhede <kwankhede@nvidia.com> Cc: Vinay Kabra <vkabra@nvidia.com> Cc: Ankit Agrawal <ankita@nvidia.com> Signed-off-by: Zhi Wang <zhiw@nvidia.com> --- drivers/vfio/pci/nvidia-vgpu/Makefile | 3 + drivers/vfio/pci/nvidia-vgpu/vfio.h | 43 ++ drivers/vfio/pci/nvidia-vgpu/vfio_access.c | 297 ++++++++++++ drivers/vfio/pci/nvidia-vgpu/vfio_main.c | 511 +++++++++++++++++++++ drivers/vfio/pci/nvidia-vgpu/vgpu.c | 22 + drivers/vfio/pci/nvidia-vgpu/vgpu_mgr.h | 2 +- 6 files changed, 877 insertions(+), 1 deletion(-) create mode 100644 drivers/vfio/pci/nvidia-vgpu/vfio.h create mode 100644 drivers/vfio/pci/nvidia-vgpu/vfio_access.c create mode 100644 drivers/vfio/pci/nvidia-vgpu/vfio_main.c