@@ -21954,6 +21954,12 @@ L: kvm@vger.kernel.org
S: Maintained
F: drivers/vfio/pci/mlx5/
+VFIO NVIDIA PCI DRIVER
+M: Ankit Agrawal <ankita@nvidia.com>
+L: kvm@vger.kernel.org
+S: Maintained
+F: drivers/vfio/pci/nvgpu/
+
VGA_SWITCHEROO
R: Lukas Wunner <lukas@wunner.de>
S: Maintained
@@ -59,4 +59,6 @@ source "drivers/vfio/pci/mlx5/Kconfig"
source "drivers/vfio/pci/hisilicon/Kconfig"
+source "drivers/vfio/pci/nvgpu/Kconfig"
+
endif
@@ -11,3 +11,5 @@ obj-$(CONFIG_VFIO_PCI) += vfio-pci.o
obj-$(CONFIG_MLX5_VFIO_PCI) += mlx5/
obj-$(CONFIG_HISI_ACC_VFIO_PCI) += hisilicon/
+
+obj-$(CONFIG_NVGPU_VFIO_PCI) += nvgpu/
new file mode 100644
@@ -0,0 +1,10 @@
+# SPDX-License-Identifier: GPL-2.0-only
+config NVGPU_VFIO_PCI
+ tristate "VFIO support for the GPU in the NVIDIA Grace Hopper Superchip"
+ depends on ARM64 || (COMPILE_TEST && 64BIT)
+ select VFIO_PCI_CORE
+ help
+ VFIO support for the GPU in the NVIDIA Grace Hopper Superchip is
+ required to assign the GPU device to a VM using KVM/qemu/etc.
+
+ If you don't know what to do here, say N.
new file mode 100644
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0-only
+obj-$(CONFIG_NVGPU_VFIO_PCI) += nvgpu-vfio-pci.o
+nvgpu-vfio-pci-y := main.o
new file mode 100644
@@ -0,0 +1,255 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved
+ */
+
+#include <linux/pci.h>
+#include <linux/vfio_pci_core.h>
+
+#define DUMMY_PFN \
+ (((nvdev->mem_prop.hpa + nvdev->mem_prop.mem_length) >> PAGE_SHIFT) - 1)
+
+struct dev_mem_properties {
+ uint64_t hpa;
+ uint64_t mem_length;
+ int bar1_start_offset;
+};
+
+struct nvgpu_vfio_pci_core_device {
+ struct vfio_pci_core_device core_device;
+ struct dev_mem_properties mem_prop;
+};
+
+static int vfio_get_bar1_start_offset(struct vfio_pci_core_device *vdev)
+{
+ u8 val = 0;
+
+ pci_read_config_byte(vdev->pdev, 0x10, &val);
+ /*
+ * The BAR1 start offset in the PCI config space depends on the BAR0size.
+ * Check if the BAR0 is 64b and return the approproiate BAR1 offset.
+ */
+ if (val & PCI_BASE_ADDRESS_MEM_TYPE_64)
+ return VFIO_PCI_BAR2_REGION_INDEX;
+
+ return VFIO_PCI_BAR1_REGION_INDEX;
+}
+
+static int nvgpu_vfio_pci_open_device(struct vfio_device *core_vdev)
+{
+ struct nvgpu_vfio_pci_core_device *nvdev = container_of(
+ core_vdev, struct nvgpu_vfio_pci_core_device, core_device.vdev);
+ struct vfio_pci_core_device *vdev =
+ container_of(core_vdev, struct vfio_pci_core_device, vdev);
+ int ret;
+
+ ret = vfio_pci_core_enable(vdev);
+ if (ret)
+ return ret;
+
+ vfio_pci_core_finish_enable(vdev);
+
+ nvdev->mem_prop.bar1_start_offset = vfio_get_bar1_start_offset(vdev);
+
+ return ret;
+}
+
+int nvgpu_vfio_pci_mmap(struct vfio_device *core_vdev,
+ struct vm_area_struct *vma)
+{
+ struct nvgpu_vfio_pci_core_device *nvdev = container_of(
+ core_vdev, struct nvgpu_vfio_pci_core_device, core_device.vdev);
+
+ unsigned long start_pfn;
+ unsigned int index;
+ u64 req_len, pgoff;
+ int ret = 0;
+
+ index = vma->vm_pgoff >> (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT);
+ if (index != nvdev->mem_prop.bar1_start_offset)
+ return vfio_pci_core_mmap(core_vdev, vma);
+
+ /*
+ * Request to mmap the BAR1. Map to the CPU accessible memory on the
+ * GPU using the memory information gathered from the system ACPI
+ * tables.
+ */
+ start_pfn = nvdev->mem_prop.hpa >> PAGE_SHIFT;
+ req_len = vma->vm_end - vma->vm_start;
+ pgoff = vma->vm_pgoff &
+ ((1U << (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT)) - 1);
+ if (pgoff >= (nvdev->mem_prop.mem_length >> PAGE_SHIFT))
+ return -EINVAL;
+
+ /*
+ * Perform a PFN map to the memory. The device BAR1 is backed by the
+ * GPU memory now. Check that the mapping does not overflow out of
+ * the GPU memory size.
+ */
+ ret = remap_pfn_range(vma, vma->vm_start, start_pfn + pgoff,
+ min(req_len, nvdev->mem_prop.mem_length - pgoff),
+ vma->vm_page_prot);
+ if (ret)
+ return ret;
+
+ vma->vm_pgoff = start_pfn + pgoff;
+
+ return 0;
+}
+
+long nvgpu_vfio_pci_ioctl(struct vfio_device *core_vdev, unsigned int cmd,
+ unsigned long arg)
+{
+ struct nvgpu_vfio_pci_core_device *nvdev = container_of(
+ core_vdev, struct nvgpu_vfio_pci_core_device, core_device.vdev);
+
+ unsigned long minsz = offsetofend(struct vfio_region_info, offset);
+ struct vfio_region_info info;
+
+ switch (cmd) {
+ case VFIO_DEVICE_GET_REGION_INFO:
+ if (copy_from_user(&info, (void __user *)arg, minsz))
+ return -EFAULT;
+
+ if (info.argsz < minsz)
+ return -EINVAL;
+
+ if (info.index == nvdev->mem_prop.bar1_start_offset) {
+ /*
+ * Request to determine the BAR1 region information. Send the
+ * GPU memory information.
+ */
+ info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
+ info.size = nvdev->mem_prop.mem_length;
+ info.flags = VFIO_REGION_INFO_FLAG_READ |
+ VFIO_REGION_INFO_FLAG_WRITE |
+ VFIO_REGION_INFO_FLAG_MMAP;
+ return copy_to_user((void __user *)arg, &info, minsz) ?
+ -EFAULT : 0;
+ }
+
+ if (info.index == nvdev->mem_prop.bar1_start_offset + 1) {
+ /*
+ * The BAR1 region is 64b. Ignore this access.
+ */
+ info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
+ info.size = 0;
+ info.flags = 0;
+ return copy_to_user((void __user *)arg, &info, minsz) ?
+ -EFAULT : 0;
+ }
+
+ return vfio_pci_core_ioctl(core_vdev, cmd, arg);
+
+ default:
+ return vfio_pci_core_ioctl(core_vdev, cmd, arg);
+ }
+}
+
+static const struct vfio_device_ops nvgpu_vfio_pci_ops = {
+ .name = "nvgpu-vfio-pci",
+ .init = vfio_pci_core_init_dev,
+ .release = vfio_pci_core_release_dev,
+ .open_device = nvgpu_vfio_pci_open_device,
+ .close_device = vfio_pci_core_close_device,
+ .ioctl = nvgpu_vfio_pci_ioctl,
+ .read = vfio_pci_core_read,
+ .write = vfio_pci_core_write,
+ .mmap = nvgpu_vfio_pci_mmap,
+ .request = vfio_pci_core_request,
+ .match = vfio_pci_core_match,
+ .bind_iommufd = vfio_iommufd_physical_bind,
+ .unbind_iommufd = vfio_iommufd_physical_unbind,
+ .attach_ioas = vfio_iommufd_physical_attach_ioas,
+};
+
+static struct nvgpu_vfio_pci_core_device *nvgpu_drvdata(struct pci_dev *pdev)
+{
+ struct vfio_pci_core_device *core_device = dev_get_drvdata(&pdev->dev);
+
+ return container_of(core_device, struct nvgpu_vfio_pci_core_device,
+ core_device);
+}
+
+static int
+nvgpu_vfio_pci_fetch_memory_property(struct pci_dev *pdev,
+ struct nvgpu_vfio_pci_core_device *nvdev)
+{
+ int ret = 0;
+
+ /*
+ * The memory information is present in the system ACPI tables as DSD
+ * properties nvidia,gpu-mem-base-pa and nvidia,gpu-mem-size.
+ */
+ ret = device_property_read_u64(&(pdev->dev), "nvidia,gpu-mem-base-pa",
+ &(nvdev->mem_prop.hpa));
+ if (ret)
+ return ret;
+
+ ret = device_property_read_u64(&(pdev->dev), "nvidia,gpu-mem-size",
+ &(nvdev->mem_prop.mem_length));
+ return ret;
+}
+
+static int nvgpu_vfio_pci_probe(struct pci_dev *pdev,
+ const struct pci_device_id *id)
+{
+ struct nvgpu_vfio_pci_core_device *nvdev;
+ int ret;
+
+ nvdev = vfio_alloc_device(nvgpu_vfio_pci_core_device, core_device.vdev,
+ &pdev->dev, &nvgpu_vfio_pci_ops);
+ if (IS_ERR(nvdev))
+ return PTR_ERR(nvdev);
+
+ dev_set_drvdata(&pdev->dev, nvdev);
+
+ ret = nvgpu_vfio_pci_fetch_memory_property(pdev, nvdev);
+ if (ret)
+ goto out_put_vdev;
+
+ ret = vfio_pci_core_register_device(&nvdev->core_device);
+ if (ret)
+ goto out_put_vdev;
+
+ return ret;
+
+out_put_vdev:
+ vfio_put_device(&nvdev->core_device.vdev);
+ return ret;
+}
+
+static void nvgpu_vfio_pci_remove(struct pci_dev *pdev)
+{
+ struct nvgpu_vfio_pci_core_device *nvdev = nvgpu_drvdata(pdev);
+ struct vfio_pci_core_device *vdev = &nvdev->core_device;
+
+ vfio_pci_core_unregister_device(vdev);
+ vfio_put_device(&vdev->vdev);
+}
+
+static const struct pci_device_id nvgpu_vfio_pci_table[] = {
+ { PCI_DRIVER_OVERRIDE_DEVICE_VFIO(PCI_VENDOR_ID_NVIDIA, 0x2342) },
+ { PCI_DRIVER_OVERRIDE_DEVICE_VFIO(PCI_VENDOR_ID_NVIDIA, 0x2343) },
+ { PCI_DRIVER_OVERRIDE_DEVICE_VFIO(PCI_VENDOR_ID_NVIDIA, 0x2345) },
+ {}
+};
+
+MODULE_DEVICE_TABLE(pci, nvgpu_vfio_pci_table);
+
+static struct pci_driver nvgpu_vfio_pci_driver = {
+ .name = KBUILD_MODNAME,
+ .id_table = nvgpu_vfio_pci_table,
+ .probe = nvgpu_vfio_pci_probe,
+ .remove = nvgpu_vfio_pci_remove,
+ .err_handler = &vfio_pci_core_err_handlers,
+ .driver_managed_dma = true,
+};
+
+module_pci_driver(nvgpu_vfio_pci_driver);
+
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Ankit Agrawal <ankita@nvidia.com>");
+MODULE_AUTHOR("Aniket Agashe <aniketa@nvidia.com>");
+MODULE_DESCRIPTION(
+ "VFIO NVGPU PF - User Level driver for NVIDIA devices with CPU coherently accessible device memory");