diff mbox series

[4/4] vfio/pci: Allow export dmabuf without move_notify from importer

Message ID 20241216095429.210792-5-wguay@fb.com (mailing list archive)
State Superseded
Headers show
Series [1/4] vfio: Add vfio_device_get() | expand

Commit Message

Wei Lin Guay Dec. 16, 2024, 9:54 a.m. UTC
From: Wei Lin Guay <wguay@meta.com>

Summary:
Support vfio to export dmabuf to importer such as RDMA NIC that does
not support move_notify callback, since not all RDMA driver support
on-demand-paging (ODP).

There are some use-cases such as bind accelerator that always pinned
the device memory via vfio and export it to RDMA NIC such as EFA, BNXT_RE
or IRDMA that does not support ODP.

Signed-off-by: Wei Lin Guay <wguay@meta.com>
Reviewed-by: Dag Moxnes <dagmoxnes@meta.com>
Reviewed-by: Keith Busch <kbusch@kernel.org>
Reviewed-by: Nic Viljoen <nviljoen@meta.com>
---
 drivers/vfio/pci/dma_buf.c       | 32 +++++++++++++++++++++++++++-----
 drivers/vfio/pci/vfio_pci_core.c | 16 ++++++++++++++++
 drivers/vfio/pci/vfio_pci_priv.h |  7 +++++++
 3 files changed, 50 insertions(+), 5 deletions(-)

--
2.43.5
diff mbox series

Patch

diff --git a/drivers/vfio/pci/dma_buf.c b/drivers/vfio/pci/dma_buf.c
index fd772b520cd7..8017f48296cb 100644
--- a/drivers/vfio/pci/dma_buf.c
+++ b/drivers/vfio/pci/dma_buf.c
@@ -17,6 +17,7 @@  struct vfio_pci_dma_buf {
 	unsigned int orig_nents;
 	size_t offset;
 	bool revoked;
+	bool pinned;
 };

 static int vfio_pci_dma_buf_attach(struct dma_buf *dmabuf,
@@ -32,17 +33,38 @@  static int vfio_pci_dma_buf_attach(struct dma_buf *dmabuf,
 	return 0;
 }

+bool vfio_pci_dma_buf_pinned(struct vfio_pci_core_device *vdev)
+{
+	struct vfio_pci_dma_buf *priv;
+	struct vfio_pci_dma_buf *tmp;
+	bool pinned = false;
+
+	down_write(&vdev->memory_lock);
+	list_for_each_entry_safe(priv, tmp, &vdev->dmabufs, dmabufs_elm) {
+		if (!dma_buf_try_get(priv->dmabuf))
+			continue;
+		if (priv->pinned) {
+			pinned = true;
+			break;
+		}
+	}
+	up_write(&vdev->memory_lock);
+	return pinned;
+}
+
 static void vfio_pci_dma_buf_unpin(struct dma_buf_attachment *attachment)
 {
+	struct vfio_pci_dma_buf *priv = attachment->dmabuf->priv;
+
+	priv->pinned = false;
 }

 static int vfio_pci_dma_buf_pin(struct dma_buf_attachment *attachment)
 {
-	/*
-	 * Uses the dynamic interface but must always allow for
-	 * dma_buf_move_notify() to do revoke
-	 */
-	return -EINVAL;
+	struct vfio_pci_dma_buf *priv = attachment->dmabuf->priv;
+
+	priv->pinned = true;
+	return 0;
 }

 static struct sg_table *
diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c
index bb97b4d94eb7..db28fa2cc9a8 100644
--- a/drivers/vfio/pci/vfio_pci_core.c
+++ b/drivers/vfio/pci/vfio_pci_core.c
@@ -1246,6 +1246,13 @@  static int vfio_pci_ioctl_reset(struct vfio_pci_core_device *vdev,
 	 */
 	vfio_pci_set_power_state(vdev, PCI_D0);

+	/*
+	 * prevent reset if dma_buf is pinned to avoid stale pinned
+	 * expose to the dmabuf exporter.
+	 */
+	if (vfio_pci_dma_buf_pinned(vdev))
+		return -EINVAL;
+
 	vfio_pci_dma_buf_move(vdev, true);
 	ret = pci_try_reset_function(vdev->pdev);
 	if (__vfio_pci_memory_enabled(vdev))
@@ -2444,6 +2451,15 @@  static int vfio_pci_dev_set_hot_reset(struct vfio_device_set *dev_set,
 			break;
 		}

+		/*
+		 * prevent reset if dma_buf is pinned to avoid stale pinned
+		 * expose to the dmabuf exporter.
+		 */
+		if (vfio_pci_dma_buf_pinned(vdev)) {
+			ret = -EINVAL;
+			break;
+		}
+
 		/*
 		 * Take the memory write lock for each device and zap BAR
 		 * mappings to prevent the user accessing the device while in
diff --git a/drivers/vfio/pci/vfio_pci_priv.h b/drivers/vfio/pci/vfio_pci_priv.h
index 09d3c300918c..43c40dc4751c 100644
--- a/drivers/vfio/pci/vfio_pci_priv.h
+++ b/drivers/vfio/pci/vfio_pci_priv.h
@@ -107,6 +107,7 @@  int vfio_pci_core_feature_dma_buf(struct vfio_pci_core_device *vdev, u32 flags,
 				  size_t argsz);
 void vfio_pci_dma_buf_cleanup(struct vfio_pci_core_device *vdev);
 void vfio_pci_dma_buf_move(struct vfio_pci_core_device *vdev, bool revoked);
+bool vfio_pci_dma_buf_pinned(struct vfio_pci_core_device *vdev);
 #else
 static int
 vfio_pci_core_feature_dma_buf(struct vfio_pci_core_device *vdev, u32 flags,
@@ -115,6 +116,12 @@  vfio_pci_core_feature_dma_buf(struct vfio_pci_core_device *vdev, u32 flags,
 {
 	return -ENOTTY;
 }
+
+static inline bool vfio_pci_dma_buf_pinned(struct vfio_pci_core_device *vdev)
+{
+	return false;
+}
+
 static inline void vfio_pci_dma_buf_cleanup(struct vfio_pci_core_device *vdev)
 {
 }