diff mbox series

[RFC,7/7] udmabuf: Implement UDMABUF_CREATE_LIST_FOR_PCIDEV ioctl

Message ID 20240328083615.2662516-8-vivek.kasireddy@intel.com (mailing list archive)
State New, archived
Headers show
Series drm/virtio: Import scanout buffers from other devices | expand

Commit Message

Kasireddy, Vivek March 28, 2024, 8:33 a.m. UTC
By implementing this request, the udmabuf driver would be able
to support creating a dmabuf from a PCI device's bar region. This
would facilitate P2P DMA operations between any two PCI devices
as long as they are compatible.

Based on the information (devid, bar) provided by the VMM, once
the PCI device known as the provider is identified, we create a
page pool associated with the requested bar region by calling
pci_p2pdma_add_resource(). We then populate the ubuf->pages[]
array with the pages from the pool that would eventually be
included in a sgt which would be shared with the importers.

Cc: Gerd Hoffmann <kraxel@redhat.com>
Signed-off-by: Vivek Kasireddy <vivek.kasireddy@intel.com>
---
 drivers/dma-buf/udmabuf.c | 122 +++++++++++++++++++++++++++++++++++---
 1 file changed, 114 insertions(+), 8 deletions(-)
diff mbox series

Patch

diff --git a/drivers/dma-buf/udmabuf.c b/drivers/dma-buf/udmabuf.c
index 274defd3fa3e..7355451ed337 100644
--- a/drivers/dma-buf/udmabuf.c
+++ b/drivers/dma-buf/udmabuf.c
@@ -9,6 +9,7 @@ 
 #include <linux/memfd.h>
 #include <linux/miscdevice.h>
 #include <linux/module.h>
+#include <linux/pci-p2pdma.h>
 #include <linux/shmem_fs.h>
 #include <linux/hugetlb.h>
 #include <linux/slab.h>
@@ -27,6 +28,7 @@  MODULE_PARM_DESC(size_limit_mb, "Max size of a dmabuf, in megabytes. Default is
 struct udmabuf {
 	pgoff_t pagecount;
 	struct page **pages;
+	struct pci_dev *pdev;
 	struct sg_table *sg;
 	struct miscdevice *device;
 	pgoff_t *offsets;
@@ -129,9 +131,28 @@  static void put_sg_table(struct device *dev, struct sg_table *sg,
 	kfree(sg);
 }
 
+static int check_p2p_support(struct dma_buf_attachment *attach)
+{
+	struct udmabuf *ubuf = attach->dmabuf->priv;
+	struct pci_dev *provider = ubuf->pdev;
+	struct device *client = attach->dev;
+	int ret = -1;
+
+	if (!provider)
+		return 0;
+
+	if (attach->peer2peer)
+		ret = pci_p2pdma_distance(provider, client, true);
+
+	return ret < 0 ? ret : 0;
+}
+
 static struct sg_table *map_udmabuf(struct dma_buf_attachment *at,
 				    enum dma_data_direction direction)
 {
+	if (check_p2p_support(at) < 0)
+		return ERR_PTR(-EOPNOTSUPP);
+
 	return get_sg_table(at->dev, at->dmabuf, direction);
 }
 
@@ -151,8 +172,15 @@  static void release_udmabuf(struct dma_buf *buf)
 	if (ubuf->sg)
 		put_sg_table(dev, ubuf->sg, DMA_BIDIRECTIONAL);
 
-	for (pg = 0; pg < ubuf->pagecount; pg++)
-		put_page(ubuf->pages[pg]);
+	for (pg = 0; pg < ubuf->pagecount; pg++) {
+		if (ubuf->pdev)
+			pci_free_p2pmem(ubuf->pdev,
+					page_to_virt(ubuf->pages[pg]),
+					PAGE_SIZE);
+		else
+			put_page(ubuf->pages[pg]);
+	}
+
 	kfree(ubuf->offsets);
 	kfree(ubuf->pages);
 	kfree(ubuf);
@@ -269,9 +297,74 @@  static int handle_shmem_pages(struct udmabuf *ubuf, struct file *memfd,
 	return 0;
 }
 
+static int handle_pcidev_pages(struct udmabuf *ubuf,
+			       struct udmabuf_create_list *head,
+			       struct udmabuf_create_item *list)
+{
+	struct pci_dev *pdev = NULL;
+	resource_size_t bar_size;
+	pgoff_t pgbuf = 0;
+	struct page *page;
+	int i, ret;
+	size_t size;
+	void *addr;
+
+	for (i = 0; i < head->count; i++) {
+		if (!ubuf->pdev) {
+			pdev = pci_get_domain_bus_and_slot(0,
+						PCI_BUS_NUM(list[i].devid),
+						list[i].devid & 0xff);
+			if (!pdev) {
+				ret = -ENODEV;
+				goto err;
+			}
+
+			ubuf->pdev = pdev;
+		}
+
+		bar_size = pci_resource_len(pdev, list[i].bar);
+		if (list[i].offset > bar_size ||
+		    list[i].offset + list[i].size > bar_size) {
+			ret = -EINVAL;
+			goto err;
+		}
+
+		ret = pci_p2pdma_add_resource(pdev,
+					      list[i].bar,
+					      list[i].size,
+					      list[i].offset);
+		if (ret)
+			goto err;
+
+		addr = pci_alloc_p2pmem(pdev, list[i].size);
+		if (!addr) {
+			ret = -EINVAL;
+			goto err;
+		}
+
+		size = 0;
+		while (size < list[i].size) {
+			page = virt_to_page((unsigned long)addr + size);
+			ubuf->pages[pgbuf++] = page;
+
+			size += PAGE_SIZE;
+		}
+	}
+
+err:
+	while (pgbuf > 0 && ubuf->pages[--pgbuf])
+		pci_free_p2pmem(pdev,
+				page_to_virt(ubuf->pages[pgbuf]),
+				PAGE_SIZE);
+	if (pdev)
+		pci_dev_put(pdev);
+	return ret;
+}
+
 static long udmabuf_create(struct miscdevice *device,
 			   struct udmabuf_create_list *head,
-			   struct udmabuf_create_item *list)
+			   struct udmabuf_create_item *list,
+			   bool for_pcidev)
 {
 	DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
 	struct file *memfd = NULL;
@@ -312,6 +405,14 @@  static long udmabuf_create(struct miscdevice *device,
 		goto err;
 	}
 
+	if (for_pcidev) {
+		ret = handle_pcidev_pages(ubuf, head, list);
+		if (ret)
+			goto err;
+
+		goto create_dmabuf;
+	}
+
 	pgbuf = 0;
 	for (i = 0; i < head->count; i++) {
 		ret = -EBADFD;
@@ -344,6 +445,7 @@  static long udmabuf_create(struct miscdevice *device,
 		memfd = NULL;
 	}
 
+create_dmabuf:
 	exp_info.ops  = &udmabuf_ops;
 	exp_info.size = ubuf->pagecount << PAGE_SHIFT;
 	exp_info.priv = ubuf;
@@ -362,7 +464,7 @@  static long udmabuf_create(struct miscdevice *device,
 	return dma_buf_fd(buf, flags);
 
 err:
-	while (pgbuf > 0)
+	while (pgbuf > 0 && !ubuf->pdev)
 		put_page(ubuf->pages[--pgbuf]);
 	if (memfd)
 		fput(memfd);
@@ -388,10 +490,11 @@  static long udmabuf_ioctl_create(struct file *filp, unsigned long arg)
 	list.offset = create.offset;
 	list.size   = create.size;
 
-	return udmabuf_create(filp->private_data, &head, &list);
+	return udmabuf_create(filp->private_data, &head, &list, false);
 }
 
-static long udmabuf_ioctl_create_list(struct file *filp, unsigned long arg)
+static long udmabuf_ioctl_create_list(struct file *filp, unsigned long arg,
+				      bool for_pcidev)
 {
 	struct udmabuf_create_list head;
 	struct udmabuf_create_item *list;
@@ -407,7 +510,7 @@  static long udmabuf_ioctl_create_list(struct file *filp, unsigned long arg)
 	if (IS_ERR(list))
 		return PTR_ERR(list);
 
-	ret = udmabuf_create(filp->private_data, &head, list);
+	ret = udmabuf_create(filp->private_data, &head, list, for_pcidev);
 	kfree(list);
 	return ret;
 }
@@ -422,7 +525,10 @@  static long udmabuf_ioctl(struct file *filp, unsigned int ioctl,
 		ret = udmabuf_ioctl_create(filp, arg);
 		break;
 	case UDMABUF_CREATE_LIST:
-		ret = udmabuf_ioctl_create_list(filp, arg);
+		ret = udmabuf_ioctl_create_list(filp, arg, false);
+		break;
+	case UDMABUF_CREATE_LIST_FOR_PCIDEV:
+		ret = udmabuf_ioctl_create_list(filp, arg, true);
 		break;
 	default:
 		ret = -ENOTTY;