diff mbox series

[v1,2/2] udmabuf: Add back support for mapping hugetlb pages

Message ID 20230622072710.3707315-3-vivek.kasireddy@intel.com (mailing list archive)
State New, archived
Headers show
Series udmabuf: Add back support for mapping hugetlb pages | expand

Commit Message

Kasireddy, Vivek June 22, 2023, 7:27 a.m. UTC
A user or admin can configure a VMM (Qemu) Guest's memory to be
backed by hugetlb pages for various reasons. However, a Guest OS
would still allocate (and pin) buffers that are backed by regular
4k sized pages. In order to map these buffers and create dma-bufs
for them on the Host, we first need to find the hugetlb pages where
the buffer allocations are located and then determine the offsets
of individual chunks (within those pages) and use this information
to eventually populate a scatterlist.

Testcase: default_hugepagesz=2M hugepagesz=2M hugepages=2500 options
were passed to the Host kernel and Qemu was launched with these
relevant options: qemu-system-x86_64 -m 4096m....
-device virtio-gpu-pci,max_outputs=1,blob=true,xres=1920,yres=1080
-display gtk,gl=on
-object memory-backend-memfd,hugetlb=on,id=mem1,size=4096M
-machine memory-backend=mem1

Replacing -display gtk,gl=on with -display gtk,gl=off above would
exercise the mmap handler.

Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Gerd Hoffmann <kraxel@redhat.com>
Cc: Dongwon Kim <dongwon.kim@intel.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: James Houghton <jthoughton@google.com>
Cc: Jerome Marchand <jmarchan@redhat.com>
Cc: Junxiao Chang <junxiao.chang@intel.com>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Muchun Song <muchun.song@linux.dev>
Signed-off-by: Vivek Kasireddy <vivek.kasireddy@intel.com>
---
 drivers/dma-buf/udmabuf.c | 97 +++++++++++++++++++++++++++++++++------
 1 file changed, 83 insertions(+), 14 deletions(-)

Comments

kernel test robot June 22, 2023, 10:10 p.m. UTC | #1
Hi Vivek,

kernel test robot noticed the following build warnings:

[auto build test WARNING on akpm-mm/mm-everything]
[also build test WARNING on linus/master next-20230622]
[cannot apply to drm-misc/drm-misc-next drm-tip/drm-tip v6.4-rc7]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Vivek-Kasireddy/udmabuf-Use-vmf_insert_pfn-and-VM_PFNMAP-for-handling-mmap/20230622-155402
base:   https://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm.git mm-everything
patch link:    https://lore.kernel.org/r/20230622072710.3707315-3-vivek.kasireddy%40intel.com
patch subject: [PATCH v1 2/2] udmabuf: Add back support for mapping hugetlb pages
config: openrisc-randconfig-c044-20230622 (https://download.01.org/0day-ci/archive/20230623/202306230534.TIq82gef-lkp@intel.com/config)
compiler: or1k-linux-gcc (GCC) 12.3.0
reproduce: (https://download.01.org/0day-ci/archive/20230623/202306230534.TIq82gef-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202306230534.TIq82gef-lkp@intel.com/

cocci warnings: (new ones prefixed by >>)
>> drivers/dma-buf/udmabuf.c:172:2-7: WARNING: NULL check before some freeing functions is not needed.
   drivers/dma-buf/udmabuf.c:360:2-7: WARNING: NULL check before some freeing functions is not needed.
diff mbox series

Patch

diff --git a/drivers/dma-buf/udmabuf.c b/drivers/dma-buf/udmabuf.c
index 6de40c51d895..0ae44bce01e7 100644
--- a/drivers/dma-buf/udmabuf.c
+++ b/drivers/dma-buf/udmabuf.c
@@ -10,6 +10,7 @@ 
 #include <linux/miscdevice.h>
 #include <linux/module.h>
 #include <linux/shmem_fs.h>
+#include <linux/hugetlb.h>
 #include <linux/slab.h>
 #include <linux/udmabuf.h>
 #include <linux/vmalloc.h>
@@ -28,6 +29,7 @@  struct udmabuf {
 	struct page **pages;
 	struct sg_table *sg;
 	struct miscdevice *device;
+	pgoff_t *offsets;
 };
 
 static vm_fault_t udmabuf_vm_fault(struct vm_fault *vmf)
@@ -41,6 +43,10 @@  static vm_fault_t udmabuf_vm_fault(struct vm_fault *vmf)
 		return VM_FAULT_SIGBUS;
 
 	pfn = page_to_pfn(ubuf->pages[pgoff]);
+	if (ubuf->offsets) {
+		pfn += ubuf->offsets[pgoff] >> PAGE_SHIFT;
+	}
+
 	return vmf_insert_pfn(vma, vmf->address, pfn);
 }
 
@@ -92,23 +98,40 @@  static struct sg_table *get_sg_table(struct device *dev, struct dma_buf *buf,
 {
 	struct udmabuf *ubuf = buf->priv;
 	struct sg_table *sg;
+	struct scatterlist *sgl;
+	unsigned long i = 0;
 	int ret;
 
 	sg = kzalloc(sizeof(*sg), GFP_KERNEL);
 	if (!sg)
 		return ERR_PTR(-ENOMEM);
-	ret = sg_alloc_table_from_pages(sg, ubuf->pages, ubuf->pagecount,
-					0, ubuf->pagecount << PAGE_SHIFT,
-					GFP_KERNEL);
-	if (ret < 0)
-		goto err;
+
+	if (ubuf->offsets) {
+		ret = sg_alloc_table(sg, ubuf->pagecount, GFP_KERNEL);
+		if (ret < 0)
+			goto err_alloc;
+
+		for_each_sg(sg->sgl, sgl, ubuf->pagecount, i) {
+			sg_set_page(sgl, ubuf->pages[i], PAGE_SIZE,
+				    ubuf->offsets[i]);
+		}
+	} else {
+		ret = sg_alloc_table_from_pages(sg, ubuf->pages,
+						ubuf->pagecount, 0,
+						ubuf->pagecount << PAGE_SHIFT,
+						GFP_KERNEL);
+		if (ret < 0)
+			goto err_alloc;
+
+	}
 	ret = dma_map_sgtable(dev, sg, direction, 0);
 	if (ret < 0)
-		goto err;
+		goto err_map;
 	return sg;
 
-err:
+err_map:
 	sg_free_table(sg);
+err_alloc:
 	kfree(sg);
 	return ERR_PTR(ret);
 }
@@ -145,6 +168,8 @@  static void release_udmabuf(struct dma_buf *buf)
 
 	for (pg = 0; pg < ubuf->pagecount; pg++)
 		put_page(ubuf->pages[pg]);
+	if (ubuf->offsets)
+		kfree(ubuf->offsets);
 	kfree(ubuf->pages);
 	kfree(ubuf);
 }
@@ -208,7 +233,9 @@  static long udmabuf_create(struct miscdevice *device,
 	struct udmabuf *ubuf;
 	struct dma_buf *buf;
 	pgoff_t pgoff, pgcnt, pgidx, pgbuf = 0, pglimit;
-	struct page *page;
+	struct page *page, *hpage = NULL;
+	pgoff_t hpoff, chunkoff, maxchunks;
+	struct hstate *hpstate;
 	int seals, ret = -EINVAL;
 	u32 i, flags;
 
@@ -244,7 +271,7 @@  static long udmabuf_create(struct miscdevice *device,
 		if (!memfd)
 			goto err;
 		mapping = memfd->f_mapping;
-		if (!shmem_mapping(mapping))
+		if (!shmem_mapping(mapping) && !is_file_hugepages(memfd))
 			goto err;
 		seals = memfd_fcntl(memfd, F_GET_SEALS, 0);
 		if (seals == -EINVAL)
@@ -255,16 +282,56 @@  static long udmabuf_create(struct miscdevice *device,
 			goto err;
 		pgoff = list[i].offset >> PAGE_SHIFT;
 		pgcnt = list[i].size   >> PAGE_SHIFT;
+		if (is_file_hugepages(memfd)) {
+			if (!ubuf->offsets) {
+				ubuf->offsets = kmalloc_array(ubuf->pagecount,
+							      sizeof(*ubuf->offsets),
+							      GFP_KERNEL);
+				if (!ubuf->offsets) {
+					ret = -ENOMEM;
+					goto err;
+				}
+			}
+			hpstate = hstate_file(memfd);
+			hpoff = list[i].offset >> huge_page_shift(hpstate);
+			chunkoff = (list[i].offset &
+				    ~huge_page_mask(hpstate)) >> PAGE_SHIFT;
+			maxchunks = huge_page_size(hpstate) >> PAGE_SHIFT;
+		}
 		for (pgidx = 0; pgidx < pgcnt; pgidx++) {
-			page = shmem_read_mapping_page(mapping, pgoff + pgidx);
-			if (IS_ERR(page)) {
-				ret = PTR_ERR(page);
-				goto err;
+			if (is_file_hugepages(memfd)) {
+				if (!hpage) {
+					hpage = find_get_page_flags(mapping, hpoff,
+								    FGP_ACCESSED);
+					if (!hpage) {
+						ret = -EINVAL;
+						goto err;
+					}
+				}
+				get_page(hpage);
+				ubuf->pages[pgbuf] = hpage;
+				ubuf->offsets[pgbuf++] = chunkoff << PAGE_SHIFT;
+				if (++chunkoff == maxchunks) {
+					put_page(hpage);
+					hpage = NULL;
+					chunkoff = 0;
+					hpoff++;
+				}
+			} else {
+				page = shmem_read_mapping_page(mapping, pgoff + pgidx);
+				if (IS_ERR(page)) {
+					ret = PTR_ERR(page);
+					goto err;
+				}
+				ubuf->pages[pgbuf++] = page;
 			}
-			ubuf->pages[pgbuf++] = page;
 		}
 		fput(memfd);
 		memfd = NULL;
+		if (hpage) {
+			put_page(hpage);
+			hpage = NULL;
+		}
 	}
 
 	exp_info.ops  = &udmabuf_ops;
@@ -289,6 +356,8 @@  static long udmabuf_create(struct miscdevice *device,
 		put_page(ubuf->pages[--pgbuf]);
 	if (memfd)
 		fput(memfd);
+	if (ubuf->offsets)
+		kfree(ubuf->offsets);
 	kfree(ubuf->pages);
 	kfree(ubuf);
 	return ret;