[v7,20/21] PCI/P2PDMA: Introduce pci_mmap_p2pmem()

Message ID	20220615161233.17527-21-logang@deltatee.com (mailing list archive)
State	New
Headers	show Return-Path: <owner-linux-mm@kvack.org> From: Logan Gunthorpe <logang@deltatee.com> To: linux-kernel@vger.kernel.org, linux-nvme@lists.infradead.org, linux-block@vger.kernel.org, linux-pci@vger.kernel.org, linux-mm@kvack.org, iommu@lists.linux-foundation.org Cc: Stephen Bates <sbates@raithlin.com>, Christoph Hellwig <hch@lst.de>, Dan Williams <dan.j.williams@intel.com>, Jason Gunthorpe <jgg@ziepe.ca>, =?utf-8?q?Christian_K=C3=B6nig?= <christian.koenig@amd.com>, John Hubbard <jhubbard@nvidia.com>, Don Dutile <ddutile@redhat.com>, Matthew Wilcox <willy@infradead.org>, Daniel Vetter <daniel.vetter@ffwll.ch>, Minturn Dave B <dave.b.minturn@intel.com>, Jason Ekstrand <jason@jlekstrand.net>, Dave Hansen <dave.hansen@linux.intel.com>, Xiong Jianxin <jianxin.xiong@intel.com>, Bjorn Helgaas <helgaas@kernel.org>, Ira Weiny <ira.weiny@intel.com>, Robin Murphy <robin.murphy@arm.com>, Martin Oliveira <martin.oliveira@eideticom.com>, Chaitanya Kulkarni <ckulkarnilinux@gmail.com>, Ralph Campbell <rcampbell@nvidia.com>, Logan Gunthorpe <logang@deltatee.com>, Bjorn Helgaas <bhelgaas@google.com> Date: Wed, 15 Jun 2022 10:12:32 -0600 Message-Id: <20220615161233.17527-21-logang@deltatee.com> In-Reply-To: <20220615161233.17527-1-logang@deltatee.com> References: <20220615161233.17527-1-logang@deltatee.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Subject: [PATCH v7 20/21] PCI/P2PDMA: Introduce pci_mmap_p2pmem() Sender: owner-linux-mm@kvack.org Precedence: bulk
Series	Userspace P2PDMA with O_DIRECT NVMe devices \| expand [v7,00/21] Userspace P2PDMA with O_DIRECT NVMe devices [v7,01/21] lib/scatterlist: add flag for indicating P2PDMA segments in an SGL [v7,02/21] PCI/P2PDMA: Attempt to set map_type if it has not been set [v7,03/21] PCI/P2PDMA: Expose pci_p2pdma_map_type() [v7,04/21] PCI/P2PDMA: Introduce helpers for dma_map_sg implementations [v7,05/21] dma-mapping: allow EREMOTEIO return code for P2PDMA transfers [v7,06/21] dma-direct: support PCI P2PDMA pages in dma-direct map_sg [v7,07/21] dma-mapping: add flags to dma_map_ops to indicate PCI P2PDMA support [v7,08/21] iommu/dma: support PCI P2PDMA pages in dma-iommu map_sg [v7,09/21] nvme-pci: check DMA ops when indicating support for PCI P2PDMA [v7,10/21] nvme-pci: convert to using dma_map_sgtable() [v7,11/21] RDMA/core: introduce ib_dma_pci_p2p_dma_supported() [v7,12/21] RDMA/rw: drop pci_p2pdma_[un]map_sg() [v7,13/21] PCI/P2PDMA: Remove pci_p2pdma_[un]map_sg() [v7,14/21] mm: introduce FOLL_PCI_P2PDMA to gate getting PCI P2PDMA pages [v7,15/21] iov_iter: introduce iov_iter_get_pages_[alloc_]flags() [v7,16/21] block: add check when merging zone device pages [v7,17/21] lib/scatterlist: add check when merging zone device pages [v7,18/21] block: set FOLL_PCI_P2PDMA in __bio_iov_iter_get_pages() [v7,19/21] block: set FOLL_PCI_P2PDMA in bio_map_user_iov() [v7,20/21] PCI/P2PDMA: Introduce pci_mmap_p2pmem() [v7,21/21] nvme-pci: allow mmaping the CMB in userspace

diff --git a/drivers/pci/p2pdma.c b/drivers/pci/p2pdma.c index d4e635012ffe..a6572069008b 100644 --- a/drivers/pci/p2pdma.c +++ b/drivers/pci/p2pdma.c @@ -17,14 +17,19 @@ #include <linux/genalloc.h> #include <linux/memremap.h> #include <linux/percpu-refcount.h> +#include <linux/pfn_t.h> +#include <linux/pseudo_fs.h> #include <linux/random.h> #include <linux/seq_buf.h> #include <linux/xarray.h> +#include <uapi/linux/magic.h> struct pci_p2pdma { struct gen_pool *pool; bool p2pmem_published; struct xarray map_types; + struct inode *inode; + bool active; }; struct pci_p2pdma_pagemap { @@ -101,6 +106,41 @@ static const struct attribute_group p2pmem_group = { .name = "p2pmem", }; +/* + * P2PDMA internal mount + * Fake an internal VFS mount-point in order to allocate struct address_space + * mappings to remove VMAs on unbind events. + */ +static int pci_p2pdma_fs_cnt; +static struct vfsmount *pci_p2pdma_fs_mnt; + +static int pci_p2pdma_fs_init_fs_context(struct fs_context *fc) +{ + return init_pseudo(fc, P2PDMA_MAGIC) ? 0 : -ENOMEM; +} + +static struct file_system_type pci_p2pdma_fs_type = { + .name = "p2dma", + .owner = THIS_MODULE, + .init_fs_context = pci_p2pdma_fs_init_fs_context, + .kill_sb = kill_anon_super, +}; + +static void p2pdma_page_free(struct page *page) +{ + struct pci_p2pdma_pagemap *pgmap = to_p2p_pgmap(page->pgmap); + struct percpu_ref *ref; + + gen_pool_free_owner(pgmap->provider->p2pdma->pool, + (uintptr_t)page_to_virt(page), PAGE_SIZE, + (void **)&ref); + percpu_ref_put(ref); +} + +static const struct dev_pagemap_ops p2pdma_pgmap_ops = { + .page_free = p2pdma_page_free, +}; + static void pci_p2pdma_release(void *data) { struct pci_dev *pdev = data; @@ -117,6 +157,9 @@ static void pci_p2pdma_release(void *data) gen_pool_destroy(p2pdma->pool); sysfs_remove_group(&pdev->dev.kobj, &p2pmem_group); xa_destroy(&p2pdma->map_types); + + iput(p2pdma->inode); + simple_release_fs(&pci_p2pdma_fs_mnt, &pci_p2pdma_fs_cnt); } static int pci_p2pdma_setup(struct pci_dev *pdev) @@ -134,17 +177,32 @@ static int pci_p2pdma_setup(struct pci_dev *pdev) if (!p2p->pool) goto out; - error = devm_add_action_or_reset(&pdev->dev, pci_p2pdma_release, pdev); + error = simple_pin_fs(&pci_p2pdma_fs_type, &pci_p2pdma_fs_mnt, + &pci_p2pdma_fs_cnt); if (error) goto out_pool_destroy; + p2p->inode = alloc_anon_inode(pci_p2pdma_fs_mnt->mnt_sb); + if (IS_ERR(p2p->inode)) { + error = -ENOMEM; + goto out_unpin_fs; + } + + error = devm_add_action_or_reset(&pdev->dev, pci_p2pdma_release, pdev); + if (error) + goto out_put_inode; + error = sysfs_create_group(&pdev->dev.kobj, &p2pmem_group); if (error) - goto out_pool_destroy; + goto out_put_inode; rcu_assign_pointer(pdev->p2pdma, p2p); return 0; +out_put_inode: + iput(p2p->inode); +out_unpin_fs: + simple_release_fs(&pci_p2pdma_fs_mnt, &pci_p2pdma_fs_cnt); out_pool_destroy: gen_pool_destroy(p2p->pool); out: @@ -152,6 +210,18 @@ static int pci_p2pdma_setup(struct pci_dev *pdev) return error; } +static void pci_p2pdma_unmap_mappings(void *data) +{ + struct pci_dev *pdev = data; + struct pci_p2pdma *p2pdma = rcu_dereference_protected(pdev->p2pdma, 1); + + /* Ensure no new pages can be allocated in mappings */ + p2pdma->active = false; + synchronize_rcu(); + + unmap_mapping_range(p2pdma->inode->i_mapping, 0, 0, 1); +} + /** * pci_p2pdma_add_resource - add memory for use as p2p memory * @pdev: the device to add the memory to @@ -198,6 +268,7 @@ int pci_p2pdma_add_resource(struct pci_dev *pdev, int bar, size_t size, pgmap->range.end = pgmap->range.start + size - 1; pgmap->nr_range = 1; pgmap->type = MEMORY_DEVICE_PCI_P2PDMA; + pgmap->ops = &p2pdma_pgmap_ops; p2p_pgmap->provider = pdev; p2p_pgmap->bus_offset = pci_bus_address(pdev, bar) - @@ -209,6 +280,11 @@ int pci_p2pdma_add_resource(struct pci_dev *pdev, int bar, size_t size, goto pgmap_free; } + error = devm_add_action_or_reset(&pdev->dev, pci_p2pdma_unmap_mappings, + pdev); + if (error) + goto pages_free; + p2pdma = rcu_dereference_protected(pdev->p2pdma, 1); error = gen_pool_add_owner(p2pdma->pool, (unsigned long)addr, pci_bus_address(pdev, bar) + offset, @@ -217,6 +293,7 @@ int pci_p2pdma_add_resource(struct pci_dev *pdev, int bar, size_t size, if (error) goto pages_free; + p2pdma->active = true; pci_info(pdev, "added peer-to-peer DMA memory %#llx-%#llx\n", pgmap->range.start, pgmap->range.end); @@ -1023,3 +1100,132 @@ ssize_t pci_p2pdma_enable_show(char *page, struct pci_dev *p2p_dev, return sprintf(page, "%s\n", pci_name(p2p_dev)); } EXPORT_SYMBOL_GPL(pci_p2pdma_enable_show); + +/** + * pci_p2pdma_file_open - setup file mapping to store P2PMEM VMAs + * @pdev: the device to allocate memory from + * @file: the file to open + * + * Set f_mapping of the file to the p2pdma inode so that mappings + * are can be torn down on device unbind. + */ +int pci_p2pdma_file_open(struct pci_dev *pdev, struct file *file) +{ + struct pci_p2pdma *p2pdma; + int ret; + + ret = simple_pin_fs(&pci_p2pdma_fs_type, &pci_p2pdma_fs_mnt, + &pci_p2pdma_fs_cnt); + if (ret) + return ret; + + rcu_read_lock(); + p2pdma = rcu_dereference(pdev->p2pdma); + if (p2pdma) { + ihold(p2pdma->inode); + file->f_mapping = p2pdma->inode->i_mapping; + rcu_read_unlock(); + } else { + rcu_read_unlock(); + simple_release_fs(&pci_p2pdma_fs_mnt, &pci_p2pdma_fs_cnt); + } + + return 0; +} +EXPORT_SYMBOL_GPL(pci_p2pdma_file_open); + +/** + * pci_p2pdma_file_release - release a file opened with pci_p2pdma_file_open() + * @file: the userspace vma to map the memory to + * + * Release the reference to f_mapping set by pci_p2pdma_file_open() + */ +void pci_p2pdma_file_release(struct file *file) +{ + if (file->f_mapping->host != file->f_inode) { + iput(file->f_mapping->host); + simple_release_fs(&pci_p2pdma_fs_mnt, &pci_p2pdma_fs_cnt); + } +} +EXPORT_SYMBOL_GPL(pci_p2pdma_file_release); + +/** + * pci_mmap_p2pmem - setup an mmap region to be backed with P2PDMA memory + * that was registered with pci_p2pdma_add_resource() + * @pdev: the device to allocate memory from + * @vma: the userspace vma to map the memory to + * + * The file must call pci_p2pdma_mmap_file_open() in its open() operation. + * + * Returns 0 on success, or a negative error code on failure + */ +int pci_mmap_p2pmem(struct pci_dev *pdev, struct vm_area_struct *vma) +{ + size_t len = vma->vm_end - vma->vm_start; + struct pci_p2pdma *p2pdma; + struct percpu_ref *ref; + unsigned long vaddr; + void *kaddr; + int ret; + + /* prevent private mappings from being established */ + if ((vma->vm_flags & VM_MAYSHARE) != VM_MAYSHARE) { + pci_info_ratelimited(pdev, + "%s: fail, attempted private mapping\n", + current->comm); + return -EINVAL; + } + + if (vma->vm_pgoff) { + pci_info_ratelimited(pdev, + "%s: fail, attempted mapping with non-zero offset\n", + current->comm); + return -EINVAL; + } + + rcu_read_lock(); + p2pdma = rcu_dereference(pdev->p2pdma); + if (!p2pdma || !p2pdma->active) { + ret = -ENODEV; + goto out; + } + + kaddr = (void *)gen_pool_alloc_owner(p2pdma->pool, len, (void **)&ref); + if (!kaddr) { + ret = -ENOMEM; + goto out; + } + + /* + * vm_insert_page() can sleep, so a reference is taken to mapping + * such that rcu_read_unlock() can be done before inserting the + * pages + */ + if (unlikely(!percpu_ref_tryget_live_rcu(ref))) { + ret = -ENODEV; + goto out_free_mem; + } + rcu_read_unlock(); + + for (vaddr = vma->vm_start; vaddr < vma->vm_end; vaddr += PAGE_SIZE) { + ret = vm_insert_page(vma, vaddr, virt_to_page(kaddr)); + if (ret) { + gen_pool_free(p2pdma->pool, (uintptr_t)kaddr, len); + return ret; + } + percpu_ref_get(ref); + put_page(virt_to_page(kaddr)); + kaddr += PAGE_SIZE; + len -= PAGE_SIZE; + } + + percpu_ref_put(ref); + + return 0; +out_free_mem: + gen_pool_free(p2pdma->pool, (uintptr_t)kaddr, len); +out: + rcu_read_unlock(); + return ret; +} +EXPORT_SYMBOL_GPL(pci_mmap_p2pmem); diff --git a/include/linux/pci-p2pdma.h b/include/linux/pci-p2pdma.h index 2c07aa6b7665..0ffe782940da 100644 --- a/include/linux/pci-p2pdma.h +++ b/include/linux/pci-p2pdma.h @@ -34,6 +34,9 @@ int pci_p2pdma_enable_store(const char *page, struct pci_dev **p2p_dev, bool *use_p2pdma); ssize_t pci_p2pdma_enable_show(char *page, struct pci_dev *p2p_dev, bool use_p2pdma); +int pci_p2pdma_file_open(struct pci_dev *pdev, struct file *file); +void pci_p2pdma_file_release(struct file *file); +int pci_mmap_p2pmem(struct pci_dev *pdev, struct vm_area_struct *vma); #else /* CONFIG_PCI_P2PDMA */ static inline int pci_p2pdma_add_resource(struct pci_dev *pdev, int bar, size_t size, u64 offset) @@ -90,6 +93,19 @@ static inline ssize_t pci_p2pdma_enable_show(char *page, { return sprintf(page, "none\n"); } +static inline int pci_p2pdma_file_open(struct pci_dev *pdev, + struct file *file) +{ + return 0; +} +static inline void pci_p2pdma_file_release(struct file *file) +{ +} +static inline int pci_mmap_p2pmem(struct pci_dev *pdev, + struct vm_area_struct *vma) +{ + return -EOPNOTSUPP; +} #endif /* CONFIG_PCI_P2PDMA */ diff --git a/include/uapi/linux/magic.h b/include/uapi/linux/magic.h index f724129c0425..59ba2e60dc03 100644 --- a/include/uapi/linux/magic.h +++ b/include/uapi/linux/magic.h @@ -95,6 +95,7 @@ #define BPF_FS_MAGIC 0xcafe4a11 #define AAFS_MAGIC 0x5a3c69f0 #define ZONEFS_MAGIC 0x5a4f4653 +#define P2PDMA_MAGIC 0x70327064 /* Since UDF 2.01 is ISO 13346 based... */ #define UDF_SUPER_MAGIC 0x15013346

[v7,20/21] PCI/P2PDMA: Introduce pci_mmap_p2pmem()

Commit Message

Comments

Patch