[v11,3/8] mm/gup: Introduce memfd_pin_folios() for pinning memfd folios

Message ID	20240113065223.1532987-4-vivek.kasireddy@intel.com (mailing list archive)
State	New, archived
Headers	show Return-Path: <dri-devel-bounces@lists.freedesktop.org> From: Vivek Kasireddy <vivek.kasireddy@intel.com> To: dri-devel@lists.freedesktop.org, linux-mm@kvack.org Subject: [PATCH v11 3/8] mm/gup: Introduce memfd_pin_folios() for pinning memfd folios Date: Fri, 12 Jan 2024 22:52:18 -0800 Message-Id: <20240113065223.1532987-4-vivek.kasireddy@intel.com> In-Reply-To: <20240113065223.1532987-1-vivek.kasireddy@intel.com> References: <20240113065223.1532987-1-vivek.kasireddy@intel.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Precedence: list Cc: Gerd Hoffmann <kraxel@redhat.com>, Dongwon Kim <dongwon.kim@intel.com>, David Hildenbrand <david@redhat.com>, Daniel Vetter <daniel.vetter@ffwll.ch>, Hugh Dickins <hughd@google.com>, Vivek Kasireddy <vivek.kasireddy@intel.com>, Matthew Wilcox <willy@infradead.org>, Christoph Hellwig <hch@infradead.org>, Peter Xu <peterx@redhat.com>, Jason Gunthorpe <jgg@nvidia.com>, Junxiao Chang <junxiao.chang@intel.com>, Christoph Hellwig <hch@lst.de>, Mike Kravetz <mike.kravetz@oracle.com> Errors-To: dri-devel-bounces@lists.freedesktop.org Sender: "dri-devel" <dri-devel-bounces@lists.freedesktop.org>
Series	mm/gup: Introduce memfd_pin_folios() for pinning memfd folios \| expand [v11,0/8] mm/gup: Introduce memfd_pin_folios() for pinning memfd folios [v11,1/8] mm/gup: Introduce unpin_folio/unpin_folios helpers [v11,2/8] mm/gup: Introduce check_and_migrate_movable_folios() [v11,3/8] mm/gup: Introduce memfd_pin_folios() for pinning memfd folios [v11,4/8] udmabuf: Use vmf_insert_pfn and VM_PFNMAP for handling mmap [v11,5/8] udmabuf: Add back support for mapping hugetlb pages [v11,6/8] udmabuf: Convert udmabuf driver to use folios [v11,7/8] udmabuf: Pin the pages using memfd_pin_folios() API [v11,8/8] selftests/udmabuf: Add tests to verify data after page migration

diff --git a/include/linux/memfd.h b/include/linux/memfd.h index e7abf6fa4c52..3f2cf339ceaf 100644 --- a/include/linux/memfd.h +++ b/include/linux/memfd.h @@ -6,11 +6,16 @@ #ifdef CONFIG_MEMFD_CREATE extern long memfd_fcntl(struct file *file, unsigned int cmd, unsigned int arg); +struct folio *memfd_alloc_folio(struct file *memfd, pgoff_t idx); #else static inline long memfd_fcntl(struct file *f, unsigned int c, unsigned int a) { return -EINVAL; } +static inline struct folio *memfd_alloc_folio(struct file *memfd, pgoff_t idx) +{ + return ERR_PTR(-EINVAL); +} #endif #endif /* __LINUX_MEMFD_H */ diff --git a/include/linux/mm.h b/include/linux/mm.h index a0321adaefad..33ec267afc63 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2474,6 +2474,9 @@ long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages, struct page **pages, unsigned int gup_flags); long pin_user_pages_unlocked(unsigned long start, unsigned long nr_pages, struct page **pages, unsigned int gup_flags); +long memfd_pin_folios(struct file *memfd, loff_t start, loff_t end, + struct folio **folios, unsigned int max_folios, + pgoff_t *offset); int get_user_pages_fast(unsigned long start, int nr_pages, unsigned int gup_flags, struct page **pages); diff --git a/mm/gup.c b/mm/gup.c index 00b24a429ba8..5b2b55d87998 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -5,6 +5,7 @@ #include <linux/spinlock.h> #include <linux/mm.h> +#include <linux/memfd.h> #include <linux/memremap.h> #include <linux/pagemap.h> #include <linux/rmap.h> @@ -17,6 +18,7 @@ #include <linux/hugetlb.h> #include <linux/migrate.h> #include <linux/mm_inline.h> +#include <linux/pagevec.h> #include <linux/sched/mm.h> #include <linux/shmem_fs.h> @@ -3528,3 +3530,137 @@ long pin_user_pages_unlocked(unsigned long start, unsigned long nr_pages, &locked, gup_flags); } EXPORT_SYMBOL(pin_user_pages_unlocked); + +/** + * memfd_pin_folios() - pin folios associated with a memfd + * @memfd: the memfd whose folios are to be pinned + * @start: the first memfd offset + * @end: the last memfd offset (inclusive) + * @folios: array that receives pointers to the folios pinned + * @max_folios: maximum number of entries in @folios + * @offset: the offset into the first folio + * + * Attempt to pin folios associated with a memfd in the contiguous range + * [start, end]. Given that a memfd is either backed by shmem or hugetlb, + * the folios can either be found in the page cache or need to be allocated + * if necessary. Once the folios are located, they are all pinned via + * FOLL_PIN and @offset is populatedwith the offset into the first folio. + * And, eventually, these pinned folios must be released either using + * unpin_folios() or unpin_folio(). + * + * It must be noted that the folios may be pinned for an indefinite amount + * of time. And, in most cases, the duration of time they may stay pinned + * would be controlled by the userspace. This behavior is effectively the + * same as using FOLL_LONGTERM with other GUP APIs. + * + * Returns number of folios pinned, which could be less than @max_folios + * as it depends on the folio sizes that cover the range [start, end]. + * If no folios were pinned, it returns -errno. + */ +long memfd_pin_folios(struct file *memfd, loff_t start, loff_t end, + struct folio **folios, unsigned int max_folios, + pgoff_t *offset) +{ + unsigned int flags, nr_folios, nr_found; + unsigned int i, pgshift = PAGE_SHIFT; + pgoff_t start_idx, end_idx, next_idx; + struct folio *folio = NULL; + struct folio_batch fbatch; + struct hstate *h; + long ret; + + if (start > end || !max_folios) + return -EINVAL; + + if (!memfd) + return -EINVAL; + + if (!shmem_file(memfd) && !is_file_hugepages(memfd)) + return -EINVAL; + + if (is_file_hugepages(memfd)) { + h = hstate_file(memfd); + pgshift = huge_page_shift(h); + } + + flags = memalloc_pin_save(); + do { + nr_folios = 0; + start_idx = start >> pgshift; + end_idx = end >> pgshift; + if (is_file_hugepages(memfd)) { + start_idx <<= huge_page_order(h); + end_idx <<= huge_page_order(h); + } + + folio_batch_init(&fbatch); + while (start_idx <= end_idx && nr_folios < max_folios) { + /* + * In most cases, we should be able to find the folios + * in the page cache. If we cannot find them for some + * reason, we try to allocate them and add them to the + * page cache. + */ + nr_found = filemap_get_folios_contig(memfd->f_mapping, + &start_idx, + end_idx, + &fbatch); + if (folio) { + folio_put(folio); + folio = NULL; + } + + next_idx = 0; + for (i = 0; i < nr_found; i++) { + /* + * As there can be multiple entries for a + * given folio in the batch returned by + * filemap_get_folios_contig(), the below + * check is to ensure that we pin and return a + * unique set of folios between start and end. + */ + if (next_idx && + next_idx != folio_index(fbatch.folios[i])) + continue; + + folio = try_grab_folio(&fbatch.folios[i]->page, + 1, FOLL_PIN); + if (!folio) { + folio_batch_release(&fbatch); + goto err; + } + + if (nr_folios == 0) + *offset = offset_in_folio(folio, start); + + folios[nr_folios] = folio; + next_idx = folio_next_index(folio); + if (++nr_folios == max_folios) + break; + } + + folio = NULL; + folio_batch_release(&fbatch); + if (!nr_found) { + folio = memfd_alloc_folio(memfd, start_idx); + if (IS_ERR(folio)) { + ret = PTR_ERR(folio); + if (ret != -EEXIST) + goto err; + } + } + } + + ret = check_and_migrate_movable_folios(nr_folios, folios); + } while (ret == -EAGAIN); + + memalloc_pin_restore(flags); + return ret ? ret : nr_folios; +err: + memalloc_pin_restore(flags); + unpin_folios(folios, nr_folios); + + return ret; +} +EXPORT_SYMBOL_GPL(memfd_pin_folios); + diff --git a/mm/memfd.c b/mm/memfd.c index d3a1ba4208c9..36a75e8249f8 100644 --- a/mm/memfd.c +++ b/mm/memfd.c @@ -63,6 +63,40 @@ static void memfd_tag_pins(struct xa_state *xas) xas_unlock_irq(xas); } +/* + * This is a helper function used by memfd_pin_user_pages() in GUP (gup.c). + * It is mainly called to allocate a page in a memfd when the caller + * (memfd_pin_user_pages()) cannot find a page in the page cache at a given + * index in the mapping. + */ +struct folio *memfd_alloc_folio(struct file *memfd, pgoff_t idx) +{ +#ifdef CONFIG_HUGETLB_PAGE + struct folio *folio; + int err; + + if (is_file_hugepages(memfd)) { + folio = alloc_hugetlb_folio_nodemask(hstate_file(memfd), + NUMA_NO_NODE, + NULL, + GFP_USER); + if (folio && folio_try_get(folio)) { + err = hugetlb_add_to_page_cache(folio, + memfd->f_mapping, + idx); + if (err) { + folio_put(folio); + free_huge_folio(folio); + return ERR_PTR(err); + } + return folio; + } + return ERR_PTR(-ENOMEM); + } +#endif + return shmem_read_folio(memfd->f_mapping, idx); +} + /* * Setting SEAL_WRITE requires us to verify there's no pending writer. However, * via get_user_pages(), drivers might have some pending I/O without any active

[v11,3/8] mm/gup: Introduce memfd_pin_folios() for pinning memfd folios

Commit Message

Patch