[RFC,2/3] vmalloc: Support grouped page allocations

Message ID	20210405203711.1095940-3-rick.p.edgecombe@intel.com (mailing list archive)
State	RFC
Headers	show Return-Path: <bpf-owner@kernel.org> IronPort-SDR: 7ymwcVff4gIP8YdAjYE7fYjRjrFrDsAcpodaONhQZ4r6O/cLBsyIVHw+eH1ua2g9xpgNiECoKL SNf5t7BdpHXg== IronPort-SDR: v928vZ1l56ThdOyMI6sluCapDrYJ1qbjHifAmQ0OUi0fiHw1bGMDS6S+wPykc938SJfGMzuG01 Q7gNdbAlUFPw== From: Rick Edgecombe <rick.p.edgecombe@intel.com> To: akpm@linux-foundation.org, linux-mm@kvack.org, bpf@vger.kernel.org, dave.hansen@linux.intel.com, peterz@infradead.org, luto@kernel.org, jeyu@kernel.org Cc: linux-kernel@vger.kernel.org, ast@kernel.org, daniel@iogearbox.net, andrii@kernel.org, hch@infradead.org, x86@kernel.org, Rick Edgecombe <rick.p.edgecombe@intel.com> Subject: [RFC 2/3] vmalloc: Support grouped page allocations Date: Mon, 5 Apr 2021 13:37:10 -0700 Message-Id: <20210405203711.1095940-3-rick.p.edgecombe@intel.com> In-Reply-To: <20210405203711.1095940-1-rick.p.edgecombe@intel.com> References: <20210405203711.1095940-1-rick.p.edgecombe@intel.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Precedence: bulk
Series	Group pages on the direct map for permissioned vmallocs \| expand [RFC,0/3] Group pages on the direct map for permissioned vmallocs [RFC,1/3] list: Support getting most recent element in list_lru [RFC,2/3] vmalloc: Support grouped page allocations [RFC,3/3] x86/module: Use VM_GROUP_PAGES flag

Context	Check	Description
netdev/tree_selection	success	Not a local patch

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 21f851179ff0..c0dafa7e6e73 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -80,6 +80,7 @@ config X86 select ARCH_HAS_COPY_MC if X86_64 select ARCH_HAS_SET_MEMORY select ARCH_HAS_SET_DIRECT_MAP + select ARCH_HAS_HPAGE_DIRECT_MAP_PERMS if !HIGHMEM select ARCH_HAS_STRICT_KERNEL_RWX select ARCH_HAS_STRICT_MODULE_RWX select ARCH_HAS_SYNC_CORE_BEFORE_USERMODE diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index cedcda6593f6..6a27af32f3a1 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -26,6 +26,7 @@ struct notifier_block; /* in notifier.h */ #define VM_KASAN 0x00000080 /* has allocated kasan shadow memory */ #define VM_FLUSH_RESET_PERMS 0x00000100 /* reset direct map and flush TLB on unmap, can't be freed in atomic context */ #define VM_MAP_PUT_PAGES 0x00000200 /* put pages and free array in vfree */ +#define VM_GROUP_PAGES 0x00000400 /* try to group the pages on the direct map */ /* * VM_KASAN is used slighly differently depending on CONFIG_KASAN_VMALLOC. diff --git a/mm/Kconfig b/mm/Kconfig index f730605b8dcf..ecb04d9b6227 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -869,6 +869,15 @@ config ARCH_HAS_PTE_SPECIAL config ARCH_HAS_HUGEPD bool +# +# Select if arch tries to preserve large pages on the direct map while also +# setting permissions on the direct map. When set, generic mm code will make +# an effort to centralize the breakage of the direct map that results from +# setting kernel memory permissions. +# +config ARCH_HAS_HPAGE_DIRECT_MAP_PERMS + bool + config MAPPING_DIRTY_HELPERS bool diff --git a/mm/vmalloc.c b/mm/vmalloc.c index e6f352bf0498..d478dccda3a7 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -2194,6 +2194,203 @@ struct vm_struct *remove_vm_area(const void *addr) return NULL; } +static struct page *__vm_alloc_page_order(int node, gfp_t gfp_mask, int order) +{ + if (node == NUMA_NO_NODE) + return alloc_pages(gfp_mask, order); + + return alloc_pages_node(node, gfp_mask, order); +} + +#ifdef CONFIG_ARCH_HAS_HPAGE_DIRECT_MAP_PERMS +#define GFP_GROUPED (__GFP_NOWARN | __GFP_HIGHMEM | GFP_KERNEL) + +/* + * The list_lru contains per-node lists of the cache of grouped pages. Huge + * page size blocks of pages are added whenever a request cannot be fulfilled + * by pages in the cache. For NUMA_NO_NODE, nid_round_robin is used as the + * start of a search through each node looking for a page. The shrinker will + * lock the per-node list. + */ +static struct list_lru grouped_pages_lru; +static bool grouped_page_en; +static atomic_t nid_round_robin; + +static unsigned long grouped_shrink_count(struct shrinker *shrinker, + struct shrink_control *sc) +{ + unsigned long page_cnt = list_lru_shrink_count(&grouped_pages_lru, sc); + + return page_cnt ? page_cnt : SHRINK_EMPTY; +} + +static enum lru_status grouped_isolate(struct list_head *item, + struct list_lru_one *list, + spinlock_t *lock, void *cb_arg) +{ + struct list_head *dispose = cb_arg; + + list_lru_isolate_move(list, item, dispose); + + return LRU_REMOVED; +} + +static void __dispose_pages(struct list_head *head) +{ + struct list_head *cur, *next; + + list_for_each_safe(cur, next, head) { + list_del(cur); + + /* The list head is stored at the start of the page */ + free_page((unsigned long)cur); + } +} + +static unsigned long grouped_shrink_scan(struct shrinker *shrinker, + struct shrink_control *sc) +{ + unsigned long isolated; + LIST_HEAD(freeable); + + if (!(sc->gfp_mask & GFP_GROUPED)) + return SHRINK_STOP; + + isolated = list_lru_shrink_walk(&grouped_pages_lru, sc, grouped_isolate, + &freeable); + __dispose_pages(&freeable); + + /* Every item walked gets isolated */ + sc->nr_scanned += isolated; + + return isolated; +} + +static struct shrinker grouped_page_shrinker = { + .count_objects = grouped_shrink_count, + .scan_objects = grouped_shrink_scan, + .seeks = DEFAULT_SEEKS, + .flags = SHRINKER_NUMA_AWARE +}; + +static int __init grouped_page_init(void) +{ + if (list_lru_init(&grouped_pages_lru)) + goto out; + + grouped_page_en = !register_shrinker(&grouped_page_shrinker); + if (!grouped_page_en) + list_lru_destroy(&grouped_pages_lru); + +out: + return !grouped_page_en; +} + +device_initcall(grouped_page_init); + +static struct page *__remove_first_page(int node) +{ + unsigned int start_nid, i; + struct list_head *head; + + if (node != NUMA_NO_NODE) { + head = list_lru_get_mru(&grouped_pages_lru, node); + if (head) + return virt_to_page(head); + return NULL; + } + + /* If NUMA_NO_NODE, search the nodes in round robin for a page */ + start_nid = (unsigned int)atomic_fetch_inc(&nid_round_robin) % nr_node_ids; + for (i = 0; i < nr_node_ids; i++) { + int cur_nid = (start_nid + i) % nr_node_ids; + + head = list_lru_get_mru(&grouped_pages_lru, cur_nid); + if (head) + return virt_to_page(head); + } + + return NULL; +} + +static inline bool vmalloc_grouped_page_enabled(unsigned long vm_flags) +{ + return grouped_page_en && vm_flags & VM_GROUP_PAGES; +} + +/* Return a page to the cache used by VM_GROUP_PAGES */ +static void __free_grouped_page(struct page *page) +{ + struct list_head *item; + + item = (struct list_head *)page_address(page); + INIT_LIST_HEAD(item); + list_lru_add(&grouped_pages_lru, item); +} + +/* Get and add some new pages to the cache to be used by VM_GROUP_PAGES */ +static struct page *__replenish_grouped_pages(int node) +{ + const unsigned int hpage_cnt = HPAGE_SIZE >> PAGE_SHIFT; + struct page *page; + int i; + + page = __vm_alloc_page_order(node, GFP_GROUPED, HUGETLB_PAGE_ORDER); + if (!page) + return __vm_alloc_page_order(node, GFP_GROUPED, 0); + + split_page(page, HUGETLB_PAGE_ORDER); + + for (i = 1; i < hpage_cnt; i++) + __free_grouped_page(&page[i]); + + return &page[0]; +} + +/* Get a page from the cache for VM_GROUP_PAGES */ +static struct page *__get_grouped_page(int node, gfp_t gfp_mask) +{ + struct page *page; + + if (gfp_mask != GFP_GROUPED) { + WARN(1, "Unsupported GFP flags for VM_GROUP_PAGES\n"); + return NULL; + } + + page = __remove_first_page(node); + + if (page) + return page; + + return __replenish_grouped_pages(node); +} +#else /* CONFIG_ARCH_HAS_HPAGE_DIRECT_MAP_PERMS */ +static inline bool vmalloc_grouped_page_enabled(unsigned long vm_flags) +{ + return false; +} + +extern void __free_grouped_page(struct page *page); +extern struct page *__get_grouped_page(int node, gfp_t gfp_mask); +#endif /* CONFIG_ARCH_HAS_HPAGE_DIRECT_MAP_PERMS */ + +static struct page *vm_alloc_page(int node, unsigned long vm_flags, + gfp_t gfp_mask) +{ + if (vmalloc_grouped_page_enabled(vm_flags)) + return __get_grouped_page(node, gfp_mask); + + return __vm_alloc_page_order(node, gfp_mask, 0); +} + +static void vm_free_page(struct page *page, unsigned long vm_flags) +{ + if (vmalloc_grouped_page_enabled(vm_flags)) + __free_grouped_page(page); + else + __free_pages(page, 0); +} + static inline void set_area_direct_map(const struct vm_struct *area, int (*set_direct_map)(struct page *page)) { @@ -2283,7 +2480,7 @@ static void __vunmap(const void *addr, int deallocate_pages) struct page *page = area->pages[i]; BUG_ON(!page); - __free_pages(page, 0); + vm_free_page(page, area->flags); } atomic_long_sub(area->nr_pages, &nr_vmalloc_pages); @@ -2474,7 +2671,7 @@ EXPORT_SYMBOL_GPL(vmap_pfn); #endif /* CONFIG_VMAP_PFN */ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask, - pgprot_t prot, int node) + pgprot_t prot, int node, unsigned long vm_flags) { const gfp_t nested_gfp = (gfp_mask & GFP_RECLAIM_MASK) | __GFP_ZERO; unsigned int nr_pages = get_vm_area_size(area) >> PAGE_SHIFT; @@ -2504,12 +2701,7 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask, area->nr_pages = nr_pages; for (i = 0; i < area->nr_pages; i++) { - struct page *page; - - if (node == NUMA_NO_NODE) - page = alloc_page(gfp_mask); - else - page = alloc_pages_node(node, gfp_mask, 0); + struct page *page = vm_alloc_page(node, vm_flags, gfp_mask); if (unlikely(!page)) { /* Successfully allocated i pages, free them in __vfree() */ @@ -2560,6 +2752,7 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align, pgprot_t prot, unsigned long vm_flags, int node, const void *caller) { + unsigned long flags = VM_ALLOC | VM_UNINITIALIZED | vm_flags; struct vm_struct *area; void *addr; unsigned long real_size = size; @@ -2568,12 +2761,12 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align, if (!size || (size >> PAGE_SHIFT) > totalram_pages()) goto fail; - area = __get_vm_area_node(real_size, align, VM_ALLOC | VM_UNINITIALIZED | - vm_flags, start, end, node, gfp_mask, caller); + area = __get_vm_area_node(real_size, align, flags, start, end, node, + gfp_mask, caller); if (!area) goto fail; - addr = __vmalloc_area_node(area, gfp_mask, prot, node); + addr = __vmalloc_area_node(area, gfp_mask, prot, node, flags); if (!addr) return NULL;

[RFC,2/3] vmalloc: Support grouped page allocations

Checks

Commit Message

Comments

Patch