@@ -1321,6 +1321,13 @@ config DYNAMIC_SIGFRAME
# Select, if arch has a named attribute group bound to NUMA device nodes.
config HAVE_ARCH_NODE_DEV_GROUP
bool
+#
+# Select if the architecture wants to minimize fragmentation of its
+# direct/linear map cauesd by set_memory and set_direct_map operations
+#
+config ARCH_WANTS_GFP_UNMAPPED
+ bool
+ depends on ARCH_HAS_SET_MEMORY || ARCH_HAS_SET_DIRECT_MAP
source "kernel/gcov/Kconfig"
@@ -120,6 +120,7 @@ config X86
select ARCH_WANTS_NO_INSTR
select ARCH_WANT_HUGE_PMD_SHARE
select ARCH_WANT_LD_ORPHAN_WARN
+ select ARCH_WANTS_GFP_UNMAPPED
select ARCH_WANTS_THP_SWAP if X86_64
select ARCH_HAS_PARANOID_L1D_FLUSH
select BUILDTIME_TABLE_SORT
@@ -55,8 +55,9 @@ struct vm_area_struct;
#define ___GFP_ACCOUNT 0x400000u
#define ___GFP_ZEROTAGS 0x800000u
#define ___GFP_SKIP_KASAN_POISON 0x1000000u
+#define ___GFP_UNMAPPED 0x2000000u
#ifdef CONFIG_LOCKDEP
-#define ___GFP_NOLOCKDEP 0x2000000u
+#define ___GFP_NOLOCKDEP 0x4000000u
#else
#define ___GFP_NOLOCKDEP 0
#endif
@@ -101,12 +102,15 @@ struct vm_area_struct;
* node with no fallbacks or placement policy enforcements.
*
* %__GFP_ACCOUNT causes the allocation to be accounted to kmemcg.
+ *
+ * %__GFP_UNMAPPED removes the allocated pages from the direct map.
*/
#define __GFP_RECLAIMABLE ((__force gfp_t)___GFP_RECLAIMABLE)
#define __GFP_WRITE ((__force gfp_t)___GFP_WRITE)
#define __GFP_HARDWALL ((__force gfp_t)___GFP_HARDWALL)
#define __GFP_THISNODE ((__force gfp_t)___GFP_THISNODE)
#define __GFP_ACCOUNT ((__force gfp_t)___GFP_ACCOUNT)
+#define __GFP_UNMAPPED ((__force gfp_t)___GFP_UNMAPPED)
/**
* DOC: Watermark modifiers
@@ -249,7 +253,7 @@ struct vm_area_struct;
#define __GFP_NOLOCKDEP ((__force gfp_t)___GFP_NOLOCKDEP)
/* Room for N __GFP_FOO bits */
-#define __GFP_BITS_SHIFT (25 + IS_ENABLED(CONFIG_LOCKDEP))
+#define __GFP_BITS_SHIFT (26 + IS_ENABLED(CONFIG_LOCKDEP))
#define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1))
/**
@@ -348,6 +352,11 @@ static inline int gfp_migratetype(const gfp_t gfp_flags)
BUILD_BUG_ON((1UL << GFP_MOVABLE_SHIFT) != ___GFP_MOVABLE);
BUILD_BUG_ON((___GFP_MOVABLE >> GFP_MOVABLE_SHIFT) != MIGRATE_MOVABLE);
+#ifdef CONFIG_ARCH_WANTS_GFP_UNMAPPED
+ if (unlikely(gfp_flags & __GFP_UNMAPPED))
+ return MIGRATE_UNMAPPED;
+#endif
+
if (unlikely(page_group_by_mobility_disabled))
return MIGRATE_UNMOVABLE;
@@ -43,6 +43,9 @@ enum migratetype {
MIGRATE_UNMOVABLE,
MIGRATE_MOVABLE,
MIGRATE_RECLAIMABLE,
+#ifdef CONFIG_ARCH_WANTS_GFP_UNMAPPED
+ MIGRATE_UNMAPPED,
+#endif
MIGRATE_PCPTYPES, /* the number of types on the pcp lists */
MIGRATE_HIGHATOMIC = MIGRATE_PCPTYPES,
#ifdef CONFIG_CMA
@@ -78,6 +81,14 @@ extern const char * const migratetype_names[MIGRATE_TYPES];
# define is_migrate_cma_page(_page) false
#endif
+#ifdef CONFIG_ARCH_WANTS_GFP_UNMAPPED
+# define is_migrate_unmapped(migratetype) unlikely((migratetype) == MIGRATE_UNMAPPED)
+# define is_migrate_unmapped_page(_page) (get_pageblock_migratetype(_page) == MIGRATE_UNMAPPED)
+#else
+# define is_migrate_unmapped(migratetype) false
+# define is_migrate_unmapped_page(_page) false
+#endif
+
static inline bool is_migrate_movable(int mt)
{
return is_migrate_cma(mt) || mt == MIGRATE_MOVABLE;
@@ -50,7 +50,8 @@
{(unsigned long)__GFP_DIRECT_RECLAIM, "__GFP_DIRECT_RECLAIM"},\
{(unsigned long)__GFP_KSWAPD_RECLAIM, "__GFP_KSWAPD_RECLAIM"},\
{(unsigned long)__GFP_ZEROTAGS, "__GFP_ZEROTAGS"}, \
- {(unsigned long)__GFP_SKIP_KASAN_POISON,"__GFP_SKIP_KASAN_POISON"}\
+ {(unsigned long)__GFP_SKIP_KASAN_POISON,"__GFP_SKIP_KASAN_POISON"},\
+ {(unsigned long)__GFP_UNMAPPED, "__GFP_UNMAPPED"} \
#define show_gfp_flags(flags) \
(flags) ? __print_flags(flags, "|", \
@@ -32,7 +32,7 @@ struct folio_batch;
#define GFP_CONSTRAINT_MASK (__GFP_HARDWALL|__GFP_THISNODE)
/* Do not use these with a slab allocator */
-#define GFP_SLAB_BUG_MASK (__GFP_DMA32|__GFP_HIGHMEM|~__GFP_BITS_MASK)
+#define GFP_SLAB_BUG_MASK (__GFP_DMA32|__GFP_HIGHMEM|__GFP_UNMAPPED|~__GFP_BITS_MASK)
void page_writeback_init(void);
@@ -75,6 +75,7 @@
#include <linux/khugepaged.h>
#include <linux/buffer_head.h>
#include <linux/delayacct.h>
+#include <linux/set_memory.h>
#include <asm/sections.h>
#include <asm/tlbflush.h>
#include <asm/div64.h>
@@ -82,6 +83,12 @@
#include "shuffle.h"
#include "page_reporting.h"
+/*
+ * FIXME: add a proper definition in include/linux/mm.h once remaining
+ * definitions of PMD_ORDER in arch/ are updated
+ */
+#define PMD_ORDER (PMD_SHIFT - PAGE_SHIFT)
+
/* Free Page Internal flags: for internal, non-pcp variants of free_pages(). */
typedef int __bitwise fpi_t;
@@ -319,6 +326,9 @@ const char * const migratetype_names[MIGRATE_TYPES] = {
"Unmovable",
"Movable",
"Reclaimable",
+#ifdef CONFIG_ARCH_WANTS_GFP_UNMAPPED
+ "Unmapped",
+#endif
"HighAtomic",
#ifdef CONFIG_CMA
"CMA",
@@ -938,9 +948,10 @@ compaction_capture(struct capture_control *capc, struct page *page,
if (!capc || order != capc->cc->order)
return false;
- /* Do not accidentally pollute CMA or isolated regions*/
+ /* Do not accidentally pollute CMA or isolated or unmapped regions */
if (is_migrate_cma(migratetype) ||
- is_migrate_isolate(migratetype))
+ is_migrate_isolate(migratetype) ||
+ is_migrate_unmapped(migratetype))
return false;
/*
@@ -1143,6 +1154,17 @@ static inline void __free_one_page(struct page *page,
done_merging:
set_buddy_order(page, order);
+#if 0
+ /*
+ * FIXME: collapse PMD-size page in the direct map and move the
+ * pageblock from MIGRATE_UNMAPPED to another migrate type.
+ */
+ if ((order == PMD_ORDER) && is_migrate_unmapped_page(page)) {
+ set_direct_map_PMD(page);
+ set_pageblock_migratetype(page, MIGRATE_MOVABLE);
+ }
+#endif
+
if (fpi_flags & FPI_TO_TAIL)
to_tail = true;
else if (is_shuffle_order(order))
@@ -1271,6 +1293,40 @@ static int free_tail_pages_check(struct page *head_page, struct page *page)
return ret;
}
+static void migrate_unmapped_map_pages(struct page *page, unsigned int nr)
+{
+#ifdef CONFIG_ARCH_WANTS_GFP_UNMAPPED
+ int i;
+
+ if (!is_migrate_unmapped_page(page))
+ return;
+
+ for (i = 0; i < nr; i++)
+ set_direct_map_default_noflush(page + i);
+#endif
+}
+
+static void migrate_unmapped_unmap_pages(struct page *page, unsigned int nr,
+ gfp_t gfp)
+{
+#ifdef CONFIG_ARCH_WANTS_GFP_UNMAPPED
+ unsigned long start = (unsigned long)page_address(page);
+ unsigned long end = start + nr * PAGE_SIZE;
+ int i;
+
+ if (!(gfp & __GFP_UNMAPPED))
+ return;
+
+ if (!is_migrate_unmapped_page(page))
+ return;
+
+ for (i = 0; i < nr; i++)
+ set_direct_map_invalid_noflush(page + i);
+
+ flush_tlb_kernel_range(start, end);
+#endif
+}
+
static void kernel_init_free_pages(struct page *page, int numpages, bool zero_tags)
{
int i;
@@ -1359,6 +1415,7 @@ static __always_inline bool free_pages_prepare(struct page *page,
PAGE_SIZE << order);
}
+ migrate_unmapped_map_pages(page, 1 << order);
kernel_poison_pages(page, 1 << order);
/*
@@ -2426,6 +2483,7 @@ inline void post_alloc_hook(struct page *page, unsigned int order,
set_page_owner(page, order, gfp_flags);
page_table_check_alloc(page, order);
+ migrate_unmapped_unmap_pages(page, 1 << order, gfp_flags);
}
static void prep_new_page(struct page *page, unsigned int order, gfp_t gfp_flags,
@@ -2480,6 +2538,7 @@ struct page *__rmqueue_smallest(struct zone *zone, unsigned int order,
* This array describes the order lists are fallen back to when
* the free lists for the desirable migrate type are depleted
*/
+#ifndef CONFIG_ARCH_WANTS_GFP_UNMAPPED
static int fallbacks[MIGRATE_TYPES][3] = {
[MIGRATE_UNMOVABLE] = { MIGRATE_RECLAIMABLE, MIGRATE_MOVABLE, MIGRATE_TYPES },
[MIGRATE_MOVABLE] = { MIGRATE_RECLAIMABLE, MIGRATE_UNMOVABLE, MIGRATE_TYPES },
@@ -2492,6 +2551,22 @@ static int fallbacks[MIGRATE_TYPES][3] = {
#endif
};
+#else /* CONFIG_ARCH_WANTS_GFP_UNMAPPED */
+
+static int fallbacks[MIGRATE_TYPES][4] = {
+ [MIGRATE_UNMOVABLE] = { MIGRATE_RECLAIMABLE, MIGRATE_MOVABLE, MIGRATE_UNMAPPED, MIGRATE_TYPES },
+ [MIGRATE_MOVABLE] = { MIGRATE_RECLAIMABLE, MIGRATE_UNMOVABLE, MIGRATE_UNMAPPED, MIGRATE_TYPES },
+ [MIGRATE_RECLAIMABLE] = { MIGRATE_UNMOVABLE, MIGRATE_MOVABLE, MIGRATE_UNMAPPED, MIGRATE_TYPES },
+ [MIGRATE_UNMAPPED] = { MIGRATE_UNMOVABLE, MIGRATE_RECLAIMABLE, MIGRATE_MOVABLE, MIGRATE_TYPES },
+#ifdef CONFIG_CMA
+ [MIGRATE_CMA] = { MIGRATE_TYPES }, /* Never used */
+#endif
+#ifdef CONFIG_MEMORY_ISOLATION
+ [MIGRATE_ISOLATE] = { MIGRATE_TYPES }, /* Never used */
+#endif
+};
+#endif /* CONFIG_ARCH_WANTS_GFP_UNMAPPED */
+
#ifdef CONFIG_CMA
static __always_inline struct page *__rmqueue_cma_fallback(struct zone *zone,
unsigned int order)
@@ -2567,6 +2642,39 @@ int move_freepages_block(struct zone *zone, struct page *page,
num_movable);
}
+static int set_pageblock_unmapped(struct zone *zone, struct page *page,
+ unsigned int order)
+{
+#ifdef CONFIG_ARCH_WANTS_GFP_UNMAPPED
+ int migratetype = get_pageblock_migratetype(page);
+ unsigned long err;
+
+ BUILD_BUG_ON(pageblock_order != PMD_ORDER);
+
+ if (is_migrate_unmapped_page(page))
+ return 0;
+
+ /*
+ * Calling set_direct_map_invalid_noflush() for any page in a
+ * pageblock will split PMD entry and the split may fail to allocate the
+ * PMD page.
+ * Subsequent calls to set_direct_map APIs within the same
+ * pageblock will only update the PTEs, so they cannot fail.
+ */
+ err = set_direct_map_invalid_noflush(page);
+ if (err) {
+ move_to_free_list(page, zone, order, migratetype);
+ return err;
+ }
+
+ set_direct_map_default_noflush(page);
+ set_pageblock_migratetype(page, MIGRATE_UNMAPPED);
+ move_freepages_block(zone, page, MIGRATE_UNMAPPED, NULL);
+#endif
+
+ return 0;
+}
+
static void change_pageblock_range(struct page *pageblock_page,
int start_order, int migratetype)
{
@@ -2605,6 +2713,7 @@ static bool can_steal_fallback(unsigned int order, int start_mt)
if (order >= pageblock_order / 2 ||
start_mt == MIGRATE_RECLAIMABLE ||
start_mt == MIGRATE_UNMOVABLE ||
+ is_migrate_unmapped(start_mt) ||
page_group_by_mobility_disabled)
return true;
@@ -2672,6 +2781,14 @@ static void steal_suitable_fallback(struct zone *zone, struct page *page,
if (is_migrate_highatomic(old_block_type))
goto single_page;
+ /*
+ * If the new migrate type is MIGRATE_UNMAPPED, the entire
+ * pageblock will be moved, but it is handled later in
+ * get_page_from_freelist() to allow error handling and recovery
+ */
+ if (is_migrate_unmapped(start_type))
+ goto single_page;
+
/* Take ownership for orders >= pageblock_order */
if (current_order >= pageblock_order) {
change_pageblock_range(page, current_order, start_type);
@@ -4162,6 +4279,10 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags,
page = rmqueue(ac->preferred_zoneref->zone, zone, order,
gfp_mask, alloc_flags, ac->migratetype);
if (page) {
+ if ((gfp_mask & __GFP_UNMAPPED) &&
+ set_pageblock_unmapped(zone, page, order))
+ return NULL;
+
prep_new_page(page, order, gfp_mask, alloc_flags);
/*
@@ -5241,6 +5362,10 @@ unsigned long __alloc_pages_bulk(gfp_t gfp, int preferred_nid,
if (nr_pages - nr_populated == 1)
goto failed;
+ /* Bulk allocator does not support __GFP_UNMAPPED */
+ if (gfp & __GFP_UNMAPPED)
+ goto failed;
+
#ifdef CONFIG_PAGE_OWNER
/*
* PAGE_OWNER may recurse into the allocator to allocate space to