@@ -1471,6 +1471,9 @@ config DYNAMIC_SIGFRAME
config HAVE_ARCH_NODE_DEV_GROUP
bool
+config LRU_TASK_PAGE_AGING
+ bool
+
config ARCH_HAS_NONLEAF_PMD_YOUNG
bool
help
@@ -233,6 +233,7 @@ config ARM64
select IRQ_FORCED_THREADING
select KASAN_VMALLOC if KASAN
select LOCK_MM_AND_FIND_VMA
+ select LRU_TASK_PAGE_AGING if LRU_GEN
select MODULES_USE_ELF_RELA
select NEED_DMA_MAP_STATE
select NEED_SG_DMA_LENGTH
@@ -281,6 +281,7 @@ config X86
select HOTPLUG_SPLIT_STARTUP if SMP && X86_32
select IRQ_FORCED_THREADING
select LOCK_MM_AND_FIND_VMA
+ select LRU_TASK_PAGE_AGING if LRU_GEN
select NEED_PER_CPU_EMBED_FIRST_CHUNK
select NEED_PER_CPU_PAGE_FIRST_CHUNK
select NEED_SG_DMA_LENGTH
@@ -321,7 +321,7 @@ struct mem_cgroup {
struct deferred_split deferred_split_queue;
#endif
-#ifdef CONFIG_LRU_GEN
+#ifdef CONFIG_LRU_TASK_PAGE_AGING
/* per-memcg mm_struct list */
struct lru_gen_mm_list mm_list;
#endif
@@ -793,7 +793,7 @@ struct mm_struct {
*/
unsigned long ksm_rmap_items;
#endif
-#ifdef CONFIG_LRU_GEN
+#ifdef CONFIG_LRU_TASK_PAGE_AGING
struct {
/* this mm_struct is on lru_gen_mm_list */
struct list_head list;
@@ -808,7 +808,7 @@ struct mm_struct {
struct mem_cgroup *memcg;
#endif
} lru_gen;
-#endif /* CONFIG_LRU_GEN */
+#endif /* CONFIG_LRU_TASK_PAGE_AGING */
} __randomize_layout;
/*
@@ -837,7 +837,7 @@ static inline cpumask_t *mm_cpumask(struct mm_struct *mm)
return (struct cpumask *)&mm->cpu_bitmap;
}
-#ifdef CONFIG_LRU_GEN
+#ifdef CONFIG_LRU_TASK_PAGE_AGING
struct lru_gen_mm_list {
/* mm_struct list for page table walkers */
@@ -871,7 +871,7 @@ static inline void lru_gen_use_mm(struct mm_struct *mm)
WRITE_ONCE(mm->lru_gen.bitmap, -1);
}
-#else /* !CONFIG_LRU_GEN */
+#else /* !CONFIG_LRU_TASK_PAGE_AGING */
static inline void lru_gen_add_mm(struct mm_struct *mm)
{
@@ -895,7 +895,7 @@ static inline void lru_gen_use_mm(struct mm_struct *mm)
{
}
-#endif /* CONFIG_LRU_GEN */
+#endif /* CONFIG_LRU_TASK_PAGE_AGING */
struct vma_iterator {
struct ma_state mas;
@@ -461,6 +461,7 @@ enum {
struct lru_gen_mm_state {
/* set to max_seq after each iteration */
unsigned long seq;
+#ifdef CONFIG_LRU_TASK_PAGE_AGING
/* where the current iteration continues after */
struct list_head *head;
/* where the last iteration ended before */
@@ -469,6 +470,11 @@ struct lru_gen_mm_state {
unsigned long *filters[NR_BLOOM_FILTERS];
/* the mm stats for debugging */
unsigned long stats[NR_HIST_GENS][NR_MM_STATS];
+#else
+ /* protect the seq update above */
+ /* May be we can use lruvec->lock? */
+ spinlock_t lock;
+#endif
};
struct lru_gen_mm_walk {
@@ -546,9 +552,13 @@ struct lru_gen_memcg {
};
void lru_gen_init_pgdat(struct pglist_data *pgdat);
-
+#ifdef CONFIG_LRU_TASK_PAGE_AGING
void lru_gen_init_memcg(struct mem_cgroup *memcg);
void lru_gen_exit_memcg(struct mem_cgroup *memcg);
+#else
+static inline void lru_gen_init_memcg(struct mem_cgroup *memcg) {}
+static inline void lru_gen_exit_memcg(struct mem_cgroup *memcg) {}
+#endif
void lru_gen_online_memcg(struct mem_cgroup *memcg);
void lru_gen_offline_memcg(struct mem_cgroup *memcg);
void lru_gen_release_memcg(struct mem_cgroup *memcg);
@@ -2932,7 +2932,7 @@ pid_t kernel_clone(struct kernel_clone_args *args)
get_task_struct(p);
}
- if (IS_ENABLED(CONFIG_LRU_GEN) && !(clone_flags & CLONE_VM)) {
+ if (IS_ENABLED(CONFIG_LRU_TASK_PAGE_AGING) && !(clone_flags & CLONE_VM)) {
/* lock the task to synchronize with memcg migration */
task_lock(p);
lru_gen_add_mm(p->mm);
@@ -6357,7 +6357,7 @@ static void mem_cgroup_move_task(void)
}
#endif
-#ifdef CONFIG_LRU_GEN
+#ifdef CONFIG_LRU_TASK_PAGE_AGING
static void mem_cgroup_attach(struct cgroup_taskset *tset)
{
struct task_struct *task;
@@ -3244,10 +3244,17 @@ DEFINE_STATIC_KEY_ARRAY_FALSE(lru_gen_caps, NR_LRU_GEN_CAPS);
#define get_cap(cap) static_branch_unlikely(&lru_gen_caps[cap])
#endif
+#ifdef CONFIG_LRU_TASK_PAGE_AGING
static bool should_walk_mmu(void)
{
return arch_has_hw_pte_young() && get_cap(LRU_GEN_MM_WALK);
}
+#else
+static bool should_walk_mmu(void)
+{
+ return false;
+}
+#endif
static bool should_clear_pmd_young(void)
{
@@ -3588,6 +3595,8 @@ static void clear_mm_walk(void)
kfree(walk);
}
+#ifdef CONFIG_LRU_TASK_PAGE_AGING
+
/******************************************************************************
* Bloom filters
******************************************************************************/
@@ -4382,6 +4391,33 @@ static bool iterate_mm_list_walk(struct lruvec *lruvec, unsigned long max_seq,
return success;
}
+#else
+
+static bool iterate_mm_list_nowalk(struct lruvec *lruvec, unsigned long max_seq)
+{
+ bool success = false;
+ struct lru_gen_mm_state *mm_state = &lruvec->mm_state;
+
+ spin_lock(&mm_state->lock);
+
+ VM_WARN_ON_ONCE(mm_state->seq + 1 < max_seq);
+
+ if (max_seq > mm_state->seq) {
+ WRITE_ONCE(mm_state->seq, mm_state->seq + 1);
+ success = true;
+ }
+
+ spin_unlock(&mm_state->lock);
+
+ return success;
+}
+
+static bool iterate_mm_list_walk(struct lruvec *lruvec, unsigned long max_seq,
+ bool can_swap, bool force_scan)
+{
+ return false;
+}
+#endif
static bool inc_min_seq(struct lruvec *lruvec, int type, bool can_swap)
{
@@ -4744,9 +4780,11 @@ void lru_gen_look_around(struct page_vma_mapped_walk *pvmw)
arch_leave_lazy_mmu_mode();
mem_cgroup_unlock_pages();
+#ifdef CONFIG_LRU_TASK_PAGE_AGING
/* feedback from rmap walkers to page table walkers */
if (suitable_to_scan(i, young))
update_bloom_filter(lruvec, max_seq, pvmw->pmd);
+#endif
}
/******************************************************************************
@@ -5896,6 +5934,7 @@ static void lru_gen_seq_show_full(struct seq_file *m, struct lruvec *lruvec,
seq_putc(m, '\n');
}
+#ifdef CONFIG_LRU_TASK_PAGE_AGING
seq_puts(m, " ");
for (i = 0; i < NR_MM_STATS; i++) {
const char *s = " ";
@@ -5912,6 +5951,7 @@ static void lru_gen_seq_show_full(struct seq_file *m, struct lruvec *lruvec,
seq_printf(m, " %10lu%c", n, s[i]);
}
seq_putc(m, '\n');
+#endif
}
/* see Documentation/admin-guide/mm/multigen_lru.rst for details */
@@ -6186,6 +6226,9 @@ void lru_gen_init_lruvec(struct lruvec *lruvec)
INIT_LIST_HEAD(&lrugen->folios[gen][type][zone]);
lruvec->mm_state.seq = MIN_NR_GENS;
+#ifndef CONFIG_LRU_TASK_PAGE_AGING
+ spin_lock_init(&lruvec->mm_state.lock);
+#endif
}
#ifdef CONFIG_MEMCG
@@ -6202,6 +6245,7 @@ void lru_gen_init_pgdat(struct pglist_data *pgdat)
}
}
+#ifdef CONFIG_LRU_TASK_PAGE_AGING
void lru_gen_init_memcg(struct mem_cgroup *memcg)
{
INIT_LIST_HEAD(&memcg->mm_list.fifo);
@@ -6229,6 +6273,7 @@ void lru_gen_exit_memcg(struct mem_cgroup *memcg)
}
}
}
+#endif
#endif /* CONFIG_MEMCG */
Not all architecture supports hardware atomic updates of access bits. On such an arch, we don't use a page table walk to classify pages into generations. Add a kernel config option and remove adding all the page table walk code on such architecture. This avoid calling lru_gen related code (lru_gen_add/remove/migrate_mm) in fork/exit/context switch Also we don't build different components like Bloom filter and all the page table walk code (walk_mm and related code) on not supported architecture with this change. No preformance change observed with mongodb ycsb test: Patch details Throughput(Ops/sec) without patch 91252 With patch 91488 Without patch: $ size mm/vmscan.o text data bss dec hex filename 116016 36857 40 152913 25551 mm/vmscan.o With patch $ size mm/vmscan.o text data bss dec hex filename 112864 36437 40 149341 2475d mm/vmscan.o Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com> --- arch/Kconfig | 3 +++ arch/arm64/Kconfig | 1 + arch/x86/Kconfig | 1 + include/linux/memcontrol.h | 2 +- include/linux/mm_types.h | 10 ++++----- include/linux/mmzone.h | 12 +++++++++- kernel/fork.c | 2 +- mm/memcontrol.c | 2 +- mm/vmscan.c | 45 ++++++++++++++++++++++++++++++++++++++ 9 files changed, 69 insertions(+), 9 deletions(-)