diff mbox series

[RFC,V1,01/13] mm: Add kmmscand kernel daemon

Message ID 20250319193028.29514-2-raghavendra.kt@amd.com (mailing list archive)
State New
Headers show
Series mm: slowtier page promotion based on PTE A bit | expand

Commit Message

Raghavendra K T March 19, 2025, 7:30 p.m. UTC
Add a skeleton to support scanning and migration.
Also add a config option for the same.

High level design:

While (1):
  scan the slowtier pages belonging to VMAs of a task.
  Add to migation list

Separate thread:
  migrate scanned pages to a toptier node based on heuristics

The overall code is heavily influenced by khugepaged design.

Signed-off-by: Raghavendra K T <raghavendra.kt@amd.com>
---
 mm/Kconfig    |   8 +++
 mm/Makefile   |   1 +
 mm/kmmscand.c | 176 ++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 185 insertions(+)
 create mode 100644 mm/kmmscand.c
diff mbox series

Patch

diff --git a/mm/Kconfig b/mm/Kconfig
index 1b501db06417..5a4931633e15 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -783,6 +783,14 @@  config KSM
 	  until a program has madvised that an area is MADV_MERGEABLE, and
 	  root has set /sys/kernel/mm/ksm/run to 1 (if CONFIG_SYSFS is set).
 
+config KMMSCAND
+	bool "Enable PTE A bit scanning and Migration"
+	depends on NUMA_BALANCING
+	help
+	  Enable PTE A bit scanning of page. CXL pages accessed are migrated to
+	  a regular NUMA node. The option creates a separate kthread for
+	  scanning and migration.
+
 config DEFAULT_MMAP_MIN_ADDR
 	int "Low address space to protect from user allocation"
 	depends on MMU
diff --git a/mm/Makefile b/mm/Makefile
index 850386a67b3e..45e2f8cc8fd6 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -94,6 +94,7 @@  obj-$(CONFIG_FAIL_PAGE_ALLOC) += fail_page_alloc.o
 obj-$(CONFIG_MEMTEST)		+= memtest.o
 obj-$(CONFIG_MIGRATION) += migrate.o
 obj-$(CONFIG_NUMA) += memory-tiers.o
+obj-$(CONFIG_KMMSCAND) += kmmscand.o
 obj-$(CONFIG_DEVICE_MIGRATION) += migrate_device.o
 obj-$(CONFIG_TRANSPARENT_HUGEPAGE) += huge_memory.o khugepaged.o
 obj-$(CONFIG_PAGE_COUNTER) += page_counter.o
diff --git a/mm/kmmscand.c b/mm/kmmscand.c
new file mode 100644
index 000000000000..6c55250b5cfb
--- /dev/null
+++ b/mm/kmmscand.c
@@ -0,0 +1,176 @@ 
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/mm.h>
+#include <linux/mm_types.h>
+#include <linux/sched.h>
+#include <linux/sched/mm.h>
+#include <linux/mmu_notifier.h>
+#include <linux/swap.h>
+#include <linux/mm_inline.h>
+#include <linux/kthread.h>
+#include <linux/string.h>
+#include <linux/delay.h>
+#include <linux/cleanup.h>
+
+#include <asm/pgalloc.h>
+#include "internal.h"
+
+
+static struct task_struct *kmmscand_thread __read_mostly;
+static DEFINE_MUTEX(kmmscand_mutex);
+
+/* How long to pause between two scan and migration cycle */
+static unsigned int kmmscand_scan_sleep_ms __read_mostly = 16;
+
+/* Max number of mms to scan in one scan and migration cycle */
+#define KMMSCAND_MMS_TO_SCAN	(4 * 1024UL)
+static unsigned long kmmscand_mms_to_scan __read_mostly = KMMSCAND_MMS_TO_SCAN;
+
+bool kmmscand_scan_enabled = true;
+static bool need_wakeup;
+
+static unsigned long kmmscand_sleep_expire;
+
+static DECLARE_WAIT_QUEUE_HEAD(kmmscand_wait);
+
+struct kmmscand_scan {
+	struct list_head mm_head;
+};
+
+struct kmmscand_scan kmmscand_scan = {
+	.mm_head = LIST_HEAD_INIT(kmmscand_scan.mm_head),
+};
+
+static int kmmscand_has_work(void)
+{
+	return !list_empty(&kmmscand_scan.mm_head);
+}
+
+static bool kmmscand_should_wakeup(void)
+{
+	bool wakeup =  kthread_should_stop() || need_wakeup ||
+	       time_after_eq(jiffies, kmmscand_sleep_expire);
+	if (need_wakeup)
+		need_wakeup = false;
+
+	return wakeup;
+}
+
+static void kmmscand_wait_work(void)
+{
+	const unsigned long scan_sleep_jiffies =
+		msecs_to_jiffies(kmmscand_scan_sleep_ms);
+
+	if (!scan_sleep_jiffies)
+		return;
+
+	kmmscand_sleep_expire = jiffies + scan_sleep_jiffies;
+	wait_event_timeout(kmmscand_wait,
+			kmmscand_should_wakeup(),
+			scan_sleep_jiffies);
+	return;
+}
+
+static unsigned long kmmscand_scan_mm_slot(void)
+{
+	/* placeholder for scanning */
+	msleep(100);
+	return 0;
+}
+
+static void kmmscand_do_scan(void)
+{
+	unsigned long iter = 0, mms_to_scan;
+
+	mms_to_scan = READ_ONCE(kmmscand_mms_to_scan);
+
+	while (true) {
+		cond_resched();
+
+		if (unlikely(kthread_should_stop()) ||
+			!READ_ONCE(kmmscand_scan_enabled))
+			break;
+
+		if (kmmscand_has_work())
+			kmmscand_scan_mm_slot();
+
+		iter++;
+		if (iter >= mms_to_scan)
+			break;
+	}
+}
+
+static int kmmscand(void *none)
+{
+	for (;;) {
+		if (unlikely(kthread_should_stop()))
+			break;
+
+		kmmscand_do_scan();
+
+		while (!READ_ONCE(kmmscand_scan_enabled)) {
+			cpu_relax();
+			kmmscand_wait_work();
+		}
+
+		kmmscand_wait_work();
+	}
+	return 0;
+}
+
+static int start_kmmscand(void)
+{
+	int err = 0;
+
+	guard(mutex)(&kmmscand_mutex);
+
+	/* Some one already succeeded in starting daemon */
+	if (kmmscand_thread)
+		goto end;
+
+	kmmscand_thread = kthread_run(kmmscand, NULL, "kmmscand");
+	if (IS_ERR(kmmscand_thread)) {
+		pr_err("kmmscand: kthread_run(kmmscand) failed\n");
+		err = PTR_ERR(kmmscand_thread);
+		kmmscand_thread = NULL;
+		goto end;
+	} else {
+		pr_info("kmmscand: Successfully started kmmscand");
+	}
+
+	if (!list_empty(&kmmscand_scan.mm_head))
+		wake_up_interruptible(&kmmscand_wait);
+
+end:
+	return err;
+}
+
+static int stop_kmmscand(void)
+{
+	int err = 0;
+
+	guard(mutex)(&kmmscand_mutex);
+
+	if (kmmscand_thread) {
+		kthread_stop(kmmscand_thread);
+		kmmscand_thread = NULL;
+	}
+
+	return err;
+}
+
+static int __init kmmscand_init(void)
+{
+	int err;
+
+	err = start_kmmscand();
+	if (err)
+		goto err_kmmscand;
+
+	return 0;
+
+err_kmmscand:
+	stop_kmmscand();
+
+	return err;
+}
+subsys_initcall(kmmscand_init);