diff mbox series

[1/4] mm/mempolicy: Support memory hotplug in weighted interleave

Message ID 20250307063534.540-2-rakie.kim@sk.com (mailing list archive)
State New
Headers show
Series mm/mempolicy: Add memory hotplug support in weighted interleave | expand

Commit Message

Rakie Kim March 7, 2025, 6:35 a.m. UTC
The weighted interleave policy distributes page allocations across multiple
NUMA nodes based on their performance weight, thereby optimizing memory
bandwidth utilization. The weight values for each node are configured
through sysfs.

Previously, the sysfs entries for configuring weighted interleave were only
created during initialization. This approach had several limitations:
- Sysfs entries were generated for all possible nodes at boot time,
  including nodes without memory, leading to unnecessary sysfs creation.
- Some memory devices transition to an online state after initialization,
  but the existing implementation failed to create sysfs entries for
  these dynamically added nodes. As a result, memory hotplugged nodes
  were not properly recognized by the weighed interleave mechanism.

To resolve these issues, this patch introduces two key improvements:
1) At initialization, only nodes that are online and have memory are
   recognized, preventing the creation of unnecessary sysfs entries.
2) Nodes that become available after initialization are dynamically
   detected and integrated through the memory hotplug mechanism.

With this enhancement, the weighted interleave policy now properly supports
memory hotplug, ensuring that newly added nodes are recognized and sysfs
entries are created accordingly.

Signed-off-by: Rakie Kim <rakie.kim@sk.com>
---
 mm/mempolicy.c | 44 +++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 39 insertions(+), 5 deletions(-)
diff mbox series

Patch

diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index bbaadbeeb291..385607179ebd 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -113,6 +113,7 @@ 
 #include <asm/tlbflush.h>
 #include <asm/tlb.h>
 #include <linux/uaccess.h>
+#include <linux/memory.h>
 
 #include "internal.h"
 
@@ -3489,9 +3490,35 @@  static int add_weight_node(int nid, struct kobject *wi_kobj)
 	return 0;
 }
 
+struct kobject *wi_kobj;
+
+static int wi_node_notifier(struct notifier_block *nb,
+			       unsigned long action, void *data)
+{
+	int err;
+	struct memory_notify *arg = data;
+	int nid = arg->status_change_nid;
+
+	if (nid < 0)
+		goto notifier_end;
+
+	switch(action) {
+	case MEM_ONLINE:
+		err = add_weight_node(nid, wi_kobj);
+		if (err) {
+			pr_err("failed to add sysfs [node%d]\n", nid);
+			kobject_put(wi_kobj);
+			return NOTIFY_BAD;
+		}
+		break;
+	}
+
+notifier_end:
+	return NOTIFY_OK;
+}
+
 static int add_weighted_interleave_group(struct kobject *root_kobj)
 {
-	struct kobject *wi_kobj;
 	int nid, err;
 
 	wi_kobj = kzalloc(sizeof(struct kobject), GFP_KERNEL);
@@ -3505,16 +3532,23 @@  static int add_weighted_interleave_group(struct kobject *root_kobj)
 		return err;
 	}
 
-	for_each_node_state(nid, N_POSSIBLE) {
+	for_each_online_node(nid) {
+		if (!node_state(nid, N_MEMORY))
+			continue;
+
 		err = add_weight_node(nid, wi_kobj);
 		if (err) {
 			pr_err("failed to add sysfs [node%d]\n", nid);
-			break;
+			goto err_out;
 		}
 	}
-	if (err)
-		kobject_put(wi_kobj);
+
+	hotplug_memory_notifier(wi_node_notifier, DEFAULT_CALLBACK_PRI);
 	return 0;
+
+err_out:
+	kobject_put(wi_kobj);
+	return err;
 }
 
 static void mempolicy_kobj_release(struct kobject *kobj)