diff mbox series

[v5,3/3] mm/mempolicy: Support memory hotplug in weighted interleave

Message ID 20250402014906.1086-4-rakie.kim@sk.com
State New
Headers show
Series Enhance sysfs handling for memory hotplug in weighted interleave | expand

Commit Message

Rakie Kim April 2, 2025, 1:49 a.m. UTC
The weighted interleave policy distributes page allocations across multiple
NUMA nodes based on their performance weight, thereby improving memory
bandwidth utilization. The weight values for each node are configured
through sysfs.

Previously, sysfs entries for configuring weighted interleave were created
for all possible nodes (N_POSSIBLE) at initialization, including nodes that
might not have memory. However, not all nodes in N_POSSIBLE are usable at
runtime, as some may remain memoryless or offline.
This led to sysfs entries being created for unusable nodes, causing
potential misconfiguration issues.

To address this issue, this patch modifies the sysfs creation logic to:
1) Limit sysfs entries to nodes that are online and have memory, avoiding
   the creation of sysfs entries for nodes that cannot be used.
2) Support memory hotplug by dynamically adding and removing sysfs entries
   based on whether a node transitions into or out of the N_MEMORY state.

Additionally, the patch ensures that sysfs attributes are properly managed
when nodes go offline, preventing stale or redundant entries from persisting
in the system.

By making these changes, the weighted interleave policy now manages its
sysfs entries more efficiently, ensuring that only relevant nodes are
considered for interleaving, and dynamically adapting to memory hotplug
events.

Signed-off-by: Rakie Kim <rakie.kim@sk.com>
Signed-off-by: Honggyu Kim <honggyu.kim@sk.com>
Signed-off-by: Yunjeong Mun <yunjeong.mun@sk.com>
---
 mm/mempolicy.c | 110 ++++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 87 insertions(+), 23 deletions(-)

Comments

Honggyu Kim April 2, 2025, 4:18 a.m. UTC | #1
Hi Rakie,

This is to fix the following broken status.
https://lore.kernel.org/linux-mm/b8ac8654-92bd-4c08-a3fc-e28a7be5e0e6@sk.com

So we must add the following tag for this patch.
Fixes: fa3bea4e1f82 ("mm/mempolicy: introduce MPOL_WEIGHTED_INTERLEAVE for 
weighted interleaving")


Hi Gregory,

This patch is already in Andrew's tree. Is the current version okay for you?


Hi Andrew,

I'm not sure if this is going to the final version but could you please add this
patch to stable with Cc: <stable@vger.kernel.org>?
We might need to bring the whole series to avoid conflicts to stable tree.

Thanks,
Honggyu

On 4/2/2025 10:49 AM, Rakie Kim wrote:
> The weighted interleave policy distributes page allocations across multiple
> NUMA nodes based on their performance weight, thereby improving memory
> bandwidth utilization. The weight values for each node are configured
> through sysfs.
> 
> Previously, sysfs entries for configuring weighted interleave were created
> for all possible nodes (N_POSSIBLE) at initialization, including nodes that
> might not have memory. However, not all nodes in N_POSSIBLE are usable at
> runtime, as some may remain memoryless or offline.
> This led to sysfs entries being created for unusable nodes, causing
> potential misconfiguration issues.
> 
> To address this issue, this patch modifies the sysfs creation logic to:
> 1) Limit sysfs entries to nodes that are online and have memory, avoiding
>     the creation of sysfs entries for nodes that cannot be used.
> 2) Support memory hotplug by dynamically adding and removing sysfs entries
>     based on whether a node transitions into or out of the N_MEMORY state.
> 
> Additionally, the patch ensures that sysfs attributes are properly managed
> when nodes go offline, preventing stale or redundant entries from persisting
> in the system.
> 
> By making these changes, the weighted interleave policy now manages its
> sysfs entries more efficiently, ensuring that only relevant nodes are
> considered for interleaving, and dynamically adapting to memory hotplug
> events.
> 
> Signed-off-by: Rakie Kim <rakie.kim@sk.com>
> Signed-off-by: Honggyu Kim <honggyu.kim@sk.com>
> Signed-off-by: Yunjeong Mun <yunjeong.mun@sk.com>
> ---
>   mm/mempolicy.c | 110 ++++++++++++++++++++++++++++++++++++++-----------
>   1 file changed, 87 insertions(+), 23 deletions(-)
> 
> diff --git a/mm/mempolicy.c b/mm/mempolicy.c
> index 3092a792bd28..ea4f3f3f2cdd 100644
> --- a/mm/mempolicy.c
> +++ b/mm/mempolicy.c
> @@ -113,6 +113,7 @@
>   #include <asm/tlbflush.h>
>   #include <asm/tlb.h>
>   #include <linux/uaccess.h>
> +#include <linux/memory.h>
>   
>   #include "internal.h"
>   
> @@ -3390,6 +3391,7 @@ struct iw_node_attr {
>   
>   struct sysfs_wi_group {
>   	struct kobject wi_kobj;
> +	struct mutex kobj_lock;
>   	struct iw_node_attr *nattrs[];
>   };
>   
> @@ -3439,13 +3441,24 @@ static ssize_t node_store(struct kobject *kobj, struct kobj_attribute *attr,
>   
>   static void sysfs_wi_node_release(int nid)
>   {
> -	if (!wi_group->nattrs[nid])
> +	struct iw_node_attr *attr;
> +
> +	if (nid < 0 || nid >= nr_node_ids)
> +		return;
> +
> +	mutex_lock(&wi_group->kobj_lock);
> +	attr = wi_group->nattrs[nid];
> +	if (!attr) {
> +		mutex_unlock(&wi_group->kobj_lock);
>   		return;
> +	}
> +
> +	wi_group->nattrs[nid] = NULL;
> +	mutex_unlock(&wi_group->kobj_lock);
>   
> -	sysfs_remove_file(&wi_group->wi_kobj,
> -			  &wi_group->nattrs[nid]->kobj_attr.attr);
> -	kfree(wi_group->nattrs[nid]->kobj_attr.attr.name);
> -	kfree(wi_group->nattrs[nid]);
> +	sysfs_remove_file(&wi_group->wi_kobj, &attr->kobj_attr.attr);
> +	kfree(attr->kobj_attr.attr.name);
> +	kfree(attr);
>   }
>   
>   static void sysfs_wi_release(struct kobject *wi_kobj)
> @@ -3464,35 +3477,81 @@ static const struct kobj_type wi_ktype = {
>   
>   static int sysfs_wi_node_add(int nid)
>   {
> -	struct iw_node_attr *node_attr;
> +	int ret = 0;
>   	char *name;
> +	struct iw_node_attr *new_attr = NULL;
>   
> -	node_attr = kzalloc(sizeof(*node_attr), GFP_KERNEL);
> -	if (!node_attr)
> +	if (nid < 0 || nid >= nr_node_ids) {
> +		pr_err("Invalid node id: %d\n", nid);
> +		return -EINVAL;
> +	}
> +
> +	new_attr = kzalloc(sizeof(struct iw_node_attr), GFP_KERNEL);
> +	if (!new_attr)
>   		return -ENOMEM;
>   
>   	name = kasprintf(GFP_KERNEL, "node%d", nid);
>   	if (!name) {
> -		kfree(node_attr);
> +		kfree(new_attr);
>   		return -ENOMEM;
>   	}
>   
> -	sysfs_attr_init(&node_attr->kobj_attr.attr);
> -	node_attr->kobj_attr.attr.name = name;
> -	node_attr->kobj_attr.attr.mode = 0644;
> -	node_attr->kobj_attr.show = node_show;
> -	node_attr->kobj_attr.store = node_store;
> -	node_attr->nid = nid;
> +	mutex_lock(&wi_group->kobj_lock);
> +	if (wi_group->nattrs[nid]) {
> +		mutex_unlock(&wi_group->kobj_lock);
> +		pr_info("Node [%d] already exists\n", nid);
> +		kfree(new_attr);
> +		kfree(name);
> +		return 0;
> +	}
> +	wi_group->nattrs[nid] = new_attr;
>   
> -	if (sysfs_create_file(&wi_group->wi_kobj, &node_attr->kobj_attr.attr)) {
> -		kfree(node_attr->kobj_attr.attr.name);
> -		kfree(node_attr);
> -		pr_err("failed to add attribute to weighted_interleave\n");
> -		return -ENOMEM;
> +	sysfs_attr_init(&wi_group->nattrs[nid]->kobj_attr.attr);
> +	wi_group->nattrs[nid]->kobj_attr.attr.name = name;
> +	wi_group->nattrs[nid]->kobj_attr.attr.mode = 0644;
> +	wi_group->nattrs[nid]->kobj_attr.show = node_show;
> +	wi_group->nattrs[nid]->kobj_attr.store = node_store;
> +	wi_group->nattrs[nid]->nid = nid;
> +
> +	ret = sysfs_create_file(&wi_group->wi_kobj,
> +				&wi_group->nattrs[nid]->kobj_attr.attr);
> +	if (ret) {
> +		kfree(wi_group->nattrs[nid]->kobj_attr.attr.name);
> +		kfree(wi_group->nattrs[nid]);
> +		wi_group->nattrs[nid] = NULL;
> +		pr_err("Failed to add attribute to weighted_interleave: %d\n", ret);
>   	}
> +	mutex_unlock(&wi_group->kobj_lock);
>   
> -	wi_group->nattrs[nid] = node_attr;
> -	return 0;
> +	return ret;
> +}
> +
> +static int wi_node_notifier(struct notifier_block *nb,
> +			       unsigned long action, void *data)
> +{
> +	int err;
> +	struct memory_notify *arg = data;
> +	int nid = arg->status_change_nid;
> +
> +	if (nid < 0)
> +		goto notifier_end;
> +
> +	switch(action) {
> +	case MEM_ONLINE:
> +		err = sysfs_wi_node_add(nid);
> +		if (err) {
> +			pr_err("failed to add sysfs [node%d]\n", nid);
> +			return NOTIFY_BAD;
> +		}
> +		break;
> +	case MEM_OFFLINE:
> +		if (!node_state(nid, N_MEMORY))
> +			sysfs_wi_node_release(nid);
> +		break;
> +	}
> +
> +notifier_end:
> +	return NOTIFY_OK;
>   }
>   
>   static int add_weighted_interleave_group(struct kobject *mempolicy_kobj)
> @@ -3504,13 +3563,17 @@ static int add_weighted_interleave_group(struct kobject *mempolicy_kobj)
>   		       GFP_KERNEL);
>   	if (!wi_group)
>   		return -ENOMEM;
> +	mutex_init(&wi_group->kobj_lock);
>   
>   	err = kobject_init_and_add(&wi_group->wi_kobj, &wi_ktype, mempolicy_kobj,
>   				   "weighted_interleave");
>   	if (err)
>   		goto err_out;
>   
> -	for_each_node_state(nid, N_POSSIBLE) {
> +	for_each_online_node(nid) {
> +		if (!node_state(nid, N_MEMORY))
> +			continue;
> +
>   		err = sysfs_wi_node_add(nid);
>   		if (err) {
>   			pr_err("failed to add sysfs [node%d]\n", nid);
> @@ -3518,6 +3581,7 @@ static int add_weighted_interleave_group(struct kobject *mempolicy_kobj)
>   		}
>   	}
>   
> +	hotplug_memory_notifier(wi_node_notifier, DEFAULT_CALLBACK_PRI);
>   	return 0;
>   
>   err_out:
Andrew Morton April 2, 2025, 4:34 a.m. UTC | #2
On Wed, 2 Apr 2025 13:18:51 +0900 Honggyu Kim <honggyu.kim@sk.com> wrote:

> This is to fix the following broken status.
> https://lore.kernel.org/linux-mm/b8ac8654-92bd-4c08-a3fc-e28a7be5e0e6@sk.com
> 
> So we must add the following tag for this patch.
> Fixes: fa3bea4e1f82 ("mm/mempolicy: introduce MPOL_WEIGHTED_INTERLEAVE for 
> weighted interleaving")
> 
> 
> Hi Gregory,
> 
> This patch is already in Andrew's tree. Is the current version okay for you?
> 
> 
> Hi Andrew,
> 
> I'm not sure if this is going to the final version but could you please add this
> patch to stable with Cc: <stable@vger.kernel.org>?
> We might need to bring the whole series to avoid conflicts to stable tree.

This is all rather confused.

Do we need to backport all three patches into -stable?  If so, all three
should have Fixes:.  Preferably they all have the same Fixes: so we
aren't backporting untested patch combinations.

I presently have:

Subject: mm/mempolicy: fix memory leaks in weighted interleave sysfs
Fixes: dce41f5ae253 ("mm/mempolicy: implement the sysfs-based weighted_interleave interface")
Cc: <stable@vger.kernel.org>

Subject: mm/mempolicy: support dynamic sysfs updates for weighted interleave
Cc: <stable@vger.kernel.org>

Subject: mm/mempolicy: support memory hotplug in weighted interleave
Fixes: fa3bea4e1f82 ("mm/mempolicy: introduce MPOL_WEIGHTED_INTERLEAVE for weighted interleaving")
Cc: <stable@vger.kernel.org>


[2/3] doesn't look like a fix.

Perhaps the whole thing should be redone: a two-patch bugfix series,
each with a Fixes: and a cc:stable and feature patch with neither a
Fixes: nor a cc:stable.
Gregory Price April 2, 2025, 5:15 a.m. UTC | #3
On Tue, Apr 01, 2025 at 09:34:39PM -0700, Andrew Morton wrote:
> > I'm not sure if this is going to the final version but could you please add this
> > patch to stable with Cc: <stable@vger.kernel.org>?
> > We might need to bring the whole series to avoid conflicts to stable tree.
> 
> This is all rather confused.
> 
> Do we need to backport all three patches into -stable?  If so, all three
> should have Fixes:.  Preferably they all have the same Fixes: so we
> aren't backporting untested patch combinations.
>

I'd just as soon leave the entire thing out of stable backports.

Only the first patch actually fixes a bug (very small memory leak
during system tear down, so it's not even a critical bug). This could
be added to stable.

Patches 2 and 3 aren't fixes, they're desired behavioral changes
to the feature. An interface being confusing doesn't mean it's broken.

~Gregory
Gregory Price April 2, 2025, 5:23 a.m. UTC | #4
On Wed, Apr 02, 2025 at 01:15:48AM -0400, Gregory Price wrote:
> On Tue, Apr 01, 2025 at 09:34:39PM -0700, Andrew Morton wrote:
> > 
> > Do we need to backport all three patches into -stable?  If so, all three
> > should have Fixes:.  Preferably they all have the same Fixes: so we
> > aren't backporting untested patch combinations.
> >
> 
> Patches 2 and 3 aren't fixes, they're desired behavioral changes
> to the feature. An interface being confusing doesn't mean it's broken.
>

Just some added clarity:

The behavioral change in sysfs change goes from N_POSSIBLE nodes always
being exposed - to sysfs entries coming and going as nodes transition
between having memory and not having memory.

I can see the argument for wanting this backported as a fix to ensure
userspace software is consistent across LTS.

So for backporting: all or nothing - and all the fixes tags should be
the same.

~Gregory
Honggyu Kim April 2, 2025, 5:32 a.m. UTC | #5
Hi Gregory,

On 4/2/2025 2:15 PM, Gregory Price wrote:
> On Tue, Apr 01, 2025 at 09:34:39PM -0700, Andrew Morton wrote:
>>> I'm not sure if this is going to the final version but could you please add this
>>> patch to stable with Cc: <stable@vger.kernel.org>?
>>> We might need to bring the whole series to avoid conflicts to stable tree.
>>
>> This is all rather confused.
>>
>> Do we need to backport all three patches into -stable?  If so, all three
>> should have Fixes:.  Preferably they all have the same Fixes: so we
>> aren't backporting untested patch combinations.
>>
> 
> I'd just as soon leave the entire thing out of stable backports.
> 
> Only the first patch actually fixes a bug (very small memory leak
> during system tear down, so it's not even a critical bug). This could
> be added to stable.
> 
> Patches 2 and 3 aren't fixes, they're desired behavioral changes
> to the feature. An interface being confusing doesn't mean it's broken.

I think those are fixes but I also agree that this is behavioral changes as
well.  So I'm fine leaving these out of stable backports.


Hi Andrew,

I won't ask changes so please keep the current status as is.  Sorry for making
confusion.

Thanks,
Honggyu

> 
> ~Gregory
Oscar Salvador April 2, 2025, 9:51 a.m. UTC | #6
On Wed, Apr 02, 2025 at 10:49:04AM +0900, Rakie Kim wrote:
> The weighted interleave policy distributes page allocations across multiple
> NUMA nodes based on their performance weight, thereby improving memory
> bandwidth utilization. The weight values for each node are configured
> through sysfs.
> 
> Previously, sysfs entries for configuring weighted interleave were created
> for all possible nodes (N_POSSIBLE) at initialization, including nodes that
> might not have memory. However, not all nodes in N_POSSIBLE are usable at
> runtime, as some may remain memoryless or offline.
> This led to sysfs entries being created for unusable nodes, causing
> potential misconfiguration issues.
> 
> To address this issue, this patch modifies the sysfs creation logic to:
> 1) Limit sysfs entries to nodes that are online and have memory, avoiding
>    the creation of sysfs entries for nodes that cannot be used.
> 2) Support memory hotplug by dynamically adding and removing sysfs entries
>    based on whether a node transitions into or out of the N_MEMORY state.
> 
> Additionally, the patch ensures that sysfs attributes are properly managed
> when nodes go offline, preventing stale or redundant entries from persisting
> in the system.
> 
> By making these changes, the weighted interleave policy now manages its
> sysfs entries more efficiently, ensuring that only relevant nodes are
> considered for interleaving, and dynamically adapting to memory hotplug
> events.
> 
> Signed-off-by: Rakie Kim <rakie.kim@sk.com>
> Signed-off-by: Honggyu Kim <honggyu.kim@sk.com>
> Signed-off-by: Yunjeong Mun <yunjeong.mun@sk.com>

Sorry to jump at v5, and maybe this was already discussed but:

> +static int wi_node_notifier(struct notifier_block *nb,
> +			       unsigned long action, void *data)
> +{
> +	int err;
> +	struct memory_notify *arg = data;
> +	int nid = arg->status_change_nid;
> +
> +	if (nid < 0)
> +		goto notifier_end;
> +
> +	switch(action) {
> +	case MEM_ONLINE:
> +		err = sysfs_wi_node_add(nid);
> +		if (err) {
> +			pr_err("failed to add sysfs [node%d]\n", nid);
> +			return NOTIFY_BAD;
> +		}
> +		break;
> +	case MEM_OFFLINE:
> +		if (!node_state(nid, N_MEMORY))
> +			sysfs_wi_node_release(nid);

This check is not needed.

If status_change_nid is different than NUMA_NO_NODE, it means that the memory
state of the numa node was effectively changed.
So doing:
 
  case MEM_OFFLINE:
          sysfs_wi_node_release(nid)
 
is enough.
Gregory Price April 2, 2025, 2:14 p.m. UTC | #7
On Wed, Apr 02, 2025 at 11:51:17AM +0200, Oscar Salvador wrote:
> > +	case MEM_OFFLINE:
> > +		if (!node_state(nid, N_MEMORY))
> > +			sysfs_wi_node_release(nid);
> 
> This check is not needed.
> 
> If status_change_nid is different than NUMA_NO_NODE, it means that the memory
> state of the numa node was effectively changed.
> So doing:
>  
>   case MEM_OFFLINE:
>           sysfs_wi_node_release(nid)
>  
> is enough.

offline_pages will call this callback unconditionally any time it's
called (and succeeds).  If 2 dax devices online into the same node, it's
possible to offline one chunk of blocks without offlining the other
chunks of blocks - which is why we did the additional check.

~Gregory
Oscar Salvador April 2, 2025, 3:41 p.m. UTC | #8
On Wed, Apr 02, 2025 at 10:14:42AM -0400, Gregory Price wrote:
> On Wed, Apr 02, 2025 at 11:51:17AM +0200, Oscar Salvador wrote:
> > > +	case MEM_OFFLINE:
> > > +		if (!node_state(nid, N_MEMORY))
> > > +			sysfs_wi_node_release(nid);
> > 
> > This check is not needed.
> > 
> > If status_change_nid is different than NUMA_NO_NODE, it means that the memory
> > state of the numa node was effectively changed.
> > So doing:
> >  
> >   case MEM_OFFLINE:
> >           sysfs_wi_node_release(nid)
> >  
> > is enough.
> 
> offline_pages will call this callback unconditionally any time it's
> called (and succeeds).  If 2 dax devices online into the same node, it's

Yes, this callback will be called whenever {online,offline}_pages succeeds, but
status_change_nid will be != NUMA_NO_NODE IFF the node state changes.
And you already have the check

 if (nid < 0)
    goto out

at the beginning, which means that all {offline,online}_pages operation that
do not carry a numa node state change will be filtered out there.

Makes sense, or am I missing something?
Gregory Price April 2, 2025, 4:36 p.m. UTC | #9
On Wed, Apr 02, 2025 at 05:41:57PM +0200, Oscar Salvador wrote:
> 
> Yes, this callback will be called whenever {online,offline}_pages succeeds, but
> status_change_nid will be != NUMA_NO_NODE IFF the node state changes.
> And you already have the check
> 
>  if (nid < 0)
>     goto out
> 
> at the beginning, which means that all {offline,online}_pages operation that
> do not carry a numa node state change will be filtered out there.
> 
> Makes sense, or am I missing something?
>

Ah, you're quite right.  That was difficult to see on the surface, so
the check in fact superfluous.  No need for an extra version, can just
add a patch to squash and drop it.

~Gregory
diff mbox series

Patch

diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 3092a792bd28..ea4f3f3f2cdd 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -113,6 +113,7 @@ 
 #include <asm/tlbflush.h>
 #include <asm/tlb.h>
 #include <linux/uaccess.h>
+#include <linux/memory.h>
 
 #include "internal.h"
 
@@ -3390,6 +3391,7 @@  struct iw_node_attr {
 
 struct sysfs_wi_group {
 	struct kobject wi_kobj;
+	struct mutex kobj_lock;
 	struct iw_node_attr *nattrs[];
 };
 
@@ -3439,13 +3441,24 @@  static ssize_t node_store(struct kobject *kobj, struct kobj_attribute *attr,
 
 static void sysfs_wi_node_release(int nid)
 {
-	if (!wi_group->nattrs[nid])
+	struct iw_node_attr *attr;
+
+	if (nid < 0 || nid >= nr_node_ids)
+		return;
+
+	mutex_lock(&wi_group->kobj_lock);
+	attr = wi_group->nattrs[nid];
+	if (!attr) {
+		mutex_unlock(&wi_group->kobj_lock);
 		return;
+	}
+
+	wi_group->nattrs[nid] = NULL;
+	mutex_unlock(&wi_group->kobj_lock);
 
-	sysfs_remove_file(&wi_group->wi_kobj,
-			  &wi_group->nattrs[nid]->kobj_attr.attr);
-	kfree(wi_group->nattrs[nid]->kobj_attr.attr.name);
-	kfree(wi_group->nattrs[nid]);
+	sysfs_remove_file(&wi_group->wi_kobj, &attr->kobj_attr.attr);
+	kfree(attr->kobj_attr.attr.name);
+	kfree(attr);
 }
 
 static void sysfs_wi_release(struct kobject *wi_kobj)
@@ -3464,35 +3477,81 @@  static const struct kobj_type wi_ktype = {
 
 static int sysfs_wi_node_add(int nid)
 {
-	struct iw_node_attr *node_attr;
+	int ret = 0;
 	char *name;
+	struct iw_node_attr *new_attr = NULL;
 
-	node_attr = kzalloc(sizeof(*node_attr), GFP_KERNEL);
-	if (!node_attr)
+	if (nid < 0 || nid >= nr_node_ids) {
+		pr_err("Invalid node id: %d\n", nid);
+		return -EINVAL;
+	}
+
+	new_attr = kzalloc(sizeof(struct iw_node_attr), GFP_KERNEL);
+	if (!new_attr)
 		return -ENOMEM;
 
 	name = kasprintf(GFP_KERNEL, "node%d", nid);
 	if (!name) {
-		kfree(node_attr);
+		kfree(new_attr);
 		return -ENOMEM;
 	}
 
-	sysfs_attr_init(&node_attr->kobj_attr.attr);
-	node_attr->kobj_attr.attr.name = name;
-	node_attr->kobj_attr.attr.mode = 0644;
-	node_attr->kobj_attr.show = node_show;
-	node_attr->kobj_attr.store = node_store;
-	node_attr->nid = nid;
+	mutex_lock(&wi_group->kobj_lock);
+	if (wi_group->nattrs[nid]) {
+		mutex_unlock(&wi_group->kobj_lock);
+		pr_info("Node [%d] already exists\n", nid);
+		kfree(new_attr);
+		kfree(name);
+		return 0;
+	}
+	wi_group->nattrs[nid] = new_attr;
 
-	if (sysfs_create_file(&wi_group->wi_kobj, &node_attr->kobj_attr.attr)) {
-		kfree(node_attr->kobj_attr.attr.name);
-		kfree(node_attr);
-		pr_err("failed to add attribute to weighted_interleave\n");
-		return -ENOMEM;
+	sysfs_attr_init(&wi_group->nattrs[nid]->kobj_attr.attr);
+	wi_group->nattrs[nid]->kobj_attr.attr.name = name;
+	wi_group->nattrs[nid]->kobj_attr.attr.mode = 0644;
+	wi_group->nattrs[nid]->kobj_attr.show = node_show;
+	wi_group->nattrs[nid]->kobj_attr.store = node_store;
+	wi_group->nattrs[nid]->nid = nid;
+
+	ret = sysfs_create_file(&wi_group->wi_kobj,
+				&wi_group->nattrs[nid]->kobj_attr.attr);
+	if (ret) {
+		kfree(wi_group->nattrs[nid]->kobj_attr.attr.name);
+		kfree(wi_group->nattrs[nid]);
+		wi_group->nattrs[nid] = NULL;
+		pr_err("Failed to add attribute to weighted_interleave: %d\n", ret);
 	}
+	mutex_unlock(&wi_group->kobj_lock);
 
-	wi_group->nattrs[nid] = node_attr;
-	return 0;
+	return ret;
+}
+
+static int wi_node_notifier(struct notifier_block *nb,
+			       unsigned long action, void *data)
+{
+	int err;
+	struct memory_notify *arg = data;
+	int nid = arg->status_change_nid;
+
+	if (nid < 0)
+		goto notifier_end;
+
+	switch(action) {
+	case MEM_ONLINE:
+		err = sysfs_wi_node_add(nid);
+		if (err) {
+			pr_err("failed to add sysfs [node%d]\n", nid);
+			return NOTIFY_BAD;
+		}
+		break;
+	case MEM_OFFLINE:
+		if (!node_state(nid, N_MEMORY))
+			sysfs_wi_node_release(nid);
+		break;
+	}
+
+notifier_end:
+	return NOTIFY_OK;
 }
 
 static int add_weighted_interleave_group(struct kobject *mempolicy_kobj)
@@ -3504,13 +3563,17 @@  static int add_weighted_interleave_group(struct kobject *mempolicy_kobj)
 		       GFP_KERNEL);
 	if (!wi_group)
 		return -ENOMEM;
+	mutex_init(&wi_group->kobj_lock);
 
 	err = kobject_init_and_add(&wi_group->wi_kobj, &wi_ktype, mempolicy_kobj,
 				   "weighted_interleave");
 	if (err)
 		goto err_out;
 
-	for_each_node_state(nid, N_POSSIBLE) {
+	for_each_online_node(nid) {
+		if (!node_state(nid, N_MEMORY))
+			continue;
+
 		err = sysfs_wi_node_add(nid);
 		if (err) {
 			pr_err("failed to add sysfs [node%d]\n", nid);
@@ -3518,6 +3581,7 @@  static int add_weighted_interleave_group(struct kobject *mempolicy_kobj)
 		}
 	}
 
+	hotplug_memory_notifier(wi_node_notifier, DEFAULT_CALLBACK_PRI);
 	return 0;
 
 err_out: