Message ID | 20250401090901.1050-4-rakie.kim@sk.com |
---|---|
State | Superseded |
Headers | show |
Series | Enhance sysfs handling for memory hotplug in weighted interleave | expand |
On Tue, Apr 01, 2025 at 06:08:59PM +0900, Rakie Kim wrote: > static void sysfs_wi_release(struct kobject *wi_kobj) > @@ -3464,35 +3477,84 @@ static const struct kobj_type wi_ktype = { > > static int sysfs_wi_node_add(int nid) > { ... snip .. > + mutex_lock(&wi_group->kobj_lock); > + if (wi_group->nattrs[nid]) { > + mutex_unlock(&wi_group->kobj_lock); > + pr_info("Node [%d] already exists\n", nid); > + kfree(new_attr); > + kfree(name); > + return 0; > + } > > - if (sysfs_create_file(&wi_group->wi_kobj, &node_attr->kobj_attr.attr)) { > - kfree(node_attr->kobj_attr.attr.name); > - kfree(node_attr); > - pr_err("failed to add attribute to weighted_interleave\n"); > - return -ENOMEM; > + wi_group->nattrs[nid] = new_attr; > + mutex_unlock(&wi_group->kobj_lock); > + Shouldn't all of this vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv > + sysfs_attr_init(&wi_group->nattrs[nid]->kobj_attr.attr); > + wi_group->nattrs[nid]->kobj_attr.attr.name = name; > + wi_group->nattrs[nid]->kobj_attr.attr.mode = 0644; > + wi_group->nattrs[nid]->kobj_attr.show = node_show; > + wi_group->nattrs[nid]->kobj_attr.store = node_store; > + wi_group->nattrs[nid]->nid = nid; > + > + ret = sysfs_create_file(&wi_group->wi_kobj, > + &wi_group->nattrs[nid]->kobj_attr.attr); > + if (ret) { > + kfree(wi_group->nattrs[nid]->kobj_attr.attr.name); > + kfree(wi_group->nattrs[nid]); > + wi_group->nattrs[nid] = NULL; > + pr_err("Failed to add attribute to weighted_interleave: %d\n", ret); > } ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Be happening inside the lock as well? > + > + switch(action) { > + case MEM_ONLINE: > + if (node_state(nid, N_MEMORY)) { Hm, I see the issue here, ok, this node_state check isn't needed, as it will always be true. So this function needs to handle duplicates still. vvv > + err = sysfs_wi_node_add(nid); > + if (err) { > + pr_err("failed to add sysfs [node%d]\n", nid); > + return NOTIFY_BAD; > + } > + } > + break; > + case MEM_OFFLINE: > + if (!node_state(nid, N_MEMORY)) This check is good for the time being. > + sysfs_wi_node_release(nid); > + break; > + } > + > +notifier_end: > + return NOTIFY_OK; > } > > But really I think we probably just want to change to build on top of this: https://lore.kernel.org/all/20250401092716.537512-2-osalvador@suse.de/ And use register_node_notifier with NODE_BECAME_MEMORYLESS and NODE_BECAME_MEM_AWARE ~Gregory
On Tue, 1 Apr 2025 16:32:42 -0400 Gregory Price <gourry@gourry.net> wrote: > On Tue, Apr 01, 2025 at 06:08:59PM +0900, Rakie Kim wrote: > > static void sysfs_wi_release(struct kobject *wi_kobj) > > @@ -3464,35 +3477,84 @@ static const struct kobj_type wi_ktype = { > > > > static int sysfs_wi_node_add(int nid) > > { > ... snip .. > > + mutex_lock(&wi_group->kobj_lock); > > + if (wi_group->nattrs[nid]) { > > + mutex_unlock(&wi_group->kobj_lock); > > + pr_info("Node [%d] already exists\n", nid); > > + kfree(new_attr); > > + kfree(name); > > + return 0; > > + } > > > > - if (sysfs_create_file(&wi_group->wi_kobj, &node_attr->kobj_attr.attr)) { > > - kfree(node_attr->kobj_attr.attr.name); > > - kfree(node_attr); > > - pr_err("failed to add attribute to weighted_interleave\n"); > > - return -ENOMEM; > > + wi_group->nattrs[nid] = new_attr; > > + mutex_unlock(&wi_group->kobj_lock); > > + > > Shouldn't all of this > vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv > > + sysfs_attr_init(&wi_group->nattrs[nid]->kobj_attr.attr); > > + wi_group->nattrs[nid]->kobj_attr.attr.name = name; > > + wi_group->nattrs[nid]->kobj_attr.attr.mode = 0644; > > + wi_group->nattrs[nid]->kobj_attr.show = node_show; > > + wi_group->nattrs[nid]->kobj_attr.store = node_store; > > + wi_group->nattrs[nid]->nid = nid; > > + > > + ret = sysfs_create_file(&wi_group->wi_kobj, > > + &wi_group->nattrs[nid]->kobj_attr.attr); > > + if (ret) { > > + kfree(wi_group->nattrs[nid]->kobj_attr.attr.name); > > + kfree(wi_group->nattrs[nid]); > > + wi_group->nattrs[nid] = NULL; > > + pr_err("Failed to add attribute to weighted_interleave: %d\n", ret); > > } > ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ > > Be happening inside the lock as well? I agree that applying your suggestion would make the code more robust. I will update the patch to follow your recommendation. > > > + > > + switch(action) { > > + case MEM_ONLINE: > > + if (node_state(nid, N_MEMORY)) { > > Hm, I see the issue here, ok, this node_state check isn't needed, as it > will always be true. So this function needs to handle duplicates still. Yes, you're right. The `node_state(nid, N_MEMORY)` check I added here is redundant because it will always be true in this context. I will remove it to avoid unnecessary duplication. > vvv > > + err = sysfs_wi_node_add(nid); > > + if (err) { > > + pr_err("failed to add sysfs [node%d]\n", nid); > > + return NOTIFY_BAD; > > + } > > + } > > + break; > > + case MEM_OFFLINE: > > + if (!node_state(nid, N_MEMORY)) > > This check is good for the time being. This check looks appropriate for now and I'll keep it as-is. > > > + sysfs_wi_node_release(nid); > > + break; > > + } > > + > > +notifier_end: > > + return NOTIFY_OK; > > } > > > > > > But really I think we probably just want to change to build on top of this: > https://lore.kernel.org/all/20250401092716.537512-2-osalvador@suse.de/ > > And use register_node_notifier with NODE_BECAME_MEMORYLESS and NODE_BECAME_MEM_AWARE > > ~Gregory Thank you for sharing the link regarding `node_notify`. I agree that the mechanism you pointed out would be a better fit for this patch. By using `register_node_notifier` with `NODE_BECAME_MEMORYLESS` and `NODE_BECAME_MEM_AWARE`, we can avoid unnecessary callbacks and implement this functionality more efficiently. However, I think it would be better to apply the current patch first and then update it to use `node_notify` once that support is finalized and available upstream. Rakie
diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 3092a792bd28..fa755d20780c 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -113,6 +113,7 @@ #include <asm/tlbflush.h> #include <asm/tlb.h> #include <linux/uaccess.h> +#include <linux/memory.h> #include "internal.h" @@ -3390,6 +3391,7 @@ struct iw_node_attr { struct sysfs_wi_group { struct kobject wi_kobj; + struct mutex kobj_lock; struct iw_node_attr *nattrs[]; }; @@ -3439,13 +3441,24 @@ static ssize_t node_store(struct kobject *kobj, struct kobj_attribute *attr, static void sysfs_wi_node_release(int nid) { - if (!wi_group->nattrs[nid]) + struct iw_node_attr *attr; + + if (nid < 0 || nid >= nr_node_ids) + return; + + mutex_lock(&wi_group->kobj_lock); + attr = wi_group->nattrs[nid]; + if (!attr) { + mutex_unlock(&wi_group->kobj_lock); return; + } - sysfs_remove_file(&wi_group->wi_kobj, - &wi_group->nattrs[nid]->kobj_attr.attr); - kfree(wi_group->nattrs[nid]->kobj_attr.attr.name); - kfree(wi_group->nattrs[nid]); + wi_group->nattrs[nid] = NULL; + mutex_unlock(&wi_group->kobj_lock); + + sysfs_remove_file(&wi_group->wi_kobj, &attr->kobj_attr.attr); + kfree(attr->kobj_attr.attr.name); + kfree(attr); } static void sysfs_wi_release(struct kobject *wi_kobj) @@ -3464,35 +3477,84 @@ static const struct kobj_type wi_ktype = { static int sysfs_wi_node_add(int nid) { - struct iw_node_attr *node_attr; + int ret = 0; char *name; + struct iw_node_attr *new_attr = NULL; - node_attr = kzalloc(sizeof(*node_attr), GFP_KERNEL); - if (!node_attr) + if (nid < 0 || nid >= nr_node_ids) { + pr_err("Invalid node id: %d\n", nid); + return -EINVAL; + } + + new_attr = kzalloc(sizeof(struct iw_node_attr), GFP_KERNEL); + if (!new_attr) return -ENOMEM; name = kasprintf(GFP_KERNEL, "node%d", nid); if (!name) { - kfree(node_attr); + kfree(new_attr); return -ENOMEM; } - sysfs_attr_init(&node_attr->kobj_attr.attr); - node_attr->kobj_attr.attr.name = name; - node_attr->kobj_attr.attr.mode = 0644; - node_attr->kobj_attr.show = node_show; - node_attr->kobj_attr.store = node_store; - node_attr->nid = nid; + mutex_lock(&wi_group->kobj_lock); + if (wi_group->nattrs[nid]) { + mutex_unlock(&wi_group->kobj_lock); + pr_info("Node [%d] already exists\n", nid); + kfree(new_attr); + kfree(name); + return 0; + } - if (sysfs_create_file(&wi_group->wi_kobj, &node_attr->kobj_attr.attr)) { - kfree(node_attr->kobj_attr.attr.name); - kfree(node_attr); - pr_err("failed to add attribute to weighted_interleave\n"); - return -ENOMEM; + wi_group->nattrs[nid] = new_attr; + mutex_unlock(&wi_group->kobj_lock); + + sysfs_attr_init(&wi_group->nattrs[nid]->kobj_attr.attr); + wi_group->nattrs[nid]->kobj_attr.attr.name = name; + wi_group->nattrs[nid]->kobj_attr.attr.mode = 0644; + wi_group->nattrs[nid]->kobj_attr.show = node_show; + wi_group->nattrs[nid]->kobj_attr.store = node_store; + wi_group->nattrs[nid]->nid = nid; + + ret = sysfs_create_file(&wi_group->wi_kobj, + &wi_group->nattrs[nid]->kobj_attr.attr); + if (ret) { + kfree(wi_group->nattrs[nid]->kobj_attr.attr.name); + kfree(wi_group->nattrs[nid]); + wi_group->nattrs[nid] = NULL; + pr_err("Failed to add attribute to weighted_interleave: %d\n", ret); } - wi_group->nattrs[nid] = node_attr; - return 0; + return ret; +} + +static int wi_node_notifier(struct notifier_block *nb, + unsigned long action, void *data) +{ + int err; + struct memory_notify *arg = data; + int nid = arg->status_change_nid; + + if (nid < 0) + goto notifier_end; + + switch(action) { + case MEM_ONLINE: + if (node_state(nid, N_MEMORY)) { + err = sysfs_wi_node_add(nid); + if (err) { + pr_err("failed to add sysfs [node%d]\n", nid); + return NOTIFY_BAD; + } + } + break; + case MEM_OFFLINE: + if (!node_state(nid, N_MEMORY)) + sysfs_wi_node_release(nid); + break; + } + +notifier_end: + return NOTIFY_OK; } static int add_weighted_interleave_group(struct kobject *mempolicy_kobj) @@ -3504,13 +3566,17 @@ static int add_weighted_interleave_group(struct kobject *mempolicy_kobj) GFP_KERNEL); if (!wi_group) return -ENOMEM; + mutex_init(&wi_group->kobj_lock); err = kobject_init_and_add(&wi_group->wi_kobj, &wi_ktype, mempolicy_kobj, "weighted_interleave"); if (err) goto err_out; - for_each_node_state(nid, N_POSSIBLE) { + for_each_online_node(nid) { + if (!node_state(nid, N_MEMORY)) + continue; + err = sysfs_wi_node_add(nid); if (err) { pr_err("failed to add sysfs [node%d]\n", nid); @@ -3518,6 +3584,7 @@ static int add_weighted_interleave_group(struct kobject *mempolicy_kobj) } } + hotplug_memory_notifier(wi_node_notifier, DEFAULT_CALLBACK_PRI); return 0; err_out: