diff mbox series

mm: hugetlb: eliminate memory-less nodes handling

Message ID 20220901083023.42319-1-songmuchun@bytedance.com (mailing list archive)
State New
Headers show
Series mm: hugetlb: eliminate memory-less nodes handling | expand

Commit Message

Muchun Song Sept. 1, 2022, 8:30 a.m. UTC
The memory-notify-based approach aims to handle meory-less nodes, however, it just adds
the complexity of code as pointed by David in thread [1].  The handling of memory-less
nodes is introduced by commit 4faf8d950ec4 ("hugetlb: handle memory hot-plug events").
From its commit message, we cannot find any necessity of handling this case. So, we can
simply register/unregister sysfs entries in register_node/unregister_node to simlify the
code.

https://lore.kernel.org/linux-mm/60933ffc-b850-976c-78a0-0ee6e0ea9ef0@redhat.com/ [1]
Suggested-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Muchun Song <songmuchun@bytedance.com>
---
 drivers/base/node.c  |  7 +++++--
 include/linux/node.h |  5 +++++
 mm/hugetlb.c         | 37 ++++++++++---------------------------
 3 files changed, 20 insertions(+), 29 deletions(-)

Comments

David Hildenbrand Sept. 1, 2022, 8:50 a.m. UTC | #1
On 01.09.22 10:30, Muchun Song wrote:
> The memory-notify-based approach aims to handle meory-less nodes, however, it just adds
> the complexity of code as pointed by David in thread [1].  The handling of memory-less
> nodes is introduced by commit 4faf8d950ec4 ("hugetlb: handle memory hot-plug events").
> From its commit message, we cannot find any necessity of handling this case. So, we can
> simply register/unregister sysfs entries in register_node/unregister_node to simlify the
> code.
> 
> https://lore.kernel.org/linux-mm/60933ffc-b850-976c-78a0-0ee6e0ea9ef0@redhat.com/ [1]
> Suggested-by: David Hildenbrand <david@redhat.com>
> Signed-off-by: Muchun Song <songmuchun@bytedance.com>
> ---
>  drivers/base/node.c  |  7 +++++--
>  include/linux/node.h |  5 +++++
>  mm/hugetlb.c         | 37 ++++++++++---------------------------
>  3 files changed, 20 insertions(+), 29 deletions(-)
> 
> diff --git a/drivers/base/node.c b/drivers/base/node.c
> index ed391cb09999..cf115d5a9b8a 100644
> --- a/drivers/base/node.c
> +++ b/drivers/base/node.c
> @@ -608,10 +608,12 @@ static int register_node(struct node *node, int num)
>  	node->dev.groups = node_dev_groups;
>  	error = device_register(&node->dev);
>  
> -	if (error)
> +	if (error) {
>  		put_device(&node->dev);
> -	else
> +	} else {
> +		hugetlb_register_node(node);
>  		compaction_register_node(node);
> +	}

Good, so this matches what other code does.

>  
>  	return error;
>  }
> @@ -625,6 +627,7 @@ static int register_node(struct node *node, int num)
>   */
>  void unregister_node(struct node *node)
>  {
> +	hugetlb_unregister_node(node);
>  	compaction_unregister_node(node);
>  	node_remove_accesses(node);
>  	node_remove_caches(node);
> diff --git a/include/linux/node.h b/include/linux/node.h
> index 427a5975cf40..f5d41498c2bf 100644
> --- a/include/linux/node.h
> +++ b/include/linux/node.h
> @@ -138,6 +138,11 @@ extern void unregister_memory_block_under_nodes(struct memory_block *mem_blk);
>  extern int register_memory_node_under_compute_node(unsigned int mem_nid,
>  						   unsigned int cpu_nid,
>  						   unsigned access);
> +
> +#ifdef CONFIG_HUGETLBFS
> +void hugetlb_register_node(struct node *node);
> +void hugetlb_unregister_node(struct node *node);
> +#endif

compaction_register_node() resides in include/linux/compaction.h, so I
wonder if this should go into hugetlb.h (unless it causes trouble)

>  #else
>  static inline void node_dev_init(void)
>  {
> diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> index d0617d64d718..722e862bb6be 100644
> --- a/mm/hugetlb.c
> +++ b/mm/hugetlb.c
> @@ -3898,6 +3898,7 @@ static void __init hugetlb_sysfs_init(void)
>  }
>  
>  #ifdef CONFIG_NUMA
> +static bool hugetlb_initialized __ro_after_init;

We set it out of hugetlb_register_all_nodes(), so it conceptually not
correct. We either need a better name here or set it from generic init code.

You could call it hugetlb_sysfs_initialized() and set that from
hugetlb_sysfs_init(), which is called just before
hugetlb_register_all_nodes().

[ shouldn't hugetlb_register_all_nodes() get called from
hugetlb_sysfs_init() ? it's all about sysfs as well ... ]

>  
>  /*
>   * node_hstate/s - associate per node hstate attributes, via their kobjects,
> @@ -3953,7 +3954,7 @@ static struct hstate *kobj_to_node_hstate(struct kobject *kobj, int *nidp)
>   * Unregister hstate attributes from a single node device.
>   * No-op if no hstate attributes attached.
>   */
> -static void hugetlb_unregister_node(struct node *node)
> +void hugetlb_unregister_node(struct node *node)
>  {
>  	struct hstate *h;
>  	struct node_hstate *nhs = &node_hstates[node->dev.id];
> @@ -3983,19 +3984,22 @@ static void hugetlb_unregister_node(struct node *node)
>   * Register hstate attributes for a single node device.
>   * No-op if attributes already registered.
>   */
> -static int hugetlb_register_node(struct node *node)
> +void hugetlb_register_node(struct node *node)
>  {
>  	struct hstate *h;
>  	struct node_hstate *nhs = &node_hstates[node->dev.id];
>  	int err;
>  
> +	if (!hugetlb_initialized)
> +		return;
> +
>  	if (nhs->hugepages_kobj)
> -		return 0;		/* already allocated */
> +		return;		/* already allocated */
>  
>  	nhs->hugepages_kobj = kobject_create_and_add("hugepages",
>  							&node->dev.kobj);
>  	if (!nhs->hugepages_kobj)
> -		return -ENOMEM;
> +		return;
>  
>  	for_each_hstate(h) {
>  		err = hugetlb_sysfs_add_hstate(h, nhs->hugepages_kobj,
> @@ -4005,28 +4009,9 @@ static int hugetlb_register_node(struct node *node)
>  			pr_err("HugeTLB: Unable to add hstate %s for node %d\n",
>  				h->name, node->dev.id);
>  			hugetlb_unregister_node(node);
> -			return -ENOMEM;
> +			break;
>  		}
>  	}
> -	return 0;
> -}
> -
> -static int __meminit hugetlb_memory_callback(struct notifier_block *self,
> -					     unsigned long action, void *arg)
> -{
> -	int ret = 0;
> -	struct memory_notify *mnb = arg;
> -	int nid = mnb->status_change_nid;
> -
> -	if (nid == NUMA_NO_NODE)
> -		return NOTIFY_DONE;
> -
> -	if (action == MEM_GOING_ONLINE)
> -		ret = hugetlb_register_node(node_devices[nid]);
> -	else if (action == MEM_CANCEL_ONLINE || action == MEM_OFFLINE)
> -		hugetlb_unregister_node(node_devices[nid]);
> -
> -	return notifier_from_errno(ret);
>  }
>  
>  /*
> @@ -4038,11 +4023,9 @@ static void __init hugetlb_register_all_nodes(void)
>  {
>  	int nid;
>  
> -	get_online_mems();
> -	hotplug_memory_notifier(hugetlb_memory_callback, 0);
> +	hugetlb_initialized = true;
>  	for_each_node_state(nid, N_MEMORY)
>  		hugetlb_register_node(node_devices[nid]);
> -	put_online_mems();
>  }
>  #else	/* !CONFIG_NUMA */
>  

Apart from the comments, looks good and clean to me. Thanks!
Aneesh Kumar K.V Sept. 1, 2022, 9 a.m. UTC | #2
On 9/1/22 2:00 PM, Muchun Song wrote:
> The memory-notify-based approach aims to handle meory-less nodes, however, it just adds
> the complexity of code as pointed by David in thread [1].  The handling of memory-less
> nodes is introduced by commit 4faf8d950ec4 ("hugetlb: handle memory hot-plug events").
> From its commit message, we cannot find any necessity of handling this case. So, we can
> simply register/unregister sysfs entries in register_node/unregister_node to simlify the
> code.

Isn't that hotplug callback added because in hugetlb_register_all_nodes() we register
sysfs nodes only for N_MEMORY nodes? 


> 
> https://lore.kernel.org/linux-mm/60933ffc-b850-976c-78a0-0ee6e0ea9ef0@redhat.com/ [1]
> Suggested-by: David Hildenbrand <david@redhat.com>
> Signed-off-by: Muchun Song <songmuchun@bytedance.com>
> ---
>  drivers/base/node.c  |  7 +++++--
>  include/linux/node.h |  5 +++++
>  mm/hugetlb.c         | 37 ++++++++++---------------------------
>  3 files changed, 20 insertions(+), 29 deletions(-)
> 
> diff --git a/drivers/base/node.c b/drivers/base/node.c
> index ed391cb09999..cf115d5a9b8a 100644
> --- a/drivers/base/node.c
> +++ b/drivers/base/node.c
> @@ -608,10 +608,12 @@ static int register_node(struct node *node, int num)
>  	node->dev.groups = node_dev_groups;
>  	error = device_register(&node->dev);
>  
> -	if (error)
> +	if (error) {
>  		put_device(&node->dev);
> -	else
> +	} else {
> +		hugetlb_register_node(node);
>  		compaction_register_node(node);
> +	}
>  


I guess this will handle register of sysfs hugetlb files for new NUMA nodes
after hugetlb_initialized = true;

But what about N_CPU that can get memory added later. Do we need to update
hugetlb_register_all_nodes() to handle N_ONLINE? 


>  	return error;
>  }
> @@ -625,6 +627,7 @@ static int register_node(struct node *node, int num)
>   */
>  void unregister_node(struct node *node)
>  {
> +	hugetlb_unregister_node(node);
>  	compaction_unregister_node(node);
>  	node_remove_accesses(node);
>  	node_remove_caches(node);
> diff --git a/include/linux/node.h b/include/linux/node.h
> index 427a5975cf40..f5d41498c2bf 100644
> --- a/include/linux/node.h
> +++ b/include/linux/node.h
> @@ -138,6 +138,11 @@ extern void unregister_memory_block_under_nodes(struct memory_block *mem_blk);
>  extern int register_memory_node_under_compute_node(unsigned int mem_nid,
>  						   unsigned int cpu_nid,
>  						   unsigned access);
> +
> +#ifdef CONFIG_HUGETLBFS
> +void hugetlb_register_node(struct node *node);
> +void hugetlb_unregister_node(struct node *node);
> +#endif
>  #else
>  static inline void node_dev_init(void)
>  {
> diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> index d0617d64d718..722e862bb6be 100644
> --- a/mm/hugetlb.c
> +++ b/mm/hugetlb.c
> @@ -3898,6 +3898,7 @@ static void __init hugetlb_sysfs_init(void)
>  }
>  
>  #ifdef CONFIG_NUMA
> +static bool hugetlb_initialized __ro_after_init;
>  
>  /*
>   * node_hstate/s - associate per node hstate attributes, via their kobjects,
> @@ -3953,7 +3954,7 @@ static struct hstate *kobj_to_node_hstate(struct kobject *kobj, int *nidp)
>   * Unregister hstate attributes from a single node device.
>   * No-op if no hstate attributes attached.
>   */
> -static void hugetlb_unregister_node(struct node *node)
> +void hugetlb_unregister_node(struct node *node)
>  {
>  	struct hstate *h;
>  	struct node_hstate *nhs = &node_hstates[node->dev.id];
> @@ -3983,19 +3984,22 @@ static void hugetlb_unregister_node(struct node *node)
>   * Register hstate attributes for a single node device.
>   * No-op if attributes already registered.
>   */
> -static int hugetlb_register_node(struct node *node)
> +void hugetlb_register_node(struct node *node)
>  {
>  	struct hstate *h;
>  	struct node_hstate *nhs = &node_hstates[node->dev.id];
>  	int err;
>  
> +	if (!hugetlb_initialized)
> +		return;
> +
>  	if (nhs->hugepages_kobj)
> -		return 0;		/* already allocated */
> +		return;		/* already allocated */
>  
>  	nhs->hugepages_kobj = kobject_create_and_add("hugepages",
>  							&node->dev.kobj);
>  	if (!nhs->hugepages_kobj)
> -		return -ENOMEM;
> +		return;
>  
>  	for_each_hstate(h) {
>  		err = hugetlb_sysfs_add_hstate(h, nhs->hugepages_kobj,
> @@ -4005,28 +4009,9 @@ static int hugetlb_register_node(struct node *node)
>  			pr_err("HugeTLB: Unable to add hstate %s for node %d\n",
>  				h->name, node->dev.id);
>  			hugetlb_unregister_node(node);
> -			return -ENOMEM;
> +			break;
>  		}
>  	}
> -	return 0;
> -}
> -
> -static int __meminit hugetlb_memory_callback(struct notifier_block *self,
> -					     unsigned long action, void *arg)
> -{
> -	int ret = 0;
> -	struct memory_notify *mnb = arg;
> -	int nid = mnb->status_change_nid;
> -
> -	if (nid == NUMA_NO_NODE)
> -		return NOTIFY_DONE;
> -
> -	if (action == MEM_GOING_ONLINE)
> -		ret = hugetlb_register_node(node_devices[nid]);
> -	else if (action == MEM_CANCEL_ONLINE || action == MEM_OFFLINE)
> -		hugetlb_unregister_node(node_devices[nid]);
> -
> -	return notifier_from_errno(ret);
>  }
>  
>  /*
> @@ -4038,11 +4023,9 @@ static void __init hugetlb_register_all_nodes(void)
>  {
>  	int nid;
>  
> -	get_online_mems();
> -	hotplug_memory_notifier(hugetlb_memory_callback, 0);
> +	hugetlb_initialized = true;
>  	for_each_node_state(nid, N_MEMORY)


Should this be for_each_online_node() ?

>  		hugetlb_register_node(node_devices[nid]);
> -	put_online_mems();
>  }
>  #else	/* !CONFIG_NUMA */
>
David Hildenbrand Sept. 1, 2022, 9:03 a.m. UTC | #3
On 01.09.22 11:00, Aneesh Kumar K V wrote:
> On 9/1/22 2:00 PM, Muchun Song wrote:
>> The memory-notify-based approach aims to handle meory-less nodes, however, it just adds
>> the complexity of code as pointed by David in thread [1].  The handling of memory-less
>> nodes is introduced by commit 4faf8d950ec4 ("hugetlb: handle memory hot-plug events").
>> From its commit message, we cannot find any necessity of handling this case. So, we can
>> simply register/unregister sysfs entries in register_node/unregister_node to simlify the
>> code.
> 
> Isn't that hotplug callback added because in hugetlb_register_all_nodes() we register
> sysfs nodes only for N_MEMORY nodes? 
> 

Right, that needs adjustment as well.
Muchun Song Sept. 2, 2022, 2:39 a.m. UTC | #4
> On Sep 1, 2022, at 16:50, David Hildenbrand <david@redhat.com> wrote:
> 
> On 01.09.22 10:30, Muchun Song wrote:
>> The memory-notify-based approach aims to handle meory-less nodes, however, it just adds
>> the complexity of code as pointed by David in thread [1].  The handling of memory-less
>> nodes is introduced by commit 4faf8d950ec4 ("hugetlb: handle memory hot-plug events").
>> From its commit message, we cannot find any necessity of handling this case. So, we can
>> simply register/unregister sysfs entries in register_node/unregister_node to simlify the
>> code.
>> 
>> https://lore.kernel.org/linux-mm/60933ffc-b850-976c-78a0-0ee6e0ea9ef0@redhat.com/ [1]
>> Suggested-by: David Hildenbrand <david@redhat.com>
>> Signed-off-by: Muchun Song <songmuchun@bytedance.com>
>> ---
>> drivers/base/node.c  |  7 +++++--
>> include/linux/node.h |  5 +++++
>> mm/hugetlb.c         | 37 ++++++++++---------------------------
>> 3 files changed, 20 insertions(+), 29 deletions(-)
>> 
>> diff --git a/drivers/base/node.c b/drivers/base/node.c
>> index ed391cb09999..cf115d5a9b8a 100644
>> --- a/drivers/base/node.c
>> +++ b/drivers/base/node.c
>> @@ -608,10 +608,12 @@ static int register_node(struct node *node, int num)
>> 	node->dev.groups = node_dev_groups;
>> 	error = device_register(&node->dev);
>> 
>> -	if (error)
>> +	if (error) {
>> 		put_device(&node->dev);
>> -	else
>> +	} else {
>> +		hugetlb_register_node(node);
>> 		compaction_register_node(node);
>> +	}
> 
> Good, so this matches what other code does.
> 
>> 
>> 	return error;
>> }
>> @@ -625,6 +627,7 @@ static int register_node(struct node *node, int num)
>>  */
>> void unregister_node(struct node *node)
>> {
>> +	hugetlb_unregister_node(node);
>> 	compaction_unregister_node(node);
>> 	node_remove_accesses(node);
>> 	node_remove_caches(node);
>> diff --git a/include/linux/node.h b/include/linux/node.h
>> index 427a5975cf40..f5d41498c2bf 100644
>> --- a/include/linux/node.h
>> +++ b/include/linux/node.h
>> @@ -138,6 +138,11 @@ extern void unregister_memory_block_under_nodes(struct memory_block *mem_blk);
>> extern int register_memory_node_under_compute_node(unsigned int mem_nid,
>> 						   unsigned int cpu_nid,
>> 						   unsigned access);
>> +
>> +#ifdef CONFIG_HUGETLBFS
>> +void hugetlb_register_node(struct node *node);
>> +void hugetlb_unregister_node(struct node *node);
>> +#endif
> 
> compaction_register_node() resides in include/linux/compaction.h, so I
> wonder if this should go into hugetlb.h (unless it causes trouble)

I think yes. Will update in next version.

> 
>> #else
>> static inline void node_dev_init(void)
>> {
>> diff --git a/mm/hugetlb.c b/mm/hugetlb.c
>> index d0617d64d718..722e862bb6be 100644
>> --- a/mm/hugetlb.c
>> +++ b/mm/hugetlb.c
>> @@ -3898,6 +3898,7 @@ static void __init hugetlb_sysfs_init(void)
>> }
>> 
>> #ifdef CONFIG_NUMA
>> +static bool hugetlb_initialized __ro_after_init;
> 
> We set it out of hugetlb_register_all_nodes(), so it conceptually not
> correct. We either need a better name here or set it from generic init code.
> 
> You could call it hugetlb_sysfs_initialized() and set that from
> hugetlb_sysfs_init(), which is called just before
> hugetlb_register_all_nodes().

Make sense.

> 
> [ shouldn't hugetlb_register_all_nodes() get called from
> hugetlb_sysfs_init() ? it's all about sysfs as well ... ]

Yep, we can call hugetlb_register_all_nodes() in hugetlb_sysfs_init().

> 
>> 
>> /*
>>  * node_hstate/s - associate per node hstate attributes, via their kobjects,
>> @@ -3953,7 +3954,7 @@ static struct hstate *kobj_to_node_hstate(struct kobject *kobj, int *nidp)
>>  * Unregister hstate attributes from a single node device.
>>  * No-op if no hstate attributes attached.
>>  */
>> -static void hugetlb_unregister_node(struct node *node)
>> +void hugetlb_unregister_node(struct node *node)
>> {
>> 	struct hstate *h;
>> 	struct node_hstate *nhs = &node_hstates[node->dev.id];
>> @@ -3983,19 +3984,22 @@ static void hugetlb_unregister_node(struct node *node)
>>  * Register hstate attributes for a single node device.
>>  * No-op if attributes already registered.
>>  */
>> -static int hugetlb_register_node(struct node *node)
>> +void hugetlb_register_node(struct node *node)
>> {
>> 	struct hstate *h;
>> 	struct node_hstate *nhs = &node_hstates[node->dev.id];
>> 	int err;
>> 
>> +	if (!hugetlb_initialized)
>> +		return;
>> +
>> 	if (nhs->hugepages_kobj)
>> -		return 0;		/* already allocated */
>> +		return;		/* already allocated */
>> 
>> 	nhs->hugepages_kobj = kobject_create_and_add("hugepages",
>> 							&node->dev.kobj);
>> 	if (!nhs->hugepages_kobj)
>> -		return -ENOMEM;
>> +		return;
>> 
>> 	for_each_hstate(h) {
>> 		err = hugetlb_sysfs_add_hstate(h, nhs->hugepages_kobj,
>> @@ -4005,28 +4009,9 @@ static int hugetlb_register_node(struct node *node)
>> 			pr_err("HugeTLB: Unable to add hstate %s for node %d\n",
>> 				h->name, node->dev.id);
>> 			hugetlb_unregister_node(node);
>> -			return -ENOMEM;
>> +			break;
>> 		}
>> 	}
>> -	return 0;
>> -}
>> -
>> -static int __meminit hugetlb_memory_callback(struct notifier_block *self,
>> -					     unsigned long action, void *arg)
>> -{
>> -	int ret = 0;
>> -	struct memory_notify *mnb = arg;
>> -	int nid = mnb->status_change_nid;
>> -
>> -	if (nid == NUMA_NO_NODE)
>> -		return NOTIFY_DONE;
>> -
>> -	if (action == MEM_GOING_ONLINE)
>> -		ret = hugetlb_register_node(node_devices[nid]);
>> -	else if (action == MEM_CANCEL_ONLINE || action == MEM_OFFLINE)
>> -		hugetlb_unregister_node(node_devices[nid]);
>> -
>> -	return notifier_from_errno(ret);
>> }
>> 
>> /*
>> @@ -4038,11 +4023,9 @@ static void __init hugetlb_register_all_nodes(void)
>> {
>> 	int nid;
>> 
>> -	get_online_mems();
>> -	hotplug_memory_notifier(hugetlb_memory_callback, 0);
>> +	hugetlb_initialized = true;
>> 	for_each_node_state(nid, N_MEMORY)
>> 		hugetlb_register_node(node_devices[nid]);
>> -	put_online_mems();
>> }
>> #else	/* !CONFIG_NUMA */
>> 
> 
> Apart from the comments, looks good and clean to me. Thanks!

Thanks for your suggestions and review.

Muchun

> 
> -- 
> Thanks,
> 
> David / dhildenb
Muchun Song Sept. 2, 2022, 3:42 a.m. UTC | #5
> On Sep 1, 2022, at 17:00, Aneesh Kumar K V <aneesh.kumar@linux.ibm.com> wrote:
> 
> On 9/1/22 2:00 PM, Muchun Song wrote:
>> The memory-notify-based approach aims to handle meory-less nodes, however, it just adds
>> the complexity of code as pointed by David in thread [1].  The handling of memory-less
>> nodes is introduced by commit 4faf8d950ec4 ("hugetlb: handle memory hot-plug events").
>> From its commit message, we cannot find any necessity of handling this case. So, we can
>> simply register/unregister sysfs entries in register_node/unregister_node to simlify the
>> code.
> 
> Isn't that hotplug callback added because in hugetlb_register_all_nodes() we register
> sysfs nodes only for N_MEMORY nodes? 

I think you might right. I have looked at the commit 9a30523066cd which introduces the sysfs
creation. I saw it create the sysfs for every possible node.

	for (nid = 0; nid < nr_node_ids; nid++)
		hugetlb_register_node(node);

And then I checked the commit 9b5e5d0fdc91, which said it was a preparation for handling
memory-less nodes via memory hotplug.

> 
> 
>> 
>> https://lore.kernel.org/linux-mm/60933ffc-b850-976c-78a0-0ee6e0ea9ef0@redhat.com/ [1]
>> Suggested-by: David Hildenbrand <david@redhat.com>
>> Signed-off-by: Muchun Song <songmuchun@bytedance.com>
>> ---
>> drivers/base/node.c  |  7 +++++--
>> include/linux/node.h |  5 +++++
>> mm/hugetlb.c         | 37 ++++++++++---------------------------
>> 3 files changed, 20 insertions(+), 29 deletions(-)
>> 
>> diff --git a/drivers/base/node.c b/drivers/base/node.c
>> index ed391cb09999..cf115d5a9b8a 100644
>> --- a/drivers/base/node.c
>> +++ b/drivers/base/node.c
>> @@ -608,10 +608,12 @@ static int register_node(struct node *node, int num)
>> 	node->dev.groups = node_dev_groups;
>> 	error = device_register(&node->dev);
>> 
>> -	if (error)
>> +	if (error) {
>> 		put_device(&node->dev);
>> -	else
>> +	} else {
>> +		hugetlb_register_node(node);
>> 		compaction_register_node(node);
>> +	}
>> 
> 
> 
> I guess this will handle register of sysfs hugetlb files for new NUMA nodes
> after hugetlb_initialized = true;

Yes.

> 
> But what about N_CPU that can get memory added later. Do we need to update
> hugetlb_register_all_nodes() to handle N_ONLINE? 

I think we should.

> 
> 
>> 	return error;
>> }
>> @@ -625,6 +627,7 @@ static int register_node(struct node *node, int num)
>>  */
>> void unregister_node(struct node *node)
>> {
>> +	hugetlb_unregister_node(node);
>> 	compaction_unregister_node(node);
>> 	node_remove_accesses(node);
>> 	node_remove_caches(node);
>> diff --git a/include/linux/node.h b/include/linux/node.h
>> index 427a5975cf40..f5d41498c2bf 100644
>> --- a/include/linux/node.h
>> +++ b/include/linux/node.h
>> @@ -138,6 +138,11 @@ extern void unregister_memory_block_under_nodes(struct memory_block *mem_blk);
>> extern int register_memory_node_under_compute_node(unsigned int mem_nid,
>> 						   unsigned int cpu_nid,
>> 						   unsigned access);
>> +
>> +#ifdef CONFIG_HUGETLBFS
>> +void hugetlb_register_node(struct node *node);
>> +void hugetlb_unregister_node(struct node *node);
>> +#endif
>> #else
>> static inline void node_dev_init(void)
>> {
>> diff --git a/mm/hugetlb.c b/mm/hugetlb.c
>> index d0617d64d718..722e862bb6be 100644
>> --- a/mm/hugetlb.c
>> +++ b/mm/hugetlb.c
>> @@ -3898,6 +3898,7 @@ static void __init hugetlb_sysfs_init(void)
>> }
>> 
>> #ifdef CONFIG_NUMA
>> +static bool hugetlb_initialized __ro_after_init;
>> 
>> /*
>>  * node_hstate/s - associate per node hstate attributes, via their kobjects,
>> @@ -3953,7 +3954,7 @@ static struct hstate *kobj_to_node_hstate(struct kobject *kobj, int *nidp)
>>  * Unregister hstate attributes from a single node device.
>>  * No-op if no hstate attributes attached.
>>  */
>> -static void hugetlb_unregister_node(struct node *node)
>> +void hugetlb_unregister_node(struct node *node)
>> {
>> 	struct hstate *h;
>> 	struct node_hstate *nhs = &node_hstates[node->dev.id];
>> @@ -3983,19 +3984,22 @@ static void hugetlb_unregister_node(struct node *node)
>>  * Register hstate attributes for a single node device.
>>  * No-op if attributes already registered.
>>  */
>> -static int hugetlb_register_node(struct node *node)
>> +void hugetlb_register_node(struct node *node)
>> {
>> 	struct hstate *h;
>> 	struct node_hstate *nhs = &node_hstates[node->dev.id];
>> 	int err;
>> 
>> +	if (!hugetlb_initialized)
>> +		return;
>> +
>> 	if (nhs->hugepages_kobj)
>> -		return 0;		/* already allocated */
>> +		return;		/* already allocated */
>> 
>> 	nhs->hugepages_kobj = kobject_create_and_add("hugepages",
>> 							&node->dev.kobj);
>> 	if (!nhs->hugepages_kobj)
>> -		return -ENOMEM;
>> +		return;
>> 
>> 	for_each_hstate(h) {
>> 		err = hugetlb_sysfs_add_hstate(h, nhs->hugepages_kobj,
>> @@ -4005,28 +4009,9 @@ static int hugetlb_register_node(struct node *node)
>> 			pr_err("HugeTLB: Unable to add hstate %s for node %d\n",
>> 				h->name, node->dev.id);
>> 			hugetlb_unregister_node(node);
>> -			return -ENOMEM;
>> +			break;
>> 		}
>> 	}
>> -	return 0;
>> -}
>> -
>> -static int __meminit hugetlb_memory_callback(struct notifier_block *self,
>> -					     unsigned long action, void *arg)
>> -{
>> -	int ret = 0;
>> -	struct memory_notify *mnb = arg;
>> -	int nid = mnb->status_change_nid;
>> -
>> -	if (nid == NUMA_NO_NODE)
>> -		return NOTIFY_DONE;
>> -
>> -	if (action == MEM_GOING_ONLINE)
>> -		ret = hugetlb_register_node(node_devices[nid]);
>> -	else if (action == MEM_CANCEL_ONLINE || action == MEM_OFFLINE)
>> -		hugetlb_unregister_node(node_devices[nid]);
>> -
>> -	return notifier_from_errno(ret);
>> }
>> 
>> /*
>> @@ -4038,11 +4023,9 @@ static void __init hugetlb_register_all_nodes(void)
>> {
>> 	int nid;
>> 
>> -	get_online_mems();
>> -	hotplug_memory_notifier(hugetlb_memory_callback, 0);
>> +	hugetlb_initialized = true;
>> 	for_each_node_state(nid, N_MEMORY)
> 
> 
> Should this be for_each_online_node() ?

So, yes.

Thanks for your review.

Muchun.

> 
>> 		hugetlb_register_node(node_devices[nid]);
>> -	put_online_mems();
>> }
>> #else	/* !CONFIG_NUMA */
kernel test robot Sept. 2, 2022, 4:48 a.m. UTC | #6
Hi Muchun,

I love your patch! Yet something to improve:

[auto build test ERROR on akpm-mm/mm-everything]

url:    https://github.com/intel-lab-lkp/linux/commits/Muchun-Song/mm-hugetlb-eliminate-memory-less-nodes-handling/20220901-163132
base:   https://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm.git mm-everything
config: loongarch-buildonly-randconfig-r004-20220901 (https://download.01.org/0day-ci/archive/20220902/202209021255.1ChhyCsl-lkp@intel.com/config)
compiler: loongarch64-linux-gcc (GCC) 12.1.0
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/intel-lab-lkp/linux/commit/8e3d203cb06be81565d117b3f6d5e279a833174c
        git remote add linux-review https://github.com/intel-lab-lkp/linux
        git fetch --no-tags linux-review Muchun-Song/mm-hugetlb-eliminate-memory-less-nodes-handling/20220901-163132
        git checkout 8e3d203cb06be81565d117b3f6d5e279a833174c
        # save the config file
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross W=1 ARCH=loongarch 

If you fix the issue, kindly add following tag where applicable
Reported-by: kernel test robot <lkp@intel.com>

All errors (new ones prefixed by >>):

   drivers/base/node.c: In function 'register_node':
>> drivers/base/node.c:614:17: error: implicit declaration of function 'hugetlb_register_node'; did you mean 'unregister_node'? [-Werror=implicit-function-declaration]
     614 |                 hugetlb_register_node(node);
         |                 ^~~~~~~~~~~~~~~~~~~~~
         |                 unregister_node
   drivers/base/node.c: In function 'unregister_node':
>> drivers/base/node.c:630:9: error: implicit declaration of function 'hugetlb_unregister_node'; did you mean 'unregister_node'? [-Werror=implicit-function-declaration]
     630 |         hugetlb_unregister_node(node);
         |         ^~~~~~~~~~~~~~~~~~~~~~~
         |         unregister_node
   cc1: some warnings being treated as errors


vim +614 drivers/base/node.c

   594	
   595	/*
   596	 * register_node - Setup a sysfs device for a node.
   597	 * @num - Node number to use when creating the device.
   598	 *
   599	 * Initialize and register the node device.
   600	 */
   601	static int register_node(struct node *node, int num)
   602	{
   603		int error;
   604	
   605		node->dev.id = num;
   606		node->dev.bus = &node_subsys;
   607		node->dev.release = node_device_release;
   608		node->dev.groups = node_dev_groups;
   609		error = device_register(&node->dev);
   610	
   611		if (error) {
   612			put_device(&node->dev);
   613		} else {
 > 614			hugetlb_register_node(node);
   615			compaction_register_node(node);
   616		}
   617	
   618		return error;
   619	}
   620	
   621	/**
   622	 * unregister_node - unregister a node device
   623	 * @node: node going away
   624	 *
   625	 * Unregisters a node device @node.  All the devices on the node must be
   626	 * unregistered before calling this function.
   627	 */
   628	void unregister_node(struct node *node)
   629	{
 > 630		hugetlb_unregister_node(node);
   631		compaction_unregister_node(node);
   632		node_remove_accesses(node);
   633		node_remove_caches(node);
   634		device_unregister(&node->dev);
   635	}
   636
diff mbox series

Patch

diff --git a/drivers/base/node.c b/drivers/base/node.c
index ed391cb09999..cf115d5a9b8a 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -608,10 +608,12 @@  static int register_node(struct node *node, int num)
 	node->dev.groups = node_dev_groups;
 	error = device_register(&node->dev);
 
-	if (error)
+	if (error) {
 		put_device(&node->dev);
-	else
+	} else {
+		hugetlb_register_node(node);
 		compaction_register_node(node);
+	}
 
 	return error;
 }
@@ -625,6 +627,7 @@  static int register_node(struct node *node, int num)
  */
 void unregister_node(struct node *node)
 {
+	hugetlb_unregister_node(node);
 	compaction_unregister_node(node);
 	node_remove_accesses(node);
 	node_remove_caches(node);
diff --git a/include/linux/node.h b/include/linux/node.h
index 427a5975cf40..f5d41498c2bf 100644
--- a/include/linux/node.h
+++ b/include/linux/node.h
@@ -138,6 +138,11 @@  extern void unregister_memory_block_under_nodes(struct memory_block *mem_blk);
 extern int register_memory_node_under_compute_node(unsigned int mem_nid,
 						   unsigned int cpu_nid,
 						   unsigned access);
+
+#ifdef CONFIG_HUGETLBFS
+void hugetlb_register_node(struct node *node);
+void hugetlb_unregister_node(struct node *node);
+#endif
 #else
 static inline void node_dev_init(void)
 {
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index d0617d64d718..722e862bb6be 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -3898,6 +3898,7 @@  static void __init hugetlb_sysfs_init(void)
 }
 
 #ifdef CONFIG_NUMA
+static bool hugetlb_initialized __ro_after_init;
 
 /*
  * node_hstate/s - associate per node hstate attributes, via their kobjects,
@@ -3953,7 +3954,7 @@  static struct hstate *kobj_to_node_hstate(struct kobject *kobj, int *nidp)
  * Unregister hstate attributes from a single node device.
  * No-op if no hstate attributes attached.
  */
-static void hugetlb_unregister_node(struct node *node)
+void hugetlb_unregister_node(struct node *node)
 {
 	struct hstate *h;
 	struct node_hstate *nhs = &node_hstates[node->dev.id];
@@ -3983,19 +3984,22 @@  static void hugetlb_unregister_node(struct node *node)
  * Register hstate attributes for a single node device.
  * No-op if attributes already registered.
  */
-static int hugetlb_register_node(struct node *node)
+void hugetlb_register_node(struct node *node)
 {
 	struct hstate *h;
 	struct node_hstate *nhs = &node_hstates[node->dev.id];
 	int err;
 
+	if (!hugetlb_initialized)
+		return;
+
 	if (nhs->hugepages_kobj)
-		return 0;		/* already allocated */
+		return;		/* already allocated */
 
 	nhs->hugepages_kobj = kobject_create_and_add("hugepages",
 							&node->dev.kobj);
 	if (!nhs->hugepages_kobj)
-		return -ENOMEM;
+		return;
 
 	for_each_hstate(h) {
 		err = hugetlb_sysfs_add_hstate(h, nhs->hugepages_kobj,
@@ -4005,28 +4009,9 @@  static int hugetlb_register_node(struct node *node)
 			pr_err("HugeTLB: Unable to add hstate %s for node %d\n",
 				h->name, node->dev.id);
 			hugetlb_unregister_node(node);
-			return -ENOMEM;
+			break;
 		}
 	}
-	return 0;
-}
-
-static int __meminit hugetlb_memory_callback(struct notifier_block *self,
-					     unsigned long action, void *arg)
-{
-	int ret = 0;
-	struct memory_notify *mnb = arg;
-	int nid = mnb->status_change_nid;
-
-	if (nid == NUMA_NO_NODE)
-		return NOTIFY_DONE;
-
-	if (action == MEM_GOING_ONLINE)
-		ret = hugetlb_register_node(node_devices[nid]);
-	else if (action == MEM_CANCEL_ONLINE || action == MEM_OFFLINE)
-		hugetlb_unregister_node(node_devices[nid]);
-
-	return notifier_from_errno(ret);
 }
 
 /*
@@ -4038,11 +4023,9 @@  static void __init hugetlb_register_all_nodes(void)
 {
 	int nid;
 
-	get_online_mems();
-	hotplug_memory_notifier(hugetlb_memory_callback, 0);
+	hugetlb_initialized = true;
 	for_each_node_state(nid, N_MEMORY)
 		hugetlb_register_node(node_devices[nid]);
-	put_online_mems();
 }
 #else	/* !CONFIG_NUMA */