diff mbox series

[v2,3/4] dax: Update hmem resource/device registration

Message ID 9a41f91ed487ea64e187d84fab3bd69a4a5862f9.1737046620.git.nathan.fontenot@amd.com
State New
Headers show
Series Add managed SOFT RESERVE resource handling | expand

Commit Message

Nathan Fontenot Jan. 16, 2025, 5:42 p.m. UTC
In order to handle registering hmem devices for SOFT RESERVE reources
that are added late in boot update the hmem_register_resource(),
hmem_register_device(), and walk_hmem_resources() interfaces.

Remove the target_nid arg to hmem_register_resource(). The target nid
value is calculated from the resource start address and not used until
registering a device for the resource. Move the target nid calculation
to hmem_register_device().

To allow for registering hmem devices outside of the hmem dax driver
probe routine save the dax hmem platform driver during probe. The
hmem_register_device() interface can then drop the host and target
nid parameters.

There should be no functional changes.

Signed-off-by: Nathan Fontenot <nathan.fontenot@amd.com>
---
 drivers/acpi/numa/hmat.c  |  7 ++-----
 drivers/dax/hmem/device.c | 14 ++++++--------
 drivers/dax/hmem/hmem.c   | 12 ++++++++----
 include/linux/dax.h       |  9 ++++-----
 4 files changed, 20 insertions(+), 22 deletions(-)

Comments

Ira Weiny Jan. 16, 2025, 10:28 p.m. UTC | #1
Nathan Fontenot wrote:
> In order to handle registering hmem devices for SOFT RESERVE reources
                                                              ^^^^^^^^^
                                                              resources

> that are added late in boot update the hmem_register_resource(),
> hmem_register_device(), and walk_hmem_resources() interfaces.
> 
> Remove the target_nid arg to hmem_register_resource(). The target nid
> value is calculated from the resource start address and not used until
> registering a device for the resource. Move the target nid calculation
> to hmem_register_device().
> 
> To allow for registering hmem devices outside of the hmem dax driver
> probe routine save the dax hmem platform driver during probe. The
> hmem_register_device() interface can then drop the host and target
> nid parameters.
> 
> There should be no functional changes.
> 
> Signed-off-by: Nathan Fontenot <nathan.fontenot@amd.com>
> ---
>  drivers/acpi/numa/hmat.c  |  7 ++-----
>  drivers/dax/hmem/device.c | 14 ++++++--------
>  drivers/dax/hmem/hmem.c   | 12 ++++++++----
>  include/linux/dax.h       |  9 ++++-----
>  4 files changed, 20 insertions(+), 22 deletions(-)
> 
> diff --git a/drivers/acpi/numa/hmat.c b/drivers/acpi/numa/hmat.c
> index 1a902a02390f..23d4b3ad6d88 100644
> --- a/drivers/acpi/numa/hmat.c
> +++ b/drivers/acpi/numa/hmat.c
> @@ -857,11 +857,8 @@ static void hmat_register_target_devices(struct memory_target *target)
>  	if (!IS_ENABLED(CONFIG_DEV_DAX_HMEM))
>  		return;
>  
> -	for (res = target->memregions.child; res; res = res->sibling) {
> -		int target_nid = pxm_to_node(target->memory_pxm);
> -
> -		hmem_register_resource(target_nid, res);
> -	}
> +	for (res = target->memregions.child; res; res = res->sibling)
> +		hmem_register_resource(res);
>  }
>  
>  static void hmat_register_target(struct memory_target *target)
> diff --git a/drivers/dax/hmem/device.c b/drivers/dax/hmem/device.c
> index f9e1a76a04a9..ae25e08a636f 100644
> --- a/drivers/dax/hmem/device.c
> +++ b/drivers/dax/hmem/device.c
> @@ -17,14 +17,14 @@ static struct resource hmem_active = {
>  	.flags = IORESOURCE_MEM,
>  };
>  
> -int walk_hmem_resources(struct device *host, walk_hmem_fn fn)
> +int walk_hmem_resources(walk_hmem_fn fn)
>  {
>  	struct resource *res;
>  	int rc = 0;
>  
>  	mutex_lock(&hmem_resource_lock);
>  	for (res = hmem_active.child; res; res = res->sibling) {
> -		rc = fn(host, (int) res->desc, res);
> +		rc = fn(res);
>  		if (rc)
>  			break;
>  	}
> @@ -33,7 +33,7 @@ int walk_hmem_resources(struct device *host, walk_hmem_fn fn)
>  }
>  EXPORT_SYMBOL_GPL(walk_hmem_resources);
>  
> -static void __hmem_register_resource(int target_nid, struct resource *res)
> +static void __hmem_register_resource(struct resource *res)
>  {
>  	struct platform_device *pdev;
>  	struct resource *new;
> @@ -46,8 +46,6 @@ static void __hmem_register_resource(int target_nid, struct resource *res)
>  		return;
>  	}
>  
> -	new->desc = target_nid;
> -
>  	if (platform_initialized)
>  		return;
>  
> @@ -64,19 +62,19 @@ static void __hmem_register_resource(int target_nid, struct resource *res)
>  		platform_initialized = true;
>  }
>  
> -void hmem_register_resource(int target_nid, struct resource *res)
> +void hmem_register_resource(struct resource *res)
>  {
>  	if (nohmem)
>  		return;
>  
>  	mutex_lock(&hmem_resource_lock);
> -	__hmem_register_resource(target_nid, res);
> +	__hmem_register_resource(res);
>  	mutex_unlock(&hmem_resource_lock);
>  }
>  
>  static __init int hmem_register_one(struct resource *res, void *data)
>  {
> -	hmem_register_resource(phys_to_target_node(res->start), res);
> +	hmem_register_resource(res);
>  
>  	return 0;
>  }
> diff --git a/drivers/dax/hmem/hmem.c b/drivers/dax/hmem/hmem.c
> index 5e7c53f18491..088f4060d4d5 100644
> --- a/drivers/dax/hmem/hmem.c
> +++ b/drivers/dax/hmem/hmem.c
> @@ -9,6 +9,8 @@
>  static bool region_idle;
>  module_param_named(region_idle, region_idle, bool, 0644);
>  
> +static struct platform_device *dax_hmem_pdev;

I don't think you can assume there is only ever 1 hmem platform device.

hmat_register_target_devices() in particular iterates multiple memory
regions and will create more than one.

What am I missing?
Ira

> +
>  static int dax_hmem_probe(struct platform_device *pdev)
>  {
>  	unsigned long flags = IORESOURCE_DAX_KMEM;
> @@ -59,13 +61,13 @@ static void release_hmem(void *pdev)
>  	platform_device_unregister(pdev);
>  }
>  
> -static int hmem_register_device(struct device *host, int target_nid,
> -				const struct resource *res)
> +static int hmem_register_device(const struct resource *res)
>  {
> +	struct device *host = &dax_hmem_pdev->dev;
>  	struct platform_device *pdev;
>  	struct memregion_info info;
> +	int target_nid, rc;
>  	long id;
> -	int rc;
>  
>  	if (IS_ENABLED(CONFIG_CXL_REGION) &&
>  	    region_intersects(res->start, resource_size(res), IORESOURCE_MEM,
> @@ -94,6 +96,7 @@ static int hmem_register_device(struct device *host, int target_nid,
>  		return -ENOMEM;
>  	}
>  
> +	target_nid = phys_to_target_node(res->start);
>  	pdev->dev.numa_node = numa_map_to_online_node(target_nid);
>  	info = (struct memregion_info) {
>  		.target_node = target_nid,
> @@ -125,7 +128,8 @@ static int hmem_register_device(struct device *host, int target_nid,
>  
>  static int dax_hmem_platform_probe(struct platform_device *pdev)
>  {
> -	return walk_hmem_resources(&pdev->dev, hmem_register_device);
> +	dax_hmem_pdev = pdev;
> +	return walk_hmem_resources(hmem_register_device);
>  }
>  
>  static struct platform_driver dax_hmem_platform_driver = {
> diff --git a/include/linux/dax.h b/include/linux/dax.h
> index 9d3e3327af4c..beaa4bcb515c 100644
> --- a/include/linux/dax.h
> +++ b/include/linux/dax.h
> @@ -276,14 +276,13 @@ static inline int dax_mem2blk_err(int err)
>  }
>  
>  #ifdef CONFIG_DEV_DAX_HMEM_DEVICES
> -void hmem_register_resource(int target_nid, struct resource *r);
> +void hmem_register_resource(struct resource *r);
>  #else
> -static inline void hmem_register_resource(int target_nid, struct resource *r)
> +static inline void hmem_register_resource(struct resource *r)
>  {
>  }
>  #endif
>  
> -typedef int (*walk_hmem_fn)(struct device *dev, int target_nid,
> -			    const struct resource *res);
> -int walk_hmem_resources(struct device *dev, walk_hmem_fn fn);
> +typedef int (*walk_hmem_fn)(const struct resource *res);
> +int walk_hmem_resources(walk_hmem_fn fn);
>  #endif
> -- 
> 2.43.0
> 
>
Fontenot, Nathan Jan. 21, 2025, 6:49 p.m. UTC | #2
On 1/16/2025 4:28 PM, Ira Weiny wrote:
> Nathan Fontenot wrote:
>> In order to handle registering hmem devices for SOFT RESERVE reources
>                                                               ^^^^^^^^^
>                                                               resources
> 
>> that are added late in boot update the hmem_register_resource(),
>> hmem_register_device(), and walk_hmem_resources() interfaces.
>>
>> Remove the target_nid arg to hmem_register_resource(). The target nid
>> value is calculated from the resource start address and not used until
>> registering a device for the resource. Move the target nid calculation
>> to hmem_register_device().
>>
>> To allow for registering hmem devices outside of the hmem dax driver
>> probe routine save the dax hmem platform driver during probe. The
>> hmem_register_device() interface can then drop the host and target
>> nid parameters.
>>
>> There should be no functional changes.
>>
>> Signed-off-by: Nathan Fontenot <nathan.fontenot@amd.com>
>> ---
>>  drivers/acpi/numa/hmat.c  |  7 ++-----
>>  drivers/dax/hmem/device.c | 14 ++++++--------
>>  drivers/dax/hmem/hmem.c   | 12 ++++++++----
>>  include/linux/dax.h       |  9 ++++-----
>>  4 files changed, 20 insertions(+), 22 deletions(-)
>>
>> diff --git a/drivers/acpi/numa/hmat.c b/drivers/acpi/numa/hmat.c
>> index 1a902a02390f..23d4b3ad6d88 100644
>> --- a/drivers/acpi/numa/hmat.c
>> +++ b/drivers/acpi/numa/hmat.c
>> @@ -857,11 +857,8 @@ static void hmat_register_target_devices(struct memory_target *target)
>>  	if (!IS_ENABLED(CONFIG_DEV_DAX_HMEM))
>>  		return;
>>  
>> -	for (res = target->memregions.child; res; res = res->sibling) {
>> -		int target_nid = pxm_to_node(target->memory_pxm);
>> -
>> -		hmem_register_resource(target_nid, res);
>> -	}
>> +	for (res = target->memregions.child; res; res = res->sibling)
>> +		hmem_register_resource(res);
>>  }
>>  
>>  static void hmat_register_target(struct memory_target *target)
>> diff --git a/drivers/dax/hmem/device.c b/drivers/dax/hmem/device.c
>> index f9e1a76a04a9..ae25e08a636f 100644
>> --- a/drivers/dax/hmem/device.c
>> +++ b/drivers/dax/hmem/device.c
>> @@ -17,14 +17,14 @@ static struct resource hmem_active = {
>>  	.flags = IORESOURCE_MEM,
>>  };
>>  
>> -int walk_hmem_resources(struct device *host, walk_hmem_fn fn)
>> +int walk_hmem_resources(walk_hmem_fn fn)
>>  {
>>  	struct resource *res;
>>  	int rc = 0;
>>  
>>  	mutex_lock(&hmem_resource_lock);
>>  	for (res = hmem_active.child; res; res = res->sibling) {
>> -		rc = fn(host, (int) res->desc, res);
>> +		rc = fn(res);
>>  		if (rc)
>>  			break;
>>  	}
>> @@ -33,7 +33,7 @@ int walk_hmem_resources(struct device *host, walk_hmem_fn fn)
>>  }
>>  EXPORT_SYMBOL_GPL(walk_hmem_resources);
>>  
>> -static void __hmem_register_resource(int target_nid, struct resource *res)
>> +static void __hmem_register_resource(struct resource *res)
>>  {
>>  	struct platform_device *pdev;
>>  	struct resource *new;
>> @@ -46,8 +46,6 @@ static void __hmem_register_resource(int target_nid, struct resource *res)
>>  		return;
>>  	}
>>  
>> -	new->desc = target_nid;
>> -
>>  	if (platform_initialized)
>>  		return;
>>  
>> @@ -64,19 +62,19 @@ static void __hmem_register_resource(int target_nid, struct resource *res)
>>  		platform_initialized = true;
>>  }
>>  
>> -void hmem_register_resource(int target_nid, struct resource *res)
>> +void hmem_register_resource(struct resource *res)
>>  {
>>  	if (nohmem)
>>  		return;
>>  
>>  	mutex_lock(&hmem_resource_lock);
>> -	__hmem_register_resource(target_nid, res);
>> +	__hmem_register_resource(res);
>>  	mutex_unlock(&hmem_resource_lock);
>>  }
>>  
>>  static __init int hmem_register_one(struct resource *res, void *data)
>>  {
>> -	hmem_register_resource(phys_to_target_node(res->start), res);
>> +	hmem_register_resource(res);
>>  
>>  	return 0;
>>  }
>> diff --git a/drivers/dax/hmem/hmem.c b/drivers/dax/hmem/hmem.c
>> index 5e7c53f18491..088f4060d4d5 100644
>> --- a/drivers/dax/hmem/hmem.c
>> +++ b/drivers/dax/hmem/hmem.c
>> @@ -9,6 +9,8 @@
>>  static bool region_idle;
>>  module_param_named(region_idle, region_idle, bool, 0644);
>>  
>> +static struct platform_device *dax_hmem_pdev;
> 
> I don't think you can assume there is only ever 1 hmem platform device.
> 
> hmat_register_target_devices() in particular iterates multiple memory
> regions and will create more than one.
> 
> What am I missing?

You may be correct that there can be more than one hmem platform device.
I was making this change based on a comment from Dan that it may not matter
which platform device these are created against.

I could be wrong in that assumption. If so we'll need to figure lout how to
determine which platform device a soft reserve resource would be created
against when they are added later in boot from a notification by the
srmem notification chain.

-Nathan

> Ira
> 
>> +
>>  static int dax_hmem_probe(struct platform_device *pdev)
>>  {
>>  	unsigned long flags = IORESOURCE_DAX_KMEM;
>> @@ -59,13 +61,13 @@ static void release_hmem(void *pdev)
>>  	platform_device_unregister(pdev);
>>  }
>>  
>> -static int hmem_register_device(struct device *host, int target_nid,
>> -				const struct resource *res)
>> +static int hmem_register_device(const struct resource *res)
>>  {
>> +	struct device *host = &dax_hmem_pdev->dev;
>>  	struct platform_device *pdev;
>>  	struct memregion_info info;
>> +	int target_nid, rc;
>>  	long id;
>> -	int rc;
>>  
>>  	if (IS_ENABLED(CONFIG_CXL_REGION) &&
>>  	    region_intersects(res->start, resource_size(res), IORESOURCE_MEM,
>> @@ -94,6 +96,7 @@ static int hmem_register_device(struct device *host, int target_nid,
>>  		return -ENOMEM;
>>  	}
>>  
>> +	target_nid = phys_to_target_node(res->start);
>>  	pdev->dev.numa_node = numa_map_to_online_node(target_nid);
>>  	info = (struct memregion_info) {
>>  		.target_node = target_nid,
>> @@ -125,7 +128,8 @@ static int hmem_register_device(struct device *host, int target_nid,
>>  
>>  static int dax_hmem_platform_probe(struct platform_device *pdev)
>>  {
>> -	return walk_hmem_resources(&pdev->dev, hmem_register_device);
>> +	dax_hmem_pdev = pdev;
>> +	return walk_hmem_resources(hmem_register_device);
>>  }
>>  
>>  static struct platform_driver dax_hmem_platform_driver = {
>> diff --git a/include/linux/dax.h b/include/linux/dax.h
>> index 9d3e3327af4c..beaa4bcb515c 100644
>> --- a/include/linux/dax.h
>> +++ b/include/linux/dax.h
>> @@ -276,14 +276,13 @@ static inline int dax_mem2blk_err(int err)
>>  }
>>  
>>  #ifdef CONFIG_DEV_DAX_HMEM_DEVICES
>> -void hmem_register_resource(int target_nid, struct resource *r);
>> +void hmem_register_resource(struct resource *r);
>>  #else
>> -static inline void hmem_register_resource(int target_nid, struct resource *r)
>> +static inline void hmem_register_resource(struct resource *r)
>>  {
>>  }
>>  #endif
>>  
>> -typedef int (*walk_hmem_fn)(struct device *dev, int target_nid,
>> -			    const struct resource *res);
>> -int walk_hmem_resources(struct device *dev, walk_hmem_fn fn);
>> +typedef int (*walk_hmem_fn)(const struct resource *res);
>> +int walk_hmem_resources(walk_hmem_fn fn);
>>  #endif
>> -- 
>> 2.43.0
>>
>>
> 
>
Ira Weiny Jan. 21, 2025, 11:14 p.m. UTC | #3
Fontenot, Nathan wrote:
> On 1/16/2025 4:28 PM, Ira Weiny wrote:
> > Nathan Fontenot wrote:
> >> In order to handle registering hmem devices for SOFT RESERVE reources
> >                                                               ^^^^^^^^^
> >                                                               resources
> > 
> >> that are added late in boot update the hmem_register_resource(),
> >> hmem_register_device(), and walk_hmem_resources() interfaces.
> >>
> >> Remove the target_nid arg to hmem_register_resource(). The target nid
> >> value is calculated from the resource start address and not used until
> >> registering a device for the resource. Move the target nid calculation
> >> to hmem_register_device().
> >>
> >> To allow for registering hmem devices outside of the hmem dax driver
> >> probe routine save the dax hmem platform driver during probe. The
> >> hmem_register_device() interface can then drop the host and target
> >> nid parameters.
> >>
> >> There should be no functional changes.
> >>
> >> Signed-off-by: Nathan Fontenot <nathan.fontenot@amd.com>
> >> ---
> >>  drivers/acpi/numa/hmat.c  |  7 ++-----
> >>  drivers/dax/hmem/device.c | 14 ++++++--------
> >>  drivers/dax/hmem/hmem.c   | 12 ++++++++----
> >>  include/linux/dax.h       |  9 ++++-----
> >>  4 files changed, 20 insertions(+), 22 deletions(-)
> >>
> >> diff --git a/drivers/acpi/numa/hmat.c b/drivers/acpi/numa/hmat.c
> >> index 1a902a02390f..23d4b3ad6d88 100644
> >> --- a/drivers/acpi/numa/hmat.c
> >> +++ b/drivers/acpi/numa/hmat.c
> >> @@ -857,11 +857,8 @@ static void hmat_register_target_devices(struct memory_target *target)
> >>  	if (!IS_ENABLED(CONFIG_DEV_DAX_HMEM))
> >>  		return;
> >>  
> >> -	for (res = target->memregions.child; res; res = res->sibling) {
> >> -		int target_nid = pxm_to_node(target->memory_pxm);
> >> -
> >> -		hmem_register_resource(target_nid, res);
> >> -	}
> >> +	for (res = target->memregions.child; res; res = res->sibling)
> >> +		hmem_register_resource(res);
> >>  }
> >>  
> >>  static void hmat_register_target(struct memory_target *target)
> >> diff --git a/drivers/dax/hmem/device.c b/drivers/dax/hmem/device.c
> >> index f9e1a76a04a9..ae25e08a636f 100644
> >> --- a/drivers/dax/hmem/device.c
> >> +++ b/drivers/dax/hmem/device.c
> >> @@ -17,14 +17,14 @@ static struct resource hmem_active = {
> >>  	.flags = IORESOURCE_MEM,
> >>  };
> >>  
> >> -int walk_hmem_resources(struct device *host, walk_hmem_fn fn)
> >> +int walk_hmem_resources(walk_hmem_fn fn)
> >>  {
> >>  	struct resource *res;
> >>  	int rc = 0;
> >>  
> >>  	mutex_lock(&hmem_resource_lock);
> >>  	for (res = hmem_active.child; res; res = res->sibling) {
> >> -		rc = fn(host, (int) res->desc, res);
> >> +		rc = fn(res);
> >>  		if (rc)
> >>  			break;
> >>  	}
> >> @@ -33,7 +33,7 @@ int walk_hmem_resources(struct device *host, walk_hmem_fn fn)
> >>  }
> >>  EXPORT_SYMBOL_GPL(walk_hmem_resources);
> >>  
> >> -static void __hmem_register_resource(int target_nid, struct resource *res)
> >> +static void __hmem_register_resource(struct resource *res)
> >>  {
> >>  	struct platform_device *pdev;
> >>  	struct resource *new;
> >> @@ -46,8 +46,6 @@ static void __hmem_register_resource(int target_nid, struct resource *res)
> >>  		return;
> >>  	}
> >>  
> >> -	new->desc = target_nid;
> >> -
> >>  	if (platform_initialized)
> >>  		return;
> >>  
> >> @@ -64,19 +62,19 @@ static void __hmem_register_resource(int target_nid, struct resource *res)
> >>  		platform_initialized = true;
> >>  }
> >>  
> >> -void hmem_register_resource(int target_nid, struct resource *res)
> >> +void hmem_register_resource(struct resource *res)
> >>  {
> >>  	if (nohmem)
> >>  		return;
> >>  
> >>  	mutex_lock(&hmem_resource_lock);
> >> -	__hmem_register_resource(target_nid, res);
> >> +	__hmem_register_resource(res);
> >>  	mutex_unlock(&hmem_resource_lock);
> >>  }
> >>  
> >>  static __init int hmem_register_one(struct resource *res, void *data)
> >>  {
> >> -	hmem_register_resource(phys_to_target_node(res->start), res);
> >> +	hmem_register_resource(res);
> >>  
> >>  	return 0;
> >>  }
> >> diff --git a/drivers/dax/hmem/hmem.c b/drivers/dax/hmem/hmem.c
> >> index 5e7c53f18491..088f4060d4d5 100644
> >> --- a/drivers/dax/hmem/hmem.c
> >> +++ b/drivers/dax/hmem/hmem.c
> >> @@ -9,6 +9,8 @@
> >>  static bool region_idle;
> >>  module_param_named(region_idle, region_idle, bool, 0644);
> >>  
> >> +static struct platform_device *dax_hmem_pdev;
> > 
> > I don't think you can assume there is only ever 1 hmem platform device.
> > 
> > hmat_register_target_devices() in particular iterates multiple memory
> > regions and will create more than one.
> > 
> > What am I missing?
> 
> You may be correct that there can be more than one hmem platform device.
> I was making this change based on a comment from Dan that it may not matter
> which platform device these are created against.

If that is true I think there should be a big comment around this code
explaining why it is ok to have the platform device being allocated in
this call unregistered when a different platform device (host) is
released.

IOW hmem_register_device() calls two devm_*() functions using host as the
device used to trigger an action.  It is not entirely clear to me why that
change is safe here.

> 
> I could be wrong in that assumption. If so we'll need to figure lout how to
> determine which platform device a soft reserve resource would be created
> against when they are added later in boot from a notification by the
> srmem notification chain.

I see that it would be more difficult to track.  And I'm ok if it really
does work.  But just looking at the commit message and code I don't see
how this does not at least introduce a functional change.

Ira

> 
> -Nathan
> 
> > Ira
> > 
> >> +
> >>  static int dax_hmem_probe(struct platform_device *pdev)
> >>  {
> >>  	unsigned long flags = IORESOURCE_DAX_KMEM;
> >> @@ -59,13 +61,13 @@ static void release_hmem(void *pdev)
> >>  	platform_device_unregister(pdev);
> >>  }
> >>  
> >> -static int hmem_register_device(struct device *host, int target_nid,
> >> -				const struct resource *res)
> >> +static int hmem_register_device(const struct resource *res)
> >>  {
> >> +	struct device *host = &dax_hmem_pdev->dev;
> >>  	struct platform_device *pdev;
> >>  	struct memregion_info info;
> >> +	int target_nid, rc;
> >>  	long id;
> >> -	int rc;
> >>  
> >>  	if (IS_ENABLED(CONFIG_CXL_REGION) &&
> >>  	    region_intersects(res->start, resource_size(res), IORESOURCE_MEM,
> >> @@ -94,6 +96,7 @@ static int hmem_register_device(struct device *host, int target_nid,
> >>  		return -ENOMEM;
> >>  	}
> >>  
> >> +	target_nid = phys_to_target_node(res->start);
> >>  	pdev->dev.numa_node = numa_map_to_online_node(target_nid);
> >>  	info = (struct memregion_info) {
> >>  		.target_node = target_nid,
> >> @@ -125,7 +128,8 @@ static int hmem_register_device(struct device *host, int target_nid,
> >>  
> >>  static int dax_hmem_platform_probe(struct platform_device *pdev)
> >>  {
> >> -	return walk_hmem_resources(&pdev->dev, hmem_register_device);
> >> +	dax_hmem_pdev = pdev;
> >> +	return walk_hmem_resources(hmem_register_device);
> >>  }
> >>  
> >>  static struct platform_driver dax_hmem_platform_driver = {
> >> diff --git a/include/linux/dax.h b/include/linux/dax.h
> >> index 9d3e3327af4c..beaa4bcb515c 100644
> >> --- a/include/linux/dax.h
> >> +++ b/include/linux/dax.h
> >> @@ -276,14 +276,13 @@ static inline int dax_mem2blk_err(int err)
> >>  }
> >>  
> >>  #ifdef CONFIG_DEV_DAX_HMEM_DEVICES
> >> -void hmem_register_resource(int target_nid, struct resource *r);
> >> +void hmem_register_resource(struct resource *r);
> >>  #else
> >> -static inline void hmem_register_resource(int target_nid, struct resource *r)
> >> +static inline void hmem_register_resource(struct resource *r)
> >>  {
> >>  }
> >>  #endif
> >>  
> >> -typedef int (*walk_hmem_fn)(struct device *dev, int target_nid,
> >> -			    const struct resource *res);
> >> -int walk_hmem_resources(struct device *dev, walk_hmem_fn fn);
> >> +typedef int (*walk_hmem_fn)(const struct resource *res);
> >> +int walk_hmem_resources(walk_hmem_fn fn);
> >>  #endif
> >> -- 
> >> 2.43.0
> >>
> >>
> > 
> > 
>
Fontenot, Nathan Jan. 23, 2025, 4:01 p.m. UTC | #4
On 1/21/2025 5:14 PM, Ira Weiny wrote:
> Fontenot, Nathan wrote:
>> On 1/16/2025 4:28 PM, Ira Weiny wrote:
>>> Nathan Fontenot wrote:
>>>> In order to handle registering hmem devices for SOFT RESERVE reources
>>>                                                               ^^^^^^^^^
>>>                                                               resources
>>>
>>>> that are added late in boot update the hmem_register_resource(),
>>>> hmem_register_device(), and walk_hmem_resources() interfaces.
>>>>
>>>> Remove the target_nid arg to hmem_register_resource(). The target nid
>>>> value is calculated from the resource start address and not used until
>>>> registering a device for the resource. Move the target nid calculation
>>>> to hmem_register_device().
>>>>
>>>> To allow for registering hmem devices outside of the hmem dax driver
>>>> probe routine save the dax hmem platform driver during probe. The
>>>> hmem_register_device() interface can then drop the host and target
>>>> nid parameters.
>>>>
>>>> There should be no functional changes.
>>>>
>>>> Signed-off-by: Nathan Fontenot <nathan.fontenot@amd.com>
>>>> ---
>>>>  drivers/acpi/numa/hmat.c  |  7 ++-----
>>>>  drivers/dax/hmem/device.c | 14 ++++++--------
>>>>  drivers/dax/hmem/hmem.c   | 12 ++++++++----
>>>>  include/linux/dax.h       |  9 ++++-----
>>>>  4 files changed, 20 insertions(+), 22 deletions(-)
>>>>
>>>> diff --git a/drivers/acpi/numa/hmat.c b/drivers/acpi/numa/hmat.c
>>>> index 1a902a02390f..23d4b3ad6d88 100644
>>>> --- a/drivers/acpi/numa/hmat.c
>>>> +++ b/drivers/acpi/numa/hmat.c
>>>> @@ -857,11 +857,8 @@ static void hmat_register_target_devices(struct memory_target *target)
>>>>  	if (!IS_ENABLED(CONFIG_DEV_DAX_HMEM))
>>>>  		return;
>>>>  
>>>> -	for (res = target->memregions.child; res; res = res->sibling) {
>>>> -		int target_nid = pxm_to_node(target->memory_pxm);
>>>> -
>>>> -		hmem_register_resource(target_nid, res);
>>>> -	}
>>>> +	for (res = target->memregions.child; res; res = res->sibling)
>>>> +		hmem_register_resource(res);
>>>>  }
>>>>  
>>>>  static void hmat_register_target(struct memory_target *target)
>>>> diff --git a/drivers/dax/hmem/device.c b/drivers/dax/hmem/device.c
>>>> index f9e1a76a04a9..ae25e08a636f 100644
>>>> --- a/drivers/dax/hmem/device.c
>>>> +++ b/drivers/dax/hmem/device.c
>>>> @@ -17,14 +17,14 @@ static struct resource hmem_active = {
>>>>  	.flags = IORESOURCE_MEM,
>>>>  };
>>>>  
>>>> -int walk_hmem_resources(struct device *host, walk_hmem_fn fn)
>>>> +int walk_hmem_resources(walk_hmem_fn fn)
>>>>  {
>>>>  	struct resource *res;
>>>>  	int rc = 0;
>>>>  
>>>>  	mutex_lock(&hmem_resource_lock);
>>>>  	for (res = hmem_active.child; res; res = res->sibling) {
>>>> -		rc = fn(host, (int) res->desc, res);
>>>> +		rc = fn(res);
>>>>  		if (rc)
>>>>  			break;
>>>>  	}
>>>> @@ -33,7 +33,7 @@ int walk_hmem_resources(struct device *host, walk_hmem_fn fn)
>>>>  }
>>>>  EXPORT_SYMBOL_GPL(walk_hmem_resources);
>>>>  
>>>> -static void __hmem_register_resource(int target_nid, struct resource *res)
>>>> +static void __hmem_register_resource(struct resource *res)
>>>>  {
>>>>  	struct platform_device *pdev;
>>>>  	struct resource *new;
>>>> @@ -46,8 +46,6 @@ static void __hmem_register_resource(int target_nid, struct resource *res)
>>>>  		return;
>>>>  	}
>>>>  
>>>> -	new->desc = target_nid;
>>>> -
>>>>  	if (platform_initialized)
>>>>  		return;
>>>>  
>>>> @@ -64,19 +62,19 @@ static void __hmem_register_resource(int target_nid, struct resource *res)
>>>>  		platform_initialized = true;
>>>>  }
>>>>  
>>>> -void hmem_register_resource(int target_nid, struct resource *res)
>>>> +void hmem_register_resource(struct resource *res)
>>>>  {
>>>>  	if (nohmem)
>>>>  		return;
>>>>  
>>>>  	mutex_lock(&hmem_resource_lock);
>>>> -	__hmem_register_resource(target_nid, res);
>>>> +	__hmem_register_resource(res);
>>>>  	mutex_unlock(&hmem_resource_lock);
>>>>  }
>>>>  
>>>>  static __init int hmem_register_one(struct resource *res, void *data)
>>>>  {
>>>> -	hmem_register_resource(phys_to_target_node(res->start), res);
>>>> +	hmem_register_resource(res);
>>>>  
>>>>  	return 0;
>>>>  }
>>>> diff --git a/drivers/dax/hmem/hmem.c b/drivers/dax/hmem/hmem.c
>>>> index 5e7c53f18491..088f4060d4d5 100644
>>>> --- a/drivers/dax/hmem/hmem.c
>>>> +++ b/drivers/dax/hmem/hmem.c
>>>> @@ -9,6 +9,8 @@
>>>>  static bool region_idle;
>>>>  module_param_named(region_idle, region_idle, bool, 0644);
>>>>  
>>>> +static struct platform_device *dax_hmem_pdev;
>>>
>>> I don't think you can assume there is only ever 1 hmem platform device.
>>>
>>> hmat_register_target_devices() in particular iterates multiple memory
>>> regions and will create more than one.
>>>
>>> What am I missing?
>>
>> You may be correct that there can be more than one hmem platform device.
>> I was making this change based on a comment from Dan that it may not matter
>> which platform device these are created against.
> 
> If that is true I think there should be a big comment around this code
> explaining why it is ok to have the platform device being allocated in
> this call unregistered when a different platform device (host) is
> released.
> 
> IOW hmem_register_device() calls two devm_*() functions using host as the
> device used to trigger an action.  It is not entirely clear to me why that
> change is safe here.
> 
>>
>> I could be wrong in that assumption. If so we'll need to figure lout how to
>> determine which platform device a soft reserve resource would be created
>> against when they are added later in boot from a notification by the
>> srmem notification chain.
> 
> I see that it would be more difficult to track.  And I'm ok if it really
> does work.  But just looking at the commit message and code I don't see
> how this does not at least introduce a functional change.

I'm going to go back and take a look at this again. I went this direction
using the approach of having the srmem notification chain. The dax driver
then adds soft reserves outside of a probe routine and don't have a
platform device associated with them.

-Nathan

> 
> Ira
> 
>>
>> -Nathan
>>
>>> Ira
>>>
>>>> +
>>>>  static int dax_hmem_probe(struct platform_device *pdev)
>>>>  {
>>>>  	unsigned long flags = IORESOURCE_DAX_KMEM;
>>>> @@ -59,13 +61,13 @@ static void release_hmem(void *pdev)
>>>>  	platform_device_unregister(pdev);
>>>>  }
>>>>  
>>>> -static int hmem_register_device(struct device *host, int target_nid,
>>>> -				const struct resource *res)
>>>> +static int hmem_register_device(const struct resource *res)
>>>>  {
>>>> +	struct device *host = &dax_hmem_pdev->dev;
>>>>  	struct platform_device *pdev;
>>>>  	struct memregion_info info;
>>>> +	int target_nid, rc;
>>>>  	long id;
>>>> -	int rc;
>>>>  
>>>>  	if (IS_ENABLED(CONFIG_CXL_REGION) &&
>>>>  	    region_intersects(res->start, resource_size(res), IORESOURCE_MEM,
>>>> @@ -94,6 +96,7 @@ static int hmem_register_device(struct device *host, int target_nid,
>>>>  		return -ENOMEM;
>>>>  	}
>>>>  
>>>> +	target_nid = phys_to_target_node(res->start);
>>>>  	pdev->dev.numa_node = numa_map_to_online_node(target_nid);
>>>>  	info = (struct memregion_info) {
>>>>  		.target_node = target_nid,
>>>> @@ -125,7 +128,8 @@ static int hmem_register_device(struct device *host, int target_nid,
>>>>  
>>>>  static int dax_hmem_platform_probe(struct platform_device *pdev)
>>>>  {
>>>> -	return walk_hmem_resources(&pdev->dev, hmem_register_device);
>>>> +	dax_hmem_pdev = pdev;
>>>> +	return walk_hmem_resources(hmem_register_device);
>>>>  }
>>>>  
>>>>  static struct platform_driver dax_hmem_platform_driver = {
>>>> diff --git a/include/linux/dax.h b/include/linux/dax.h
>>>> index 9d3e3327af4c..beaa4bcb515c 100644
>>>> --- a/include/linux/dax.h
>>>> +++ b/include/linux/dax.h
>>>> @@ -276,14 +276,13 @@ static inline int dax_mem2blk_err(int err)
>>>>  }
>>>>  
>>>>  #ifdef CONFIG_DEV_DAX_HMEM_DEVICES
>>>> -void hmem_register_resource(int target_nid, struct resource *r);
>>>> +void hmem_register_resource(struct resource *r);
>>>>  #else
>>>> -static inline void hmem_register_resource(int target_nid, struct resource *r)
>>>> +static inline void hmem_register_resource(struct resource *r)
>>>>  {
>>>>  }
>>>>  #endif
>>>>  
>>>> -typedef int (*walk_hmem_fn)(struct device *dev, int target_nid,
>>>> -			    const struct resource *res);
>>>> -int walk_hmem_resources(struct device *dev, walk_hmem_fn fn);
>>>> +typedef int (*walk_hmem_fn)(const struct resource *res);
>>>> +int walk_hmem_resources(walk_hmem_fn fn);
>>>>  #endif
>>>> -- 
>>>> 2.43.0
>>>>
>>>>
>>>
>>>
>>
> 
>
Fontenot, Nathan Jan. 27, 2025, 6:44 p.m. UTC | #5
On 1/23/2025 10:01 AM, Fontenot, Nathan wrote:
> On 1/21/2025 5:14 PM, Ira Weiny wrote:
>> Fontenot, Nathan wrote:
>>> On 1/16/2025 4:28 PM, Ira Weiny wrote:
>>>> Nathan Fontenot wrote:
>>>>> In order to handle registering hmem devices for SOFT RESERVE reources
>>>>                                                               ^^^^^^^^^
>>>>                                                               resources
>>>>
>>>>> that are added late in boot update the hmem_register_resource(),
>>>>> hmem_register_device(), and walk_hmem_resources() interfaces.
>>>>>
>>>>> Remove the target_nid arg to hmem_register_resource(). The target nid
>>>>> value is calculated from the resource start address and not used until
>>>>> registering a device for the resource. Move the target nid calculation
>>>>> to hmem_register_device().
>>>>>
>>>>> To allow for registering hmem devices outside of the hmem dax driver
>>>>> probe routine save the dax hmem platform driver during probe. The
>>>>> hmem_register_device() interface can then drop the host and target
>>>>> nid parameters.
>>>>>
>>>>> There should be no functional changes.
>>>>>
>>>>> Signed-off-by: Nathan Fontenot <nathan.fontenot@amd.com>

[ snip ]

>>>>> diff --git a/drivers/dax/hmem/hmem.c b/drivers/dax/hmem/hmem.c
>>>>> index 5e7c53f18491..088f4060d4d5 100644
>>>>> --- a/drivers/dax/hmem/hmem.c
>>>>> +++ b/drivers/dax/hmem/hmem.c
>>>>> @@ -9,6 +9,8 @@
>>>>>  static bool region_idle;
>>>>>  module_param_named(region_idle, region_idle, bool, 0644);
>>>>>  
>>>>> +static struct platform_device *dax_hmem_pdev;
>>>>
>>>> I don't think you can assume there is only ever 1 hmem platform device.
>>>>
>>>> hmat_register_target_devices() in particular iterates multiple memory
>>>> regions and will create more than one.
>>>>
>>>> What am I missing?
>>>
>>> You may be correct that there can be more than one hmem platform device.
>>> I was making this change based on a comment from Dan that it may not matter
>>> which platform device these are created against.
>>
>> If that is true I think there should be a big comment around this code
>> explaining why it is ok to have the platform device being allocated in
>> this call unregistered when a different platform device (host) is
>> released.
>>
>> IOW hmem_register_device() calls two devm_*() functions using host as the
>> device used to trigger an action.  It is not entirely clear to me why that
>> change is safe here.
>>
>>>
>>> I could be wrong in that assumption. If so we'll need to figure lout how to
>>> determine which platform device a soft reserve resource would be created
>>> against when they are added later in boot from a notification by the
>>> srmem notification chain.
>>
>> I see that it would be more difficult to track.  And I'm ok if it really
>> does work.  But just looking at the commit message and code I don't see
>> how this does not at least introduce a functional change.
> 
> I'm going to go back and take a look at this again. I went this direction
> using the approach of having the srmem notification chain. The dax driver
> then adds soft reserves outside of a probe routine and don't have a
> platform device associated with them.
> 

Digging back into this, the dax driver only creates one platform device.
During hmem_register_resource (which is what hmat_register_target_devices()
calls for each resource) the dax hmem driver will only create a platform
device when the first resource is registered.

Each resource that is passed to hmem_register_resource() is added to a
resource tree internal to the dax/hmem driver that is eventually walked
by the dax hmem driver probe routine.

Now that I understand this better I am confident that saving a pointer
to the dax hmem platform device is safe. I'll include this information
in the commit log for the next version of the patch.

-Nathan
diff mbox series

Patch

diff --git a/drivers/acpi/numa/hmat.c b/drivers/acpi/numa/hmat.c
index 1a902a02390f..23d4b3ad6d88 100644
--- a/drivers/acpi/numa/hmat.c
+++ b/drivers/acpi/numa/hmat.c
@@ -857,11 +857,8 @@  static void hmat_register_target_devices(struct memory_target *target)
 	if (!IS_ENABLED(CONFIG_DEV_DAX_HMEM))
 		return;
 
-	for (res = target->memregions.child; res; res = res->sibling) {
-		int target_nid = pxm_to_node(target->memory_pxm);
-
-		hmem_register_resource(target_nid, res);
-	}
+	for (res = target->memregions.child; res; res = res->sibling)
+		hmem_register_resource(res);
 }
 
 static void hmat_register_target(struct memory_target *target)
diff --git a/drivers/dax/hmem/device.c b/drivers/dax/hmem/device.c
index f9e1a76a04a9..ae25e08a636f 100644
--- a/drivers/dax/hmem/device.c
+++ b/drivers/dax/hmem/device.c
@@ -17,14 +17,14 @@  static struct resource hmem_active = {
 	.flags = IORESOURCE_MEM,
 };
 
-int walk_hmem_resources(struct device *host, walk_hmem_fn fn)
+int walk_hmem_resources(walk_hmem_fn fn)
 {
 	struct resource *res;
 	int rc = 0;
 
 	mutex_lock(&hmem_resource_lock);
 	for (res = hmem_active.child; res; res = res->sibling) {
-		rc = fn(host, (int) res->desc, res);
+		rc = fn(res);
 		if (rc)
 			break;
 	}
@@ -33,7 +33,7 @@  int walk_hmem_resources(struct device *host, walk_hmem_fn fn)
 }
 EXPORT_SYMBOL_GPL(walk_hmem_resources);
 
-static void __hmem_register_resource(int target_nid, struct resource *res)
+static void __hmem_register_resource(struct resource *res)
 {
 	struct platform_device *pdev;
 	struct resource *new;
@@ -46,8 +46,6 @@  static void __hmem_register_resource(int target_nid, struct resource *res)
 		return;
 	}
 
-	new->desc = target_nid;
-
 	if (platform_initialized)
 		return;
 
@@ -64,19 +62,19 @@  static void __hmem_register_resource(int target_nid, struct resource *res)
 		platform_initialized = true;
 }
 
-void hmem_register_resource(int target_nid, struct resource *res)
+void hmem_register_resource(struct resource *res)
 {
 	if (nohmem)
 		return;
 
 	mutex_lock(&hmem_resource_lock);
-	__hmem_register_resource(target_nid, res);
+	__hmem_register_resource(res);
 	mutex_unlock(&hmem_resource_lock);
 }
 
 static __init int hmem_register_one(struct resource *res, void *data)
 {
-	hmem_register_resource(phys_to_target_node(res->start), res);
+	hmem_register_resource(res);
 
 	return 0;
 }
diff --git a/drivers/dax/hmem/hmem.c b/drivers/dax/hmem/hmem.c
index 5e7c53f18491..088f4060d4d5 100644
--- a/drivers/dax/hmem/hmem.c
+++ b/drivers/dax/hmem/hmem.c
@@ -9,6 +9,8 @@ 
 static bool region_idle;
 module_param_named(region_idle, region_idle, bool, 0644);
 
+static struct platform_device *dax_hmem_pdev;
+
 static int dax_hmem_probe(struct platform_device *pdev)
 {
 	unsigned long flags = IORESOURCE_DAX_KMEM;
@@ -59,13 +61,13 @@  static void release_hmem(void *pdev)
 	platform_device_unregister(pdev);
 }
 
-static int hmem_register_device(struct device *host, int target_nid,
-				const struct resource *res)
+static int hmem_register_device(const struct resource *res)
 {
+	struct device *host = &dax_hmem_pdev->dev;
 	struct platform_device *pdev;
 	struct memregion_info info;
+	int target_nid, rc;
 	long id;
-	int rc;
 
 	if (IS_ENABLED(CONFIG_CXL_REGION) &&
 	    region_intersects(res->start, resource_size(res), IORESOURCE_MEM,
@@ -94,6 +96,7 @@  static int hmem_register_device(struct device *host, int target_nid,
 		return -ENOMEM;
 	}
 
+	target_nid = phys_to_target_node(res->start);
 	pdev->dev.numa_node = numa_map_to_online_node(target_nid);
 	info = (struct memregion_info) {
 		.target_node = target_nid,
@@ -125,7 +128,8 @@  static int hmem_register_device(struct device *host, int target_nid,
 
 static int dax_hmem_platform_probe(struct platform_device *pdev)
 {
-	return walk_hmem_resources(&pdev->dev, hmem_register_device);
+	dax_hmem_pdev = pdev;
+	return walk_hmem_resources(hmem_register_device);
 }
 
 static struct platform_driver dax_hmem_platform_driver = {
diff --git a/include/linux/dax.h b/include/linux/dax.h
index 9d3e3327af4c..beaa4bcb515c 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -276,14 +276,13 @@  static inline int dax_mem2blk_err(int err)
 }
 
 #ifdef CONFIG_DEV_DAX_HMEM_DEVICES
-void hmem_register_resource(int target_nid, struct resource *r);
+void hmem_register_resource(struct resource *r);
 #else
-static inline void hmem_register_resource(int target_nid, struct resource *r)
+static inline void hmem_register_resource(struct resource *r)
 {
 }
 #endif
 
-typedef int (*walk_hmem_fn)(struct device *dev, int target_nid,
-			    const struct resource *res);
-int walk_hmem_resources(struct device *dev, walk_hmem_fn fn);
+typedef int (*walk_hmem_fn)(const struct resource *res);
+int walk_hmem_resources(walk_hmem_fn fn);
 #endif