Message ID | 9a41f91ed487ea64e187d84fab3bd69a4a5862f9.1737046620.git.nathan.fontenot@amd.com |
---|---|
State | New |
Headers | show |
Series | Add managed SOFT RESERVE resource handling | expand |
Nathan Fontenot wrote: > In order to handle registering hmem devices for SOFT RESERVE reources ^^^^^^^^^ resources > that are added late in boot update the hmem_register_resource(), > hmem_register_device(), and walk_hmem_resources() interfaces. > > Remove the target_nid arg to hmem_register_resource(). The target nid > value is calculated from the resource start address and not used until > registering a device for the resource. Move the target nid calculation > to hmem_register_device(). > > To allow for registering hmem devices outside of the hmem dax driver > probe routine save the dax hmem platform driver during probe. The > hmem_register_device() interface can then drop the host and target > nid parameters. > > There should be no functional changes. > > Signed-off-by: Nathan Fontenot <nathan.fontenot@amd.com> > --- > drivers/acpi/numa/hmat.c | 7 ++----- > drivers/dax/hmem/device.c | 14 ++++++-------- > drivers/dax/hmem/hmem.c | 12 ++++++++---- > include/linux/dax.h | 9 ++++----- > 4 files changed, 20 insertions(+), 22 deletions(-) > > diff --git a/drivers/acpi/numa/hmat.c b/drivers/acpi/numa/hmat.c > index 1a902a02390f..23d4b3ad6d88 100644 > --- a/drivers/acpi/numa/hmat.c > +++ b/drivers/acpi/numa/hmat.c > @@ -857,11 +857,8 @@ static void hmat_register_target_devices(struct memory_target *target) > if (!IS_ENABLED(CONFIG_DEV_DAX_HMEM)) > return; > > - for (res = target->memregions.child; res; res = res->sibling) { > - int target_nid = pxm_to_node(target->memory_pxm); > - > - hmem_register_resource(target_nid, res); > - } > + for (res = target->memregions.child; res; res = res->sibling) > + hmem_register_resource(res); > } > > static void hmat_register_target(struct memory_target *target) > diff --git a/drivers/dax/hmem/device.c b/drivers/dax/hmem/device.c > index f9e1a76a04a9..ae25e08a636f 100644 > --- a/drivers/dax/hmem/device.c > +++ b/drivers/dax/hmem/device.c > @@ -17,14 +17,14 @@ static struct resource hmem_active = { > .flags = IORESOURCE_MEM, > }; > > -int walk_hmem_resources(struct device *host, walk_hmem_fn fn) > +int walk_hmem_resources(walk_hmem_fn fn) > { > struct resource *res; > int rc = 0; > > mutex_lock(&hmem_resource_lock); > for (res = hmem_active.child; res; res = res->sibling) { > - rc = fn(host, (int) res->desc, res); > + rc = fn(res); > if (rc) > break; > } > @@ -33,7 +33,7 @@ int walk_hmem_resources(struct device *host, walk_hmem_fn fn) > } > EXPORT_SYMBOL_GPL(walk_hmem_resources); > > -static void __hmem_register_resource(int target_nid, struct resource *res) > +static void __hmem_register_resource(struct resource *res) > { > struct platform_device *pdev; > struct resource *new; > @@ -46,8 +46,6 @@ static void __hmem_register_resource(int target_nid, struct resource *res) > return; > } > > - new->desc = target_nid; > - > if (platform_initialized) > return; > > @@ -64,19 +62,19 @@ static void __hmem_register_resource(int target_nid, struct resource *res) > platform_initialized = true; > } > > -void hmem_register_resource(int target_nid, struct resource *res) > +void hmem_register_resource(struct resource *res) > { > if (nohmem) > return; > > mutex_lock(&hmem_resource_lock); > - __hmem_register_resource(target_nid, res); > + __hmem_register_resource(res); > mutex_unlock(&hmem_resource_lock); > } > > static __init int hmem_register_one(struct resource *res, void *data) > { > - hmem_register_resource(phys_to_target_node(res->start), res); > + hmem_register_resource(res); > > return 0; > } > diff --git a/drivers/dax/hmem/hmem.c b/drivers/dax/hmem/hmem.c > index 5e7c53f18491..088f4060d4d5 100644 > --- a/drivers/dax/hmem/hmem.c > +++ b/drivers/dax/hmem/hmem.c > @@ -9,6 +9,8 @@ > static bool region_idle; > module_param_named(region_idle, region_idle, bool, 0644); > > +static struct platform_device *dax_hmem_pdev; I don't think you can assume there is only ever 1 hmem platform device. hmat_register_target_devices() in particular iterates multiple memory regions and will create more than one. What am I missing? Ira > + > static int dax_hmem_probe(struct platform_device *pdev) > { > unsigned long flags = IORESOURCE_DAX_KMEM; > @@ -59,13 +61,13 @@ static void release_hmem(void *pdev) > platform_device_unregister(pdev); > } > > -static int hmem_register_device(struct device *host, int target_nid, > - const struct resource *res) > +static int hmem_register_device(const struct resource *res) > { > + struct device *host = &dax_hmem_pdev->dev; > struct platform_device *pdev; > struct memregion_info info; > + int target_nid, rc; > long id; > - int rc; > > if (IS_ENABLED(CONFIG_CXL_REGION) && > region_intersects(res->start, resource_size(res), IORESOURCE_MEM, > @@ -94,6 +96,7 @@ static int hmem_register_device(struct device *host, int target_nid, > return -ENOMEM; > } > > + target_nid = phys_to_target_node(res->start); > pdev->dev.numa_node = numa_map_to_online_node(target_nid); > info = (struct memregion_info) { > .target_node = target_nid, > @@ -125,7 +128,8 @@ static int hmem_register_device(struct device *host, int target_nid, > > static int dax_hmem_platform_probe(struct platform_device *pdev) > { > - return walk_hmem_resources(&pdev->dev, hmem_register_device); > + dax_hmem_pdev = pdev; > + return walk_hmem_resources(hmem_register_device); > } > > static struct platform_driver dax_hmem_platform_driver = { > diff --git a/include/linux/dax.h b/include/linux/dax.h > index 9d3e3327af4c..beaa4bcb515c 100644 > --- a/include/linux/dax.h > +++ b/include/linux/dax.h > @@ -276,14 +276,13 @@ static inline int dax_mem2blk_err(int err) > } > > #ifdef CONFIG_DEV_DAX_HMEM_DEVICES > -void hmem_register_resource(int target_nid, struct resource *r); > +void hmem_register_resource(struct resource *r); > #else > -static inline void hmem_register_resource(int target_nid, struct resource *r) > +static inline void hmem_register_resource(struct resource *r) > { > } > #endif > > -typedef int (*walk_hmem_fn)(struct device *dev, int target_nid, > - const struct resource *res); > -int walk_hmem_resources(struct device *dev, walk_hmem_fn fn); > +typedef int (*walk_hmem_fn)(const struct resource *res); > +int walk_hmem_resources(walk_hmem_fn fn); > #endif > -- > 2.43.0 > >
On 1/16/2025 4:28 PM, Ira Weiny wrote: > Nathan Fontenot wrote: >> In order to handle registering hmem devices for SOFT RESERVE reources > ^^^^^^^^^ > resources > >> that are added late in boot update the hmem_register_resource(), >> hmem_register_device(), and walk_hmem_resources() interfaces. >> >> Remove the target_nid arg to hmem_register_resource(). The target nid >> value is calculated from the resource start address and not used until >> registering a device for the resource. Move the target nid calculation >> to hmem_register_device(). >> >> To allow for registering hmem devices outside of the hmem dax driver >> probe routine save the dax hmem platform driver during probe. The >> hmem_register_device() interface can then drop the host and target >> nid parameters. >> >> There should be no functional changes. >> >> Signed-off-by: Nathan Fontenot <nathan.fontenot@amd.com> >> --- >> drivers/acpi/numa/hmat.c | 7 ++----- >> drivers/dax/hmem/device.c | 14 ++++++-------- >> drivers/dax/hmem/hmem.c | 12 ++++++++---- >> include/linux/dax.h | 9 ++++----- >> 4 files changed, 20 insertions(+), 22 deletions(-) >> >> diff --git a/drivers/acpi/numa/hmat.c b/drivers/acpi/numa/hmat.c >> index 1a902a02390f..23d4b3ad6d88 100644 >> --- a/drivers/acpi/numa/hmat.c >> +++ b/drivers/acpi/numa/hmat.c >> @@ -857,11 +857,8 @@ static void hmat_register_target_devices(struct memory_target *target) >> if (!IS_ENABLED(CONFIG_DEV_DAX_HMEM)) >> return; >> >> - for (res = target->memregions.child; res; res = res->sibling) { >> - int target_nid = pxm_to_node(target->memory_pxm); >> - >> - hmem_register_resource(target_nid, res); >> - } >> + for (res = target->memregions.child; res; res = res->sibling) >> + hmem_register_resource(res); >> } >> >> static void hmat_register_target(struct memory_target *target) >> diff --git a/drivers/dax/hmem/device.c b/drivers/dax/hmem/device.c >> index f9e1a76a04a9..ae25e08a636f 100644 >> --- a/drivers/dax/hmem/device.c >> +++ b/drivers/dax/hmem/device.c >> @@ -17,14 +17,14 @@ static struct resource hmem_active = { >> .flags = IORESOURCE_MEM, >> }; >> >> -int walk_hmem_resources(struct device *host, walk_hmem_fn fn) >> +int walk_hmem_resources(walk_hmem_fn fn) >> { >> struct resource *res; >> int rc = 0; >> >> mutex_lock(&hmem_resource_lock); >> for (res = hmem_active.child; res; res = res->sibling) { >> - rc = fn(host, (int) res->desc, res); >> + rc = fn(res); >> if (rc) >> break; >> } >> @@ -33,7 +33,7 @@ int walk_hmem_resources(struct device *host, walk_hmem_fn fn) >> } >> EXPORT_SYMBOL_GPL(walk_hmem_resources); >> >> -static void __hmem_register_resource(int target_nid, struct resource *res) >> +static void __hmem_register_resource(struct resource *res) >> { >> struct platform_device *pdev; >> struct resource *new; >> @@ -46,8 +46,6 @@ static void __hmem_register_resource(int target_nid, struct resource *res) >> return; >> } >> >> - new->desc = target_nid; >> - >> if (platform_initialized) >> return; >> >> @@ -64,19 +62,19 @@ static void __hmem_register_resource(int target_nid, struct resource *res) >> platform_initialized = true; >> } >> >> -void hmem_register_resource(int target_nid, struct resource *res) >> +void hmem_register_resource(struct resource *res) >> { >> if (nohmem) >> return; >> >> mutex_lock(&hmem_resource_lock); >> - __hmem_register_resource(target_nid, res); >> + __hmem_register_resource(res); >> mutex_unlock(&hmem_resource_lock); >> } >> >> static __init int hmem_register_one(struct resource *res, void *data) >> { >> - hmem_register_resource(phys_to_target_node(res->start), res); >> + hmem_register_resource(res); >> >> return 0; >> } >> diff --git a/drivers/dax/hmem/hmem.c b/drivers/dax/hmem/hmem.c >> index 5e7c53f18491..088f4060d4d5 100644 >> --- a/drivers/dax/hmem/hmem.c >> +++ b/drivers/dax/hmem/hmem.c >> @@ -9,6 +9,8 @@ >> static bool region_idle; >> module_param_named(region_idle, region_idle, bool, 0644); >> >> +static struct platform_device *dax_hmem_pdev; > > I don't think you can assume there is only ever 1 hmem platform device. > > hmat_register_target_devices() in particular iterates multiple memory > regions and will create more than one. > > What am I missing? You may be correct that there can be more than one hmem platform device. I was making this change based on a comment from Dan that it may not matter which platform device these are created against. I could be wrong in that assumption. If so we'll need to figure lout how to determine which platform device a soft reserve resource would be created against when they are added later in boot from a notification by the srmem notification chain. -Nathan > Ira > >> + >> static int dax_hmem_probe(struct platform_device *pdev) >> { >> unsigned long flags = IORESOURCE_DAX_KMEM; >> @@ -59,13 +61,13 @@ static void release_hmem(void *pdev) >> platform_device_unregister(pdev); >> } >> >> -static int hmem_register_device(struct device *host, int target_nid, >> - const struct resource *res) >> +static int hmem_register_device(const struct resource *res) >> { >> + struct device *host = &dax_hmem_pdev->dev; >> struct platform_device *pdev; >> struct memregion_info info; >> + int target_nid, rc; >> long id; >> - int rc; >> >> if (IS_ENABLED(CONFIG_CXL_REGION) && >> region_intersects(res->start, resource_size(res), IORESOURCE_MEM, >> @@ -94,6 +96,7 @@ static int hmem_register_device(struct device *host, int target_nid, >> return -ENOMEM; >> } >> >> + target_nid = phys_to_target_node(res->start); >> pdev->dev.numa_node = numa_map_to_online_node(target_nid); >> info = (struct memregion_info) { >> .target_node = target_nid, >> @@ -125,7 +128,8 @@ static int hmem_register_device(struct device *host, int target_nid, >> >> static int dax_hmem_platform_probe(struct platform_device *pdev) >> { >> - return walk_hmem_resources(&pdev->dev, hmem_register_device); >> + dax_hmem_pdev = pdev; >> + return walk_hmem_resources(hmem_register_device); >> } >> >> static struct platform_driver dax_hmem_platform_driver = { >> diff --git a/include/linux/dax.h b/include/linux/dax.h >> index 9d3e3327af4c..beaa4bcb515c 100644 >> --- a/include/linux/dax.h >> +++ b/include/linux/dax.h >> @@ -276,14 +276,13 @@ static inline int dax_mem2blk_err(int err) >> } >> >> #ifdef CONFIG_DEV_DAX_HMEM_DEVICES >> -void hmem_register_resource(int target_nid, struct resource *r); >> +void hmem_register_resource(struct resource *r); >> #else >> -static inline void hmem_register_resource(int target_nid, struct resource *r) >> +static inline void hmem_register_resource(struct resource *r) >> { >> } >> #endif >> >> -typedef int (*walk_hmem_fn)(struct device *dev, int target_nid, >> - const struct resource *res); >> -int walk_hmem_resources(struct device *dev, walk_hmem_fn fn); >> +typedef int (*walk_hmem_fn)(const struct resource *res); >> +int walk_hmem_resources(walk_hmem_fn fn); >> #endif >> -- >> 2.43.0 >> >> > >
Fontenot, Nathan wrote: > On 1/16/2025 4:28 PM, Ira Weiny wrote: > > Nathan Fontenot wrote: > >> In order to handle registering hmem devices for SOFT RESERVE reources > > ^^^^^^^^^ > > resources > > > >> that are added late in boot update the hmem_register_resource(), > >> hmem_register_device(), and walk_hmem_resources() interfaces. > >> > >> Remove the target_nid arg to hmem_register_resource(). The target nid > >> value is calculated from the resource start address and not used until > >> registering a device for the resource. Move the target nid calculation > >> to hmem_register_device(). > >> > >> To allow for registering hmem devices outside of the hmem dax driver > >> probe routine save the dax hmem platform driver during probe. The > >> hmem_register_device() interface can then drop the host and target > >> nid parameters. > >> > >> There should be no functional changes. > >> > >> Signed-off-by: Nathan Fontenot <nathan.fontenot@amd.com> > >> --- > >> drivers/acpi/numa/hmat.c | 7 ++----- > >> drivers/dax/hmem/device.c | 14 ++++++-------- > >> drivers/dax/hmem/hmem.c | 12 ++++++++---- > >> include/linux/dax.h | 9 ++++----- > >> 4 files changed, 20 insertions(+), 22 deletions(-) > >> > >> diff --git a/drivers/acpi/numa/hmat.c b/drivers/acpi/numa/hmat.c > >> index 1a902a02390f..23d4b3ad6d88 100644 > >> --- a/drivers/acpi/numa/hmat.c > >> +++ b/drivers/acpi/numa/hmat.c > >> @@ -857,11 +857,8 @@ static void hmat_register_target_devices(struct memory_target *target) > >> if (!IS_ENABLED(CONFIG_DEV_DAX_HMEM)) > >> return; > >> > >> - for (res = target->memregions.child; res; res = res->sibling) { > >> - int target_nid = pxm_to_node(target->memory_pxm); > >> - > >> - hmem_register_resource(target_nid, res); > >> - } > >> + for (res = target->memregions.child; res; res = res->sibling) > >> + hmem_register_resource(res); > >> } > >> > >> static void hmat_register_target(struct memory_target *target) > >> diff --git a/drivers/dax/hmem/device.c b/drivers/dax/hmem/device.c > >> index f9e1a76a04a9..ae25e08a636f 100644 > >> --- a/drivers/dax/hmem/device.c > >> +++ b/drivers/dax/hmem/device.c > >> @@ -17,14 +17,14 @@ static struct resource hmem_active = { > >> .flags = IORESOURCE_MEM, > >> }; > >> > >> -int walk_hmem_resources(struct device *host, walk_hmem_fn fn) > >> +int walk_hmem_resources(walk_hmem_fn fn) > >> { > >> struct resource *res; > >> int rc = 0; > >> > >> mutex_lock(&hmem_resource_lock); > >> for (res = hmem_active.child; res; res = res->sibling) { > >> - rc = fn(host, (int) res->desc, res); > >> + rc = fn(res); > >> if (rc) > >> break; > >> } > >> @@ -33,7 +33,7 @@ int walk_hmem_resources(struct device *host, walk_hmem_fn fn) > >> } > >> EXPORT_SYMBOL_GPL(walk_hmem_resources); > >> > >> -static void __hmem_register_resource(int target_nid, struct resource *res) > >> +static void __hmem_register_resource(struct resource *res) > >> { > >> struct platform_device *pdev; > >> struct resource *new; > >> @@ -46,8 +46,6 @@ static void __hmem_register_resource(int target_nid, struct resource *res) > >> return; > >> } > >> > >> - new->desc = target_nid; > >> - > >> if (platform_initialized) > >> return; > >> > >> @@ -64,19 +62,19 @@ static void __hmem_register_resource(int target_nid, struct resource *res) > >> platform_initialized = true; > >> } > >> > >> -void hmem_register_resource(int target_nid, struct resource *res) > >> +void hmem_register_resource(struct resource *res) > >> { > >> if (nohmem) > >> return; > >> > >> mutex_lock(&hmem_resource_lock); > >> - __hmem_register_resource(target_nid, res); > >> + __hmem_register_resource(res); > >> mutex_unlock(&hmem_resource_lock); > >> } > >> > >> static __init int hmem_register_one(struct resource *res, void *data) > >> { > >> - hmem_register_resource(phys_to_target_node(res->start), res); > >> + hmem_register_resource(res); > >> > >> return 0; > >> } > >> diff --git a/drivers/dax/hmem/hmem.c b/drivers/dax/hmem/hmem.c > >> index 5e7c53f18491..088f4060d4d5 100644 > >> --- a/drivers/dax/hmem/hmem.c > >> +++ b/drivers/dax/hmem/hmem.c > >> @@ -9,6 +9,8 @@ > >> static bool region_idle; > >> module_param_named(region_idle, region_idle, bool, 0644); > >> > >> +static struct platform_device *dax_hmem_pdev; > > > > I don't think you can assume there is only ever 1 hmem platform device. > > > > hmat_register_target_devices() in particular iterates multiple memory > > regions and will create more than one. > > > > What am I missing? > > You may be correct that there can be more than one hmem platform device. > I was making this change based on a comment from Dan that it may not matter > which platform device these are created against. If that is true I think there should be a big comment around this code explaining why it is ok to have the platform device being allocated in this call unregistered when a different platform device (host) is released. IOW hmem_register_device() calls two devm_*() functions using host as the device used to trigger an action. It is not entirely clear to me why that change is safe here. > > I could be wrong in that assumption. If so we'll need to figure lout how to > determine which platform device a soft reserve resource would be created > against when they are added later in boot from a notification by the > srmem notification chain. I see that it would be more difficult to track. And I'm ok if it really does work. But just looking at the commit message and code I don't see how this does not at least introduce a functional change. Ira > > -Nathan > > > Ira > > > >> + > >> static int dax_hmem_probe(struct platform_device *pdev) > >> { > >> unsigned long flags = IORESOURCE_DAX_KMEM; > >> @@ -59,13 +61,13 @@ static void release_hmem(void *pdev) > >> platform_device_unregister(pdev); > >> } > >> > >> -static int hmem_register_device(struct device *host, int target_nid, > >> - const struct resource *res) > >> +static int hmem_register_device(const struct resource *res) > >> { > >> + struct device *host = &dax_hmem_pdev->dev; > >> struct platform_device *pdev; > >> struct memregion_info info; > >> + int target_nid, rc; > >> long id; > >> - int rc; > >> > >> if (IS_ENABLED(CONFIG_CXL_REGION) && > >> region_intersects(res->start, resource_size(res), IORESOURCE_MEM, > >> @@ -94,6 +96,7 @@ static int hmem_register_device(struct device *host, int target_nid, > >> return -ENOMEM; > >> } > >> > >> + target_nid = phys_to_target_node(res->start); > >> pdev->dev.numa_node = numa_map_to_online_node(target_nid); > >> info = (struct memregion_info) { > >> .target_node = target_nid, > >> @@ -125,7 +128,8 @@ static int hmem_register_device(struct device *host, int target_nid, > >> > >> static int dax_hmem_platform_probe(struct platform_device *pdev) > >> { > >> - return walk_hmem_resources(&pdev->dev, hmem_register_device); > >> + dax_hmem_pdev = pdev; > >> + return walk_hmem_resources(hmem_register_device); > >> } > >> > >> static struct platform_driver dax_hmem_platform_driver = { > >> diff --git a/include/linux/dax.h b/include/linux/dax.h > >> index 9d3e3327af4c..beaa4bcb515c 100644 > >> --- a/include/linux/dax.h > >> +++ b/include/linux/dax.h > >> @@ -276,14 +276,13 @@ static inline int dax_mem2blk_err(int err) > >> } > >> > >> #ifdef CONFIG_DEV_DAX_HMEM_DEVICES > >> -void hmem_register_resource(int target_nid, struct resource *r); > >> +void hmem_register_resource(struct resource *r); > >> #else > >> -static inline void hmem_register_resource(int target_nid, struct resource *r) > >> +static inline void hmem_register_resource(struct resource *r) > >> { > >> } > >> #endif > >> > >> -typedef int (*walk_hmem_fn)(struct device *dev, int target_nid, > >> - const struct resource *res); > >> -int walk_hmem_resources(struct device *dev, walk_hmem_fn fn); > >> +typedef int (*walk_hmem_fn)(const struct resource *res); > >> +int walk_hmem_resources(walk_hmem_fn fn); > >> #endif > >> -- > >> 2.43.0 > >> > >> > > > > >
On 1/21/2025 5:14 PM, Ira Weiny wrote: > Fontenot, Nathan wrote: >> On 1/16/2025 4:28 PM, Ira Weiny wrote: >>> Nathan Fontenot wrote: >>>> In order to handle registering hmem devices for SOFT RESERVE reources >>> ^^^^^^^^^ >>> resources >>> >>>> that are added late in boot update the hmem_register_resource(), >>>> hmem_register_device(), and walk_hmem_resources() interfaces. >>>> >>>> Remove the target_nid arg to hmem_register_resource(). The target nid >>>> value is calculated from the resource start address and not used until >>>> registering a device for the resource. Move the target nid calculation >>>> to hmem_register_device(). >>>> >>>> To allow for registering hmem devices outside of the hmem dax driver >>>> probe routine save the dax hmem platform driver during probe. The >>>> hmem_register_device() interface can then drop the host and target >>>> nid parameters. >>>> >>>> There should be no functional changes. >>>> >>>> Signed-off-by: Nathan Fontenot <nathan.fontenot@amd.com> >>>> --- >>>> drivers/acpi/numa/hmat.c | 7 ++----- >>>> drivers/dax/hmem/device.c | 14 ++++++-------- >>>> drivers/dax/hmem/hmem.c | 12 ++++++++---- >>>> include/linux/dax.h | 9 ++++----- >>>> 4 files changed, 20 insertions(+), 22 deletions(-) >>>> >>>> diff --git a/drivers/acpi/numa/hmat.c b/drivers/acpi/numa/hmat.c >>>> index 1a902a02390f..23d4b3ad6d88 100644 >>>> --- a/drivers/acpi/numa/hmat.c >>>> +++ b/drivers/acpi/numa/hmat.c >>>> @@ -857,11 +857,8 @@ static void hmat_register_target_devices(struct memory_target *target) >>>> if (!IS_ENABLED(CONFIG_DEV_DAX_HMEM)) >>>> return; >>>> >>>> - for (res = target->memregions.child; res; res = res->sibling) { >>>> - int target_nid = pxm_to_node(target->memory_pxm); >>>> - >>>> - hmem_register_resource(target_nid, res); >>>> - } >>>> + for (res = target->memregions.child; res; res = res->sibling) >>>> + hmem_register_resource(res); >>>> } >>>> >>>> static void hmat_register_target(struct memory_target *target) >>>> diff --git a/drivers/dax/hmem/device.c b/drivers/dax/hmem/device.c >>>> index f9e1a76a04a9..ae25e08a636f 100644 >>>> --- a/drivers/dax/hmem/device.c >>>> +++ b/drivers/dax/hmem/device.c >>>> @@ -17,14 +17,14 @@ static struct resource hmem_active = { >>>> .flags = IORESOURCE_MEM, >>>> }; >>>> >>>> -int walk_hmem_resources(struct device *host, walk_hmem_fn fn) >>>> +int walk_hmem_resources(walk_hmem_fn fn) >>>> { >>>> struct resource *res; >>>> int rc = 0; >>>> >>>> mutex_lock(&hmem_resource_lock); >>>> for (res = hmem_active.child; res; res = res->sibling) { >>>> - rc = fn(host, (int) res->desc, res); >>>> + rc = fn(res); >>>> if (rc) >>>> break; >>>> } >>>> @@ -33,7 +33,7 @@ int walk_hmem_resources(struct device *host, walk_hmem_fn fn) >>>> } >>>> EXPORT_SYMBOL_GPL(walk_hmem_resources); >>>> >>>> -static void __hmem_register_resource(int target_nid, struct resource *res) >>>> +static void __hmem_register_resource(struct resource *res) >>>> { >>>> struct platform_device *pdev; >>>> struct resource *new; >>>> @@ -46,8 +46,6 @@ static void __hmem_register_resource(int target_nid, struct resource *res) >>>> return; >>>> } >>>> >>>> - new->desc = target_nid; >>>> - >>>> if (platform_initialized) >>>> return; >>>> >>>> @@ -64,19 +62,19 @@ static void __hmem_register_resource(int target_nid, struct resource *res) >>>> platform_initialized = true; >>>> } >>>> >>>> -void hmem_register_resource(int target_nid, struct resource *res) >>>> +void hmem_register_resource(struct resource *res) >>>> { >>>> if (nohmem) >>>> return; >>>> >>>> mutex_lock(&hmem_resource_lock); >>>> - __hmem_register_resource(target_nid, res); >>>> + __hmem_register_resource(res); >>>> mutex_unlock(&hmem_resource_lock); >>>> } >>>> >>>> static __init int hmem_register_one(struct resource *res, void *data) >>>> { >>>> - hmem_register_resource(phys_to_target_node(res->start), res); >>>> + hmem_register_resource(res); >>>> >>>> return 0; >>>> } >>>> diff --git a/drivers/dax/hmem/hmem.c b/drivers/dax/hmem/hmem.c >>>> index 5e7c53f18491..088f4060d4d5 100644 >>>> --- a/drivers/dax/hmem/hmem.c >>>> +++ b/drivers/dax/hmem/hmem.c >>>> @@ -9,6 +9,8 @@ >>>> static bool region_idle; >>>> module_param_named(region_idle, region_idle, bool, 0644); >>>> >>>> +static struct platform_device *dax_hmem_pdev; >>> >>> I don't think you can assume there is only ever 1 hmem platform device. >>> >>> hmat_register_target_devices() in particular iterates multiple memory >>> regions and will create more than one. >>> >>> What am I missing? >> >> You may be correct that there can be more than one hmem platform device. >> I was making this change based on a comment from Dan that it may not matter >> which platform device these are created against. > > If that is true I think there should be a big comment around this code > explaining why it is ok to have the platform device being allocated in > this call unregistered when a different platform device (host) is > released. > > IOW hmem_register_device() calls two devm_*() functions using host as the > device used to trigger an action. It is not entirely clear to me why that > change is safe here. > >> >> I could be wrong in that assumption. If so we'll need to figure lout how to >> determine which platform device a soft reserve resource would be created >> against when they are added later in boot from a notification by the >> srmem notification chain. > > I see that it would be more difficult to track. And I'm ok if it really > does work. But just looking at the commit message and code I don't see > how this does not at least introduce a functional change. I'm going to go back and take a look at this again. I went this direction using the approach of having the srmem notification chain. The dax driver then adds soft reserves outside of a probe routine and don't have a platform device associated with them. -Nathan > > Ira > >> >> -Nathan >> >>> Ira >>> >>>> + >>>> static int dax_hmem_probe(struct platform_device *pdev) >>>> { >>>> unsigned long flags = IORESOURCE_DAX_KMEM; >>>> @@ -59,13 +61,13 @@ static void release_hmem(void *pdev) >>>> platform_device_unregister(pdev); >>>> } >>>> >>>> -static int hmem_register_device(struct device *host, int target_nid, >>>> - const struct resource *res) >>>> +static int hmem_register_device(const struct resource *res) >>>> { >>>> + struct device *host = &dax_hmem_pdev->dev; >>>> struct platform_device *pdev; >>>> struct memregion_info info; >>>> + int target_nid, rc; >>>> long id; >>>> - int rc; >>>> >>>> if (IS_ENABLED(CONFIG_CXL_REGION) && >>>> region_intersects(res->start, resource_size(res), IORESOURCE_MEM, >>>> @@ -94,6 +96,7 @@ static int hmem_register_device(struct device *host, int target_nid, >>>> return -ENOMEM; >>>> } >>>> >>>> + target_nid = phys_to_target_node(res->start); >>>> pdev->dev.numa_node = numa_map_to_online_node(target_nid); >>>> info = (struct memregion_info) { >>>> .target_node = target_nid, >>>> @@ -125,7 +128,8 @@ static int hmem_register_device(struct device *host, int target_nid, >>>> >>>> static int dax_hmem_platform_probe(struct platform_device *pdev) >>>> { >>>> - return walk_hmem_resources(&pdev->dev, hmem_register_device); >>>> + dax_hmem_pdev = pdev; >>>> + return walk_hmem_resources(hmem_register_device); >>>> } >>>> >>>> static struct platform_driver dax_hmem_platform_driver = { >>>> diff --git a/include/linux/dax.h b/include/linux/dax.h >>>> index 9d3e3327af4c..beaa4bcb515c 100644 >>>> --- a/include/linux/dax.h >>>> +++ b/include/linux/dax.h >>>> @@ -276,14 +276,13 @@ static inline int dax_mem2blk_err(int err) >>>> } >>>> >>>> #ifdef CONFIG_DEV_DAX_HMEM_DEVICES >>>> -void hmem_register_resource(int target_nid, struct resource *r); >>>> +void hmem_register_resource(struct resource *r); >>>> #else >>>> -static inline void hmem_register_resource(int target_nid, struct resource *r) >>>> +static inline void hmem_register_resource(struct resource *r) >>>> { >>>> } >>>> #endif >>>> >>>> -typedef int (*walk_hmem_fn)(struct device *dev, int target_nid, >>>> - const struct resource *res); >>>> -int walk_hmem_resources(struct device *dev, walk_hmem_fn fn); >>>> +typedef int (*walk_hmem_fn)(const struct resource *res); >>>> +int walk_hmem_resources(walk_hmem_fn fn); >>>> #endif >>>> -- >>>> 2.43.0 >>>> >>>> >>> >>> >> > >
On 1/23/2025 10:01 AM, Fontenot, Nathan wrote: > On 1/21/2025 5:14 PM, Ira Weiny wrote: >> Fontenot, Nathan wrote: >>> On 1/16/2025 4:28 PM, Ira Weiny wrote: >>>> Nathan Fontenot wrote: >>>>> In order to handle registering hmem devices for SOFT RESERVE reources >>>> ^^^^^^^^^ >>>> resources >>>> >>>>> that are added late in boot update the hmem_register_resource(), >>>>> hmem_register_device(), and walk_hmem_resources() interfaces. >>>>> >>>>> Remove the target_nid arg to hmem_register_resource(). The target nid >>>>> value is calculated from the resource start address and not used until >>>>> registering a device for the resource. Move the target nid calculation >>>>> to hmem_register_device(). >>>>> >>>>> To allow for registering hmem devices outside of the hmem dax driver >>>>> probe routine save the dax hmem platform driver during probe. The >>>>> hmem_register_device() interface can then drop the host and target >>>>> nid parameters. >>>>> >>>>> There should be no functional changes. >>>>> >>>>> Signed-off-by: Nathan Fontenot <nathan.fontenot@amd.com> [ snip ] >>>>> diff --git a/drivers/dax/hmem/hmem.c b/drivers/dax/hmem/hmem.c >>>>> index 5e7c53f18491..088f4060d4d5 100644 >>>>> --- a/drivers/dax/hmem/hmem.c >>>>> +++ b/drivers/dax/hmem/hmem.c >>>>> @@ -9,6 +9,8 @@ >>>>> static bool region_idle; >>>>> module_param_named(region_idle, region_idle, bool, 0644); >>>>> >>>>> +static struct platform_device *dax_hmem_pdev; >>>> >>>> I don't think you can assume there is only ever 1 hmem platform device. >>>> >>>> hmat_register_target_devices() in particular iterates multiple memory >>>> regions and will create more than one. >>>> >>>> What am I missing? >>> >>> You may be correct that there can be more than one hmem platform device. >>> I was making this change based on a comment from Dan that it may not matter >>> which platform device these are created against. >> >> If that is true I think there should be a big comment around this code >> explaining why it is ok to have the platform device being allocated in >> this call unregistered when a different platform device (host) is >> released. >> >> IOW hmem_register_device() calls two devm_*() functions using host as the >> device used to trigger an action. It is not entirely clear to me why that >> change is safe here. >> >>> >>> I could be wrong in that assumption. If so we'll need to figure lout how to >>> determine which platform device a soft reserve resource would be created >>> against when they are added later in boot from a notification by the >>> srmem notification chain. >> >> I see that it would be more difficult to track. And I'm ok if it really >> does work. But just looking at the commit message and code I don't see >> how this does not at least introduce a functional change. > > I'm going to go back and take a look at this again. I went this direction > using the approach of having the srmem notification chain. The dax driver > then adds soft reserves outside of a probe routine and don't have a > platform device associated with them. > Digging back into this, the dax driver only creates one platform device. During hmem_register_resource (which is what hmat_register_target_devices() calls for each resource) the dax hmem driver will only create a platform device when the first resource is registered. Each resource that is passed to hmem_register_resource() is added to a resource tree internal to the dax/hmem driver that is eventually walked by the dax hmem driver probe routine. Now that I understand this better I am confident that saving a pointer to the dax hmem platform device is safe. I'll include this information in the commit log for the next version of the patch. -Nathan
diff --git a/drivers/acpi/numa/hmat.c b/drivers/acpi/numa/hmat.c index 1a902a02390f..23d4b3ad6d88 100644 --- a/drivers/acpi/numa/hmat.c +++ b/drivers/acpi/numa/hmat.c @@ -857,11 +857,8 @@ static void hmat_register_target_devices(struct memory_target *target) if (!IS_ENABLED(CONFIG_DEV_DAX_HMEM)) return; - for (res = target->memregions.child; res; res = res->sibling) { - int target_nid = pxm_to_node(target->memory_pxm); - - hmem_register_resource(target_nid, res); - } + for (res = target->memregions.child; res; res = res->sibling) + hmem_register_resource(res); } static void hmat_register_target(struct memory_target *target) diff --git a/drivers/dax/hmem/device.c b/drivers/dax/hmem/device.c index f9e1a76a04a9..ae25e08a636f 100644 --- a/drivers/dax/hmem/device.c +++ b/drivers/dax/hmem/device.c @@ -17,14 +17,14 @@ static struct resource hmem_active = { .flags = IORESOURCE_MEM, }; -int walk_hmem_resources(struct device *host, walk_hmem_fn fn) +int walk_hmem_resources(walk_hmem_fn fn) { struct resource *res; int rc = 0; mutex_lock(&hmem_resource_lock); for (res = hmem_active.child; res; res = res->sibling) { - rc = fn(host, (int) res->desc, res); + rc = fn(res); if (rc) break; } @@ -33,7 +33,7 @@ int walk_hmem_resources(struct device *host, walk_hmem_fn fn) } EXPORT_SYMBOL_GPL(walk_hmem_resources); -static void __hmem_register_resource(int target_nid, struct resource *res) +static void __hmem_register_resource(struct resource *res) { struct platform_device *pdev; struct resource *new; @@ -46,8 +46,6 @@ static void __hmem_register_resource(int target_nid, struct resource *res) return; } - new->desc = target_nid; - if (platform_initialized) return; @@ -64,19 +62,19 @@ static void __hmem_register_resource(int target_nid, struct resource *res) platform_initialized = true; } -void hmem_register_resource(int target_nid, struct resource *res) +void hmem_register_resource(struct resource *res) { if (nohmem) return; mutex_lock(&hmem_resource_lock); - __hmem_register_resource(target_nid, res); + __hmem_register_resource(res); mutex_unlock(&hmem_resource_lock); } static __init int hmem_register_one(struct resource *res, void *data) { - hmem_register_resource(phys_to_target_node(res->start), res); + hmem_register_resource(res); return 0; } diff --git a/drivers/dax/hmem/hmem.c b/drivers/dax/hmem/hmem.c index 5e7c53f18491..088f4060d4d5 100644 --- a/drivers/dax/hmem/hmem.c +++ b/drivers/dax/hmem/hmem.c @@ -9,6 +9,8 @@ static bool region_idle; module_param_named(region_idle, region_idle, bool, 0644); +static struct platform_device *dax_hmem_pdev; + static int dax_hmem_probe(struct platform_device *pdev) { unsigned long flags = IORESOURCE_DAX_KMEM; @@ -59,13 +61,13 @@ static void release_hmem(void *pdev) platform_device_unregister(pdev); } -static int hmem_register_device(struct device *host, int target_nid, - const struct resource *res) +static int hmem_register_device(const struct resource *res) { + struct device *host = &dax_hmem_pdev->dev; struct platform_device *pdev; struct memregion_info info; + int target_nid, rc; long id; - int rc; if (IS_ENABLED(CONFIG_CXL_REGION) && region_intersects(res->start, resource_size(res), IORESOURCE_MEM, @@ -94,6 +96,7 @@ static int hmem_register_device(struct device *host, int target_nid, return -ENOMEM; } + target_nid = phys_to_target_node(res->start); pdev->dev.numa_node = numa_map_to_online_node(target_nid); info = (struct memregion_info) { .target_node = target_nid, @@ -125,7 +128,8 @@ static int hmem_register_device(struct device *host, int target_nid, static int dax_hmem_platform_probe(struct platform_device *pdev) { - return walk_hmem_resources(&pdev->dev, hmem_register_device); + dax_hmem_pdev = pdev; + return walk_hmem_resources(hmem_register_device); } static struct platform_driver dax_hmem_platform_driver = { diff --git a/include/linux/dax.h b/include/linux/dax.h index 9d3e3327af4c..beaa4bcb515c 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -276,14 +276,13 @@ static inline int dax_mem2blk_err(int err) } #ifdef CONFIG_DEV_DAX_HMEM_DEVICES -void hmem_register_resource(int target_nid, struct resource *r); +void hmem_register_resource(struct resource *r); #else -static inline void hmem_register_resource(int target_nid, struct resource *r) +static inline void hmem_register_resource(struct resource *r) { } #endif -typedef int (*walk_hmem_fn)(struct device *dev, int target_nid, - const struct resource *res); -int walk_hmem_resources(struct device *dev, walk_hmem_fn fn); +typedef int (*walk_hmem_fn)(const struct resource *res); +int walk_hmem_resources(walk_hmem_fn fn); #endif
In order to handle registering hmem devices for SOFT RESERVE reources that are added late in boot update the hmem_register_resource(), hmem_register_device(), and walk_hmem_resources() interfaces. Remove the target_nid arg to hmem_register_resource(). The target nid value is calculated from the resource start address and not used until registering a device for the resource. Move the target nid calculation to hmem_register_device(). To allow for registering hmem devices outside of the hmem dax driver probe routine save the dax hmem platform driver during probe. The hmem_register_device() interface can then drop the host and target nid parameters. There should be no functional changes. Signed-off-by: Nathan Fontenot <nathan.fontenot@amd.com> --- drivers/acpi/numa/hmat.c | 7 ++----- drivers/dax/hmem/device.c | 14 ++++++-------- drivers/dax/hmem/hmem.c | 12 ++++++++---- include/linux/dax.h | 9 ++++----- 4 files changed, 20 insertions(+), 22 deletions(-)