diff mbox series

[v4,06/14] device-dax: ensure dev_dax->pgmap is valid for dynamic devices

Message ID 20210827145819.16471-7-joao.m.martins@oracle.com (mailing list archive)
State New, archived
Headers show
Series mm, sparse-vmemmap: Introduce compound devmaps for device-dax | expand

Commit Message

Joao Martins Aug. 27, 2021, 2:58 p.m. UTC
Right now, only static dax regions have a valid @pgmap pointer in its
struct dev_dax. Dynamic dax case however, do not.

In preparation for device-dax compound devmap support, make sure that
dev_dax pgmap field is set after it has been allocated and initialized.

dynamic dax device have the @pgmap is allocated at probe() and it's
managed by devm (contrast to static dax region which a pgmap is provided
and dax core kfrees it). So in addition to ensure a valid @pgmap, clear
the pgmap when the dynamic dax device is released to avoid the same
pgmap ranges to be re-requested across multiple region device reconfigs.

Suggested-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
---
 drivers/dax/bus.c    | 8 ++++++++
 drivers/dax/device.c | 2 ++
 2 files changed, 10 insertions(+)

Comments

Dan Williams Nov. 5, 2021, 12:31 a.m. UTC | #1
On Fri, Aug 27, 2021 at 7:59 AM Joao Martins <joao.m.martins@oracle.com> wrote:
>
> Right now, only static dax regions have a valid @pgmap pointer in its
> struct dev_dax. Dynamic dax case however, do not.
>
> In preparation for device-dax compound devmap support, make sure that
> dev_dax pgmap field is set after it has been allocated and initialized.
>
> dynamic dax device have the @pgmap is allocated at probe() and it's
> managed by devm (contrast to static dax region which a pgmap is provided
> and dax core kfrees it). So in addition to ensure a valid @pgmap, clear
> the pgmap when the dynamic dax device is released to avoid the same
> pgmap ranges to be re-requested across multiple region device reconfigs.
>
> Suggested-by: Dan Williams <dan.j.williams@intel.com>
> Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
> ---
>  drivers/dax/bus.c    | 8 ++++++++
>  drivers/dax/device.c | 2 ++
>  2 files changed, 10 insertions(+)
>
> diff --git a/drivers/dax/bus.c b/drivers/dax/bus.c
> index 6cc4da4c713d..49dbff9ba609 100644
> --- a/drivers/dax/bus.c
> +++ b/drivers/dax/bus.c
> @@ -363,6 +363,14 @@ void kill_dev_dax(struct dev_dax *dev_dax)
>
>         kill_dax(dax_dev);
>         unmap_mapping_range(inode->i_mapping, 0, 0, 1);
> +
> +       /*
> +        * Dynamic dax region have the pgmap allocated via dev_kzalloc()
> +        * and thus freed by devm. Clear the pgmap to not have stale pgmap
> +        * ranges on probe() from previous reconfigurations of region devices.
> +        */
> +       if (!is_static(dev_dax->region))
> +               dev_dax->pgmap = NULL;
>  }
>  EXPORT_SYMBOL_GPL(kill_dev_dax);
>
> diff --git a/drivers/dax/device.c b/drivers/dax/device.c
> index 0b82159b3564..6e348b5f9d45 100644
> --- a/drivers/dax/device.c
> +++ b/drivers/dax/device.c
> @@ -426,6 +426,8 @@ int dev_dax_probe(struct dev_dax *dev_dax)
>         }
>
>         pgmap->type = MEMORY_DEVICE_GENERIC;
> +       dev_dax->pgmap = pgmap;

So I think I'd rather see a bigger patch that replaces some of the
implicit dev_dax->pgmap == NULL checks with explicit is_static()
checks. Something like the following only compile and boot tested...
Note the struct_size() change probably wants to be its own cleanup,
and the EXPORT_SYMBOL_NS_GPL(..., DAX) probably wants to be its own
patch converting over the entirety of drivers/dax/. Thoughts?


diff --git a/drivers/dax/bus.c b/drivers/dax/bus.c
index 6cc4da4c713d..67ab7e05b340 100644
--- a/drivers/dax/bus.c
+++ b/drivers/dax/bus.c
@@ -134,6 +134,12 @@ static bool is_static(struct dax_region *dax_region)
        return (dax_region->res.flags & IORESOURCE_DAX_STATIC) != 0;
 }

+bool static_dev_dax(struct dev_dax *dev_dax)
+{
+       return is_static(dev_dax->region);
+}
+EXPORT_SYMBOL_NS_GPL(static_dev_dax, DAX);
+
 static u64 dev_dax_size(struct dev_dax *dev_dax)
 {
        u64 size = 0;
@@ -363,6 +369,8 @@ void kill_dev_dax(struct dev_dax *dev_dax)

        kill_dax(dax_dev);
        unmap_mapping_range(inode->i_mapping, 0, 0, 1);
+       if (static_dev_dax(dev_dax))
+               dev_dax->pgmap = NULL;
 }
 EXPORT_SYMBOL_GPL(kill_dev_dax);

diff --git a/drivers/dax/bus.h b/drivers/dax/bus.h
index 1e946ad7780a..4acdfee7dd59 100644
--- a/drivers/dax/bus.h
+++ b/drivers/dax/bus.h
@@ -48,6 +48,7 @@ int __dax_driver_register(struct dax_device_driver *dax_drv,
        __dax_driver_register(driver, THIS_MODULE, KBUILD_MODNAME)
 void dax_driver_unregister(struct dax_device_driver *dax_drv);
 void kill_dev_dax(struct dev_dax *dev_dax);
+bool static_dev_dax(struct dev_dax *dev_dax);

 #if IS_ENABLED(CONFIG_DEV_DAX_PMEM_COMPAT)
 int dev_dax_probe(struct dev_dax *dev_dax);
diff --git a/drivers/dax/device.c b/drivers/dax/device.c
index dd8222a42808..87507aff2b10 100644
--- a/drivers/dax/device.c
+++ b/drivers/dax/device.c
@@ -398,31 +398,43 @@ int dev_dax_probe(struct dev_dax *dev_dax)
        void *addr;
        int rc, i;

-       pgmap = dev_dax->pgmap;
-       if (dev_WARN_ONCE(dev, pgmap && dev_dax->nr_range > 1,
-                       "static pgmap / multi-range device conflict\n"))
+       if (static_dev_dax(dev_dax) && dev_dax->nr_range > 1) {
+               dev_warn(dev, "static pgmap / multi-range device conflict\n");
                return -EINVAL;
+       }

-       if (!pgmap) {
-               pgmap = devm_kzalloc(dev, sizeof(*pgmap) + sizeof(struct range)
-                               * (dev_dax->nr_range - 1), GFP_KERNEL);
+       if (static_dev_dax(dev_dax)) {
+               pgmap = dev_dax->pgmap;
+       } else {
+               if (dev_dax->pgmap) {
+                       dev_warn(dev,
+                                "dynamic-dax with pre-populated page map!?\n");
+                       return -EINVAL;
+               }
+               pgmap = devm_kzalloc(
+                       dev, struct_size(pgmap, ranges, dev_dax->nr_range - 1),
+                       GFP_KERNEL);
                if (!pgmap)
                        return -ENOMEM;
                pgmap->nr_range = dev_dax->nr_range;
+               dev_dax->pgmap = pgmap;
+               for (i = 0; i < dev_dax->nr_range; i++) {
+                       struct range *range = &dev_dax->ranges[i].range;
+
+                       pgmap->ranges[i] = *range;
+               }
        }

        for (i = 0; i < dev_dax->nr_range; i++) {
                struct range *range = &dev_dax->ranges[i].range;

-               if (!devm_request_mem_region(dev, range->start,
-                                       range_len(range), dev_name(dev))) {
-                       dev_warn(dev, "mapping%d: %#llx-%#llx could
not reserve range\n",
-                                       i, range->start, range->end);
-                       return -EBUSY;
-               }
-               /* don't update the range for static pgmap */
-               if (!dev_dax->pgmap)
-                       pgmap->ranges[i] = *range;
+               if (devm_request_mem_region(dev, range->start, range_len(range),
+                                           dev_name(dev)))
+                       continue;
+               dev_warn(dev,
+                        "mapping%d: %#llx-%#llx could not reserve range\n", i,
+                        range->start, range->end);
+               return -EBUSY;
        }

        pgmap->type = MEMORY_DEVICE_GENERIC;
@@ -473,3 +485,4 @@ MODULE_LICENSE("GPL v2");
 module_init(dax_init);
 module_exit(dax_exit);
 MODULE_ALIAS_DAX_DEVICE(0);
+MODULE_IMPORT_NS(DAX);



> +
>         addr = devm_memremap_pages(dev, pgmap);
>         if (IS_ERR(addr))
>                 return PTR_ERR(addr);
> --
> 2.17.1
>
Joao Martins Nov. 5, 2021, 12:09 p.m. UTC | #2
On 11/5/21 00:31, Dan Williams wrote:
> On Fri, Aug 27, 2021 at 7:59 AM Joao Martins <joao.m.martins@oracle.com> wrote:
>>
>> Right now, only static dax regions have a valid @pgmap pointer in its
>> struct dev_dax. Dynamic dax case however, do not.
>>
>> In preparation for device-dax compound devmap support, make sure that
>> dev_dax pgmap field is set after it has been allocated and initialized.
>>
>> dynamic dax device have the @pgmap is allocated at probe() and it's
>> managed by devm (contrast to static dax region which a pgmap is provided
>> and dax core kfrees it). So in addition to ensure a valid @pgmap, clear
>> the pgmap when the dynamic dax device is released to avoid the same
>> pgmap ranges to be re-requested across multiple region device reconfigs.
>>
>> Suggested-by: Dan Williams <dan.j.williams@intel.com>
>> Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
>> ---
>>  drivers/dax/bus.c    | 8 ++++++++
>>  drivers/dax/device.c | 2 ++
>>  2 files changed, 10 insertions(+)
>>
>> diff --git a/drivers/dax/bus.c b/drivers/dax/bus.c
>> index 6cc4da4c713d..49dbff9ba609 100644
>> --- a/drivers/dax/bus.c
>> +++ b/drivers/dax/bus.c
>> @@ -363,6 +363,14 @@ void kill_dev_dax(struct dev_dax *dev_dax)
>>
>>         kill_dax(dax_dev);
>>         unmap_mapping_range(inode->i_mapping, 0, 0, 1);
>> +
>> +       /*
>> +        * Dynamic dax region have the pgmap allocated via dev_kzalloc()
>> +        * and thus freed by devm. Clear the pgmap to not have stale pgmap
>> +        * ranges on probe() from previous reconfigurations of region devices.
>> +        */
>> +       if (!is_static(dev_dax->region))
>> +               dev_dax->pgmap = NULL;
>>  }
>>  EXPORT_SYMBOL_GPL(kill_dev_dax);
>>
>> diff --git a/drivers/dax/device.c b/drivers/dax/device.c
>> index 0b82159b3564..6e348b5f9d45 100644
>> --- a/drivers/dax/device.c
>> +++ b/drivers/dax/device.c
>> @@ -426,6 +426,8 @@ int dev_dax_probe(struct dev_dax *dev_dax)
>>         }
>>
>>         pgmap->type = MEMORY_DEVICE_GENERIC;
>> +       dev_dax->pgmap = pgmap;
> 
> So I think I'd rather see a bigger patch that replaces some of the
> implicit dev_dax->pgmap == NULL checks with explicit is_static()
> checks. Something like the following only compile and boot tested...
> Note the struct_size() change probably wants to be its own cleanup,
> and the EXPORT_SYMBOL_NS_GPL(..., DAX) probably wants to be its own
> patch converting over the entirety of drivers/dax/. Thoughts?
> 
It's a good idea. Certainly the implicit pgmap == NULL made it harder
than the necessary to find where the problem was. So turning those checks
into explicit checks that differentiate static vs dynamic dax will help

With respect to this series converting those pgmap == NULL is going to need
to made me export the symbol (provided dax core and dax device can be built
as modules). So I don't know how this can be a patch converting entirety of
dax. Perhaps you mean that I would just EXPORT_SYMBOL() and then a bigger
patch introduces the MODULE_NS_IMPORT() And EXPORT_SYMBOL_NS*() separately.

The struct_size, yeah, should be a separate patch much like commit 7d18dd75a8af
("device-dax/kmem: use struct_size()").

minor comment below on your snippet.

> 
> diff --git a/drivers/dax/bus.c b/drivers/dax/bus.c
> index 6cc4da4c713d..67ab7e05b340 100644
> --- a/drivers/dax/bus.c
> +++ b/drivers/dax/bus.c
> @@ -134,6 +134,12 @@ static bool is_static(struct dax_region *dax_region)
>         return (dax_region->res.flags & IORESOURCE_DAX_STATIC) != 0;
>  }
> 
> +bool static_dev_dax(struct dev_dax *dev_dax)
> +{
> +       return is_static(dev_dax->region);
> +}
> +EXPORT_SYMBOL_NS_GPL(static_dev_dax, DAX);
> +
>  static u64 dev_dax_size(struct dev_dax *dev_dax)
>  {
>         u64 size = 0;
> @@ -363,6 +369,8 @@ void kill_dev_dax(struct dev_dax *dev_dax)
> 
>         kill_dax(dax_dev);
>         unmap_mapping_range(inode->i_mapping, 0, 0, 1);
> +       if (static_dev_dax(dev_dax))
> +               dev_dax->pgmap = NULL;
>  }

Here you probably meant !static_dev_dax() per my patch.

>  EXPORT_SYMBOL_GPL(kill_dev_dax);
> 
> diff --git a/drivers/dax/bus.h b/drivers/dax/bus.h
> index 1e946ad7780a..4acdfee7dd59 100644
> --- a/drivers/dax/bus.h
> +++ b/drivers/dax/bus.h
> @@ -48,6 +48,7 @@ int __dax_driver_register(struct dax_device_driver *dax_drv,
>         __dax_driver_register(driver, THIS_MODULE, KBUILD_MODNAME)
>  void dax_driver_unregister(struct dax_device_driver *dax_drv);
>  void kill_dev_dax(struct dev_dax *dev_dax);
> +bool static_dev_dax(struct dev_dax *dev_dax);
> 
>  #if IS_ENABLED(CONFIG_DEV_DAX_PMEM_COMPAT)
>  int dev_dax_probe(struct dev_dax *dev_dax);
> diff --git a/drivers/dax/device.c b/drivers/dax/device.c
> index dd8222a42808..87507aff2b10 100644
> --- a/drivers/dax/device.c
> +++ b/drivers/dax/device.c
> @@ -398,31 +398,43 @@ int dev_dax_probe(struct dev_dax *dev_dax)
>         void *addr;
>         int rc, i;
> 
> -       pgmap = dev_dax->pgmap;
> -       if (dev_WARN_ONCE(dev, pgmap && dev_dax->nr_range > 1,
> -                       "static pgmap / multi-range device conflict\n"))
> +       if (static_dev_dax(dev_dax) && dev_dax->nr_range > 1) {
> +               dev_warn(dev, "static pgmap / multi-range device conflict\n");
>                 return -EINVAL;
> +       }
> 
> -       if (!pgmap) {
> -               pgmap = devm_kzalloc(dev, sizeof(*pgmap) + sizeof(struct range)
> -                               * (dev_dax->nr_range - 1), GFP_KERNEL);
> +       if (static_dev_dax(dev_dax)) {
> +               pgmap = dev_dax->pgmap;
> +       } else {
> +               if (dev_dax->pgmap) {
> +                       dev_warn(dev,
> +                                "dynamic-dax with pre-populated page map!?\n");
> +                       return -EINVAL;
> +               }
> +               pgmap = devm_kzalloc(
> +                       dev, struct_size(pgmap, ranges, dev_dax->nr_range - 1),
> +                       GFP_KERNEL);
>                 if (!pgmap)
>                         return -ENOMEM;
>                 pgmap->nr_range = dev_dax->nr_range;
> +               dev_dax->pgmap = pgmap;
> +               for (i = 0; i < dev_dax->nr_range; i++) {
> +                       struct range *range = &dev_dax->ranges[i].range;
> +
> +                       pgmap->ranges[i] = *range;
> +               }
>         }
> 
This code move is probably not needed unless your point is to have a more clear
separation on what's initialization versus the mem region request (that's
applicable to both dynamic and static).

>         for (i = 0; i < dev_dax->nr_range; i++) {
>                 struct range *range = &dev_dax->ranges[i].range;
> 
> -               if (!devm_request_mem_region(dev, range->start,
> -                                       range_len(range), dev_name(dev))) {
> -                       dev_warn(dev, "mapping%d: %#llx-%#llx could
> not reserve range\n",
> -                                       i, range->start, range->end);
> -                       return -EBUSY;
> -               }
> -               /* don't update the range for static pgmap */
> -               if (!dev_dax->pgmap)
> -                       pgmap->ranges[i] = *range;
> +               if (devm_request_mem_region(dev, range->start, range_len(range),
> +                                           dev_name(dev)))
> +                       continue;
> +               dev_warn(dev,
> +                        "mapping%d: %#llx-%#llx could not reserve range\n", i,
> +                        range->start, range->end);
> +               return -EBUSY;
>         }
> 
>         pgmap->type = MEMORY_DEVICE_GENERIC;
> @@ -473,3 +485,4 @@ MODULE_LICENSE("GPL v2");
>  module_init(dax_init);
>  module_exit(dax_exit);
>  MODULE_ALIAS_DAX_DEVICE(0);
> +MODULE_IMPORT_NS(DAX);
Joao Martins Nov. 5, 2021, 4:14 p.m. UTC | #3
On 11/5/21 12:09, Joao Martins wrote:
> On 11/5/21 00:31, Dan Williams wrote:
>> On Fri, Aug 27, 2021 at 7:59 AM Joao Martins <joao.m.martins@oracle.com> wrote:
>>>
>>> Right now, only static dax regions have a valid @pgmap pointer in its
>>> struct dev_dax. Dynamic dax case however, do not.
>>>
>>> In preparation for device-dax compound devmap support, make sure that
>>> dev_dax pgmap field is set after it has been allocated and initialized.
>>>
>>> dynamic dax device have the @pgmap is allocated at probe() and it's
>>> managed by devm (contrast to static dax region which a pgmap is provided
>>> and dax core kfrees it). So in addition to ensure a valid @pgmap, clear
>>> the pgmap when the dynamic dax device is released to avoid the same
>>> pgmap ranges to be re-requested across multiple region device reconfigs.
>>>
>>> Suggested-by: Dan Williams <dan.j.williams@intel.com>
>>> Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
>>> ---
>>>  drivers/dax/bus.c    | 8 ++++++++
>>>  drivers/dax/device.c | 2 ++
>>>  2 files changed, 10 insertions(+)
>>>
>>> diff --git a/drivers/dax/bus.c b/drivers/dax/bus.c
>>> index 6cc4da4c713d..49dbff9ba609 100644
>>> --- a/drivers/dax/bus.c
>>> +++ b/drivers/dax/bus.c
>>> @@ -363,6 +363,14 @@ void kill_dev_dax(struct dev_dax *dev_dax)
>>>
>>>         kill_dax(dax_dev);
>>>         unmap_mapping_range(inode->i_mapping, 0, 0, 1);
>>> +
>>> +       /*
>>> +        * Dynamic dax region have the pgmap allocated via dev_kzalloc()
>>> +        * and thus freed by devm. Clear the pgmap to not have stale pgmap
>>> +        * ranges on probe() from previous reconfigurations of region devices.
>>> +        */
>>> +       if (!is_static(dev_dax->region))
>>> +               dev_dax->pgmap = NULL;
>>>  }
>>>  EXPORT_SYMBOL_GPL(kill_dev_dax);
>>>
>>> diff --git a/drivers/dax/device.c b/drivers/dax/device.c
>>> index 0b82159b3564..6e348b5f9d45 100644
>>> --- a/drivers/dax/device.c
>>> +++ b/drivers/dax/device.c
>>> @@ -426,6 +426,8 @@ int dev_dax_probe(struct dev_dax *dev_dax)
>>>         }
>>>
>>>         pgmap->type = MEMORY_DEVICE_GENERIC;
>>> +       dev_dax->pgmap = pgmap;
>>
>> So I think I'd rather see a bigger patch that replaces some of the
>> implicit dev_dax->pgmap == NULL checks with explicit is_static()
>> checks. Something like the following only compile and boot tested...
>> Note the struct_size() change probably wants to be its own cleanup,
>> and the EXPORT_SYMBOL_NS_GPL(..., DAX) probably wants to be its own
>> patch converting over the entirety of drivers/dax/. Thoughts?
>>
> It's a good idea. Certainly the implicit pgmap == NULL made it harder
> than the necessary to find where the problem was. So turning those checks
> into explicit checks that differentiate static vs dynamic dax will help
> 
> With respect to this series converting those pgmap == NULL is going to need
> to made me export the symbol (provided dax core and dax device can be built
> as modules). So I don't know how this can be a patch converting entirety of
> dax. Perhaps you mean that I would just EXPORT_SYMBOL() and then a bigger
> patch introduces the MODULE_NS_IMPORT() And EXPORT_SYMBOL_NS*() separately.
> 
To be clear by "then a bigger patch" I actually meant that the
EXPORT_SYMBOL_NS()/MODULE_NS_IMPORT() would be a separate patch not included
this series. For this series I would just use EXPORT_SYMBOL().

> The struct_size, yeah, should be a separate patch much like commit 7d18dd75a8af
> ("device-dax/kmem: use struct_size()").
> 
This cleanup would still be included as a predecessor patch, as I will be
touching this area in this patch.

> minor comment below on your snippet.
> 
>>
>> diff --git a/drivers/dax/bus.c b/drivers/dax/bus.c
>> index 6cc4da4c713d..67ab7e05b340 100644
>> --- a/drivers/dax/bus.c
>> +++ b/drivers/dax/bus.c
>> @@ -134,6 +134,12 @@ static bool is_static(struct dax_region *dax_region)
>>         return (dax_region->res.flags & IORESOURCE_DAX_STATIC) != 0;
>>  }
>>
>> +bool static_dev_dax(struct dev_dax *dev_dax)
>> +{
>> +       return is_static(dev_dax->region);
>> +}
>> +EXPORT_SYMBOL_NS_GPL(static_dev_dax, DAX);
>> +
>>  static u64 dev_dax_size(struct dev_dax *dev_dax)
>>  {
>>         u64 size = 0;
>> @@ -363,6 +369,8 @@ void kill_dev_dax(struct dev_dax *dev_dax)
>>
>>         kill_dax(dax_dev);
>>         unmap_mapping_range(inode->i_mapping, 0, 0, 1);
>> +       if (static_dev_dax(dev_dax))
>> +               dev_dax->pgmap = NULL;
>>  }
> 
> Here you probably meant !static_dev_dax() per my patch.
> 
>>  EXPORT_SYMBOL_GPL(kill_dev_dax);
>>
>> diff --git a/drivers/dax/bus.h b/drivers/dax/bus.h
>> index 1e946ad7780a..4acdfee7dd59 100644
>> --- a/drivers/dax/bus.h
>> +++ b/drivers/dax/bus.h
>> @@ -48,6 +48,7 @@ int __dax_driver_register(struct dax_device_driver *dax_drv,
>>         __dax_driver_register(driver, THIS_MODULE, KBUILD_MODNAME)
>>  void dax_driver_unregister(struct dax_device_driver *dax_drv);
>>  void kill_dev_dax(struct dev_dax *dev_dax);
>> +bool static_dev_dax(struct dev_dax *dev_dax);
>>
>>  #if IS_ENABLED(CONFIG_DEV_DAX_PMEM_COMPAT)
>>  int dev_dax_probe(struct dev_dax *dev_dax);
>> diff --git a/drivers/dax/device.c b/drivers/dax/device.c
>> index dd8222a42808..87507aff2b10 100644
>> --- a/drivers/dax/device.c
>> +++ b/drivers/dax/device.c
>> @@ -398,31 +398,43 @@ int dev_dax_probe(struct dev_dax *dev_dax)
>>         void *addr;
>>         int rc, i;
>>
>> -       pgmap = dev_dax->pgmap;
>> -       if (dev_WARN_ONCE(dev, pgmap && dev_dax->nr_range > 1,
>> -                       "static pgmap / multi-range device conflict\n"))
>> +       if (static_dev_dax(dev_dax) && dev_dax->nr_range > 1) {
>> +               dev_warn(dev, "static pgmap / multi-range device conflict\n");
>>                 return -EINVAL;
>> +       }
>>
>> -       if (!pgmap) {
>> -               pgmap = devm_kzalloc(dev, sizeof(*pgmap) + sizeof(struct range)
>> -                               * (dev_dax->nr_range - 1), GFP_KERNEL);
>> +       if (static_dev_dax(dev_dax)) {
>> +               pgmap = dev_dax->pgmap;
>> +       } else {
>> +               if (dev_dax->pgmap) {
>> +                       dev_warn(dev,
>> +                                "dynamic-dax with pre-populated page map!?\n");
>> +                       return -EINVAL;
>> +               }
>> +               pgmap = devm_kzalloc(
>> +                       dev, struct_size(pgmap, ranges, dev_dax->nr_range - 1),
>> +                       GFP_KERNEL);
>>                 if (!pgmap)
>>                         return -ENOMEM;
>>                 pgmap->nr_range = dev_dax->nr_range;
>> +               dev_dax->pgmap = pgmap;
>> +               for (i = 0; i < dev_dax->nr_range; i++) {
>> +                       struct range *range = &dev_dax->ranges[i].range;
>> +
>> +                       pgmap->ranges[i] = *range;
>> +               }
>>         }
>>
> This code move is probably not needed unless your point is to have a more clear
> separation on what's initialization versus the mem region request (that's
> applicable to both dynamic and static).
> 
>>         for (i = 0; i < dev_dax->nr_range; i++) {
>>                 struct range *range = &dev_dax->ranges[i].range;
>>
>> -               if (!devm_request_mem_region(dev, range->start,
>> -                                       range_len(range), dev_name(dev))) {
>> -                       dev_warn(dev, "mapping%d: %#llx-%#llx could
>> not reserve range\n",
>> -                                       i, range->start, range->end);
>> -                       return -EBUSY;
>> -               }
>> -               /* don't update the range for static pgmap */
>> -               if (!dev_dax->pgmap)
>> -                       pgmap->ranges[i] = *range;
>> +               if (devm_request_mem_region(dev, range->start, range_len(range),
>> +                                           dev_name(dev)))
>> +                       continue;
>> +               dev_warn(dev,
>> +                        "mapping%d: %#llx-%#llx could not reserve range\n", i,
>> +                        range->start, range->end);
>> +               return -EBUSY;
>>         }
>>
>>         pgmap->type = MEMORY_DEVICE_GENERIC;
>> @@ -473,3 +485,4 @@ MODULE_LICENSE("GPL v2");
>>  module_init(dax_init);
>>  module_exit(dax_exit);
>>  MODULE_ALIAS_DAX_DEVICE(0);
>> +MODULE_IMPORT_NS(DAX);
>
Dan Williams Nov. 5, 2021, 4:46 p.m. UTC | #4
On Fri, Nov 5, 2021 at 5:10 AM Joao Martins <joao.m.martins@oracle.com> wrote:
>
> On 11/5/21 00:31, Dan Williams wrote:
> > On Fri, Aug 27, 2021 at 7:59 AM Joao Martins <joao.m.martins@oracle.com> wrote:
> >>
> >> Right now, only static dax regions have a valid @pgmap pointer in its
> >> struct dev_dax. Dynamic dax case however, do not.
> >>
> >> In preparation for device-dax compound devmap support, make sure that
> >> dev_dax pgmap field is set after it has been allocated and initialized.
> >>
> >> dynamic dax device have the @pgmap is allocated at probe() and it's
> >> managed by devm (contrast to static dax region which a pgmap is provided
> >> and dax core kfrees it). So in addition to ensure a valid @pgmap, clear
> >> the pgmap when the dynamic dax device is released to avoid the same
> >> pgmap ranges to be re-requested across multiple region device reconfigs.
> >>
> >> Suggested-by: Dan Williams <dan.j.williams@intel.com>
> >> Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
> >> ---
> >>  drivers/dax/bus.c    | 8 ++++++++
> >>  drivers/dax/device.c | 2 ++
> >>  2 files changed, 10 insertions(+)
> >>
> >> diff --git a/drivers/dax/bus.c b/drivers/dax/bus.c
> >> index 6cc4da4c713d..49dbff9ba609 100644
> >> --- a/drivers/dax/bus.c
> >> +++ b/drivers/dax/bus.c
> >> @@ -363,6 +363,14 @@ void kill_dev_dax(struct dev_dax *dev_dax)
> >>
> >>         kill_dax(dax_dev);
> >>         unmap_mapping_range(inode->i_mapping, 0, 0, 1);
> >> +
> >> +       /*
> >> +        * Dynamic dax region have the pgmap allocated via dev_kzalloc()
> >> +        * and thus freed by devm. Clear the pgmap to not have stale pgmap
> >> +        * ranges on probe() from previous reconfigurations of region devices.
> >> +        */
> >> +       if (!is_static(dev_dax->region))
> >> +               dev_dax->pgmap = NULL;
> >>  }
> >>  EXPORT_SYMBOL_GPL(kill_dev_dax);
> >>
> >> diff --git a/drivers/dax/device.c b/drivers/dax/device.c
> >> index 0b82159b3564..6e348b5f9d45 100644
> >> --- a/drivers/dax/device.c
> >> +++ b/drivers/dax/device.c
> >> @@ -426,6 +426,8 @@ int dev_dax_probe(struct dev_dax *dev_dax)
> >>         }
> >>
> >>         pgmap->type = MEMORY_DEVICE_GENERIC;
> >> +       dev_dax->pgmap = pgmap;
> >
> > So I think I'd rather see a bigger patch that replaces some of the
> > implicit dev_dax->pgmap == NULL checks with explicit is_static()
> > checks. Something like the following only compile and boot tested...
> > Note the struct_size() change probably wants to be its own cleanup,
> > and the EXPORT_SYMBOL_NS_GPL(..., DAX) probably wants to be its own
> > patch converting over the entirety of drivers/dax/. Thoughts?
> >
> It's a good idea. Certainly the implicit pgmap == NULL made it harder
> than the necessary to find where the problem was. So turning those checks
> into explicit checks that differentiate static vs dynamic dax will help
>
> With respect to this series converting those pgmap == NULL is going to need
> to made me export the symbol (provided dax core and dax device can be built
> as modules). So I don't know how this can be a patch converting entirety of
> dax. Perhaps you mean that I would just EXPORT_SYMBOL() and then a bigger
> patch introduces the MODULE_NS_IMPORT() And EXPORT_SYMBOL_NS*() separately.

Yeah, either a lead-in patch to do the conversion, or a follow on to
convert everything after the fact. Either way works for me, but I have
a small preference for the lead-in patch.

>
> The struct_size, yeah, should be a separate patch much like commit 7d18dd75a8af
> ("device-dax/kmem: use struct_size()").

Yeah.

>
> minor comment below on your snippet.
>
> >
> > diff --git a/drivers/dax/bus.c b/drivers/dax/bus.c
> > index 6cc4da4c713d..67ab7e05b340 100644
> > --- a/drivers/dax/bus.c
> > +++ b/drivers/dax/bus.c
> > @@ -134,6 +134,12 @@ static bool is_static(struct dax_region *dax_region)
> >         return (dax_region->res.flags & IORESOURCE_DAX_STATIC) != 0;
> >  }
> >
> > +bool static_dev_dax(struct dev_dax *dev_dax)
> > +{
> > +       return is_static(dev_dax->region);
> > +}
> > +EXPORT_SYMBOL_NS_GPL(static_dev_dax, DAX);
> > +
> >  static u64 dev_dax_size(struct dev_dax *dev_dax)
> >  {
> >         u64 size = 0;
> > @@ -363,6 +369,8 @@ void kill_dev_dax(struct dev_dax *dev_dax)
> >
> >         kill_dax(dax_dev);
> >         unmap_mapping_range(inode->i_mapping, 0, 0, 1);
> > +       if (static_dev_dax(dev_dax))
> > +               dev_dax->pgmap = NULL;
> >  }
>
> Here you probably meant !static_dev_dax() per my patch.

Oops, yes.

>
> >  EXPORT_SYMBOL_GPL(kill_dev_dax);
> >
> > diff --git a/drivers/dax/bus.h b/drivers/dax/bus.h
> > index 1e946ad7780a..4acdfee7dd59 100644
> > --- a/drivers/dax/bus.h
> > +++ b/drivers/dax/bus.h
> > @@ -48,6 +48,7 @@ int __dax_driver_register(struct dax_device_driver *dax_drv,
> >         __dax_driver_register(driver, THIS_MODULE, KBUILD_MODNAME)
> >  void dax_driver_unregister(struct dax_device_driver *dax_drv);
> >  void kill_dev_dax(struct dev_dax *dev_dax);
> > +bool static_dev_dax(struct dev_dax *dev_dax);
> >
> >  #if IS_ENABLED(CONFIG_DEV_DAX_PMEM_COMPAT)
> >  int dev_dax_probe(struct dev_dax *dev_dax);
> > diff --git a/drivers/dax/device.c b/drivers/dax/device.c
> > index dd8222a42808..87507aff2b10 100644
> > --- a/drivers/dax/device.c
> > +++ b/drivers/dax/device.c
> > @@ -398,31 +398,43 @@ int dev_dax_probe(struct dev_dax *dev_dax)
> >         void *addr;
> >         int rc, i;
> >
> > -       pgmap = dev_dax->pgmap;
> > -       if (dev_WARN_ONCE(dev, pgmap && dev_dax->nr_range > 1,
> > -                       "static pgmap / multi-range device conflict\n"))
> > +       if (static_dev_dax(dev_dax) && dev_dax->nr_range > 1) {
> > +               dev_warn(dev, "static pgmap / multi-range device conflict\n");
> >                 return -EINVAL;
> > +       }
> >
> > -       if (!pgmap) {
> > -               pgmap = devm_kzalloc(dev, sizeof(*pgmap) + sizeof(struct range)
> > -                               * (dev_dax->nr_range - 1), GFP_KERNEL);
> > +       if (static_dev_dax(dev_dax)) {
> > +               pgmap = dev_dax->pgmap;
> > +       } else {
> > +               if (dev_dax->pgmap) {
> > +                       dev_warn(dev,
> > +                                "dynamic-dax with pre-populated page map!?\n");
> > +                       return -EINVAL;
> > +               }
> > +               pgmap = devm_kzalloc(
> > +                       dev, struct_size(pgmap, ranges, dev_dax->nr_range - 1),
> > +                       GFP_KERNEL);
> >                 if (!pgmap)
> >                         return -ENOMEM;
> >                 pgmap->nr_range = dev_dax->nr_range;
> > +               dev_dax->pgmap = pgmap;
> > +               for (i = 0; i < dev_dax->nr_range; i++) {
> > +                       struct range *range = &dev_dax->ranges[i].range;
> > +
> > +                       pgmap->ranges[i] = *range;
> > +               }
> >         }
> >
> This code move is probably not needed unless your point is to have a more clear
> separation on what's initialization versus the mem region request (that's
> applicable to both dynamic and static).

It was more of an RFC cleanup idea and yes, should be its own patch if
you think it helps make the init path clearer.
Joao Martins Nov. 5, 2021, 6:11 p.m. UTC | #5
On 11/5/21 16:46, Dan Williams wrote:
> On Fri, Nov 5, 2021 at 5:10 AM Joao Martins <joao.m.martins@oracle.com> wrote:
>>
>> On 11/5/21 00:31, Dan Williams wrote:
>>> On Fri, Aug 27, 2021 at 7:59 AM Joao Martins <joao.m.martins@oracle.com> wrote:
>>>>
>>>> Right now, only static dax regions have a valid @pgmap pointer in its
>>>> struct dev_dax. Dynamic dax case however, do not.
>>>>
>>>> In preparation for device-dax compound devmap support, make sure that
>>>> dev_dax pgmap field is set after it has been allocated and initialized.
>>>>
>>>> dynamic dax device have the @pgmap is allocated at probe() and it's
>>>> managed by devm (contrast to static dax region which a pgmap is provided
>>>> and dax core kfrees it). So in addition to ensure a valid @pgmap, clear
>>>> the pgmap when the dynamic dax device is released to avoid the same
>>>> pgmap ranges to be re-requested across multiple region device reconfigs.
>>>>
>>>> Suggested-by: Dan Williams <dan.j.williams@intel.com>
>>>> Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
>>>> ---
>>>>  drivers/dax/bus.c    | 8 ++++++++
>>>>  drivers/dax/device.c | 2 ++
>>>>  2 files changed, 10 insertions(+)
>>>>
>>>> diff --git a/drivers/dax/bus.c b/drivers/dax/bus.c
>>>> index 6cc4da4c713d..49dbff9ba609 100644
>>>> --- a/drivers/dax/bus.c
>>>> +++ b/drivers/dax/bus.c
>>>> @@ -363,6 +363,14 @@ void kill_dev_dax(struct dev_dax *dev_dax)
>>>>
>>>>         kill_dax(dax_dev);
>>>>         unmap_mapping_range(inode->i_mapping, 0, 0, 1);
>>>> +
>>>> +       /*
>>>> +        * Dynamic dax region have the pgmap allocated via dev_kzalloc()
>>>> +        * and thus freed by devm. Clear the pgmap to not have stale pgmap
>>>> +        * ranges on probe() from previous reconfigurations of region devices.
>>>> +        */
>>>> +       if (!is_static(dev_dax->region))
>>>> +               dev_dax->pgmap = NULL;
>>>>  }
>>>>  EXPORT_SYMBOL_GPL(kill_dev_dax);
>>>>
>>>> diff --git a/drivers/dax/device.c b/drivers/dax/device.c
>>>> index 0b82159b3564..6e348b5f9d45 100644
>>>> --- a/drivers/dax/device.c
>>>> +++ b/drivers/dax/device.c
>>>> @@ -426,6 +426,8 @@ int dev_dax_probe(struct dev_dax *dev_dax)
>>>>         }
>>>>
>>>>         pgmap->type = MEMORY_DEVICE_GENERIC;
>>>> +       dev_dax->pgmap = pgmap;
>>>
>>> So I think I'd rather see a bigger patch that replaces some of the
>>> implicit dev_dax->pgmap == NULL checks with explicit is_static()
>>> checks. Something like the following only compile and boot tested...
>>> Note the struct_size() change probably wants to be its own cleanup,
>>> and the EXPORT_SYMBOL_NS_GPL(..., DAX) probably wants to be its own
>>> patch converting over the entirety of drivers/dax/. Thoughts?
>>>
>> It's a good idea. Certainly the implicit pgmap == NULL made it harder
>> than the necessary to find where the problem was. So turning those checks
>> into explicit checks that differentiate static vs dynamic dax will help
>>
>> With respect to this series converting those pgmap == NULL is going to need
>> to made me export the symbol (provided dax core and dax device can be built
>> as modules). So I don't know how this can be a patch converting entirety of
>> dax. Perhaps you mean that I would just EXPORT_SYMBOL() and then a bigger
>> patch introduces the MODULE_NS_IMPORT() And EXPORT_SYMBOL_NS*() separately.
> 
> Yeah, either a lead-in patch to do the conversion, or a follow on to
> convert everything after the fact. Either way works for me, but I have
> a small preference for the lead-in patch.
> 

The one reason I am leaning towards the after the fact conversion, is that the
addition of a new exported symbol looks unrelated to a drivers/dax wide conversion
to namespaced symbols. Looks like a separate cleanup on top of this series, as
opposed to a dependency cleanup.

>>>  EXPORT_SYMBOL_GPL(kill_dev_dax);
>>>
>>> diff --git a/drivers/dax/bus.h b/drivers/dax/bus.h
>>> index 1e946ad7780a..4acdfee7dd59 100644
>>> --- a/drivers/dax/bus.h
>>> +++ b/drivers/dax/bus.h
>>> @@ -48,6 +48,7 @@ int __dax_driver_register(struct dax_device_driver *dax_drv,
>>>         __dax_driver_register(driver, THIS_MODULE, KBUILD_MODNAME)
>>>  void dax_driver_unregister(struct dax_device_driver *dax_drv);
>>>  void kill_dev_dax(struct dev_dax *dev_dax);
>>> +bool static_dev_dax(struct dev_dax *dev_dax);
>>>
>>>  #if IS_ENABLED(CONFIG_DEV_DAX_PMEM_COMPAT)
>>>  int dev_dax_probe(struct dev_dax *dev_dax);
>>> diff --git a/drivers/dax/device.c b/drivers/dax/device.c
>>> index dd8222a42808..87507aff2b10 100644
>>> --- a/drivers/dax/device.c
>>> +++ b/drivers/dax/device.c
>>> @@ -398,31 +398,43 @@ int dev_dax_probe(struct dev_dax *dev_dax)
>>>         void *addr;
>>>         int rc, i;
>>>
>>> -       pgmap = dev_dax->pgmap;
>>> -       if (dev_WARN_ONCE(dev, pgmap && dev_dax->nr_range > 1,
>>> -                       "static pgmap / multi-range device conflict\n"))
>>> +       if (static_dev_dax(dev_dax) && dev_dax->nr_range > 1) {
>>> +               dev_warn(dev, "static pgmap / multi-range device conflict\n");
>>>                 return -EINVAL;
>>> +       }
>>>
>>> -       if (!pgmap) {
>>> -               pgmap = devm_kzalloc(dev, sizeof(*pgmap) + sizeof(struct range)
>>> -                               * (dev_dax->nr_range - 1), GFP_KERNEL);
>>> +       if (static_dev_dax(dev_dax)) {
>>> +               pgmap = dev_dax->pgmap;
>>> +       } else {
>>> +               if (dev_dax->pgmap) {
>>> +                       dev_warn(dev,
>>> +                                "dynamic-dax with pre-populated page map!?\n");
>>> +                       return -EINVAL;
>>> +               }
>>> +               pgmap = devm_kzalloc(
>>> +                       dev, struct_size(pgmap, ranges, dev_dax->nr_range - 1),
>>> +                       GFP_KERNEL);
>>>                 if (!pgmap)
>>>                         return -ENOMEM;
>>>                 pgmap->nr_range = dev_dax->nr_range;
>>> +               dev_dax->pgmap = pgmap;
>>> +               for (i = 0; i < dev_dax->nr_range; i++) {
>>> +                       struct range *range = &dev_dax->ranges[i].range;
>>> +
>>> +                       pgmap->ranges[i] = *range;
>>> +               }
>>>         }
>>>
>> This code move is probably not needed unless your point is to have a more clear
>> separation on what's initialization versus the mem region request (that's
>> applicable to both dynamic and static).
> 
> It was more of an RFC cleanup idea and yes, should be its own patch if
> you think it helps make the init path clearer.
> 
OK.
diff mbox series

Patch

diff --git a/drivers/dax/bus.c b/drivers/dax/bus.c
index 6cc4da4c713d..49dbff9ba609 100644
--- a/drivers/dax/bus.c
+++ b/drivers/dax/bus.c
@@ -363,6 +363,14 @@  void kill_dev_dax(struct dev_dax *dev_dax)
 
 	kill_dax(dax_dev);
 	unmap_mapping_range(inode->i_mapping, 0, 0, 1);
+
+	/*
+	 * Dynamic dax region have the pgmap allocated via dev_kzalloc()
+	 * and thus freed by devm. Clear the pgmap to not have stale pgmap
+	 * ranges on probe() from previous reconfigurations of region devices.
+	 */
+	if (!is_static(dev_dax->region))
+		dev_dax->pgmap = NULL;
 }
 EXPORT_SYMBOL_GPL(kill_dev_dax);
 
diff --git a/drivers/dax/device.c b/drivers/dax/device.c
index 0b82159b3564..6e348b5f9d45 100644
--- a/drivers/dax/device.c
+++ b/drivers/dax/device.c
@@ -426,6 +426,8 @@  int dev_dax_probe(struct dev_dax *dev_dax)
 	}
 
 	pgmap->type = MEMORY_DEVICE_GENERIC;
+	dev_dax->pgmap = pgmap;
+
 	addr = devm_memremap_pages(dev, pgmap);
 	if (IS_ERR(addr))
 		return PTR_ERR(addr);