diff mbox series

[3/7] cxl/pmem: Fix cxl_pmem_region and cxl_memdev leak

Message ID 166752183647.947915.2045230911503793901.stgit@dwillia2-xfh.jf.intel.com
State Accepted
Commit a0f1d22a9753d515957e1d2341e6af615494d387
Headers show
Series CXL region creation fixes for 6.1 | expand

Commit Message

Dan Williams Nov. 4, 2022, 12:30 a.m. UTC
When a cxl_nvdimm object goes through a ->remove() event (device
physically removed, nvdimm-bridge disabled, or nvdimm device disabled),
then any associated regions must also be disabled. As highlighted by the
cxl-create-region.sh test [1], a single device may host multiple
regions, but the driver was only tracking one region at a time. This
leads to a situation where only the last enabled region per nvdimm
device is cleaned up properly. Other regions are leaked, and this also
causes cxl_memdev reference leaks.

Fix the tracking by allowing cxl_nvdimm objects to track multiple region
associations.

Cc: <stable@vger.kernel.org>
Link: https://github.com/pmem/ndctl/blob/main/test/cxl-create-region.sh [1]
Reported-by: Vishal Verma <vishal.l.verma@intel.com>
Fixes: 04ad63f086d1 ("cxl/region: Introduce cxl_pmem_region objects")
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/cxl/core/pmem.c |    2 +
 drivers/cxl/cxl.h       |    2 -
 drivers/cxl/pmem.c      |  100 ++++++++++++++++++++++++++++++-----------------
 3 files changed, 67 insertions(+), 37 deletions(-)

Comments

Verma, Vishal L Nov. 4, 2022, 5:55 a.m. UTC | #1
On Thu, 2022-11-03 at 17:30 -0700, Dan Williams wrote:
> When a cxl_nvdimm object goes through a ->remove() event (device
> physically removed, nvdimm-bridge disabled, or nvdimm device disabled),
> then any associated regions must also be disabled. As highlighted by the
> cxl-create-region.sh test [1], a single device may host multiple
> regions, but the driver was only tracking one region at a time. This
> leads to a situation where only the last enabled region per nvdimm
> device is cleaned up properly. Other regions are leaked, and this also
> causes cxl_memdev reference leaks.
> 
> Fix the tracking by allowing cxl_nvdimm objects to track multiple region
> associations.
> 
> Cc: <stable@vger.kernel.org>
> Link: https://github.com/pmem/ndctl/blob/main/test/cxl-create-region.sh [1]
> Reported-by: Vishal Verma <vishal.l.verma@intel.com>
> Fixes: 04ad63f086d1 ("cxl/region: Introduce cxl_pmem_region objects")
> Signed-off-by: Dan Williams <dan.j.williams@intel.com>
> ---
>  drivers/cxl/core/pmem.c |    2 +
>  drivers/cxl/cxl.h       |    2 -
>  drivers/cxl/pmem.c      |  100 ++++++++++++++++++++++++++++++-----------------
>  3 files changed, 67 insertions(+), 37 deletions(-)

One minor nit below, otherwise looks good to me.

Reviewed-by: Vishal Verma <vishal.l.verma@intel.com>


[..]
> diff --git a/drivers/cxl/pmem.c b/drivers/cxl/pmem.c
> index 0bac05d804bc..c98ff5eb85a6 100644
> --- a/drivers/cxl/pmem.c
> +++ b/drivers/cxl/pmem.c
> @@ -30,17 +30,20 @@ static void unregister_nvdimm(void *nvdimm)
>         struct cxl_nvdimm *cxl_nvd = nvdimm_provider_data(nvdimm);
>         struct cxl_nvdimm_bridge *cxl_nvb = cxl_nvd->bridge;
>         struct cxl_pmem_region *cxlr_pmem;
> +       unsigned long index;
>  
>         device_lock(&cxl_nvb->dev);
> -       cxlr_pmem = cxl_nvd->region;
>         dev_set_drvdata(&cxl_nvd->dev, NULL);
> -       cxl_nvd->region = NULL;
> -       device_unlock(&cxl_nvb->dev);
> +       xa_for_each(&cxl_nvd->pmem_regions, index, cxlr_pmem) {
> +               get_device(&cxlr_pmem->dev);
> +               device_unlock(&cxl_nvb->dev);
>  
> -       if (cxlr_pmem) {
>                 device_release_driver(&cxlr_pmem->dev);
>                 put_device(&cxlr_pmem->dev);
> +
> +               device_lock(&cxl_nvb->dev);
>         }
> +       device_unlock(&cxl_nvb->dev);
>  
>         nvdimm_delete(nvdimm);
>         cxl_nvd->bridge = NULL;
> @@ -366,25 +369,48 @@ static int match_cxl_nvdimm(struct device *dev, void *data)
>  
>  static void unregister_nvdimm_region(void *nd_region)
>  {
> -       struct cxl_nvdimm_bridge *cxl_nvb;
> -       struct cxl_pmem_region *cxlr_pmem;
> +       nvdimm_region_delete(nd_region);
> +}
> +
> +static int cxl_nvdimm_add_region(struct cxl_nvdimm *cxl_nvd,
> +                                struct cxl_pmem_region *cxlr_pmem)
> +{
> +       int rc;
> +
> +       rc = xa_insert(&cxl_nvd->pmem_regions, (unsigned long)cxlr_pmem,
> +                      cxlr_pmem, GFP_KERNEL);
> +       if (rc)
> +               return rc;
> +
> +       get_device(&cxlr_pmem->dev);
> +       return 0;
> +}
> +
> +static void cxl_nvdimm_del_region(struct cxl_nvdimm *cxl_nvd, struct cxl_pmem_region *cxlr_pmem)

Split the long line?

> +{
> +       /*
> +        * It is possible this is called without a corresponding
> +        * cxl_nvdimm_add_region for @cxlr_pmem
> +        */
> +       cxlr_pmem = xa_erase(&cxl_nvd->pmem_regions, (unsigned long)cxlr_pmem);
> +       if (cxlr_pmem)
> +               put_device(&cxlr_pmem->dev);
> +}
> +
> +static void release_mappings(void *data)
> +{
>         int i;
> +       struct cxl_pmem_region *cxlr_pmem = data;
> +       struct cxl_nvdimm_bridge *cxl_nvb = cxlr_pmem->bridge;
>  
> -       cxlr_pmem = nd_region_provider_data(nd_region);
> -       cxl_nvb = cxlr_pmem->bridge;
>         device_lock(&cxl_nvb->dev);
>         for (i = 0; i < cxlr_pmem->nr_mappings; i++) {
>                 struct cxl_pmem_region_mapping *m = &cxlr_pmem->mapping[i];
>                 struct cxl_nvdimm *cxl_nvd = m->cxl_nvd;
>  
> -               if (cxl_nvd->region) {
> -                       put_device(&cxlr_pmem->dev);
> -                       cxl_nvd->region = NULL;
> -               }
> +               cxl_nvdimm_del_region(cxl_nvd, cxlr_pmem);
>         }
>         device_unlock(&cxl_nvb->dev);
> -
> -       nvdimm_region_delete(nd_region);
>  }
>  
>  static void cxlr_pmem_remove_resource(void *res)
> @@ -422,7 +448,7 @@ static int cxl_pmem_region_probe(struct device *dev)
>         if (!cxl_nvb->nvdimm_bus) {
>                 dev_dbg(dev, "nvdimm bus not found\n");
>                 rc = -ENXIO;
> -               goto err;
> +               goto out_nvb;
>         }
>  
>         memset(&mappings, 0, sizeof(mappings));
> @@ -431,7 +457,7 @@ static int cxl_pmem_region_probe(struct device *dev)
>         res = devm_kzalloc(dev, sizeof(*res), GFP_KERNEL);
>         if (!res) {
>                 rc = -ENOMEM;
> -               goto err;
> +               goto out_nvb;
>         }
>  
>         res->name = "Persistent Memory";
> @@ -442,11 +468,11 @@ static int cxl_pmem_region_probe(struct device *dev)
>  
>         rc = insert_resource(&iomem_resource, res);
>         if (rc)
> -               goto err;
> +               goto out_nvb;
>  
>         rc = devm_add_action_or_reset(dev, cxlr_pmem_remove_resource, res);
>         if (rc)
> -               goto err;
> +               goto out_nvb;
>  
>         ndr_desc.res = res;
>         ndr_desc.provider_data = cxlr_pmem;
> @@ -462,7 +488,7 @@ static int cxl_pmem_region_probe(struct device *dev)
>         nd_set = devm_kzalloc(dev, sizeof(*nd_set), GFP_KERNEL);
>         if (!nd_set) {
>                 rc = -ENOMEM;
> -               goto err;
> +               goto out_nvb;
>         }
>  
>         ndr_desc.memregion = cxlr->id;
> @@ -472,9 +498,13 @@ static int cxl_pmem_region_probe(struct device *dev)
>         info = kmalloc_array(cxlr_pmem->nr_mappings, sizeof(*info), GFP_KERNEL);
>         if (!info) {
>                 rc = -ENOMEM;
> -               goto err;
> +               goto out_nvb;
>         }
>  
> +       rc = devm_add_action_or_reset(dev, release_mappings, cxlr_pmem);
> +       if (rc)
> +               goto out_nvd;
> +
>         for (i = 0; i < cxlr_pmem->nr_mappings; i++) {
>                 struct cxl_pmem_region_mapping *m = &cxlr_pmem->mapping[i];
>                 struct cxl_memdev *cxlmd = m->cxlmd;
> @@ -486,7 +516,7 @@ static int cxl_pmem_region_probe(struct device *dev)
>                         dev_dbg(dev, "[%d]: %s: no cxl_nvdimm found\n", i,
>                                 dev_name(&cxlmd->dev));
>                         rc = -ENODEV;
> -                       goto err;
> +                       goto out_nvd;
>                 }
>  
>                 /* safe to drop ref now with bridge lock held */
> @@ -498,10 +528,17 @@ static int cxl_pmem_region_probe(struct device *dev)
>                         dev_dbg(dev, "[%d]: %s: no nvdimm found\n", i,
>                                 dev_name(&cxlmd->dev));
>                         rc = -ENODEV;
> -                       goto err;
> +                       goto out_nvd;
>                 }
> -               cxl_nvd->region = cxlr_pmem;
> -               get_device(&cxlr_pmem->dev);
> +
> +               /*
> +                * Pin the region per nvdimm device as those may be released
> +                * out-of-order with respect to the region, and a single nvdimm
> +                * maybe associated with multiple regions
> +                */
> +               rc = cxl_nvdimm_add_region(cxl_nvd, cxlr_pmem);
> +               if (rc)
> +                       goto out_nvd;
>                 m->cxl_nvd = cxl_nvd;
>                 mappings[i] = (struct nd_mapping_desc) {
>                         .nvdimm = nvdimm,
> @@ -527,27 +564,18 @@ static int cxl_pmem_region_probe(struct device *dev)
>                 nvdimm_pmem_region_create(cxl_nvb->nvdimm_bus, &ndr_desc);
>         if (!cxlr_pmem->nd_region) {
>                 rc = -ENOMEM;
> -               goto err;
> +               goto out_nvd;
>         }
>  
>         rc = devm_add_action_or_reset(dev, unregister_nvdimm_region,
>                                       cxlr_pmem->nd_region);
> -out:
> +out_nvd:
>         kfree(info);
> +out_nvb:
>         device_unlock(&cxl_nvb->dev);
>         put_device(&cxl_nvb->dev);
>  
>         return rc;
> -
> -err:
> -       dev_dbg(dev, "failed to create nvdimm region\n");
> -       for (i--; i >= 0; i--) {
> -               nvdimm = mappings[i].nvdimm;
> -               cxl_nvd = nvdimm_provider_data(nvdimm);
> -               put_device(&cxl_nvd->region->dev);
> -               cxl_nvd->region = NULL;
> -       }
> -       goto out;
>  }
>  
>  static struct cxl_driver cxl_pmem_region_driver = {
>
Dave Jiang Nov. 4, 2022, 9:42 p.m. UTC | #2
On 11/3/2022 5:30 PM, Dan Williams wrote:
> When a cxl_nvdimm object goes through a ->remove() event (device
> physically removed, nvdimm-bridge disabled, or nvdimm device disabled),
> then any associated regions must also be disabled. As highlighted by the
> cxl-create-region.sh test [1], a single device may host multiple
> regions, but the driver was only tracking one region at a time. This
> leads to a situation where only the last enabled region per nvdimm
> device is cleaned up properly. Other regions are leaked, and this also
> causes cxl_memdev reference leaks.
> 
> Fix the tracking by allowing cxl_nvdimm objects to track multiple region
> associations.
> 
> Cc: <stable@vger.kernel.org>
> Link: https://github.com/pmem/ndctl/blob/main/test/cxl-create-region.sh [1]
> Reported-by: Vishal Verma <vishal.l.verma@intel.com>
> Fixes: 04ad63f086d1 ("cxl/region: Introduce cxl_pmem_region objects")
> Signed-off-by: Dan Williams <dan.j.williams@intel.com>

Reviewed-by: Dave Jiang <dave.jiang@intel.com>

> ---
>   drivers/cxl/core/pmem.c |    2 +
>   drivers/cxl/cxl.h       |    2 -
>   drivers/cxl/pmem.c      |  100 ++++++++++++++++++++++++++++++-----------------
>   3 files changed, 67 insertions(+), 37 deletions(-)
> 
> diff --git a/drivers/cxl/core/pmem.c b/drivers/cxl/core/pmem.c
> index 1d12a8206444..36aa5070d902 100644
> --- a/drivers/cxl/core/pmem.c
> +++ b/drivers/cxl/core/pmem.c
> @@ -188,6 +188,7 @@ static void cxl_nvdimm_release(struct device *dev)
>   {
>   	struct cxl_nvdimm *cxl_nvd = to_cxl_nvdimm(dev);
>   
> +	xa_destroy(&cxl_nvd->pmem_regions);
>   	kfree(cxl_nvd);
>   }
>   
> @@ -230,6 +231,7 @@ static struct cxl_nvdimm *cxl_nvdimm_alloc(struct cxl_memdev *cxlmd)
>   
>   	dev = &cxl_nvd->dev;
>   	cxl_nvd->cxlmd = cxlmd;
> +	xa_init(&cxl_nvd->pmem_regions);
>   	device_initialize(dev);
>   	lockdep_set_class(&dev->mutex, &cxl_nvdimm_key);
>   	device_set_pm_not_required(dev);
> diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
> index f680450f0b16..1164ad49f3d3 100644
> --- a/drivers/cxl/cxl.h
> +++ b/drivers/cxl/cxl.h
> @@ -423,7 +423,7 @@ struct cxl_nvdimm {
>   	struct device dev;
>   	struct cxl_memdev *cxlmd;
>   	struct cxl_nvdimm_bridge *bridge;
> -	struct cxl_pmem_region *region;
> +	struct xarray pmem_regions;
>   };
>   
>   struct cxl_pmem_region_mapping {
> diff --git a/drivers/cxl/pmem.c b/drivers/cxl/pmem.c
> index 0bac05d804bc..c98ff5eb85a6 100644
> --- a/drivers/cxl/pmem.c
> +++ b/drivers/cxl/pmem.c
> @@ -30,17 +30,20 @@ static void unregister_nvdimm(void *nvdimm)
>   	struct cxl_nvdimm *cxl_nvd = nvdimm_provider_data(nvdimm);
>   	struct cxl_nvdimm_bridge *cxl_nvb = cxl_nvd->bridge;
>   	struct cxl_pmem_region *cxlr_pmem;
> +	unsigned long index;
>   
>   	device_lock(&cxl_nvb->dev);
> -	cxlr_pmem = cxl_nvd->region;
>   	dev_set_drvdata(&cxl_nvd->dev, NULL);
> -	cxl_nvd->region = NULL;
> -	device_unlock(&cxl_nvb->dev);
> +	xa_for_each(&cxl_nvd->pmem_regions, index, cxlr_pmem) {
> +		get_device(&cxlr_pmem->dev);
> +		device_unlock(&cxl_nvb->dev);
>   
> -	if (cxlr_pmem) {
>   		device_release_driver(&cxlr_pmem->dev);
>   		put_device(&cxlr_pmem->dev);
> +
> +		device_lock(&cxl_nvb->dev);
>   	}
> +	device_unlock(&cxl_nvb->dev);
>   
>   	nvdimm_delete(nvdimm);
>   	cxl_nvd->bridge = NULL;
> @@ -366,25 +369,48 @@ static int match_cxl_nvdimm(struct device *dev, void *data)
>   
>   static void unregister_nvdimm_region(void *nd_region)
>   {
> -	struct cxl_nvdimm_bridge *cxl_nvb;
> -	struct cxl_pmem_region *cxlr_pmem;
> +	nvdimm_region_delete(nd_region);
> +}
> +
> +static int cxl_nvdimm_add_region(struct cxl_nvdimm *cxl_nvd,
> +				 struct cxl_pmem_region *cxlr_pmem)
> +{
> +	int rc;
> +
> +	rc = xa_insert(&cxl_nvd->pmem_regions, (unsigned long)cxlr_pmem,
> +		       cxlr_pmem, GFP_KERNEL);
> +	if (rc)
> +		return rc;
> +
> +	get_device(&cxlr_pmem->dev);
> +	return 0;
> +}
> +
> +static void cxl_nvdimm_del_region(struct cxl_nvdimm *cxl_nvd, struct cxl_pmem_region *cxlr_pmem)
> +{
> +	/*
> +	 * It is possible this is called without a corresponding
> +	 * cxl_nvdimm_add_region for @cxlr_pmem
> +	 */
> +	cxlr_pmem = xa_erase(&cxl_nvd->pmem_regions, (unsigned long)cxlr_pmem);
> +	if (cxlr_pmem)
> +		put_device(&cxlr_pmem->dev);
> +}
> +
> +static void release_mappings(void *data)
> +{
>   	int i;
> +	struct cxl_pmem_region *cxlr_pmem = data;
> +	struct cxl_nvdimm_bridge *cxl_nvb = cxlr_pmem->bridge;
>   
> -	cxlr_pmem = nd_region_provider_data(nd_region);
> -	cxl_nvb = cxlr_pmem->bridge;
>   	device_lock(&cxl_nvb->dev);
>   	for (i = 0; i < cxlr_pmem->nr_mappings; i++) {
>   		struct cxl_pmem_region_mapping *m = &cxlr_pmem->mapping[i];
>   		struct cxl_nvdimm *cxl_nvd = m->cxl_nvd;
>   
> -		if (cxl_nvd->region) {
> -			put_device(&cxlr_pmem->dev);
> -			cxl_nvd->region = NULL;
> -		}
> +		cxl_nvdimm_del_region(cxl_nvd, cxlr_pmem);
>   	}
>   	device_unlock(&cxl_nvb->dev);
> -
> -	nvdimm_region_delete(nd_region);
>   }
>   
>   static void cxlr_pmem_remove_resource(void *res)
> @@ -422,7 +448,7 @@ static int cxl_pmem_region_probe(struct device *dev)
>   	if (!cxl_nvb->nvdimm_bus) {
>   		dev_dbg(dev, "nvdimm bus not found\n");
>   		rc = -ENXIO;
> -		goto err;
> +		goto out_nvb;
>   	}
>   
>   	memset(&mappings, 0, sizeof(mappings));
> @@ -431,7 +457,7 @@ static int cxl_pmem_region_probe(struct device *dev)
>   	res = devm_kzalloc(dev, sizeof(*res), GFP_KERNEL);
>   	if (!res) {
>   		rc = -ENOMEM;
> -		goto err;
> +		goto out_nvb;
>   	}
>   
>   	res->name = "Persistent Memory";
> @@ -442,11 +468,11 @@ static int cxl_pmem_region_probe(struct device *dev)
>   
>   	rc = insert_resource(&iomem_resource, res);
>   	if (rc)
> -		goto err;
> +		goto out_nvb;
>   
>   	rc = devm_add_action_or_reset(dev, cxlr_pmem_remove_resource, res);
>   	if (rc)
> -		goto err;
> +		goto out_nvb;
>   
>   	ndr_desc.res = res;
>   	ndr_desc.provider_data = cxlr_pmem;
> @@ -462,7 +488,7 @@ static int cxl_pmem_region_probe(struct device *dev)
>   	nd_set = devm_kzalloc(dev, sizeof(*nd_set), GFP_KERNEL);
>   	if (!nd_set) {
>   		rc = -ENOMEM;
> -		goto err;
> +		goto out_nvb;
>   	}
>   
>   	ndr_desc.memregion = cxlr->id;
> @@ -472,9 +498,13 @@ static int cxl_pmem_region_probe(struct device *dev)
>   	info = kmalloc_array(cxlr_pmem->nr_mappings, sizeof(*info), GFP_KERNEL);
>   	if (!info) {
>   		rc = -ENOMEM;
> -		goto err;
> +		goto out_nvb;
>   	}
>   
> +	rc = devm_add_action_or_reset(dev, release_mappings, cxlr_pmem);
> +	if (rc)
> +		goto out_nvd;
> +
>   	for (i = 0; i < cxlr_pmem->nr_mappings; i++) {
>   		struct cxl_pmem_region_mapping *m = &cxlr_pmem->mapping[i];
>   		struct cxl_memdev *cxlmd = m->cxlmd;
> @@ -486,7 +516,7 @@ static int cxl_pmem_region_probe(struct device *dev)
>   			dev_dbg(dev, "[%d]: %s: no cxl_nvdimm found\n", i,
>   				dev_name(&cxlmd->dev));
>   			rc = -ENODEV;
> -			goto err;
> +			goto out_nvd;
>   		}
>   
>   		/* safe to drop ref now with bridge lock held */
> @@ -498,10 +528,17 @@ static int cxl_pmem_region_probe(struct device *dev)
>   			dev_dbg(dev, "[%d]: %s: no nvdimm found\n", i,
>   				dev_name(&cxlmd->dev));
>   			rc = -ENODEV;
> -			goto err;
> +			goto out_nvd;
>   		}
> -		cxl_nvd->region = cxlr_pmem;
> -		get_device(&cxlr_pmem->dev);
> +
> +		/*
> +		 * Pin the region per nvdimm device as those may be released
> +		 * out-of-order with respect to the region, and a single nvdimm
> +		 * maybe associated with multiple regions
> +		 */
> +		rc = cxl_nvdimm_add_region(cxl_nvd, cxlr_pmem);
> +		if (rc)
> +			goto out_nvd;
>   		m->cxl_nvd = cxl_nvd;
>   		mappings[i] = (struct nd_mapping_desc) {
>   			.nvdimm = nvdimm,
> @@ -527,27 +564,18 @@ static int cxl_pmem_region_probe(struct device *dev)
>   		nvdimm_pmem_region_create(cxl_nvb->nvdimm_bus, &ndr_desc);
>   	if (!cxlr_pmem->nd_region) {
>   		rc = -ENOMEM;
> -		goto err;
> +		goto out_nvd;
>   	}
>   
>   	rc = devm_add_action_or_reset(dev, unregister_nvdimm_region,
>   				      cxlr_pmem->nd_region);
> -out:
> +out_nvd:
>   	kfree(info);
> +out_nvb:
>   	device_unlock(&cxl_nvb->dev);
>   	put_device(&cxl_nvb->dev);
>   
>   	return rc;
> -
> -err:
> -	dev_dbg(dev, "failed to create nvdimm region\n");
> -	for (i--; i >= 0; i--) {
> -		nvdimm = mappings[i].nvdimm;
> -		cxl_nvd = nvdimm_provider_data(nvdimm);
> -		put_device(&cxl_nvd->region->dev);
> -		cxl_nvd->region = NULL;
> -	}
> -	goto out;
>   }
>   
>   static struct cxl_driver cxl_pmem_region_driver = {
>
diff mbox series

Patch

diff --git a/drivers/cxl/core/pmem.c b/drivers/cxl/core/pmem.c
index 1d12a8206444..36aa5070d902 100644
--- a/drivers/cxl/core/pmem.c
+++ b/drivers/cxl/core/pmem.c
@@ -188,6 +188,7 @@  static void cxl_nvdimm_release(struct device *dev)
 {
 	struct cxl_nvdimm *cxl_nvd = to_cxl_nvdimm(dev);
 
+	xa_destroy(&cxl_nvd->pmem_regions);
 	kfree(cxl_nvd);
 }
 
@@ -230,6 +231,7 @@  static struct cxl_nvdimm *cxl_nvdimm_alloc(struct cxl_memdev *cxlmd)
 
 	dev = &cxl_nvd->dev;
 	cxl_nvd->cxlmd = cxlmd;
+	xa_init(&cxl_nvd->pmem_regions);
 	device_initialize(dev);
 	lockdep_set_class(&dev->mutex, &cxl_nvdimm_key);
 	device_set_pm_not_required(dev);
diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
index f680450f0b16..1164ad49f3d3 100644
--- a/drivers/cxl/cxl.h
+++ b/drivers/cxl/cxl.h
@@ -423,7 +423,7 @@  struct cxl_nvdimm {
 	struct device dev;
 	struct cxl_memdev *cxlmd;
 	struct cxl_nvdimm_bridge *bridge;
-	struct cxl_pmem_region *region;
+	struct xarray pmem_regions;
 };
 
 struct cxl_pmem_region_mapping {
diff --git a/drivers/cxl/pmem.c b/drivers/cxl/pmem.c
index 0bac05d804bc..c98ff5eb85a6 100644
--- a/drivers/cxl/pmem.c
+++ b/drivers/cxl/pmem.c
@@ -30,17 +30,20 @@  static void unregister_nvdimm(void *nvdimm)
 	struct cxl_nvdimm *cxl_nvd = nvdimm_provider_data(nvdimm);
 	struct cxl_nvdimm_bridge *cxl_nvb = cxl_nvd->bridge;
 	struct cxl_pmem_region *cxlr_pmem;
+	unsigned long index;
 
 	device_lock(&cxl_nvb->dev);
-	cxlr_pmem = cxl_nvd->region;
 	dev_set_drvdata(&cxl_nvd->dev, NULL);
-	cxl_nvd->region = NULL;
-	device_unlock(&cxl_nvb->dev);
+	xa_for_each(&cxl_nvd->pmem_regions, index, cxlr_pmem) {
+		get_device(&cxlr_pmem->dev);
+		device_unlock(&cxl_nvb->dev);
 
-	if (cxlr_pmem) {
 		device_release_driver(&cxlr_pmem->dev);
 		put_device(&cxlr_pmem->dev);
+
+		device_lock(&cxl_nvb->dev);
 	}
+	device_unlock(&cxl_nvb->dev);
 
 	nvdimm_delete(nvdimm);
 	cxl_nvd->bridge = NULL;
@@ -366,25 +369,48 @@  static int match_cxl_nvdimm(struct device *dev, void *data)
 
 static void unregister_nvdimm_region(void *nd_region)
 {
-	struct cxl_nvdimm_bridge *cxl_nvb;
-	struct cxl_pmem_region *cxlr_pmem;
+	nvdimm_region_delete(nd_region);
+}
+
+static int cxl_nvdimm_add_region(struct cxl_nvdimm *cxl_nvd,
+				 struct cxl_pmem_region *cxlr_pmem)
+{
+	int rc;
+
+	rc = xa_insert(&cxl_nvd->pmem_regions, (unsigned long)cxlr_pmem,
+		       cxlr_pmem, GFP_KERNEL);
+	if (rc)
+		return rc;
+
+	get_device(&cxlr_pmem->dev);
+	return 0;
+}
+
+static void cxl_nvdimm_del_region(struct cxl_nvdimm *cxl_nvd, struct cxl_pmem_region *cxlr_pmem)
+{
+	/*
+	 * It is possible this is called without a corresponding
+	 * cxl_nvdimm_add_region for @cxlr_pmem
+	 */
+	cxlr_pmem = xa_erase(&cxl_nvd->pmem_regions, (unsigned long)cxlr_pmem);
+	if (cxlr_pmem)
+		put_device(&cxlr_pmem->dev);
+}
+
+static void release_mappings(void *data)
+{
 	int i;
+	struct cxl_pmem_region *cxlr_pmem = data;
+	struct cxl_nvdimm_bridge *cxl_nvb = cxlr_pmem->bridge;
 
-	cxlr_pmem = nd_region_provider_data(nd_region);
-	cxl_nvb = cxlr_pmem->bridge;
 	device_lock(&cxl_nvb->dev);
 	for (i = 0; i < cxlr_pmem->nr_mappings; i++) {
 		struct cxl_pmem_region_mapping *m = &cxlr_pmem->mapping[i];
 		struct cxl_nvdimm *cxl_nvd = m->cxl_nvd;
 
-		if (cxl_nvd->region) {
-			put_device(&cxlr_pmem->dev);
-			cxl_nvd->region = NULL;
-		}
+		cxl_nvdimm_del_region(cxl_nvd, cxlr_pmem);
 	}
 	device_unlock(&cxl_nvb->dev);
-
-	nvdimm_region_delete(nd_region);
 }
 
 static void cxlr_pmem_remove_resource(void *res)
@@ -422,7 +448,7 @@  static int cxl_pmem_region_probe(struct device *dev)
 	if (!cxl_nvb->nvdimm_bus) {
 		dev_dbg(dev, "nvdimm bus not found\n");
 		rc = -ENXIO;
-		goto err;
+		goto out_nvb;
 	}
 
 	memset(&mappings, 0, sizeof(mappings));
@@ -431,7 +457,7 @@  static int cxl_pmem_region_probe(struct device *dev)
 	res = devm_kzalloc(dev, sizeof(*res), GFP_KERNEL);
 	if (!res) {
 		rc = -ENOMEM;
-		goto err;
+		goto out_nvb;
 	}
 
 	res->name = "Persistent Memory";
@@ -442,11 +468,11 @@  static int cxl_pmem_region_probe(struct device *dev)
 
 	rc = insert_resource(&iomem_resource, res);
 	if (rc)
-		goto err;
+		goto out_nvb;
 
 	rc = devm_add_action_or_reset(dev, cxlr_pmem_remove_resource, res);
 	if (rc)
-		goto err;
+		goto out_nvb;
 
 	ndr_desc.res = res;
 	ndr_desc.provider_data = cxlr_pmem;
@@ -462,7 +488,7 @@  static int cxl_pmem_region_probe(struct device *dev)
 	nd_set = devm_kzalloc(dev, sizeof(*nd_set), GFP_KERNEL);
 	if (!nd_set) {
 		rc = -ENOMEM;
-		goto err;
+		goto out_nvb;
 	}
 
 	ndr_desc.memregion = cxlr->id;
@@ -472,9 +498,13 @@  static int cxl_pmem_region_probe(struct device *dev)
 	info = kmalloc_array(cxlr_pmem->nr_mappings, sizeof(*info), GFP_KERNEL);
 	if (!info) {
 		rc = -ENOMEM;
-		goto err;
+		goto out_nvb;
 	}
 
+	rc = devm_add_action_or_reset(dev, release_mappings, cxlr_pmem);
+	if (rc)
+		goto out_nvd;
+
 	for (i = 0; i < cxlr_pmem->nr_mappings; i++) {
 		struct cxl_pmem_region_mapping *m = &cxlr_pmem->mapping[i];
 		struct cxl_memdev *cxlmd = m->cxlmd;
@@ -486,7 +516,7 @@  static int cxl_pmem_region_probe(struct device *dev)
 			dev_dbg(dev, "[%d]: %s: no cxl_nvdimm found\n", i,
 				dev_name(&cxlmd->dev));
 			rc = -ENODEV;
-			goto err;
+			goto out_nvd;
 		}
 
 		/* safe to drop ref now with bridge lock held */
@@ -498,10 +528,17 @@  static int cxl_pmem_region_probe(struct device *dev)
 			dev_dbg(dev, "[%d]: %s: no nvdimm found\n", i,
 				dev_name(&cxlmd->dev));
 			rc = -ENODEV;
-			goto err;
+			goto out_nvd;
 		}
-		cxl_nvd->region = cxlr_pmem;
-		get_device(&cxlr_pmem->dev);
+
+		/*
+		 * Pin the region per nvdimm device as those may be released
+		 * out-of-order with respect to the region, and a single nvdimm
+		 * maybe associated with multiple regions
+		 */
+		rc = cxl_nvdimm_add_region(cxl_nvd, cxlr_pmem);
+		if (rc)
+			goto out_nvd;
 		m->cxl_nvd = cxl_nvd;
 		mappings[i] = (struct nd_mapping_desc) {
 			.nvdimm = nvdimm,
@@ -527,27 +564,18 @@  static int cxl_pmem_region_probe(struct device *dev)
 		nvdimm_pmem_region_create(cxl_nvb->nvdimm_bus, &ndr_desc);
 	if (!cxlr_pmem->nd_region) {
 		rc = -ENOMEM;
-		goto err;
+		goto out_nvd;
 	}
 
 	rc = devm_add_action_or_reset(dev, unregister_nvdimm_region,
 				      cxlr_pmem->nd_region);
-out:
+out_nvd:
 	kfree(info);
+out_nvb:
 	device_unlock(&cxl_nvb->dev);
 	put_device(&cxl_nvb->dev);
 
 	return rc;
-
-err:
-	dev_dbg(dev, "failed to create nvdimm region\n");
-	for (i--; i >= 0; i--) {
-		nvdimm = mappings[i].nvdimm;
-		cxl_nvd = nvdimm_provider_data(nvdimm);
-		put_device(&cxl_nvd->region->dev);
-		cxl_nvd->region = NULL;
-	}
-	goto out;
 }
 
 static struct cxl_driver cxl_pmem_region_driver = {