Message ID | 166752183647.947915.2045230911503793901.stgit@dwillia2-xfh.jf.intel.com |
---|---|
State | Accepted |
Commit | a0f1d22a9753d515957e1d2341e6af615494d387 |
Headers | show |
Series | CXL region creation fixes for 6.1 | expand |
On Thu, 2022-11-03 at 17:30 -0700, Dan Williams wrote: > When a cxl_nvdimm object goes through a ->remove() event (device > physically removed, nvdimm-bridge disabled, or nvdimm device disabled), > then any associated regions must also be disabled. As highlighted by the > cxl-create-region.sh test [1], a single device may host multiple > regions, but the driver was only tracking one region at a time. This > leads to a situation where only the last enabled region per nvdimm > device is cleaned up properly. Other regions are leaked, and this also > causes cxl_memdev reference leaks. > > Fix the tracking by allowing cxl_nvdimm objects to track multiple region > associations. > > Cc: <stable@vger.kernel.org> > Link: https://github.com/pmem/ndctl/blob/main/test/cxl-create-region.sh [1] > Reported-by: Vishal Verma <vishal.l.verma@intel.com> > Fixes: 04ad63f086d1 ("cxl/region: Introduce cxl_pmem_region objects") > Signed-off-by: Dan Williams <dan.j.williams@intel.com> > --- > drivers/cxl/core/pmem.c | 2 + > drivers/cxl/cxl.h | 2 - > drivers/cxl/pmem.c | 100 ++++++++++++++++++++++++++++++----------------- > 3 files changed, 67 insertions(+), 37 deletions(-) One minor nit below, otherwise looks good to me. Reviewed-by: Vishal Verma <vishal.l.verma@intel.com> [..] > diff --git a/drivers/cxl/pmem.c b/drivers/cxl/pmem.c > index 0bac05d804bc..c98ff5eb85a6 100644 > --- a/drivers/cxl/pmem.c > +++ b/drivers/cxl/pmem.c > @@ -30,17 +30,20 @@ static void unregister_nvdimm(void *nvdimm) > struct cxl_nvdimm *cxl_nvd = nvdimm_provider_data(nvdimm); > struct cxl_nvdimm_bridge *cxl_nvb = cxl_nvd->bridge; > struct cxl_pmem_region *cxlr_pmem; > + unsigned long index; > > device_lock(&cxl_nvb->dev); > - cxlr_pmem = cxl_nvd->region; > dev_set_drvdata(&cxl_nvd->dev, NULL); > - cxl_nvd->region = NULL; > - device_unlock(&cxl_nvb->dev); > + xa_for_each(&cxl_nvd->pmem_regions, index, cxlr_pmem) { > + get_device(&cxlr_pmem->dev); > + device_unlock(&cxl_nvb->dev); > > - if (cxlr_pmem) { > device_release_driver(&cxlr_pmem->dev); > put_device(&cxlr_pmem->dev); > + > + device_lock(&cxl_nvb->dev); > } > + device_unlock(&cxl_nvb->dev); > > nvdimm_delete(nvdimm); > cxl_nvd->bridge = NULL; > @@ -366,25 +369,48 @@ static int match_cxl_nvdimm(struct device *dev, void *data) > > static void unregister_nvdimm_region(void *nd_region) > { > - struct cxl_nvdimm_bridge *cxl_nvb; > - struct cxl_pmem_region *cxlr_pmem; > + nvdimm_region_delete(nd_region); > +} > + > +static int cxl_nvdimm_add_region(struct cxl_nvdimm *cxl_nvd, > + struct cxl_pmem_region *cxlr_pmem) > +{ > + int rc; > + > + rc = xa_insert(&cxl_nvd->pmem_regions, (unsigned long)cxlr_pmem, > + cxlr_pmem, GFP_KERNEL); > + if (rc) > + return rc; > + > + get_device(&cxlr_pmem->dev); > + return 0; > +} > + > +static void cxl_nvdimm_del_region(struct cxl_nvdimm *cxl_nvd, struct cxl_pmem_region *cxlr_pmem) Split the long line? > +{ > + /* > + * It is possible this is called without a corresponding > + * cxl_nvdimm_add_region for @cxlr_pmem > + */ > + cxlr_pmem = xa_erase(&cxl_nvd->pmem_regions, (unsigned long)cxlr_pmem); > + if (cxlr_pmem) > + put_device(&cxlr_pmem->dev); > +} > + > +static void release_mappings(void *data) > +{ > int i; > + struct cxl_pmem_region *cxlr_pmem = data; > + struct cxl_nvdimm_bridge *cxl_nvb = cxlr_pmem->bridge; > > - cxlr_pmem = nd_region_provider_data(nd_region); > - cxl_nvb = cxlr_pmem->bridge; > device_lock(&cxl_nvb->dev); > for (i = 0; i < cxlr_pmem->nr_mappings; i++) { > struct cxl_pmem_region_mapping *m = &cxlr_pmem->mapping[i]; > struct cxl_nvdimm *cxl_nvd = m->cxl_nvd; > > - if (cxl_nvd->region) { > - put_device(&cxlr_pmem->dev); > - cxl_nvd->region = NULL; > - } > + cxl_nvdimm_del_region(cxl_nvd, cxlr_pmem); > } > device_unlock(&cxl_nvb->dev); > - > - nvdimm_region_delete(nd_region); > } > > static void cxlr_pmem_remove_resource(void *res) > @@ -422,7 +448,7 @@ static int cxl_pmem_region_probe(struct device *dev) > if (!cxl_nvb->nvdimm_bus) { > dev_dbg(dev, "nvdimm bus not found\n"); > rc = -ENXIO; > - goto err; > + goto out_nvb; > } > > memset(&mappings, 0, sizeof(mappings)); > @@ -431,7 +457,7 @@ static int cxl_pmem_region_probe(struct device *dev) > res = devm_kzalloc(dev, sizeof(*res), GFP_KERNEL); > if (!res) { > rc = -ENOMEM; > - goto err; > + goto out_nvb; > } > > res->name = "Persistent Memory"; > @@ -442,11 +468,11 @@ static int cxl_pmem_region_probe(struct device *dev) > > rc = insert_resource(&iomem_resource, res); > if (rc) > - goto err; > + goto out_nvb; > > rc = devm_add_action_or_reset(dev, cxlr_pmem_remove_resource, res); > if (rc) > - goto err; > + goto out_nvb; > > ndr_desc.res = res; > ndr_desc.provider_data = cxlr_pmem; > @@ -462,7 +488,7 @@ static int cxl_pmem_region_probe(struct device *dev) > nd_set = devm_kzalloc(dev, sizeof(*nd_set), GFP_KERNEL); > if (!nd_set) { > rc = -ENOMEM; > - goto err; > + goto out_nvb; > } > > ndr_desc.memregion = cxlr->id; > @@ -472,9 +498,13 @@ static int cxl_pmem_region_probe(struct device *dev) > info = kmalloc_array(cxlr_pmem->nr_mappings, sizeof(*info), GFP_KERNEL); > if (!info) { > rc = -ENOMEM; > - goto err; > + goto out_nvb; > } > > + rc = devm_add_action_or_reset(dev, release_mappings, cxlr_pmem); > + if (rc) > + goto out_nvd; > + > for (i = 0; i < cxlr_pmem->nr_mappings; i++) { > struct cxl_pmem_region_mapping *m = &cxlr_pmem->mapping[i]; > struct cxl_memdev *cxlmd = m->cxlmd; > @@ -486,7 +516,7 @@ static int cxl_pmem_region_probe(struct device *dev) > dev_dbg(dev, "[%d]: %s: no cxl_nvdimm found\n", i, > dev_name(&cxlmd->dev)); > rc = -ENODEV; > - goto err; > + goto out_nvd; > } > > /* safe to drop ref now with bridge lock held */ > @@ -498,10 +528,17 @@ static int cxl_pmem_region_probe(struct device *dev) > dev_dbg(dev, "[%d]: %s: no nvdimm found\n", i, > dev_name(&cxlmd->dev)); > rc = -ENODEV; > - goto err; > + goto out_nvd; > } > - cxl_nvd->region = cxlr_pmem; > - get_device(&cxlr_pmem->dev); > + > + /* > + * Pin the region per nvdimm device as those may be released > + * out-of-order with respect to the region, and a single nvdimm > + * maybe associated with multiple regions > + */ > + rc = cxl_nvdimm_add_region(cxl_nvd, cxlr_pmem); > + if (rc) > + goto out_nvd; > m->cxl_nvd = cxl_nvd; > mappings[i] = (struct nd_mapping_desc) { > .nvdimm = nvdimm, > @@ -527,27 +564,18 @@ static int cxl_pmem_region_probe(struct device *dev) > nvdimm_pmem_region_create(cxl_nvb->nvdimm_bus, &ndr_desc); > if (!cxlr_pmem->nd_region) { > rc = -ENOMEM; > - goto err; > + goto out_nvd; > } > > rc = devm_add_action_or_reset(dev, unregister_nvdimm_region, > cxlr_pmem->nd_region); > -out: > +out_nvd: > kfree(info); > +out_nvb: > device_unlock(&cxl_nvb->dev); > put_device(&cxl_nvb->dev); > > return rc; > - > -err: > - dev_dbg(dev, "failed to create nvdimm region\n"); > - for (i--; i >= 0; i--) { > - nvdimm = mappings[i].nvdimm; > - cxl_nvd = nvdimm_provider_data(nvdimm); > - put_device(&cxl_nvd->region->dev); > - cxl_nvd->region = NULL; > - } > - goto out; > } > > static struct cxl_driver cxl_pmem_region_driver = { >
On 11/3/2022 5:30 PM, Dan Williams wrote: > When a cxl_nvdimm object goes through a ->remove() event (device > physically removed, nvdimm-bridge disabled, or nvdimm device disabled), > then any associated regions must also be disabled. As highlighted by the > cxl-create-region.sh test [1], a single device may host multiple > regions, but the driver was only tracking one region at a time. This > leads to a situation where only the last enabled region per nvdimm > device is cleaned up properly. Other regions are leaked, and this also > causes cxl_memdev reference leaks. > > Fix the tracking by allowing cxl_nvdimm objects to track multiple region > associations. > > Cc: <stable@vger.kernel.org> > Link: https://github.com/pmem/ndctl/blob/main/test/cxl-create-region.sh [1] > Reported-by: Vishal Verma <vishal.l.verma@intel.com> > Fixes: 04ad63f086d1 ("cxl/region: Introduce cxl_pmem_region objects") > Signed-off-by: Dan Williams <dan.j.williams@intel.com> Reviewed-by: Dave Jiang <dave.jiang@intel.com> > --- > drivers/cxl/core/pmem.c | 2 + > drivers/cxl/cxl.h | 2 - > drivers/cxl/pmem.c | 100 ++++++++++++++++++++++++++++++----------------- > 3 files changed, 67 insertions(+), 37 deletions(-) > > diff --git a/drivers/cxl/core/pmem.c b/drivers/cxl/core/pmem.c > index 1d12a8206444..36aa5070d902 100644 > --- a/drivers/cxl/core/pmem.c > +++ b/drivers/cxl/core/pmem.c > @@ -188,6 +188,7 @@ static void cxl_nvdimm_release(struct device *dev) > { > struct cxl_nvdimm *cxl_nvd = to_cxl_nvdimm(dev); > > + xa_destroy(&cxl_nvd->pmem_regions); > kfree(cxl_nvd); > } > > @@ -230,6 +231,7 @@ static struct cxl_nvdimm *cxl_nvdimm_alloc(struct cxl_memdev *cxlmd) > > dev = &cxl_nvd->dev; > cxl_nvd->cxlmd = cxlmd; > + xa_init(&cxl_nvd->pmem_regions); > device_initialize(dev); > lockdep_set_class(&dev->mutex, &cxl_nvdimm_key); > device_set_pm_not_required(dev); > diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h > index f680450f0b16..1164ad49f3d3 100644 > --- a/drivers/cxl/cxl.h > +++ b/drivers/cxl/cxl.h > @@ -423,7 +423,7 @@ struct cxl_nvdimm { > struct device dev; > struct cxl_memdev *cxlmd; > struct cxl_nvdimm_bridge *bridge; > - struct cxl_pmem_region *region; > + struct xarray pmem_regions; > }; > > struct cxl_pmem_region_mapping { > diff --git a/drivers/cxl/pmem.c b/drivers/cxl/pmem.c > index 0bac05d804bc..c98ff5eb85a6 100644 > --- a/drivers/cxl/pmem.c > +++ b/drivers/cxl/pmem.c > @@ -30,17 +30,20 @@ static void unregister_nvdimm(void *nvdimm) > struct cxl_nvdimm *cxl_nvd = nvdimm_provider_data(nvdimm); > struct cxl_nvdimm_bridge *cxl_nvb = cxl_nvd->bridge; > struct cxl_pmem_region *cxlr_pmem; > + unsigned long index; > > device_lock(&cxl_nvb->dev); > - cxlr_pmem = cxl_nvd->region; > dev_set_drvdata(&cxl_nvd->dev, NULL); > - cxl_nvd->region = NULL; > - device_unlock(&cxl_nvb->dev); > + xa_for_each(&cxl_nvd->pmem_regions, index, cxlr_pmem) { > + get_device(&cxlr_pmem->dev); > + device_unlock(&cxl_nvb->dev); > > - if (cxlr_pmem) { > device_release_driver(&cxlr_pmem->dev); > put_device(&cxlr_pmem->dev); > + > + device_lock(&cxl_nvb->dev); > } > + device_unlock(&cxl_nvb->dev); > > nvdimm_delete(nvdimm); > cxl_nvd->bridge = NULL; > @@ -366,25 +369,48 @@ static int match_cxl_nvdimm(struct device *dev, void *data) > > static void unregister_nvdimm_region(void *nd_region) > { > - struct cxl_nvdimm_bridge *cxl_nvb; > - struct cxl_pmem_region *cxlr_pmem; > + nvdimm_region_delete(nd_region); > +} > + > +static int cxl_nvdimm_add_region(struct cxl_nvdimm *cxl_nvd, > + struct cxl_pmem_region *cxlr_pmem) > +{ > + int rc; > + > + rc = xa_insert(&cxl_nvd->pmem_regions, (unsigned long)cxlr_pmem, > + cxlr_pmem, GFP_KERNEL); > + if (rc) > + return rc; > + > + get_device(&cxlr_pmem->dev); > + return 0; > +} > + > +static void cxl_nvdimm_del_region(struct cxl_nvdimm *cxl_nvd, struct cxl_pmem_region *cxlr_pmem) > +{ > + /* > + * It is possible this is called without a corresponding > + * cxl_nvdimm_add_region for @cxlr_pmem > + */ > + cxlr_pmem = xa_erase(&cxl_nvd->pmem_regions, (unsigned long)cxlr_pmem); > + if (cxlr_pmem) > + put_device(&cxlr_pmem->dev); > +} > + > +static void release_mappings(void *data) > +{ > int i; > + struct cxl_pmem_region *cxlr_pmem = data; > + struct cxl_nvdimm_bridge *cxl_nvb = cxlr_pmem->bridge; > > - cxlr_pmem = nd_region_provider_data(nd_region); > - cxl_nvb = cxlr_pmem->bridge; > device_lock(&cxl_nvb->dev); > for (i = 0; i < cxlr_pmem->nr_mappings; i++) { > struct cxl_pmem_region_mapping *m = &cxlr_pmem->mapping[i]; > struct cxl_nvdimm *cxl_nvd = m->cxl_nvd; > > - if (cxl_nvd->region) { > - put_device(&cxlr_pmem->dev); > - cxl_nvd->region = NULL; > - } > + cxl_nvdimm_del_region(cxl_nvd, cxlr_pmem); > } > device_unlock(&cxl_nvb->dev); > - > - nvdimm_region_delete(nd_region); > } > > static void cxlr_pmem_remove_resource(void *res) > @@ -422,7 +448,7 @@ static int cxl_pmem_region_probe(struct device *dev) > if (!cxl_nvb->nvdimm_bus) { > dev_dbg(dev, "nvdimm bus not found\n"); > rc = -ENXIO; > - goto err; > + goto out_nvb; > } > > memset(&mappings, 0, sizeof(mappings)); > @@ -431,7 +457,7 @@ static int cxl_pmem_region_probe(struct device *dev) > res = devm_kzalloc(dev, sizeof(*res), GFP_KERNEL); > if (!res) { > rc = -ENOMEM; > - goto err; > + goto out_nvb; > } > > res->name = "Persistent Memory"; > @@ -442,11 +468,11 @@ static int cxl_pmem_region_probe(struct device *dev) > > rc = insert_resource(&iomem_resource, res); > if (rc) > - goto err; > + goto out_nvb; > > rc = devm_add_action_or_reset(dev, cxlr_pmem_remove_resource, res); > if (rc) > - goto err; > + goto out_nvb; > > ndr_desc.res = res; > ndr_desc.provider_data = cxlr_pmem; > @@ -462,7 +488,7 @@ static int cxl_pmem_region_probe(struct device *dev) > nd_set = devm_kzalloc(dev, sizeof(*nd_set), GFP_KERNEL); > if (!nd_set) { > rc = -ENOMEM; > - goto err; > + goto out_nvb; > } > > ndr_desc.memregion = cxlr->id; > @@ -472,9 +498,13 @@ static int cxl_pmem_region_probe(struct device *dev) > info = kmalloc_array(cxlr_pmem->nr_mappings, sizeof(*info), GFP_KERNEL); > if (!info) { > rc = -ENOMEM; > - goto err; > + goto out_nvb; > } > > + rc = devm_add_action_or_reset(dev, release_mappings, cxlr_pmem); > + if (rc) > + goto out_nvd; > + > for (i = 0; i < cxlr_pmem->nr_mappings; i++) { > struct cxl_pmem_region_mapping *m = &cxlr_pmem->mapping[i]; > struct cxl_memdev *cxlmd = m->cxlmd; > @@ -486,7 +516,7 @@ static int cxl_pmem_region_probe(struct device *dev) > dev_dbg(dev, "[%d]: %s: no cxl_nvdimm found\n", i, > dev_name(&cxlmd->dev)); > rc = -ENODEV; > - goto err; > + goto out_nvd; > } > > /* safe to drop ref now with bridge lock held */ > @@ -498,10 +528,17 @@ static int cxl_pmem_region_probe(struct device *dev) > dev_dbg(dev, "[%d]: %s: no nvdimm found\n", i, > dev_name(&cxlmd->dev)); > rc = -ENODEV; > - goto err; > + goto out_nvd; > } > - cxl_nvd->region = cxlr_pmem; > - get_device(&cxlr_pmem->dev); > + > + /* > + * Pin the region per nvdimm device as those may be released > + * out-of-order with respect to the region, and a single nvdimm > + * maybe associated with multiple regions > + */ > + rc = cxl_nvdimm_add_region(cxl_nvd, cxlr_pmem); > + if (rc) > + goto out_nvd; > m->cxl_nvd = cxl_nvd; > mappings[i] = (struct nd_mapping_desc) { > .nvdimm = nvdimm, > @@ -527,27 +564,18 @@ static int cxl_pmem_region_probe(struct device *dev) > nvdimm_pmem_region_create(cxl_nvb->nvdimm_bus, &ndr_desc); > if (!cxlr_pmem->nd_region) { > rc = -ENOMEM; > - goto err; > + goto out_nvd; > } > > rc = devm_add_action_or_reset(dev, unregister_nvdimm_region, > cxlr_pmem->nd_region); > -out: > +out_nvd: > kfree(info); > +out_nvb: > device_unlock(&cxl_nvb->dev); > put_device(&cxl_nvb->dev); > > return rc; > - > -err: > - dev_dbg(dev, "failed to create nvdimm region\n"); > - for (i--; i >= 0; i--) { > - nvdimm = mappings[i].nvdimm; > - cxl_nvd = nvdimm_provider_data(nvdimm); > - put_device(&cxl_nvd->region->dev); > - cxl_nvd->region = NULL; > - } > - goto out; > } > > static struct cxl_driver cxl_pmem_region_driver = { >
diff --git a/drivers/cxl/core/pmem.c b/drivers/cxl/core/pmem.c index 1d12a8206444..36aa5070d902 100644 --- a/drivers/cxl/core/pmem.c +++ b/drivers/cxl/core/pmem.c @@ -188,6 +188,7 @@ static void cxl_nvdimm_release(struct device *dev) { struct cxl_nvdimm *cxl_nvd = to_cxl_nvdimm(dev); + xa_destroy(&cxl_nvd->pmem_regions); kfree(cxl_nvd); } @@ -230,6 +231,7 @@ static struct cxl_nvdimm *cxl_nvdimm_alloc(struct cxl_memdev *cxlmd) dev = &cxl_nvd->dev; cxl_nvd->cxlmd = cxlmd; + xa_init(&cxl_nvd->pmem_regions); device_initialize(dev); lockdep_set_class(&dev->mutex, &cxl_nvdimm_key); device_set_pm_not_required(dev); diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index f680450f0b16..1164ad49f3d3 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -423,7 +423,7 @@ struct cxl_nvdimm { struct device dev; struct cxl_memdev *cxlmd; struct cxl_nvdimm_bridge *bridge; - struct cxl_pmem_region *region; + struct xarray pmem_regions; }; struct cxl_pmem_region_mapping { diff --git a/drivers/cxl/pmem.c b/drivers/cxl/pmem.c index 0bac05d804bc..c98ff5eb85a6 100644 --- a/drivers/cxl/pmem.c +++ b/drivers/cxl/pmem.c @@ -30,17 +30,20 @@ static void unregister_nvdimm(void *nvdimm) struct cxl_nvdimm *cxl_nvd = nvdimm_provider_data(nvdimm); struct cxl_nvdimm_bridge *cxl_nvb = cxl_nvd->bridge; struct cxl_pmem_region *cxlr_pmem; + unsigned long index; device_lock(&cxl_nvb->dev); - cxlr_pmem = cxl_nvd->region; dev_set_drvdata(&cxl_nvd->dev, NULL); - cxl_nvd->region = NULL; - device_unlock(&cxl_nvb->dev); + xa_for_each(&cxl_nvd->pmem_regions, index, cxlr_pmem) { + get_device(&cxlr_pmem->dev); + device_unlock(&cxl_nvb->dev); - if (cxlr_pmem) { device_release_driver(&cxlr_pmem->dev); put_device(&cxlr_pmem->dev); + + device_lock(&cxl_nvb->dev); } + device_unlock(&cxl_nvb->dev); nvdimm_delete(nvdimm); cxl_nvd->bridge = NULL; @@ -366,25 +369,48 @@ static int match_cxl_nvdimm(struct device *dev, void *data) static void unregister_nvdimm_region(void *nd_region) { - struct cxl_nvdimm_bridge *cxl_nvb; - struct cxl_pmem_region *cxlr_pmem; + nvdimm_region_delete(nd_region); +} + +static int cxl_nvdimm_add_region(struct cxl_nvdimm *cxl_nvd, + struct cxl_pmem_region *cxlr_pmem) +{ + int rc; + + rc = xa_insert(&cxl_nvd->pmem_regions, (unsigned long)cxlr_pmem, + cxlr_pmem, GFP_KERNEL); + if (rc) + return rc; + + get_device(&cxlr_pmem->dev); + return 0; +} + +static void cxl_nvdimm_del_region(struct cxl_nvdimm *cxl_nvd, struct cxl_pmem_region *cxlr_pmem) +{ + /* + * It is possible this is called without a corresponding + * cxl_nvdimm_add_region for @cxlr_pmem + */ + cxlr_pmem = xa_erase(&cxl_nvd->pmem_regions, (unsigned long)cxlr_pmem); + if (cxlr_pmem) + put_device(&cxlr_pmem->dev); +} + +static void release_mappings(void *data) +{ int i; + struct cxl_pmem_region *cxlr_pmem = data; + struct cxl_nvdimm_bridge *cxl_nvb = cxlr_pmem->bridge; - cxlr_pmem = nd_region_provider_data(nd_region); - cxl_nvb = cxlr_pmem->bridge; device_lock(&cxl_nvb->dev); for (i = 0; i < cxlr_pmem->nr_mappings; i++) { struct cxl_pmem_region_mapping *m = &cxlr_pmem->mapping[i]; struct cxl_nvdimm *cxl_nvd = m->cxl_nvd; - if (cxl_nvd->region) { - put_device(&cxlr_pmem->dev); - cxl_nvd->region = NULL; - } + cxl_nvdimm_del_region(cxl_nvd, cxlr_pmem); } device_unlock(&cxl_nvb->dev); - - nvdimm_region_delete(nd_region); } static void cxlr_pmem_remove_resource(void *res) @@ -422,7 +448,7 @@ static int cxl_pmem_region_probe(struct device *dev) if (!cxl_nvb->nvdimm_bus) { dev_dbg(dev, "nvdimm bus not found\n"); rc = -ENXIO; - goto err; + goto out_nvb; } memset(&mappings, 0, sizeof(mappings)); @@ -431,7 +457,7 @@ static int cxl_pmem_region_probe(struct device *dev) res = devm_kzalloc(dev, sizeof(*res), GFP_KERNEL); if (!res) { rc = -ENOMEM; - goto err; + goto out_nvb; } res->name = "Persistent Memory"; @@ -442,11 +468,11 @@ static int cxl_pmem_region_probe(struct device *dev) rc = insert_resource(&iomem_resource, res); if (rc) - goto err; + goto out_nvb; rc = devm_add_action_or_reset(dev, cxlr_pmem_remove_resource, res); if (rc) - goto err; + goto out_nvb; ndr_desc.res = res; ndr_desc.provider_data = cxlr_pmem; @@ -462,7 +488,7 @@ static int cxl_pmem_region_probe(struct device *dev) nd_set = devm_kzalloc(dev, sizeof(*nd_set), GFP_KERNEL); if (!nd_set) { rc = -ENOMEM; - goto err; + goto out_nvb; } ndr_desc.memregion = cxlr->id; @@ -472,9 +498,13 @@ static int cxl_pmem_region_probe(struct device *dev) info = kmalloc_array(cxlr_pmem->nr_mappings, sizeof(*info), GFP_KERNEL); if (!info) { rc = -ENOMEM; - goto err; + goto out_nvb; } + rc = devm_add_action_or_reset(dev, release_mappings, cxlr_pmem); + if (rc) + goto out_nvd; + for (i = 0; i < cxlr_pmem->nr_mappings; i++) { struct cxl_pmem_region_mapping *m = &cxlr_pmem->mapping[i]; struct cxl_memdev *cxlmd = m->cxlmd; @@ -486,7 +516,7 @@ static int cxl_pmem_region_probe(struct device *dev) dev_dbg(dev, "[%d]: %s: no cxl_nvdimm found\n", i, dev_name(&cxlmd->dev)); rc = -ENODEV; - goto err; + goto out_nvd; } /* safe to drop ref now with bridge lock held */ @@ -498,10 +528,17 @@ static int cxl_pmem_region_probe(struct device *dev) dev_dbg(dev, "[%d]: %s: no nvdimm found\n", i, dev_name(&cxlmd->dev)); rc = -ENODEV; - goto err; + goto out_nvd; } - cxl_nvd->region = cxlr_pmem; - get_device(&cxlr_pmem->dev); + + /* + * Pin the region per nvdimm device as those may be released + * out-of-order with respect to the region, and a single nvdimm + * maybe associated with multiple regions + */ + rc = cxl_nvdimm_add_region(cxl_nvd, cxlr_pmem); + if (rc) + goto out_nvd; m->cxl_nvd = cxl_nvd; mappings[i] = (struct nd_mapping_desc) { .nvdimm = nvdimm, @@ -527,27 +564,18 @@ static int cxl_pmem_region_probe(struct device *dev) nvdimm_pmem_region_create(cxl_nvb->nvdimm_bus, &ndr_desc); if (!cxlr_pmem->nd_region) { rc = -ENOMEM; - goto err; + goto out_nvd; } rc = devm_add_action_or_reset(dev, unregister_nvdimm_region, cxlr_pmem->nd_region); -out: +out_nvd: kfree(info); +out_nvb: device_unlock(&cxl_nvb->dev); put_device(&cxl_nvb->dev); return rc; - -err: - dev_dbg(dev, "failed to create nvdimm region\n"); - for (i--; i >= 0; i--) { - nvdimm = mappings[i].nvdimm; - cxl_nvd = nvdimm_provider_data(nvdimm); - put_device(&cxl_nvd->region->dev); - cxl_nvd->region = NULL; - } - goto out; } static struct cxl_driver cxl_pmem_region_driver = {
When a cxl_nvdimm object goes through a ->remove() event (device physically removed, nvdimm-bridge disabled, or nvdimm device disabled), then any associated regions must also be disabled. As highlighted by the cxl-create-region.sh test [1], a single device may host multiple regions, but the driver was only tracking one region at a time. This leads to a situation where only the last enabled region per nvdimm device is cleaned up properly. Other regions are leaked, and this also causes cxl_memdev reference leaks. Fix the tracking by allowing cxl_nvdimm objects to track multiple region associations. Cc: <stable@vger.kernel.org> Link: https://github.com/pmem/ndctl/blob/main/test/cxl-create-region.sh [1] Reported-by: Vishal Verma <vishal.l.verma@intel.com> Fixes: 04ad63f086d1 ("cxl/region: Introduce cxl_pmem_region objects") Signed-off-by: Dan Williams <dan.j.williams@intel.com> --- drivers/cxl/core/pmem.c | 2 + drivers/cxl/cxl.h | 2 - drivers/cxl/pmem.c | 100 ++++++++++++++++++++++++++++++----------------- 3 files changed, 67 insertions(+), 37 deletions(-)