Message ID | 20230809163733.32008-1-dave@stgolabs.net |
---|---|
State | Superseded |
Headers | show |
Series | cxl/memdev: Decouple security init from devm_cxl_add_memdev() | expand |
On 8/9/23 09:37, Davidlohr Bueso wrote: > A crash was reported during type2 device entablement[0] which called > devm_cxl_add_memdev() without an mds, causing the security state > init steps to be bogus. > > BUG: kernel NULL pointer dereference, address: 0000000000000278 > [...] > RIP: 0010:devm_cxl_add_memdev+0x1de/0x2c0 [cxl_core] > [...] > Call Trace: > <TASK> > ? __die+0x1f/0x70 > ? page_fault_oops+0x149/0x420 > ? fixup_exception+0x22/0x310 > ? kernelmode_fixup_or_oops+0x84/0x110 > ? exc_page_fault+0x6d/0x150 > ? asm_exc_page_fault+0x22/0x30 > ? devm_cxl_add_memdev+0x1de/0x2c0 [cxl_core] > cxl_mock_mem_probe+0x632/0x870 [cxl_mock_mem] > platform_probe+0x40/0x90 > really_probe+0x19e/0x3e0 > ? __pfx___driver_attach+0x10/0x10 > __driver_probe_device+0x78/0x160 > driver_probe_device+0x1f/0x90 > __driver_attach+0xce/0x1c0 > bus_for_each_dev+0x63/0xa0 > bus_add_driver+0x112/0x210 > driver_register+0x55/0x100 > ? __pfx_cxl_mock_mem_driver_init+0x10/0x10 [cxl_mock_mem] > [...] > > Move out cxl_memdev_security_init() and allow the pci probing to call > it directly. > > The cxl_memdev_security_shutdown() counterpart will still be called > by the unregistering scenario, but is removed, however, from the > devm_cxl_add_memdev() cleanup path - as this is about ioctls and > there is no way for the sanitation to become active during that time. > > [0] https://lore.kernel.org/all/168592160379.1948938.12863272903570476312.stgit@dwillia2-xfh.jf.intel.com/ > > Reported-by: Ira Weiny <ira.weiny@intel.com> > Signed-off-by: Davidlohr Bueso <dave@stgolabs.net> Reviewed-by: Dave Jiang <dave.jiang@intel.com> > --- > drivers/cxl/core/memdev.c | 17 ++++++----------- > drivers/cxl/cxlmem.h | 1 + > drivers/cxl/pci.c | 4 ++++ > 3 files changed, 11 insertions(+), 11 deletions(-) > > diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c > index 14b547c07f54..628d9d1fb795 100644 > --- a/drivers/cxl/core/memdev.c > +++ b/drivers/cxl/core/memdev.c > @@ -561,7 +561,7 @@ static void cxl_memdev_security_shutdown(struct device *dev) > struct cxl_memdev *cxlmd = to_cxl_memdev(dev); > struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds); > > - if (mds->security.poll) > + if (mds && mds->security.poll) > cancel_delayed_work_sync(&mds->security.poll_dwork); > } > > @@ -570,7 +570,6 @@ static void cxl_memdev_shutdown(struct device *dev) > struct cxl_memdev *cxlmd = to_cxl_memdev(dev); > > down_write(&cxl_memdev_rwsem); > - cxl_memdev_security_shutdown(dev); > cxlmd->cxlds = NULL; > up_write(&cxl_memdev_rwsem); > } > @@ -580,6 +579,7 @@ static void cxl_memdev_unregister(void *_cxlmd) > struct cxl_memdev *cxlmd = _cxlmd; > struct device *dev = &cxlmd->dev; > > + cxl_memdev_security_shutdown(dev); > cxl_memdev_shutdown(dev); > cdev_device_del(&cxlmd->cdev, dev); > put_device(dev); > @@ -1009,11 +1009,10 @@ static void put_sanitize(void *data) > sysfs_put(mds->security.sanitize_node); > } > > -static int cxl_memdev_security_init(struct cxl_memdev *cxlmd) > +int cxl_memdev_security_state_init(struct cxl_memdev_state *mds) > { > - struct cxl_dev_state *cxlds = cxlmd->cxlds; > - struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds); > - struct device *dev = &cxlmd->dev; > + struct cxl_dev_state *cxlds = &mds->cxlds; > + struct device *dev = &cxlds->cxlmd->dev; > struct kernfs_node *sec; > > sec = sysfs_get_dirent(dev->kobj.sd, "security"); > @@ -1029,7 +1028,7 @@ static int cxl_memdev_security_init(struct cxl_memdev *cxlmd) > } > > return devm_add_action_or_reset(cxlds->dev, put_sanitize, mds); > - } > +} > > struct cxl_memdev *devm_cxl_add_memdev(struct cxl_dev_state *cxlds) > { > @@ -1059,10 +1058,6 @@ struct cxl_memdev *devm_cxl_add_memdev(struct cxl_dev_state *cxlds) > if (rc) > goto err; > > - rc = cxl_memdev_security_init(cxlmd); > - if (rc) > - goto err; > - > rc = devm_add_action_or_reset(cxlds->dev, cxl_memdev_unregister, cxlmd); > if (rc) > return ERR_PTR(rc); > diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h > index 706f8a6d1ef4..b99099eb2402 100644 > --- a/drivers/cxl/cxlmem.h > +++ b/drivers/cxl/cxlmem.h > @@ -884,6 +884,7 @@ static inline void cxl_mem_active_dec(void) > #endif > > int cxl_mem_sanitize(struct cxl_memdev_state *mds, u16 cmd); > +int cxl_memdev_security_state_init(struct cxl_memdev_state *mds); > > struct cxl_hdm { > struct cxl_component_regs regs; > diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c > index 1cb1494c28fe..5242dbf0044d 100644 > --- a/drivers/cxl/pci.c > +++ b/drivers/cxl/pci.c > @@ -887,6 +887,10 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) > if (IS_ERR(cxlmd)) > return PTR_ERR(cxlmd); > > + rc = cxl_memdev_security_state_init(mds); > + if (rc) > + return rc; > + > rc = cxl_memdev_setup_fw_upload(mds); > if (rc) > return rc;
On 8/9/23 09:37, Davidlohr Bueso wrote: > A crash was reported during type2 device entablement[0] which called entablement? > devm_cxl_add_memdev() without an mds, causing the security state > init steps to be bogus. > > BUG: kernel NULL pointer dereference, address: 0000000000000278 > [...] > RIP: 0010:devm_cxl_add_memdev+0x1de/0x2c0 [cxl_core] > [...] > Call Trace: > <TASK> > ? __die+0x1f/0x70 > ? page_fault_oops+0x149/0x420 > ? fixup_exception+0x22/0x310 > ? kernelmode_fixup_or_oops+0x84/0x110 > ? exc_page_fault+0x6d/0x150 > ? asm_exc_page_fault+0x22/0x30 > ? devm_cxl_add_memdev+0x1de/0x2c0 [cxl_core] > cxl_mock_mem_probe+0x632/0x870 [cxl_mock_mem] > platform_probe+0x40/0x90 > really_probe+0x19e/0x3e0 > ? __pfx___driver_attach+0x10/0x10 > __driver_probe_device+0x78/0x160 > driver_probe_device+0x1f/0x90 > __driver_attach+0xce/0x1c0 > bus_for_each_dev+0x63/0xa0 > bus_add_driver+0x112/0x210 > driver_register+0x55/0x100 > ? __pfx_cxl_mock_mem_driver_init+0x10/0x10 [cxl_mock_mem] > [...] > > Move out cxl_memdev_security_init() and allow the pci probing to call > it directly. > > The cxl_memdev_security_shutdown() counterpart will still be called > by the unregistering scenario, but is removed, however, from the > devm_cxl_add_memdev() cleanup path - as this is about ioctls and > there is no way for the sanitation to become active during that time. > > [0] https://lore.kernel.org/all/168592160379.1948938.12863272903570476312.stgit@dwillia2-xfh.jf.intel.com/ > > Reported-by: Ira Weiny <ira.weiny@intel.com> > Signed-off-by: Davidlohr Bueso <dave@stgolabs.net> > --- > drivers/cxl/core/memdev.c | 17 ++++++----------- > drivers/cxl/cxlmem.h | 1 + > drivers/cxl/pci.c | 4 ++++ > 3 files changed, 11 insertions(+), 11 deletions(-) > > diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c > index 14b547c07f54..628d9d1fb795 100644 > --- a/drivers/cxl/core/memdev.c > +++ b/drivers/cxl/core/memdev.c > @@ -561,7 +561,7 @@ static void cxl_memdev_security_shutdown(struct device *dev) > struct cxl_memdev *cxlmd = to_cxl_memdev(dev); > struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds); > > - if (mds->security.poll) > + if (mds && mds->security.poll) > cancel_delayed_work_sync(&mds->security.poll_dwork); > } > > @@ -570,7 +570,6 @@ static void cxl_memdev_shutdown(struct device *dev) > struct cxl_memdev *cxlmd = to_cxl_memdev(dev); > > down_write(&cxl_memdev_rwsem); > - cxl_memdev_security_shutdown(dev); > cxlmd->cxlds = NULL; > up_write(&cxl_memdev_rwsem); > } > @@ -580,6 +579,7 @@ static void cxl_memdev_unregister(void *_cxlmd) > struct cxl_memdev *cxlmd = _cxlmd; > struct device *dev = &cxlmd->dev; > > + cxl_memdev_security_shutdown(dev); > cxl_memdev_shutdown(dev); > cdev_device_del(&cxlmd->cdev, dev); > put_device(dev); > @@ -1009,11 +1009,10 @@ static void put_sanitize(void *data) > sysfs_put(mds->security.sanitize_node); > } > > -static int cxl_memdev_security_init(struct cxl_memdev *cxlmd) > +int cxl_memdev_security_state_init(struct cxl_memdev_state *mds) > { > - struct cxl_dev_state *cxlds = cxlmd->cxlds; > - struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds); > - struct device *dev = &cxlmd->dev; > + struct cxl_dev_state *cxlds = &mds->cxlds; > + struct device *dev = &cxlds->cxlmd->dev; > struct kernfs_node *sec; > > sec = sysfs_get_dirent(dev->kobj.sd, "security"); > @@ -1029,7 +1028,7 @@ static int cxl_memdev_security_init(struct cxl_memdev *cxlmd) > } > > return devm_add_action_or_reset(cxlds->dev, put_sanitize, mds); > - } > +} > > struct cxl_memdev *devm_cxl_add_memdev(struct cxl_dev_state *cxlds) > { > @@ -1059,10 +1058,6 @@ struct cxl_memdev *devm_cxl_add_memdev(struct cxl_dev_state *cxlds) > if (rc) > goto err; > > - rc = cxl_memdev_security_init(cxlmd); > - if (rc) > - goto err; > - > rc = devm_add_action_or_reset(cxlds->dev, cxl_memdev_unregister, cxlmd); > if (rc) > return ERR_PTR(rc); > diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h > index 706f8a6d1ef4..b99099eb2402 100644 > --- a/drivers/cxl/cxlmem.h > +++ b/drivers/cxl/cxlmem.h > @@ -884,6 +884,7 @@ static inline void cxl_mem_active_dec(void) > #endif > > int cxl_mem_sanitize(struct cxl_memdev_state *mds, u16 cmd); > +int cxl_memdev_security_state_init(struct cxl_memdev_state *mds); > > struct cxl_hdm { > struct cxl_component_regs regs; > diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c > index 1cb1494c28fe..5242dbf0044d 100644 > --- a/drivers/cxl/pci.c > +++ b/drivers/cxl/pci.c > @@ -887,6 +887,10 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) > if (IS_ERR(cxlmd)) > return PTR_ERR(cxlmd); > > + rc = cxl_memdev_security_state_init(mds); > + if (rc) > + return rc; > + > rc = cxl_memdev_setup_fw_upload(mds); > if (rc) > return rc;
Davidlohr Bueso wrote: > A crash was reported during type2 device entablement[0] which called > devm_cxl_add_memdev() without an mds, causing the security state > init steps to be bogus. > > BUG: kernel NULL pointer dereference, address: 0000000000000278 > [...] > RIP: 0010:devm_cxl_add_memdev+0x1de/0x2c0 [cxl_core] > [...] > Call Trace: > <TASK> > ? __die+0x1f/0x70 > ? page_fault_oops+0x149/0x420 > ? fixup_exception+0x22/0x310 > ? kernelmode_fixup_or_oops+0x84/0x110 > ? exc_page_fault+0x6d/0x150 > ? asm_exc_page_fault+0x22/0x30 > ? devm_cxl_add_memdev+0x1de/0x2c0 [cxl_core] > cxl_mock_mem_probe+0x632/0x870 [cxl_mock_mem] > platform_probe+0x40/0x90 > really_probe+0x19e/0x3e0 > ? __pfx___driver_attach+0x10/0x10 > __driver_probe_device+0x78/0x160 > driver_probe_device+0x1f/0x90 > __driver_attach+0xce/0x1c0 > bus_for_each_dev+0x63/0xa0 > bus_add_driver+0x112/0x210 > driver_register+0x55/0x100 > ? __pfx_cxl_mock_mem_driver_init+0x10/0x10 [cxl_mock_mem] > [...] > > Move out cxl_memdev_security_init() and allow the pci probing to call > it directly. > > The cxl_memdev_security_shutdown() counterpart will still be called > by the unregistering scenario, but is removed, however, from the > devm_cxl_add_memdev() cleanup path - as this is about ioctls and > there is no way for the sanitation to become active during that time. > > [0] https://lore.kernel.org/all/168592160379.1948938.12863272903570476312.stgit@dwillia2-xfh.jf.intel.com/ > > Reported-by: Ira Weiny <ira.weiny@intel.com> > Signed-off-by: Davidlohr Bueso <dave@stgolabs.net> > --- > drivers/cxl/core/memdev.c | 17 ++++++----------- > drivers/cxl/cxlmem.h | 1 + > drivers/cxl/pci.c | 4 ++++ > 3 files changed, 11 insertions(+), 11 deletions(-) > > diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c > index 14b547c07f54..628d9d1fb795 100644 > --- a/drivers/cxl/core/memdev.c > +++ b/drivers/cxl/core/memdev.c > @@ -561,7 +561,7 @@ static void cxl_memdev_security_shutdown(struct device *dev) > struct cxl_memdev *cxlmd = to_cxl_memdev(dev); > struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds); > > - if (mds->security.poll) > + if (mds && mds->security.poll) > cancel_delayed_work_sync(&mds->security.poll_dwork); > } > > @@ -570,7 +570,6 @@ static void cxl_memdev_shutdown(struct device *dev) > struct cxl_memdev *cxlmd = to_cxl_memdev(dev); > > down_write(&cxl_memdev_rwsem); > - cxl_memdev_security_shutdown(dev); > cxlmd->cxlds = NULL; > up_write(&cxl_memdev_rwsem); > } > @@ -580,6 +579,7 @@ static void cxl_memdev_unregister(void *_cxlmd) > struct cxl_memdev *cxlmd = _cxlmd; > struct device *dev = &cxlmd->dev; > > + cxl_memdev_security_shutdown(dev); NIT: Moving the call to cxl_memdev_security_shutdown() kind of threw me. I see why you did it. But is there is a way to make this call mirror better with pci probe? > cxl_memdev_shutdown(dev); > cdev_device_del(&cxlmd->cdev, dev); > put_device(dev); > @@ -1009,11 +1009,10 @@ static void put_sanitize(void *data) > sysfs_put(mds->security.sanitize_node); > } > > -static int cxl_memdev_security_init(struct cxl_memdev *cxlmd) > +int cxl_memdev_security_state_init(struct cxl_memdev_state *mds) This needs to be exported. I get the following error when trying this: MODPOST Module.symvers ERROR: modpost: "cxl_memdev_security_state_init" [drivers/cxl/cxl_pci.ko] undefined! make[2]: *** [scripts/Makefile.modpost:144: Module.symvers] Error 1 make[1]: *** [/home/iweiny/dev/linux-dcd-work/Makefile:1984: modpost] Error 2 make: *** [Makefile:234: __sub-make] Error 2 12:12:19 > git di diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c index 18ecb9c90412..492486707fd0 100644 --- a/drivers/cxl/core/memdev.c +++ b/drivers/cxl/core/memdev.c @@ -1025,6 +1025,7 @@ int cxl_memdev_security_state_init(struct cxl_memdev_state *mds) return devm_add_action_or_reset(cxlds->dev, put_sanitize, mds); } +EXPORT_SYMBOL_NS_GPL(cxl_memdev_security_state_init, CXL); struct cxl_memdev *devm_cxl_add_memdev(struct cxl_dev_state *cxlds) { > { > - struct cxl_dev_state *cxlds = cxlmd->cxlds; > - struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds); > - struct device *dev = &cxlmd->dev; > + struct cxl_dev_state *cxlds = &mds->cxlds; > + struct device *dev = &cxlds->cxlmd->dev; > struct kernfs_node *sec; > > sec = sysfs_get_dirent(dev->kobj.sd, "security"); > @@ -1029,7 +1028,7 @@ static int cxl_memdev_security_init(struct cxl_memdev *cxlmd) > } > > return devm_add_action_or_reset(cxlds->dev, put_sanitize, mds); > - } > +} NIT: this is an unrelated change. With the export fixed: Tested-by: Ira Weiny <ira.weiny@intel.com> Ira > > struct cxl_memdev *devm_cxl_add_memdev(struct cxl_dev_state *cxlds) > { > @@ -1059,10 +1058,6 @@ struct cxl_memdev *devm_cxl_add_memdev(struct cxl_dev_state *cxlds) > if (rc) > goto err; > > - rc = cxl_memdev_security_init(cxlmd); > - if (rc) > - goto err; > - > rc = devm_add_action_or_reset(cxlds->dev, cxl_memdev_unregister, cxlmd); > if (rc) > return ERR_PTR(rc); > diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h > index 706f8a6d1ef4..b99099eb2402 100644 > --- a/drivers/cxl/cxlmem.h > +++ b/drivers/cxl/cxlmem.h > @@ -884,6 +884,7 @@ static inline void cxl_mem_active_dec(void) > #endif > > int cxl_mem_sanitize(struct cxl_memdev_state *mds, u16 cmd); > +int cxl_memdev_security_state_init(struct cxl_memdev_state *mds); > > struct cxl_hdm { > struct cxl_component_regs regs; > diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c > index 1cb1494c28fe..5242dbf0044d 100644 > --- a/drivers/cxl/pci.c > +++ b/drivers/cxl/pci.c > @@ -887,6 +887,10 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) > if (IS_ERR(cxlmd)) > return PTR_ERR(cxlmd); > > + rc = cxl_memdev_security_state_init(mds); > + if (rc) > + return rc; > + > rc = cxl_memdev_setup_fw_upload(mds); > if (rc) > return rc; > -- > 2.41.0 >
On Thu, 10 Aug 2023, Ira Weiny wrote: >Davidlohr Bueso wrote: >> diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c >> index 14b547c07f54..628d9d1fb795 100644 >> --- a/drivers/cxl/core/memdev.c >> +++ b/drivers/cxl/core/memdev.c >> @@ -561,7 +561,7 @@ static void cxl_memdev_security_shutdown(struct device *dev) >> struct cxl_memdev *cxlmd = to_cxl_memdev(dev); >> struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds); >> >> - if (mds->security.poll) >> + if (mds && mds->security.poll) >> cancel_delayed_work_sync(&mds->security.poll_dwork); >> } >> >> @@ -570,7 +570,6 @@ static void cxl_memdev_shutdown(struct device *dev) >> struct cxl_memdev *cxlmd = to_cxl_memdev(dev); >> >> down_write(&cxl_memdev_rwsem); >> - cxl_memdev_security_shutdown(dev); >> cxlmd->cxlds = NULL; >> up_write(&cxl_memdev_rwsem); >> } >> @@ -580,6 +579,7 @@ static void cxl_memdev_unregister(void *_cxlmd) >> struct cxl_memdev *cxlmd = _cxlmd; >> struct device *dev = &cxlmd->dev; >> >> + cxl_memdev_security_shutdown(dev); > >NIT: Moving the call to cxl_memdev_security_shutdown() kind of threw me. >I see why you did it. But is there is a way to make this call mirror >better with pci probe? Right, this is the disklike I had with this patch, and have not come up with a cleaner way to do this except just doing without the call altogether and move the sanitize corner case directly into cxl_memdev_unregister(): static void cxl_memdev_unregister(void *_cxlmd) { struct cxl_memdev *cxlmd = _cxlmd; struct device *dev = &cxlmd->dev; struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds); if (mds && mds->security.poll) cancel_delayed_work_sync(&mds->security.poll_dwork); cxl_memdev_shutdown(dev); cdev_device_del(&cxlmd->cdev, dev); put_device(dev); } Then we don't really need to mirror the init, specially considering that ad-hoc sanitize checks are all over the place due to its nature. Would this be preferable? > >> cxl_memdev_shutdown(dev); >> cdev_device_del(&cxlmd->cdev, dev); >> put_device(dev); >> @@ -1009,11 +1009,10 @@ static void put_sanitize(void *data) >> sysfs_put(mds->security.sanitize_node); >> } >> >> -static int cxl_memdev_security_init(struct cxl_memdev *cxlmd) >> +int cxl_memdev_security_state_init(struct cxl_memdev_state *mds) > >This needs to be exported. I get the following error when trying this: > > MODPOST Module.symvers >ERROR: modpost: "cxl_memdev_security_state_init" [drivers/cxl/cxl_pci.ko] undefined! >make[2]: *** [scripts/Makefile.modpost:144: Module.symvers] Error 1 >make[1]: *** [/home/iweiny/dev/linux-dcd-work/Makefile:1984: modpost] Error 2 >make: *** [Makefile:234: __sub-make] Error 2 brrr I had only run a 'make drivers/cxl/' on it. Will fixup in a v2 as well as the typo found by Dave and the above if there is agreement. > >12:12:19 > git di >diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c >index 18ecb9c90412..492486707fd0 100644 >--- a/drivers/cxl/core/memdev.c >+++ b/drivers/cxl/core/memdev.c >@@ -1025,6 +1025,7 @@ int cxl_memdev_security_state_init(struct cxl_memdev_state *mds) > > return devm_add_action_or_reset(cxlds->dev, put_sanitize, mds); > } >+EXPORT_SYMBOL_NS_GPL(cxl_memdev_security_state_init, CXL); > > struct cxl_memdev *devm_cxl_add_memdev(struct cxl_dev_state *cxlds) > { > >> { >> - struct cxl_dev_state *cxlds = cxlmd->cxlds; >> - struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds); >> - struct device *dev = &cxlmd->dev; >> + struct cxl_dev_state *cxlds = &mds->cxlds; >> + struct device *dev = &cxlds->cxlmd->dev; >> struct kernfs_node *sec; >> >> sec = sysfs_get_dirent(dev->kobj.sd, "security"); >> @@ -1029,7 +1028,7 @@ static int cxl_memdev_security_init(struct cxl_memdev *cxlmd) >> } >> >> return devm_add_action_or_reset(cxlds->dev, put_sanitize, mds); >> - } >> +} > >NIT: this is an unrelated change. imho such a small change makes more sense here than a separate patch only for that. > >With the export fixed: > >Tested-by: Ira Weiny <ira.weiny@intel.com> Thanks, Davidlohr
Davidlohr Bueso wrote: > On Thu, 10 Aug 2023, Ira Weiny wrote: > > >Davidlohr Bueso wrote: > >> diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c > >> index 14b547c07f54..628d9d1fb795 100644 > >> --- a/drivers/cxl/core/memdev.c > >> +++ b/drivers/cxl/core/memdev.c > >> @@ -561,7 +561,7 @@ static void cxl_memdev_security_shutdown(struct device *dev) > >> struct cxl_memdev *cxlmd = to_cxl_memdev(dev); > >> struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds); > >> > >> - if (mds->security.poll) > >> + if (mds && mds->security.poll) > >> cancel_delayed_work_sync(&mds->security.poll_dwork); > >> } > >> > >> @@ -570,7 +570,6 @@ static void cxl_memdev_shutdown(struct device *dev) > >> struct cxl_memdev *cxlmd = to_cxl_memdev(dev); > >> > >> down_write(&cxl_memdev_rwsem); > >> - cxl_memdev_security_shutdown(dev); > >> cxlmd->cxlds = NULL; > >> up_write(&cxl_memdev_rwsem); > >> } > >> @@ -580,6 +579,7 @@ static void cxl_memdev_unregister(void *_cxlmd) > >> struct cxl_memdev *cxlmd = _cxlmd; > >> struct device *dev = &cxlmd->dev; > >> > >> + cxl_memdev_security_shutdown(dev); > > > >NIT: Moving the call to cxl_memdev_security_shutdown() kind of threw me. > >I see why you did it. But is there is a way to make this call mirror > >better with pci probe? > > Right, this is the disklike I had with this patch, and have not come > up with a cleaner way to do this except just doing without the call > altogether and move the sanitize corner case directly into > cxl_memdev_unregister(): > > static void cxl_memdev_unregister(void *_cxlmd) > { > struct cxl_memdev *cxlmd = _cxlmd; > struct device *dev = &cxlmd->dev; > struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds); > > if (mds && mds->security.poll) > cancel_delayed_work_sync(&mds->security.poll_dwork); > > cxl_memdev_shutdown(dev); > cdev_device_del(&cxlmd->cdev, dev); > put_device(dev); > } > > Then we don't really need to mirror the init, specially considering that > ad-hoc sanitize checks are all over the place due to its nature. Would > this be preferable? I'd be ok with this. In my mind it is more balanced. Especially since cxl_memdev_security_shutdown() does not exactly reverse something that cxl_memdev_security_init() does. It is really just cleaning up any outstanding work. But in reality we are bike shedding a bit here. [snip] > > struct cxl_memdev *devm_cxl_add_memdev(struct cxl_dev_state *cxlds) > > { > > > >> { > >> - struct cxl_dev_state *cxlds = cxlmd->cxlds; > >> - struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds); > >> - struct device *dev = &cxlmd->dev; > >> + struct cxl_dev_state *cxlds = &mds->cxlds; > >> + struct device *dev = &cxlds->cxlmd->dev; > >> struct kernfs_node *sec; > >> > >> sec = sysfs_get_dirent(dev->kobj.sd, "security"); > >> @@ -1029,7 +1028,7 @@ static int cxl_memdev_security_init(struct cxl_memdev *cxlmd) > >> } > >> > >> return devm_add_action_or_reset(cxlds->dev, put_sanitize, mds); > >> - } > >> +} > > > >NIT: this is an unrelated change. > > imho such a small change makes more sense here than a separate patch only for that. I actually agree... Ira > > > > >With the export fixed: > > > >Tested-by: Ira Weiny <ira.weiny@intel.com> > > Thanks, > Davidlohr
diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c index 14b547c07f54..628d9d1fb795 100644 --- a/drivers/cxl/core/memdev.c +++ b/drivers/cxl/core/memdev.c @@ -561,7 +561,7 @@ static void cxl_memdev_security_shutdown(struct device *dev) struct cxl_memdev *cxlmd = to_cxl_memdev(dev); struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds); - if (mds->security.poll) + if (mds && mds->security.poll) cancel_delayed_work_sync(&mds->security.poll_dwork); } @@ -570,7 +570,6 @@ static void cxl_memdev_shutdown(struct device *dev) struct cxl_memdev *cxlmd = to_cxl_memdev(dev); down_write(&cxl_memdev_rwsem); - cxl_memdev_security_shutdown(dev); cxlmd->cxlds = NULL; up_write(&cxl_memdev_rwsem); } @@ -580,6 +579,7 @@ static void cxl_memdev_unregister(void *_cxlmd) struct cxl_memdev *cxlmd = _cxlmd; struct device *dev = &cxlmd->dev; + cxl_memdev_security_shutdown(dev); cxl_memdev_shutdown(dev); cdev_device_del(&cxlmd->cdev, dev); put_device(dev); @@ -1009,11 +1009,10 @@ static void put_sanitize(void *data) sysfs_put(mds->security.sanitize_node); } -static int cxl_memdev_security_init(struct cxl_memdev *cxlmd) +int cxl_memdev_security_state_init(struct cxl_memdev_state *mds) { - struct cxl_dev_state *cxlds = cxlmd->cxlds; - struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds); - struct device *dev = &cxlmd->dev; + struct cxl_dev_state *cxlds = &mds->cxlds; + struct device *dev = &cxlds->cxlmd->dev; struct kernfs_node *sec; sec = sysfs_get_dirent(dev->kobj.sd, "security"); @@ -1029,7 +1028,7 @@ static int cxl_memdev_security_init(struct cxl_memdev *cxlmd) } return devm_add_action_or_reset(cxlds->dev, put_sanitize, mds); - } +} struct cxl_memdev *devm_cxl_add_memdev(struct cxl_dev_state *cxlds) { @@ -1059,10 +1058,6 @@ struct cxl_memdev *devm_cxl_add_memdev(struct cxl_dev_state *cxlds) if (rc) goto err; - rc = cxl_memdev_security_init(cxlmd); - if (rc) - goto err; - rc = devm_add_action_or_reset(cxlds->dev, cxl_memdev_unregister, cxlmd); if (rc) return ERR_PTR(rc); diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h index 706f8a6d1ef4..b99099eb2402 100644 --- a/drivers/cxl/cxlmem.h +++ b/drivers/cxl/cxlmem.h @@ -884,6 +884,7 @@ static inline void cxl_mem_active_dec(void) #endif int cxl_mem_sanitize(struct cxl_memdev_state *mds, u16 cmd); +int cxl_memdev_security_state_init(struct cxl_memdev_state *mds); struct cxl_hdm { struct cxl_component_regs regs; diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c index 1cb1494c28fe..5242dbf0044d 100644 --- a/drivers/cxl/pci.c +++ b/drivers/cxl/pci.c @@ -887,6 +887,10 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (IS_ERR(cxlmd)) return PTR_ERR(cxlmd); + rc = cxl_memdev_security_state_init(mds); + if (rc) + return rc; + rc = cxl_memdev_setup_fw_upload(mds); if (rc) return rc;
A crash was reported during type2 device entablement[0] which called devm_cxl_add_memdev() without an mds, causing the security state init steps to be bogus. BUG: kernel NULL pointer dereference, address: 0000000000000278 [...] RIP: 0010:devm_cxl_add_memdev+0x1de/0x2c0 [cxl_core] [...] Call Trace: <TASK> ? __die+0x1f/0x70 ? page_fault_oops+0x149/0x420 ? fixup_exception+0x22/0x310 ? kernelmode_fixup_or_oops+0x84/0x110 ? exc_page_fault+0x6d/0x150 ? asm_exc_page_fault+0x22/0x30 ? devm_cxl_add_memdev+0x1de/0x2c0 [cxl_core] cxl_mock_mem_probe+0x632/0x870 [cxl_mock_mem] platform_probe+0x40/0x90 really_probe+0x19e/0x3e0 ? __pfx___driver_attach+0x10/0x10 __driver_probe_device+0x78/0x160 driver_probe_device+0x1f/0x90 __driver_attach+0xce/0x1c0 bus_for_each_dev+0x63/0xa0 bus_add_driver+0x112/0x210 driver_register+0x55/0x100 ? __pfx_cxl_mock_mem_driver_init+0x10/0x10 [cxl_mock_mem] [...] Move out cxl_memdev_security_init() and allow the pci probing to call it directly. The cxl_memdev_security_shutdown() counterpart will still be called by the unregistering scenario, but is removed, however, from the devm_cxl_add_memdev() cleanup path - as this is about ioctls and there is no way for the sanitation to become active during that time. [0] https://lore.kernel.org/all/168592160379.1948938.12863272903570476312.stgit@dwillia2-xfh.jf.intel.com/ Reported-by: Ira Weiny <ira.weiny@intel.com> Signed-off-by: Davidlohr Bueso <dave@stgolabs.net> --- drivers/cxl/core/memdev.c | 17 ++++++----------- drivers/cxl/cxlmem.h | 1 + drivers/cxl/pci.c | 4 ++++ 3 files changed, 11 insertions(+), 11 deletions(-)