Message ID | 20220106214556.2461363-26-swboyd@chromium.org (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | component: Make into an aggregate bus | expand |
Hi Stephen, Thanks for helping update here. On Thu, 2022-01-06 at 13:45 -0800, Stephen Boyd wrote: > Use an aggregate driver instead of component ops so that we can get > proper driver probe ordering of the aggregate device with respect to > all > the component devices that make up the aggregate device. > > Cc: Yong Wu <yong.wu@mediatek.com> > Cc: Joerg Roedel <joro@8bytes.org> > Cc: Will Deacon <will@kernel.org> > Cc: Daniel Vetter <daniel.vetter@ffwll.ch> > Cc: "Rafael J. Wysocki" <rafael@kernel.org> > Cc: Rob Clark <robdclark@gmail.com> > Cc: Russell King <rmk+kernel@arm.linux.org.uk> > Cc: Saravana Kannan <saravanak@google.com> > Signed-off-by: Stephen Boyd <swboyd@chromium.org> When I test this on mt8195 which have two IOMMU HWs(calling component_aggregate_regsiter twice), it will abort like this. Then what should we do if we have two instances? Thanks. [ 2.652424] Error: Driver 'mtk_iommu_agg' is already registered, aborting... [ 2.654033] mtk-iommu: probe of 1c01f000.iommu failed with error -16 [ 2.662034] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000020 ... [ 2.672413] pc : aggregate_device_match+0xa8/0x1c8 [ 2.673027] lr : aggregate_device_match+0x68/0x1c8 ... [ 2.683091] Call trace: [ 2.683403] aggregate_device_match+0xa8/0x1c8 [ 2.683970] __device_attach_driver+0x38/0xd0 [ 2.684526] bus_for_each_drv+0x68/0xd0 [ 2.685015] __device_attach+0xec/0x148 [ 2.685503] device_attach+0x14/0x20 [ 2.685960] bus_rescan_devices_helper+0x50/0x90 [ 2.686545] bus_for_each_dev+0x7c/0xd8 [ 2.687033] bus_rescan_devices+0x20/0x30 [ 2.687542] __component_add+0x7c/0xa0 [ 2.688022] component_add+0x14/0x20 [ 2.688479] mtk_smi_larb_probe+0xe0/0x120 > --- > drivers/iommu/mtk_iommu.c | 14 +++++++++----- > drivers/iommu/mtk_iommu.h | 6 ++++-- > drivers/iommu/mtk_iommu_v1.c | 14 +++++++++----- > 3 files changed, 22 insertions(+), 12 deletions(-) > > diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c > index 25b834104790..8e722898cbe2 100644 > --- a/drivers/iommu/mtk_iommu.c > +++ b/drivers/iommu/mtk_iommu.c > @@ -752,9 +752,13 @@ static int mtk_iommu_hw_init(const struct > mtk_iommu_data *data) > return 0; > } > > -static const struct component_master_ops mtk_iommu_com_ops = { > - .bind = mtk_iommu_bind, > - .unbind = mtk_iommu_unbind, > +static struct aggregate_driver mtk_iommu_aggregate_driver = { > + .probe = mtk_iommu_bind, > + .remove = mtk_iommu_unbind, > + .driver = { > + .name = "mtk_iommu_agg", > + .owner = THIS_MODULE, > + }, > }; > > static int mtk_iommu_probe(struct platform_device *pdev) > @@ -895,7 +899,7 @@ static int mtk_iommu_probe(struct platform_device > *pdev) > goto out_list_del; > } > > - ret = component_master_add_with_match(dev, &mtk_iommu_com_ops, > match); > + ret = component_aggregate_register(dev, > &mtk_iommu_aggregate_driver, match); > if (ret) > goto out_bus_set_null; > return ret; > @@ -928,7 +932,7 @@ static int mtk_iommu_remove(struct > platform_device *pdev) > device_link_remove(data->smicomm_dev, &pdev->dev); > pm_runtime_disable(&pdev->dev); > devm_free_irq(&pdev->dev, data->irq, data); > - component_master_del(&pdev->dev, &mtk_iommu_com_ops); > + component_aggregate_unregister(&pdev->dev, > &mtk_iommu_aggregate_driver); > return 0; > } > > diff --git a/drivers/iommu/mtk_iommu.h b/drivers/iommu/mtk_iommu.h > index f81fa8862ed0..064fd4f4eade 100644 > --- a/drivers/iommu/mtk_iommu.h > +++ b/drivers/iommu/mtk_iommu.h > @@ -94,15 +94,17 @@ static inline void release_of(struct device *dev, > void *data) > of_node_put(data); > } > > -static inline int mtk_iommu_bind(struct device *dev) > +static inline int mtk_iommu_bind(struct aggregate_device *adev) > { > + struct device *dev = adev->parent; > struct mtk_iommu_data *data = dev_get_drvdata(dev); > > return component_bind_all(dev, &data->larb_imu); > } > > -static inline void mtk_iommu_unbind(struct device *dev) > +static inline void mtk_iommu_unbind(struct aggregate_device *adev) > { > + struct device *dev = adev->parent; > struct mtk_iommu_data *data = dev_get_drvdata(dev); > > component_unbind_all(dev, &data->larb_imu); > diff --git a/drivers/iommu/mtk_iommu_v1.c > b/drivers/iommu/mtk_iommu_v1.c > index be22fcf988ce..5fb29058a165 100644 > --- a/drivers/iommu/mtk_iommu_v1.c > +++ b/drivers/iommu/mtk_iommu_v1.c > @@ -534,9 +534,13 @@ static const struct of_device_id > mtk_iommu_of_ids[] = { > {} > }; > > -static const struct component_master_ops mtk_iommu_com_ops = { > - .bind = mtk_iommu_bind, > - .unbind = mtk_iommu_unbind, > +static struct aggregate_driver mtk_iommu_aggregate_driver = { > + .probe = mtk_iommu_bind, > + .remove = mtk_iommu_unbind, > + .driver = { > + .name = "mtk_iommu_agg", > + .owner = THIS_MODULE, > + }, > }; > > static int mtk_iommu_probe(struct platform_device *pdev) > @@ -624,7 +628,7 @@ static int mtk_iommu_probe(struct platform_device > *pdev) > goto out_dev_unreg; > } > > - ret = component_master_add_with_match(dev, &mtk_iommu_com_ops, > match); > + ret = component_aggregate_register(dev, > &mtk_iommu_aggregate_driver, match); > if (ret) > goto out_bus_set_null; > return ret; > @@ -650,7 +654,7 @@ static int mtk_iommu_remove(struct > platform_device *pdev) > > clk_disable_unprepare(data->bclk); > devm_free_irq(&pdev->dev, data->irq, data); > - component_master_del(&pdev->dev, &mtk_iommu_com_ops); > + component_aggregate_unregister(&pdev->dev, > &mtk_iommu_aggregate_driver); > return 0; > } >
Quoting Yong Wu (2022-01-11 04:22:23) > Hi Stephen, > > Thanks for helping update here. > > On Thu, 2022-01-06 at 13:45 -0800, Stephen Boyd wrote: > > Use an aggregate driver instead of component ops so that we can get > > proper driver probe ordering of the aggregate device with respect to > > all > > the component devices that make up the aggregate device. > > > > Cc: Yong Wu <yong.wu@mediatek.com> > > Cc: Joerg Roedel <joro@8bytes.org> > > Cc: Will Deacon <will@kernel.org> > > Cc: Daniel Vetter <daniel.vetter@ffwll.ch> > > Cc: "Rafael J. Wysocki" <rafael@kernel.org> > > Cc: Rob Clark <robdclark@gmail.com> > > Cc: Russell King <rmk+kernel@arm.linux.org.uk> > > Cc: Saravana Kannan <saravanak@google.com> > > Signed-off-by: Stephen Boyd <swboyd@chromium.org> > > When I test this on mt8195 which have two IOMMU HWs(calling > component_aggregate_regsiter twice), it will abort like this. Then what > should we do if we have two instances? > Thanks for testing it out. We can't register the struct driver more than once but this driver is calling the component_aggregate_register() function from the driver probe and there are two devices bound to the mtk-iommu driver so we try to register it more than once. Sigh! I see a couple options. One is to do a deep copy of the driver structure and change the driver name. Then it's a one to one relationship between device and driver. That's not very great because it leaves around junk so it should probably be avoided. Another option is to reference count the driver registration calls when component_aggregate_register() is called multiple times. Then we would only register the driver once and keep it pinned until the last unregister call is made, but still remove devices that are created for the match table. Can you try the attached patch? It is based on the next version of this patch series so the include part of the patch may not apply cleanly. ---8<--- diff --git a/drivers/base/component.c b/drivers/base/component.c index 64ad7478c67a..97f253a41bdf 100644 --- a/drivers/base/component.c +++ b/drivers/base/component.c @@ -492,15 +492,30 @@ static struct aggregate_device *__aggregate_find(struct device *parent) return dev ? to_aggregate_device(dev) : NULL; } +static DEFINE_MUTEX(aggregate_mutex); + static int aggregate_driver_register(struct aggregate_driver *adrv) { - adrv->driver.bus = &aggregate_bus_type; - return driver_register(&adrv->driver); + int ret = 0; + + mutex_lock(&aggregate_mutex); + if (!refcount_inc_not_zero(&adrv->count)) { + adrv->driver.bus = &aggregate_bus_type; + ret = driver_register(&adrv->driver); + if (!ret) + refcount_inc(&adrv->count); + } + mutex_unlock(&aggregate_mutex); + + return ret; } static void aggregate_driver_unregister(struct aggregate_driver *adrv) { - driver_unregister(&adrv->driver); + if (refcount_dec_and_mutex_lock(&adrv->count, &aggregate_mutex)) { + driver_unregister(&adrv->driver); + mutex_unlock(&aggregate_mutex); + } } static struct aggregate_device *aggregate_device_add(struct device *parent, diff --git a/include/linux/component.h b/include/linux/component.h index 53d81203c095..b061341938aa 100644 --- a/include/linux/component.h +++ b/include/linux/component.h @@ -4,6 +4,7 @@ #include <linux/stddef.h> #include <linux/device.h> +#include <linux/refcount.h> struct aggregate_device; @@ -66,6 +67,7 @@ struct device *aggregate_device_parent(const struct aggregate_device *adev); /** * struct aggregate_driver - Aggregate driver (made up of other drivers) + * @count: driver registration refcount * @driver: device driver */ struct aggregate_driver { @@ -101,6 +103,7 @@ struct aggregate_driver { */ void (*shutdown)(struct aggregate_device *adev); + refcount_t count; struct device_driver driver; };
On Tue, 2022-01-11 at 16:27 -0800, Stephen Boyd wrote: > Quoting Yong Wu (2022-01-11 04:22:23) > > Hi Stephen, > > > > Thanks for helping update here. > > > > On Thu, 2022-01-06 at 13:45 -0800, Stephen Boyd wrote: > > > Use an aggregate driver instead of component ops so that we can > > > get > > > proper driver probe ordering of the aggregate device with respect > > > to > > > all > > > the component devices that make up the aggregate device. > > > > > > Cc: Yong Wu <yong.wu@mediatek.com> > > > Cc: Joerg Roedel <joro@8bytes.org> > > > Cc: Will Deacon <will@kernel.org> > > > Cc: Daniel Vetter <daniel.vetter@ffwll.ch> > > > Cc: "Rafael J. Wysocki" <rafael@kernel.org> > > > Cc: Rob Clark <robdclark@gmail.com> > > > Cc: Russell King <rmk+kernel@arm.linux.org.uk> > > > Cc: Saravana Kannan <saravanak@google.com> > > > Signed-off-by: Stephen Boyd <swboyd@chromium.org> > > > > When I test this on mt8195 which have two IOMMU HWs(calling > > component_aggregate_regsiter twice), it will abort like this. Then > > what > > should we do if we have two instances? > > > > Thanks for testing it out. We can't register the struct driver more > than > once but this driver is calling the component_aggregate_register() > function from the driver probe and there are two devices bound to the > mtk-iommu driver so we try to register it more than once. Sigh! > > I see a couple options. One is to do a deep copy of the driver > structure > and change the driver name. Then it's a one to one relationship > between > device and driver. That's not very great because it leaves around > junk > so it should probably be avoided. > > Another option is to reference count the driver registration calls > when > component_aggregate_register() is called multiple times. Then we > would > only register the driver once and keep it pinned until the last > unregister call is made, but still remove devices that are created > for > the match table. > > Can you try the attached patch? It is based on the next version of > this > patch series so the include part of the patch may not apply cleanly. > > ---8<--- > diff --git a/drivers/base/component.c b/drivers/base/component.c > index 64ad7478c67a..97f253a41bdf 100644 > --- a/drivers/base/component.c > +++ b/drivers/base/component.c > @@ -492,15 +492,30 @@ static struct aggregate_device > *__aggregate_find(struct device *parent) > return dev ? to_aggregate_device(dev) : NULL; > } > > +static DEFINE_MUTEX(aggregate_mutex); > + > static int aggregate_driver_register(struct aggregate_driver *adrv) > { > - adrv->driver.bus = &aggregate_bus_type; > - return driver_register(&adrv->driver); > + int ret = 0; > + > + mutex_lock(&aggregate_mutex); > + if (!refcount_inc_not_zero(&adrv->count)) { > + adrv->driver.bus = &aggregate_bus_type; > + ret = driver_register(&adrv->driver); > + if (!ret) > + refcount_inc(&adrv->count); This should be refcount_set(&adrv->count, 1)? Otherwise, it will warning like this: [ 2.654526] ------------[ cut here ]------------ [ 2.655558] refcount_t: addition on 0; use-after-free. [ 2.656219] WARNING: CPU: 7 PID: 74 at ../v5.16- rc1/kernel/mediatek/lib/refcount.c:25 refcount_warn_saturate+0x128/0x148 ... [ 2.672227] Call trace: [ 2.672539] refcount_warn_saturate+0x128/0x148 [ 2.673118] component_aggregate_register+0x388/0x390 [ 2.673763] mtk_iommu_probe+0x638/0x690 [ 2.686467] ------------[ cut here ]------------ [ 2.687049] refcount_t: saturated; leaking memory. [ 2.687666] WARNING: CPU: 5 PID: 74 at ../v5.16- rc1/kernel/mediatek/lib/refcount.c:19 refcount_warn_saturate+0xfc/0x148 [ 2.703805] Call trace: [ 2.704117] refcount_warn_saturate+0xfc/0x148 [ 2.704685] component_aggregate_register+0x1fc/0x390 [ 2.705330] mtk_iommu_probe+0x638/0x690 > + } > + mutex_unlock(&aggregate_mutex); > + > + return ret; > } > > static void aggregate_driver_unregister(struct aggregate_driver > *adrv) > { > - driver_unregister(&adrv->driver); > + if (refcount_dec_and_mutex_lock(&adrv->count, > &aggregate_mutex)) { > + driver_unregister(&adrv->driver); > + mutex_unlock(&aggregate_mutex); > + } > } > > static struct aggregate_device *aggregate_device_add(struct device > *parent, > diff --git a/include/linux/component.h b/include/linux/component.h > index 53d81203c095..b061341938aa 100644 > --- a/include/linux/component.h > +++ b/include/linux/component.h > @@ -4,6 +4,7 @@ > > #include <linux/stddef.h> > #include <linux/device.h> > +#include <linux/refcount.h> > > struct aggregate_device; > > @@ -66,6 +67,7 @@ struct device *aggregate_device_parent(const struct > aggregate_device *adev); > > /** > * struct aggregate_driver - Aggregate driver (made up of other > drivers) > + * @count: driver registration refcount > * @driver: device driver > */ > struct aggregate_driver { > @@ -101,6 +103,7 @@ struct aggregate_driver { > */ > void (*shutdown)(struct aggregate_device *adev); > > + refcount_t count; > struct device_driver driver; > }; After this patch, the aggregate_driver flow looks ok. But our driver still aborts like this: [ 2.721316] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000000 ... [ 2.731658] pc : mtk_smi_larb_config_port_gen2_general+0xa4/0x138 [ 2.732434] lr : mtk_smi_larb_resume+0x54/0x98 ... [ 2.742457] Call trace: [ 2.742768] mtk_smi_larb_config_port_gen2_general+0xa4/0x138 [ 2.743496] pm_generic_runtime_resume+0x2c/0x48 [ 2.744090] __genpd_runtime_resume+0x30/0xa8 [ 2.744648] genpd_runtime_resume+0x94/0x2c8 [ 2.745191] __rpm_callback+0x44/0x150 [ 2.745669] rpm_callback+0x6c/0x78 [ 2.746114] rpm_resume+0x314/0x558 [ 2.746559] __pm_runtime_resume+0x3c/0x88 [ 2.747080] pm_runtime_get_suppliers+0x7c/0x110 [ 2.747668] __driver_probe_device+0x4c/0xe8 [ 2.748212] driver_probe_device+0x44/0x130 [ 2.748745] __device_attach_driver+0x98/0xd0 [ 2.749300] bus_for_each_drv+0x68/0xd0 [ 2.749787] __device_attach+0xec/0x148 [ 2.750277] device_attach+0x14/0x20 [ 2.750733] bus_rescan_devices_helper+0x50/0x90 [ 2.751319] bus_for_each_dev+0x7c/0xd8 [ 2.751806] bus_rescan_devices+0x20/0x30 [ 2.752315] __component_add+0x7c/0xa0 [ 2.752795] component_add+0x14/0x20 [ 2.753253] mtk_smi_larb_probe+0xe0/0x120 This is because the device runtime_resume is called before the bind operation(In our case this detailed function is mtk_smi_larb_bind). The issue doesn't happen without this patchset. I'm not sure the right sequence. If we should fix in mediatek driver, the patch could be: diff --git a/drivers/memory/mtk-smi.c b/drivers/memory/mtk-smi.c index b883dcc0bbfa..288841555067 100644 --- a/drivers/memory/mtk-smi.c +++ b/drivers/memory/mtk-smi.c @@ -483,8 +483,9 @@ static int __maybe_unused mtk_smi_larb_resume(struct device *dev) if (ret < 0) return ret; - /* Configure the basic setting for this larb */ - larb_gen->config_port(dev); + /* Configure the basic setting for this larb after it binds with iommu */ + if (larb->mmu) + larb_gen->config_port(dev); return 0; } Another nitpick, the title should be: iommu/mediatek: xxxx
Quoting Yong Wu (2022-01-12 01:09:19) > On Tue, 2022-01-11 at 16:27 -0800, Stephen Boyd wrote: > > ---8<--- > > diff --git a/drivers/base/component.c b/drivers/base/component.c > > index 64ad7478c67a..97f253a41bdf 100644 > > --- a/drivers/base/component.c > > +++ b/drivers/base/component.c > > @@ -492,15 +492,30 @@ static struct aggregate_device > > *__aggregate_find(struct device *parent) > > return dev ? to_aggregate_device(dev) : NULL; > > } > > > > +static DEFINE_MUTEX(aggregate_mutex); > > + > > static int aggregate_driver_register(struct aggregate_driver *adrv) > > { > > - adrv->driver.bus = &aggregate_bus_type; > > - return driver_register(&adrv->driver); > > + int ret = 0; > > + > > + mutex_lock(&aggregate_mutex); > > + if (!refcount_inc_not_zero(&adrv->count)) { > > + adrv->driver.bus = &aggregate_bus_type; > > + ret = driver_register(&adrv->driver); > > + if (!ret) > > + refcount_inc(&adrv->count); > > This should be refcount_set(&adrv->count, 1)? > > Otherwise, it will warning like this: Yeah I'll fix it, thanks. > > [ 2.654526] ------------[ cut here ]------------ > [ 2.655558] refcount_t: addition on 0; use-after-free. > > After this patch, the aggregate_driver flow looks ok. But our driver > still aborts like this: > > [ 2.721316] Unable to handle kernel NULL pointer dereference at > virtual address 0000000000000000 > ... > [ 2.731658] pc : mtk_smi_larb_config_port_gen2_general+0xa4/0x138 > [ 2.732434] lr : mtk_smi_larb_resume+0x54/0x98 > ... > [ 2.742457] Call trace: > [ 2.742768] mtk_smi_larb_config_port_gen2_general+0xa4/0x138 > [ 2.743496] pm_generic_runtime_resume+0x2c/0x48 > [ 2.744090] __genpd_runtime_resume+0x30/0xa8 > [ 2.744648] genpd_runtime_resume+0x94/0x2c8 > [ 2.745191] __rpm_callback+0x44/0x150 > [ 2.745669] rpm_callback+0x6c/0x78 > [ 2.746114] rpm_resume+0x314/0x558 > [ 2.746559] __pm_runtime_resume+0x3c/0x88 > [ 2.747080] pm_runtime_get_suppliers+0x7c/0x110 > [ 2.747668] __driver_probe_device+0x4c/0xe8 > [ 2.748212] driver_probe_device+0x44/0x130 > [ 2.748745] __device_attach_driver+0x98/0xd0 > [ 2.749300] bus_for_each_drv+0x68/0xd0 > [ 2.749787] __device_attach+0xec/0x148 > [ 2.750277] device_attach+0x14/0x20 > [ 2.750733] bus_rescan_devices_helper+0x50/0x90 > [ 2.751319] bus_for_each_dev+0x7c/0xd8 > [ 2.751806] bus_rescan_devices+0x20/0x30 > [ 2.752315] __component_add+0x7c/0xa0 > [ 2.752795] component_add+0x14/0x20 > [ 2.753253] mtk_smi_larb_probe+0xe0/0x120 > > This is because the device runtime_resume is called before the bind > operation(In our case this detailed function is mtk_smi_larb_bind). > The issue doesn't happen without this patchset. I'm not sure the right > sequence. If we should fix in mediatek driver, the patch could be: Oh, the runtime PM is moved around with these patches. The aggregate device is runtime PM enabled before the probe is called, and there are supplier links made to each component, so each component is runtime resumed before the aggregate probe function is called. It means that all the component drivers need to have their resources ready to power on before their component_bind() callback is made. Thinking more about it that may be wrong if something from the aggregate device is needed to fully power on the component. Is that what is happening here? > > > diff --git a/drivers/memory/mtk-smi.c b/drivers/memory/mtk-smi.c > index b883dcc0bbfa..288841555067 100644 > --- a/drivers/memory/mtk-smi.c > +++ b/drivers/memory/mtk-smi.c > @@ -483,8 +483,9 @@ static int __maybe_unused > mtk_smi_larb_resume(struct device *dev) > if (ret < 0) > return ret; > > - /* Configure the basic setting for this larb */ > - larb_gen->config_port(dev); > + /* Configure the basic setting for this larb after it binds > with iommu */ > + if (larb->mmu) > + larb_gen->config_port(dev); > > return 0; > } > > > Another nitpick, the title should be: iommu/mediatek: xxxx > Fixed it.
On Wed, 2022-01-12 at 20:25 -0800, Stephen Boyd wrote: > > > > [ 2.654526] ------------[ cut here ]------------ > > [ 2.655558] refcount_t: addition on 0; use-after-free. > > > > After this patch, the aggregate_driver flow looks ok. But our > > driver > > still aborts like this: > > > > [ 2.721316] Unable to handle kernel NULL pointer dereference at > > virtual address 0000000000000000 > > ... > > [ 2.731658] pc : > > mtk_smi_larb_config_port_gen2_general+0xa4/0x138 > > [ 2.732434] lr : mtk_smi_larb_resume+0x54/0x98 > > ... > > [ 2.742457] Call trace: > > [ 2.742768] mtk_smi_larb_config_port_gen2_general+0xa4/0x138 > > [ 2.743496] pm_generic_runtime_resume+0x2c/0x48 > > [ 2.744090] __genpd_runtime_resume+0x30/0xa8 > > [ 2.744648] genpd_runtime_resume+0x94/0x2c8 > > [ 2.745191] __rpm_callback+0x44/0x150 > > [ 2.745669] rpm_callback+0x6c/0x78 > > [ 2.746114] rpm_resume+0x314/0x558 > > [ 2.746559] __pm_runtime_resume+0x3c/0x88 > > [ 2.747080] pm_runtime_get_suppliers+0x7c/0x110 > > [ 2.747668] __driver_probe_device+0x4c/0xe8 > > [ 2.748212] driver_probe_device+0x44/0x130 > > [ 2.748745] __device_attach_driver+0x98/0xd0 > > [ 2.749300] bus_for_each_drv+0x68/0xd0 > > [ 2.749787] __device_attach+0xec/0x148 > > [ 2.750277] device_attach+0x14/0x20 > > [ 2.750733] bus_rescan_devices_helper+0x50/0x90 > > [ 2.751319] bus_for_each_dev+0x7c/0xd8 > > [ 2.751806] bus_rescan_devices+0x20/0x30 > > [ 2.752315] __component_add+0x7c/0xa0 > > [ 2.752795] component_add+0x14/0x20 > > [ 2.753253] mtk_smi_larb_probe+0xe0/0x120 > > > > This is because the device runtime_resume is called before the bind > > operation(In our case this detailed function is mtk_smi_larb_bind). > > The issue doesn't happen without this patchset. I'm not sure the > > right > > sequence. If we should fix in mediatek driver, the patch could be: > > Oh, the runtime PM is moved around with these patches. The aggregate > device is runtime PM enabled before the probe is called, In our case, the component device may probe before the aggregate device. thus the component device runtime PM has already been enabled when aggregate device probe. > and there are > supplier links made to each component, so each component is runtime > resumed before the aggregate probe function is called. Yes. This is the current flow. > It means that all > the component drivers need to have their resources ready to power on > before their component_bind() callback is made. Sorry, I don't understand here well. In this case, The component drivers prepare the resource for power on in the component_bind since the resource comes from the aggregate driver. Thus, we expect the component_bind run before the runtime resume callback. Another solution is moving the component's pm_runtime_enable into the component_bind(It's mtk_smi_larb_bind here), then the runtime callback is called after component_bind in which the resource for power on is ready. > Thinking more about it > that may be wrong if something from the aggregate device is needed to > fully power on the component. Is that what is happening here? > > > diff --git a/drivers/memory/mtk-smi.c b/drivers/memory/mtk-smi.c > > index b883dcc0bbfa..288841555067 100644 > > --- a/drivers/memory/mtk-smi.c > > +++ b/drivers/memory/mtk-smi.c > > @@ -483,8 +483,9 @@ static int __maybe_unused > > mtk_smi_larb_resume(struct device *dev) > > if (ret < 0) > > return ret; > > > > - /* Configure the basic setting for this larb */ > > - larb_gen->config_port(dev); > > + /* Configure the basic setting for this larb after it binds > > with iommu */ > > + if (larb->mmu) > > + larb_gen->config_port(dev); > > > > return 0; > > } > >
Quoting Yong Wu (2022-01-14 01:06:31) > On Wed, 2022-01-12 at 20:25 -0800, Stephen Boyd wrote: > > > > > > [ 2.654526] ------------[ cut here ]------------ > > > [ 2.655558] refcount_t: addition on 0; use-after-free. > > > > > > After this patch, the aggregate_driver flow looks ok. But our > > > driver > > > still aborts like this: > > > > > > [ 2.721316] Unable to handle kernel NULL pointer dereference at > > > virtual address 0000000000000000 > > > ... > > > [ 2.731658] pc : > > > mtk_smi_larb_config_port_gen2_general+0xa4/0x138 > > > [ 2.732434] lr : mtk_smi_larb_resume+0x54/0x98 > > > ... > > > [ 2.742457] Call trace: > > > [ 2.742768] mtk_smi_larb_config_port_gen2_general+0xa4/0x138 > > > [ 2.743496] pm_generic_runtime_resume+0x2c/0x48 > > > [ 2.744090] __genpd_runtime_resume+0x30/0xa8 > > > [ 2.744648] genpd_runtime_resume+0x94/0x2c8 > > > [ 2.745191] __rpm_callback+0x44/0x150 > > > [ 2.745669] rpm_callback+0x6c/0x78 > > > [ 2.746114] rpm_resume+0x314/0x558 > > > [ 2.746559] __pm_runtime_resume+0x3c/0x88 > > > [ 2.747080] pm_runtime_get_suppliers+0x7c/0x110 > > > [ 2.747668] __driver_probe_device+0x4c/0xe8 > > > [ 2.748212] driver_probe_device+0x44/0x130 > > > [ 2.748745] __device_attach_driver+0x98/0xd0 > > > [ 2.749300] bus_for_each_drv+0x68/0xd0 > > > [ 2.749787] __device_attach+0xec/0x148 > > > [ 2.750277] device_attach+0x14/0x20 > > > [ 2.750733] bus_rescan_devices_helper+0x50/0x90 > > > [ 2.751319] bus_for_each_dev+0x7c/0xd8 > > > [ 2.751806] bus_rescan_devices+0x20/0x30 > > > [ 2.752315] __component_add+0x7c/0xa0 > > > [ 2.752795] component_add+0x14/0x20 > > > [ 2.753253] mtk_smi_larb_probe+0xe0/0x120 > > > > > > This is because the device runtime_resume is called before the bind > > > operation(In our case this detailed function is mtk_smi_larb_bind). > > > The issue doesn't happen without this patchset. I'm not sure the > > > right > > > sequence. If we should fix in mediatek driver, the patch could be: > > > > Oh, the runtime PM is moved around with these patches. The aggregate > > device is runtime PM enabled before the probe is called, > > In our case, the component device may probe before the aggregate > device. thus the component device runtime PM has already been enabled > when aggregate device probe. This is always the case. The component device always probes before the aggregate device because the component device registers with the component framework. But the component device can decide to enable runtime PM during driver probe or during component bind. > > > and there are > > supplier links made to each component, so each component is runtime > > resumed before the aggregate probe function is called. > > Yes. This is the current flow. Got it. > > > It means that all > > the component drivers need to have their resources ready to power on > > before their component_bind() callback is made. > > Sorry, I don't understand here well. In this case, The component > drivers prepare the resource for power on in the component_bind since > the resource comes from the aggregate driver. Thus, we expect the > component_bind run before the runtime resume callback. What resource comes from the aggregate device that the component device manages in runtime PM? > > Another solution is moving the component's pm_runtime_enable into the > component_bind(It's mtk_smi_larb_bind here), then the runtime callback > is called after component_bind in which the resource for power on is > ready. This sounds more correct to me. I'm not an expert on runtime PM though as I always have to read the code to remember how it works. if the device isn't ready for runtime PM until the component bind function is called then runtime PM shouldn't be enabled on the component device.
On Fri, 2022-01-14 at 15:30 -0600, Stephen Boyd wrote: > Quoting Yong Wu (2022-01-14 01:06:31) > > On Wed, 2022-01-12 at 20:25 -0800, Stephen Boyd wrote: > > > > > > > > [ 2.654526] ------------[ cut here ]------------ > > > > [ 2.655558] refcount_t: addition on 0; use-after-free. > > > > > > > > After this patch, the aggregate_driver flow looks ok. But our > > > > driver > > > > still aborts like this: > > > > > > > > [ 2.721316] Unable to handle kernel NULL pointer dereference > > > > at > > > > virtual address 0000000000000000 > > > > ... > > > > [ 2.731658] pc : > > > > mtk_smi_larb_config_port_gen2_general+0xa4/0x138 > > > > [ 2.732434] lr : mtk_smi_larb_resume+0x54/0x98 > > > > ... > > > > [ 2.742457] Call trace: > > > > [ 2.742768] mtk_smi_larb_config_port_gen2_general+0xa4/0x13 > > > > 8 > > > > [ 2.743496] pm_generic_runtime_resume+0x2c/0x48 > > > > [ 2.744090] __genpd_runtime_resume+0x30/0xa8 > > > > [ 2.744648] genpd_runtime_resume+0x94/0x2c8 > > > > [ 2.745191] __rpm_callback+0x44/0x150 > > > > [ 2.745669] rpm_callback+0x6c/0x78 > > > > [ 2.746114] rpm_resume+0x314/0x558 > > > > [ 2.746559] __pm_runtime_resume+0x3c/0x88 > > > > [ 2.747080] pm_runtime_get_suppliers+0x7c/0x110 > > > > [ 2.747668] __driver_probe_device+0x4c/0xe8 > > > > [ 2.748212] driver_probe_device+0x44/0x130 > > > > [ 2.748745] __device_attach_driver+0x98/0xd0 > > > > [ 2.749300] bus_for_each_drv+0x68/0xd0 > > > > [ 2.749787] __device_attach+0xec/0x148 > > > > [ 2.750277] device_attach+0x14/0x20 > > > > [ 2.750733] bus_rescan_devices_helper+0x50/0x90 > > > > [ 2.751319] bus_for_each_dev+0x7c/0xd8 > > > > [ 2.751806] bus_rescan_devices+0x20/0x30 > > > > [ 2.752315] __component_add+0x7c/0xa0 > > > > [ 2.752795] component_add+0x14/0x20 > > > > [ 2.753253] mtk_smi_larb_probe+0xe0/0x120 > > > > > > > > This is because the device runtime_resume is called before the > > > > bind > > > > operation(In our case this detailed function is > > > > mtk_smi_larb_bind). > > > > The issue doesn't happen without this patchset. I'm not sure > > > > the > > > > right > > > > sequence. If we should fix in mediatek driver, the patch could > > > > be: > > > > > > Oh, the runtime PM is moved around with these patches. The > > > aggregate > > > device is runtime PM enabled before the probe is called, > > > > In our case, the component device may probe before the aggregate > > device. thus the component device runtime PM has already been > > enabled > > when aggregate device probe. > > This is always the case. The component device always probes before > the > aggregate device because the component device registers with the > component framework. But the component device can decide to enable > runtime PM during driver probe or during component bind. > > > > > > and there are > > > supplier links made to each component, so each component is > > > runtime > > > resumed before the aggregate probe function is called. > > > > Yes. This is the current flow. > > Got it. > > > > > > It means that all > > > the component drivers need to have their resources ready to power > > > on > > > before their component_bind() callback is made. > > > > Sorry, I don't understand here well. In this case, The component > > drivers prepare the resource for power on in the component_bind > > since > > the resource comes from the aggregate driver. Thus, we expect the > > component_bind run before the runtime resume callback. > > What resource comes from the aggregate device that the component > device manages in runtime PM? Here the aggregate device is the iommu device. It knows who enabled the iommu from the dtsi, then transfers this information to the component(smi_larb) devices. smi_larb will configure the registers to enable iommu for these masters in the runtime resume. > > > > Another solution is moving the component's pm_runtime_enable into > > the > > component_bind(It's mtk_smi_larb_bind here), then the runtime > > callback > > is called after component_bind in which the resource for power on > > is > > ready. > > This sounds more correct to me. I'm not an expert on runtime PM > though > as I always have to read the code to remember how it works. if the > device isn't ready for runtime PM until the component bind function > is > called then runtime PM shouldn't be enabled on the component device. Anyway, We should update the SMI driver for this case. I prepare a patch into this patchset or I send it independently? which way is better? The patch could be: diff --git a/drivers/memory/mtk-smi.c b/drivers/memory/mtk-smi.c index b883dcc0bbfa..88854c2270a1 100644 --- a/drivers/memory/mtk-smi.c +++ b/drivers/memory/mtk-smi.c @@ -175,6 +175,8 @@ mtk_smi_larb_bind(struct device *dev, struct device *master, void *data) larb->larbid = i; larb->mmu = &larb_mmu[i].mmu; larb->bank = larb_mmu[i].bank; + + pm_runtime_enable(dev); return 0; } } @@ -450,7 +452,6 @@ static int mtk_smi_larb_probe(struct platform_device *pdev) if (ret < 0) return ret; - pm_runtime_enable(dev); platform_set_drvdata(pdev, larb); ret = component_add(dev, &mtk_smi_larb_component_ops); if (ret) Thanks.
Quoting Yong Wu (2022-01-14 23:39:52) > On Fri, 2022-01-14 at 15:30 -0600, Stephen Boyd wrote: > > > > This sounds more correct to me. I'm not an expert on runtime PM > > though > > as I always have to read the code to remember how it works. if the > > device isn't ready for runtime PM until the component bind function > > is > > called then runtime PM shouldn't be enabled on the component device. > > Anyway, We should update the SMI driver for this case. I prepare a > patch into this patchset or I send it independently? which way is > better? I can roll it into this patch. It needs to be combined otherwise it breaks the bisectability of the series.
diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index 25b834104790..8e722898cbe2 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -752,9 +752,13 @@ static int mtk_iommu_hw_init(const struct mtk_iommu_data *data) return 0; } -static const struct component_master_ops mtk_iommu_com_ops = { - .bind = mtk_iommu_bind, - .unbind = mtk_iommu_unbind, +static struct aggregate_driver mtk_iommu_aggregate_driver = { + .probe = mtk_iommu_bind, + .remove = mtk_iommu_unbind, + .driver = { + .name = "mtk_iommu_agg", + .owner = THIS_MODULE, + }, }; static int mtk_iommu_probe(struct platform_device *pdev) @@ -895,7 +899,7 @@ static int mtk_iommu_probe(struct platform_device *pdev) goto out_list_del; } - ret = component_master_add_with_match(dev, &mtk_iommu_com_ops, match); + ret = component_aggregate_register(dev, &mtk_iommu_aggregate_driver, match); if (ret) goto out_bus_set_null; return ret; @@ -928,7 +932,7 @@ static int mtk_iommu_remove(struct platform_device *pdev) device_link_remove(data->smicomm_dev, &pdev->dev); pm_runtime_disable(&pdev->dev); devm_free_irq(&pdev->dev, data->irq, data); - component_master_del(&pdev->dev, &mtk_iommu_com_ops); + component_aggregate_unregister(&pdev->dev, &mtk_iommu_aggregate_driver); return 0; } diff --git a/drivers/iommu/mtk_iommu.h b/drivers/iommu/mtk_iommu.h index f81fa8862ed0..064fd4f4eade 100644 --- a/drivers/iommu/mtk_iommu.h +++ b/drivers/iommu/mtk_iommu.h @@ -94,15 +94,17 @@ static inline void release_of(struct device *dev, void *data) of_node_put(data); } -static inline int mtk_iommu_bind(struct device *dev) +static inline int mtk_iommu_bind(struct aggregate_device *adev) { + struct device *dev = adev->parent; struct mtk_iommu_data *data = dev_get_drvdata(dev); return component_bind_all(dev, &data->larb_imu); } -static inline void mtk_iommu_unbind(struct device *dev) +static inline void mtk_iommu_unbind(struct aggregate_device *adev) { + struct device *dev = adev->parent; struct mtk_iommu_data *data = dev_get_drvdata(dev); component_unbind_all(dev, &data->larb_imu); diff --git a/drivers/iommu/mtk_iommu_v1.c b/drivers/iommu/mtk_iommu_v1.c index be22fcf988ce..5fb29058a165 100644 --- a/drivers/iommu/mtk_iommu_v1.c +++ b/drivers/iommu/mtk_iommu_v1.c @@ -534,9 +534,13 @@ static const struct of_device_id mtk_iommu_of_ids[] = { {} }; -static const struct component_master_ops mtk_iommu_com_ops = { - .bind = mtk_iommu_bind, - .unbind = mtk_iommu_unbind, +static struct aggregate_driver mtk_iommu_aggregate_driver = { + .probe = mtk_iommu_bind, + .remove = mtk_iommu_unbind, + .driver = { + .name = "mtk_iommu_agg", + .owner = THIS_MODULE, + }, }; static int mtk_iommu_probe(struct platform_device *pdev) @@ -624,7 +628,7 @@ static int mtk_iommu_probe(struct platform_device *pdev) goto out_dev_unreg; } - ret = component_master_add_with_match(dev, &mtk_iommu_com_ops, match); + ret = component_aggregate_register(dev, &mtk_iommu_aggregate_driver, match); if (ret) goto out_bus_set_null; return ret; @@ -650,7 +654,7 @@ static int mtk_iommu_remove(struct platform_device *pdev) clk_disable_unprepare(data->bclk); devm_free_irq(&pdev->dev, data->irq, data); - component_master_del(&pdev->dev, &mtk_iommu_com_ops); + component_aggregate_unregister(&pdev->dev, &mtk_iommu_aggregate_driver); return 0; }
Use an aggregate driver instead of component ops so that we can get proper driver probe ordering of the aggregate device with respect to all the component devices that make up the aggregate device. Cc: Yong Wu <yong.wu@mediatek.com> Cc: Joerg Roedel <joro@8bytes.org> Cc: Will Deacon <will@kernel.org> Cc: Daniel Vetter <daniel.vetter@ffwll.ch> Cc: "Rafael J. Wysocki" <rafael@kernel.org> Cc: Rob Clark <robdclark@gmail.com> Cc: Russell King <rmk+kernel@arm.linux.org.uk> Cc: Saravana Kannan <saravanak@google.com> Signed-off-by: Stephen Boyd <swboyd@chromium.org> --- drivers/iommu/mtk_iommu.c | 14 +++++++++----- drivers/iommu/mtk_iommu.h | 6 ++++-- drivers/iommu/mtk_iommu_v1.c | 14 +++++++++----- 3 files changed, 22 insertions(+), 12 deletions(-)